import numpy as np
from environments import Environment
from abc import ABC, abstractmethod
from utils import sigmoid

class SyntheticLabeler(ABC):
    def __init__(self, env):
        self.env = env
    
    def _trajectory_reward(self, traj):
        return self.env.get_cumulative_reward(traj)
    
    @abstractmethod
    def generate_preference(self, traj1, traj2, n):
        pass

class DeterministicLabeler(SyntheticLabeler):
    def __init__(self, env):
        super().__init__(env)

    def generate_preference(self, traj1, traj2, n=1):
        r1 = self._trajectory_reward(traj1)
        r2 = self._trajectory_reward(traj2)

        return int(r1 > r2) * n
    
class StochasticLabeler(SyntheticLabeler):
    def __init__(self, env):
        super().__init__(env)

    def generate_preference(self, traj1, traj2, n=1):
        r1 = self._trajectory_reward(traj1)
        r2 = self._trajectory_reward(traj2)

        p = sigmoid(r1-r2) # 1 / (1 + np.exp(- (r1 - r2)))

        return np.random.binomial(n, p)

