import numpy as np

class SyntheticRelStoEnv:
    """
    Synthetic Data Environment:
      - sample_reward(i): Samples reward for arm i from N(mu_on[i], 1)
    
    Attributes:
      mu_on: Array of online means
    """
    def __init__(self, mu_on):
        """
        Initialize the synthetic data environment.
        
        Parameters:
          mu_on: Array of online means
        """
        self.mu_on = mu_on

    def sample_reward(self, i):
        """
        Sample reward for the specified arm.
        
        Parameters:
          i: Index of the arm
        
        Returns:
          float: Reward sampled from N(mu_on[i], 1)
        """
        r = np.random.normal(loc=self.mu_on[i], scale=1)
        return r

class RealDataRelStoEnv:
    """
    Real Data Environment:
      - sample_reward(i): Supports two modes:
          * "data" mode: Samples sample_size ratings from the corresponding movie ratings list and computes the mean as the reward
          * "normal" mode: Samples reward from normal distribution N(mu_on[i], 1) based on given mu_on[i]
    
    Attributes:
      arm_ids: List mapping arm indices to movieIds
      movie_rewards_dict: Dictionary mapping movie IDs to lists of ratings
      mu_on: Expected rewards for each arm (used in normal mode)
      feedback_mode: Mode selection, 'data' or 'normal'
      sample_size: Number of ratings to sample in data mode
    """
    def __init__(self, arm_ids, movie_rewards_dict, mu_on=None, feedback_mode="data", sample_size=10):
        """
        Initialize the real data environment.
        
        Parameters:
          arm_ids: List mapping algorithm internal index to movieId
          movie_rewards_dict: Dictionary { movieId: [ratings] }
          mu_on: List or None, expected rewards for each arm (used in normal mode)
          feedback_mode: String, 'data' or 'normal'
          sample_size: Integer, number of ratings to sample in data mode
        """
        self.arm_ids = arm_ids
        self.movie_rewards_dict = movie_rewards_dict
        self.mu_on = mu_on
        self.feedback_mode = feedback_mode
        self.sample_size = sample_size

        # Validate feedback_mode
        if feedback_mode not in ["data", "normal"]:
            raise ValueError("Invalid feedback_mode: choose 'data' or 'normal'")
        # mu_on is required in normal mode
        if feedback_mode == "normal" and mu_on is None:
            raise ValueError("mu_on must be provided when using normal feedback mode.")

    def sample_reward(self, i):
        """
        Sample reward for the specified arm:
          - data mode: Sample sample_size ratings and compute their mean
          - normal mode: Sample from normal distribution N(mu_on[i], 1)
        
        Parameters:
          i: Index of the arm
        
        Returns:
          float: Reward value, mean of sampled ratings in data mode, or sample from normal distribution in normal mode
        """
        if self.feedback_mode == "data":
            mid = self.arm_ids[i]
            if mid not in self.movie_rewards_dict:
                print(f"Warning: Movie ID {mid} not found in movie_rewards_dict, returning default reward 0.5")
                return 0.5

            ratings = self.movie_rewards_dict[mid]
            if len(ratings) == 0:
                print(f"Warning: No ratings available for Movie ID {mid}, returning default reward 0.5")
                return 0.5

            # Sample sample_size ratings
            if len(ratings) >= self.sample_size:
                sampled_ratings = np.random.choice(ratings, size=self.sample_size, replace=False)
            else:
                sampled_ratings = np.random.choice(ratings, size=self.sample_size, replace=True)

            # Compute mean as reward
            reward = np.mean(sampled_ratings)
            return reward

        elif self.feedback_mode == "normal":
            # Sample from normal distribution N(mu_on[i], 1)
            reward = np.random.normal(loc=self.mu_on[i], scale=1)
            return reward