import numpy as np

class GaussianEnvironment:
    def __init__(self, means, n_arms, noise_std):
        self.means = means
        self.noise_std = noise_std
        self.obs_x = 0
        self.obs_x_mean = 0
        self.first = None
        self.pulls = np.zeros(n_arms)
        self.rewards = np.zeros(n_arms)
        self.avg_rewards = np.zeros(n_arms)

    def reset(self):
        self.obs_x = 0
        self.obs_x_mean = 0
        self.first = None
        self.pulls = 0*self.pulls
        self.rewards = 0*self.rewards
        self.avg_rewards = 0*self.avg_rewards


    def round(self, arm):
        '''
        Input: arm pulled by the learner
        Output: first arm in the ranking, reward realization obtained by the learner and mean reward obtained by the learner
        
        The funcntion performs a single round of the learning procedure in the environment. Notice that
        the arm which gained the first place is returned, as this feedback is sufficient for our algorithms. 
        Moreover, the realization and the mean of the rewards are returned for code organization reasons.
        '''
        self.obs_x = np.random.normal(self.means[arm], self.noise_std)
        self.obs_x_mean = self.means[arm]
        self.pulls[arm] += 1
        self.rewards[arm] += self.obs_x
        self.avg_rewards[arm] = self.rewards[arm]/self.pulls[arm]
        self.first = np.argmax(self.avg_rewards)
        return self.first, self.obs_x, self.obs_x_mean

    