import numpy as np

class ENV_REAL:
    def __init__(self, df, beta_1_true, beta_2_true, beta_3_true, K, T, delta, d, seed, sigma, T_bic, gap):
        self.K = K
        self.T = T
        self.delta = delta
        self.d = d
        self.reg_sq = d * np.log(T/d)
        self.mu = self.K
        self.seed = seed
        self.sigma = sigma
        self.n_bic = T_bic
        self.gap = gap
        self.min_eig = 0.1
        self.df = df
        self.beta_1_true = beta_1_true
        self.beta_2_true = beta_2_true
        self.beta_3_true = beta_3_true
        
        # true parameters
        self.theta = np.array([self.beta_1_true, self.beta_2_true, self.beta_3_true])
        
        # data  matrix
        self.xt = np.array(self.df.iloc[:, 2:(self.d+2)].values)
        # true arm
        self.true_arm = np.array(self.df.iloc[:, 1].values).astype(np.int)-1
        
        # reward matrix
        self.yt = np.zeros((self.T, self.K))     
        self.yt_temp = np.array(self.df.iloc[:, 0].values)   
        self.yt_mean = self.xt @ self.theta.T
        
    def gen_reward(self):
        for t in range(self.T):
            for i in range(self.K):
                if self.true_arm[t] == i:
                    self.yt[t, self.true_arm[t]] = 1
                else:
                    self.yt[t, i] = 0
                    self.yt_mean[t,i] = 0
        #self.yt_mean = self.yt
