import numpy as np

class ENV:
    def __init__(self, K, T, delta, d, seed, sigma, T_bic, gap):
        self.K = K
        self.T = T
        self.delta = delta
        self.d = d
        self.reg_sq = d * np.log(T/d)
        self.mu = self.K
        self.seed = seed
        self.sigma = sigma
        self.n_bic = T_bic
        self.gap = gap
        self.min_eig = 0.1
        
        # true parameters
        self.theta = np.zeros((self.K, self.d))
        
        # data  matrix
        self.xt = np.zeros((self.T, self.d))
        # reward matrix
        self.yt = np.zeros((self.T, self.K))
        self.yt_mean = np.zeros((self.T, self.K))
        self.true_arm = np.zeros(self.T)

    def dgp(self):
        # step 1: generate the true parameters from uniform distribution with unit length
        for i in range(self.K):
            np.random.seed(self.seed+i)
            self.theta[i] = np.random.uniform(0, 1, self.d)
            self.theta[i] = self.theta[i]/np.linalg.norm(self.theta[i])
        # self.theta[0] = np.array([0.8,0.1])
        # self.theta[1] = np.array([0.1,0.8])
        # self.theta[2] = np.array([0.7,0.7])
            
            
        for t in range(self.T):
            # step 1: with unit length if greater than 1
            self.xt[t] = np.random.uniform(0, 1, self.d)
            if np.linalg.norm(self.xt[t]) >1:
                self.xt[t] = self.xt[t]/np.linalg.norm(self.xt[t])
            else:
                self.xt[t] = self.xt[t]/np.linalg.norm(self.xt[t])
            
            # step 2: generate the reward for each arm
            for i in range(self.K):
                self.yt_mean[t,i] = self.theta[i].dot(self.xt[t])
                self.yt[t,i] = self.yt_mean[t,i] + np.random.normal(0, self.sigma)
            
            self.true_arm[t] = np.argmax(self.yt_mean[t, :])
            
