import numpy as np
from joblib import Parallel, delayed
from math import sqrt,log,exp
import random
from BanditTools import *

# Arms 
mean = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

sigma = 1

arm1 = Gaussian(mean[0],sigma)
arm2 = Gaussian(mean[1],sigma)
arm3 = Gaussian(mean[2],sigma)

nbArms = 6

arms = [Gaussian(mean[i],sigma) for i in range(nbArms)] 

bandit = MAB(arms)

N_exp = 100  # Number of run 
timeHorizon = 1000000 # number of step for the runs 
Npoints = 5000 #Number of points maximum
tsave = np.unique(np.logspace(np.log10(1), np.log10(timeHorizon), Npoints, dtype=int))

################
################ AST_UCB
################

L = [0.1,0.1,0.1,0.1,0.1,0.1]
delta = [0.05,0.05,0.05,0.05,0.05,0.05]
mean_prior = [mean[i] + delta[i] for i in range(len(mean))]
N_sample4 = [1000 for _ in range(nbArms)]
prior_distrib4 = MAB([Gaussian(mean_prior[i],sigma**2) for i in range(nbArms)])
dist4 = L
Regret = OneBanditOneLearnerMultipleRuns(bandit, learner, timeHorizon, N_exp, prior_distrib4,N_sample4,dist4,sigma,tsave)

filename = "regret_AST_UCB_Sim3prior3.npz"

np.savez_compressed(filename,
                    tsave=tsave,
                    Regret=Regret)