import numpy as np
from joblib import Parallel, delayed
from math import sqrt,log,exp
import random
from BanditTools import *

# Arms 
mean = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

sigma = 1

arm1 = Gaussian(mean[0],sigma)
arm2 = Gaussian(mean[1],sigma)
arm3 = Gaussian(mean[2],sigma)

nbArms = 6

arms = [Gaussian(mean[i],sigma) for i in range(nbArms)] 

bandit = MAB(arms)

N_exp = 100  # Number of run 
timeHorizon = 1000000 # number of step for the runs 
Npoints = 5000 #Number of points maximum
tsave = np.unique(np.logspace(np.log10(1), np.log10(timeHorizon), Npoints, dtype=int))

learner = KL_UCB_Transfer(nbArms,sigma)

################
################ No Prior
################

N_sample1 = [0 for i in range(nbArms)]
prior_distrib1 = MAB([ Gaussian(0,sigma**2) for i in range(nbArms)])
dist1 = [0 for i in range(nbArms)]
Regret = OneBanditOneLearnerMultipleRuns(bandit, learner, timeHorizon, N_exp, prior_distrib1,N_sample1,dist1,sigma,tsave)

filename = "regret_KL_UCB_Transfer_Sim1noprior.npz"


np.savez_compressed(filename,
                    tsave=tsave,
                    Regret=Regret)


################
################ Prior 1
################

L = [0.4,0.4,0.4,0.4,0.4,0.4]
delta = [0.2,0.2,0.2,0.2,0.2,0.4]
mean_prior = [mean[i] + delta[i] for i in range(len(mean))]
N_sample2 = [1000 for _ in range(nbArms)]
prior_distrib2 = MAB([Gaussian(mean_prior[i],sigma**2) for i in range(nbArms)])
dist2 = L
Regret = OneBanditOneLearnerMultipleRuns(bandit, learner, timeHorizon, N_exp, prior_distrib2,N_sample2,dist2,sigma,tsave)

filename = "regret_KL_UCB_Transfer_Sim1prior1.npz"


np.savez_compressed(filename,
                    tsave=tsave,
                    Regret=Regret)


################
################ Prior 2
################

delta = [0.1,0.1,0.1,0.1,0.1,0.1]
L = [0.2,0.2,0.2,0.2,0.2,0.2]
mean_prior = [mean[i] + delta[i] for i in range(len(mean))]
N_sample3 = [1000 for _ in range(nbArms)]
prior_distrib3 = MAB([Gaussian(mean_prior[i],sigma**2) for i in range(nbArms)])
dist3 = L
Regret = OneBanditOneLearnerMultipleRuns(bandit, learner, timeHorizon, N_exp, prior_distrib3,N_sample3,dist3,sigma,tsave)

filename = "regret_KL_UCB_Transfer_Sim1prior2.npz"


np.savez_compressed(filename,
                    tsave=tsave,
                    Regret=Regret)


################
################ Prior 3
################

L = [0.1,0.1,0.1,0.1,0.1,0.1]
delta = [0.05,0.05,0.05,0.05,0.05,0.05]
mean_prior = [mean[i] + delta[i] for i in range(len(mean))]
print(mean_test)
N_sample4 = [1000 for _ in range(nbArms)]
prior_distrib4 = MAB([Gaussian(mean_prior[i],sigma**2) for i in range(nbArms)])
dist4 = L
Regret = OneBanditOneLearnerMultipleRuns(bandit, learner, timeHorizon, N_exp, prior_distrib4,N_sample4,dist4,sigma,tsave)

filename = "regret_KL_UCB_Transfer_Sim1prior3.npz"


np.savez_compressed(filename,
                    tsave=tsave,
                    Regret=Regret)


################
################ Prior 4
################

L = [0.05,0.05,0.05,0.05,0.05,0.05]
delta = [0,0,0,0,0,0]
mean_prior = [mean[i] + delta[i] for i in range(len(mean))]
N_sample5 = [1000 for _ in range(nbArms)]
prior_distrib5 = MAB([Gaussian(mean_prior[i],sigma**2) for i in range(nbArms)])
dist5 = L
Regret = OneBanditOneLearnerMultipleRuns(bandit, learner, timeHorizon, N_exp, prior_distrib5,N_sample5,dist5,sigma,tsave)

filename = "regret_KL_UCB_Transfer_Sim1prior4.npz"

np.savez_compressed(filename,
                    tsave=tsave,
                    Regret=Regret)
