import numpy as np
from joblib import Parallel, delayed
from math import sqrt,log,exp
import random
from BanditTools import *

# Arms 
mean = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

sigma = 1

arm1 = Gaussian(mean[0],sigma)
arm2 = Gaussian(mean[1],sigma)
arm3 = Gaussian(mean[2],sigma)

nbArms = 6

arms = [Gaussian(mean[i],sigma) for i in range(nbArms)] 

bandit = MAB(arms)

learner = KL_UCB_Transfer(nbArms,sigma)

N_exp = 100  # Number of run 
timeHorizon = 10000 # number of step for the runs 
Npoints = 5000 #Number of points maximum
tsave = np.unique(np.logspace(np.log10(1), np.log10(timeHorizon), Npoints, dtype=int))

################
################ No Prior
################

N_sample1 = [0 for i in range(nbArms)]
prior_distrib1 = MAB([ Gaussian(0,sigma**2) for i in range(nbArms)])
dist1 = [0 for i in range(nbArms)]
Regret = OneBanditOneLearnerMultipleRuns(bandit, learner, timeHorizon, N_exp, prior_distrib1,N_sample1,dist1,sigma,tsave)

filename = "regret_KL_UCB_Transfer_Sim2noprior.npz"

np.savez_compressed(filename,
                    tsave=tsave,
                    Regret=Regret)

################
################ Prior 1
################

L = [0,0,0,0,0,0.004]
delta = 0.001
mean_prior = [mean[i] for i in range(len(mean)-1)]
mean_prior.append(mean[-1] + delta)
N_sample2 = [0,0,0,0,0,1000]
prior_distrib2 = MAB([Gaussian(mean_prior[i],sigma**2) for i in range(nbArms)])
dist2 = L
Regret = OneBanditOneLearnerMultipleRuns(bandit, learner, timeHorizon, N_exp, prior_distrib2,N_sample2,dist2,sigma,tsave)

filename = "regret_KL_UCB_Transfer_Sim2prior1.npz"

np.savez_compressed(filename,
                    tsave=tsave,
                    Regret=Regret)

################
################ Prior 2
################

L = [0,0,0,0,0,0.21]
delta = 0.01
mean_prior = [mean[i] for i in range(len(mean)-1)]
mean_prior.append(mean[-1] - delta)
N_sample3 = [0,0,0,0,0,1000]
prior_distrib3 = MAB([Gaussian(mean_prior[i],sigma**2) for i in range(nbArms)])
dist3 = L
Regret = OneBanditOneLearnerMultipleRuns(bandit, learner, timeHorizon, N_exp, prior_distrib3,N_sample3,dist3,sigma,tsave)

filename = "regret_KL_UCB_Transfer_Sim2prior2.npz"

np.savez_compressed(filename,
                    tsave=tsave,
                    Regret=Regret)