from NSLinearBandit import NSLinearBandit
from Environment_kappa import Environment
from plot import plot_regret, plot_times
import time
from utils import dsigmoid,sigmoid
import argparse

import numpy as np

import os,pickle

seed=np.random.randint(0,1000)
seed=205
np.random.seed(seed)


parser = argparse.ArgumentParser(description="Run bandit experiment with specific parameters.")
parser.add_argument("--T", type=int, required=True, help="Horizon length")
parser.add_argument("--xi", type=float, required=True, help="Exploration parameter xi")
parser.add_argument("--PB", type=str, required=False, help="Problem type PB")

args = parser.parse_args()


xi=args.xi

d = 10
num_actions = 100

mean_low = -1
mean_high = 1
theta_bound = 1
action_bound = 1
B=1
deterministic=False

continuous=False


noise_variance = 0.01
cyclic=False

theta = np.random.uniform(0, theta_bound, d)

if np.linalg.norm(theta)>theta_bound:
    theta*=theta_bound/np.linalg.norm(theta)*0.999
T = args.T

n_mc=15

if(xi!=0):
    change_prob=T**(-xi)
else:
    change_prob=0


S = theta_bound
L = action_bound  
R = np.sqrt(noise_variance)
k_mu = 1/4
c_mu = dsigmoid(L*S)
reward_bound=L*S
r_lambda_LB=d
delta = 1/(2*T) 



from MASTER import MASTER

algorithms = [
            MASTER(num_actions,T,d, delta, r_lambda_LB, S, L, R, 'LB')
]



from DAL_LBs import DAL_LB
algorithms+=[DAL_LB(num_actions=num_actions,horizon=T,noise_variance=noise_variance,d=d,delta=delta,r_lambda=r_lambda_LB,S=S,L=L,R=R)]


from LB_WeightUCB_new import LB_WeightUCB
algorithms+=[LB_WeightUCB(num_actions=num_actions,horizon=T,d=d,delta=delta,r_lambda=r_lambda_LB,S=S,L=L,R=R)]


from OPKBLin import OPKB
opkb=OPKB(num_actions=num_actions,horizon=T,B=B)


algorithms+=[opkb]


bandit = NSLinearBandit(
    num_actions=num_actions,
    noise_variance=noise_variance,
    d=d,
    theta=theta,
    mean_low=mean_low,
    mean_high=mean_high,
    reward_bound=reward_bound,
    theta_bound=theta_bound,
    action_bound=action_bound,
    continuous=continuous
)


environment = Environment(
bandit=bandit,
algorithms=algorithms,
T=T,
change_prob=change_prob,
continuous=continuous,
reward_bound=B
)


time_start=time.time()
avg_regret, std_regret, avg_timings, avg_detections, avg_detection_delays = environment.run_experiment(n_mc=n_mc)

print('Total running time: ',time.time()-time_start)

if not continuous:
    file_name=f'{bandit.__class__.__name__}_T_{T}_d_{d}_A_{num_actions}_N_{n_mc}_xi_{xi}_cyclic_{cyclic}_deter_{deterministic}_seed_{seed}'
else:
    file_name=f'{bandit.__class__.__name__}_T_{T}_d_{d}_A_{num_actions}_N_{n_mc}_cont_seed_{seed}'
file_name+=""


if not os.path.exists('results'):
    os.makedirs('results')

with open(f'results/{file_name}.pkl', 'wb') as f:
    pickle.dump({
        'avg_regret': avg_regret,
        'std_regret': std_regret,
        'avg_timings': avg_timings,
        'avg_detections': avg_detections,
        'avg_detection_delays': avg_detection_delays
    }, f)

plot_regret(avg_regret, std_regret, environment.T, exp_name=file_name)
plot_times(avg_timings, exp_name=file_name)


