from NSSCBBandit import NSSCBBandit
from Environment_kappa import Environment
from plot import plot_regret, plot_times
import time
from utils import dsigmoid,sigmoid
import argparse


import numpy as np

import os,pickle

seed=np.random.randint(0,1000)
seed=205
np.random.seed(seed)


parser = argparse.ArgumentParser(description="Run bandit experiment with specific parameters.")
parser.add_argument("--T", type=int, required=True, help="Horizon length")
parser.add_argument("--xi", type=float, required=True, help="Exploration parameter xi")
parser.add_argument("--PB", type=str, required=False, help="Problem type PB")

args = parser.parse_args()


xi=args.xi
d = 10
num_actions = 100
mean_low = -1
mean_high = 1
theta_bound = 3
action_bound = 1
B=1
deterministic=False

continuous=False

noise_variance = 0.01

cyclic=False

theta = np.random.uniform(-theta_bound, theta_bound, d)

if np.linalg.norm(theta)>theta_bound:
    theta*=theta_bound/np.linalg.norm(theta)*.9999


T = args.T

n_mc=15

if(xi!=0):
    change_prob=T**(-xi)
else:
    change_prob=0


S = theta_bound  
L = action_bound 
R = 1 
k_mu = 1/4
c_mu = dsigmoid(L*S)
reward_bound=1

r_lambda_SCB = d*np.log(T)/(4*c_mu) 
delta = 1/(T)


algorithms=[]

from MASTER import MASTER

algorithms = [MASTER(num_actions,T,d, 2*delta, d*np.log(T), S, L, R, 'SCB', k_mu, c_mu)]
 





from SCB_WeightUCB_new import SCB_WeightUCB

algorithms+=[SCB_WeightUCB(num_actions=num_actions,horizon=T,d=d,delta=delta,r_lambda=r_lambda_SCB,S=S,L=L,R=R,k_mu=k_mu,c_mu=c_mu)]







from DAL import DAL
from OFUGLB_fast import OFUGLB
base=OFUGLB(num_actions=num_actions,horizon=T,dim=d,param_norm_ub=S,arm_norm_ub=L,failure_level=delta)
algorithms+=[DAL(base_algorithm=base, T=T, delta=1/T**(1/6))]







bandit = NSSCBBandit(
    num_actions=num_actions,
    noise_variance=noise_variance,
    d=d,
    theta=theta,
    mean_low=mean_low,
    mean_high=mean_high,
    reward_bound=reward_bound,
    theta_bound=theta_bound,
    action_bound=action_bound,
    continuous=continuous

)



environment = Environment(
    bandit=bandit,
    algorithms=algorithms,
    T=T,
    change_prob=change_prob,
    continuous=continuous,
    reward_bound=B
)


time_start=time.time()

avg_regret, std_regret, avg_timings, avg_detections, avg_detection_delays = environment.run_experiment(n_mc=n_mc)

print('Total running time: ',time.time()-time_start)


if not deterministic:
    file_name=f'{bandit.__class__.__name__}_T_{T}_d_{d}_A_{num_actions}_N_{n_mc}_xi_{xi}_worst_{cyclic}_seed_{seed}'
    file_name+=""
else:
    file_name=f'{bandit.__class__.__name__}_T_{T}_d_{d}_A_{num_actions}_N_{n_mc}_xi_{xi}_cyclic_{cyclic}_deter_{deterministic}_seed_{seed}'
    file_name+=""

if continuous:
    file_name=f'{bandit.__class__.__name__}_T_{T}_d_{d}_A_{num_actions}_N_{n_mc}_cont_seed_{seed}'

if not os.path.exists('results'):
    os.makedirs('results')

with open(f'results/{file_name}.pkl', 'wb') as f:
    pickle.dump({
        'avg_regret': avg_regret,
        'std_regret': std_regret,
        'avg_timings': avg_timings,
        'avg_detections': avg_detections,
        'avg_detection_delays': avg_detection_delays
    }, f)

plot_regret(avg_regret, std_regret, environment.T, exp_name=file_name)
plot_times(avg_timings, exp_name=file_name)


