#test.py

from NSLinearBanditBall import NSLinearBanditBall
from Environment_kappa import Environment
from plot import plot_regret, plot_times
import time
from utils import dsigmoid,sigmoid
import argparse

import numpy as np

import os,pickle

seed=np.random.randint(0,1000)
seed=205
np.random.seed(seed)


parser = argparse.ArgumentParser(description="Run bandit experiment with specific parameters.")
parser.add_argument("--T", type=int, required=True, help="Horizon length")
parser.add_argument("--xi", type=float, required=True, help="Exploration parameter xi")
parser.add_argument("--rad", type=float, required=False, help="Problem type PB")
parser.add_argument("--PB", type=str, required=False, help="Problem type PB")

args = parser.parse_args()


xi=args.xi

d = 5
num_actions = 100


radius=args.rad
mean_low = -1
mean_high = 1
theta_bound = 1
action_bound = 1
B=1
deterministic=False

continuous=True


noise_variance = 0.1
cyclic=False

theta = np.random.uniform(0, theta_bound, d)

if np.linalg.norm(theta)>theta_bound:
    theta*=theta_bound/np.linalg.norm(theta)*0.999

T = args.T

n_mc=30

if(xi!=0):
    change_prob=T**(-xi)
else:
    change_prob=0


S = theta_bound
L = action_bound
R = np.sqrt(noise_variance)
k_mu = 1/4
c_mu = dsigmoid(L*S)
reward_bound=L*S

r_lambda_LB=d
delta = 1/(2*T) 





algorithms=[]


from DAL_LBs import DAL_LB
algorithms+=[DAL_LB(num_actions=num_actions,horizon=T,noise_variance=noise_variance,d=d,delta=delta,r_lambda=r_lambda_LB,S=S,L=L,R=R)]


from LB_WeightUCB_new import LB_WeightUCB

algorithms+=[LB_WeightUCB(num_actions=num_actions,horizon=T,d=d,delta=delta,r_lambda=r_lambda_LB,S=S,L=L,R=R)]



bandit = NSLinearBanditBall(
    num_actions=num_actions,
    noise_variance=noise_variance,
    d=d,
    theta=theta,
    mean_low=mean_low,
    mean_high=mean_high,
    reward_bound=reward_bound,
    theta_bound=theta_bound,
    action_bound=action_bound,
    continuous=continuous,
    radius=radius,
    prob=change_prob
)


environment = Environment(
    bandit=bandit,
    algorithms=algorithms,
    T=T,
    change_prob=change_prob,
    continuous=continuous,
    reward_bound=B
)


time_start=time.time()

avg_regret, std_regret, avg_timings,avg_detections = environment.run_experiment(
        n_mc=n_mc
    )
print('Total running time: ',time.time()-time_start)

if not continuous:
    file_name=f'{bandit.__class__.__name__}_T_{T}_d_{d}_A_{num_actions}_N_{n_mc}_xi_{xi}_cyclic_{cyclic}_deter_{deterministic}_seed_{seed}'
else:
    file_name=f'{bandit.__class__.__name__}_T_{T}_d_{d}_A_{num_actions}_N_{n_mc}_cont_L_{theta_bound}_S_{action_bound}_rad_{radius}_var_{noise_variance}_xi_{xi}_seed_{seed}'
file_name+=""


if not os.path.exists('results'):
    os.makedirs('results')


with open(f'results/{file_name}.pkl', 'wb') as f:
    pickle.dump({
        'avg_regret': avg_regret,
        'std_regret': std_regret,
        'avg_timings': avg_timings,
        'avg_detections':avg_detections
    }, f)




plot_regret(avg_regret, std_regret, environment.T, exp_name=file_name)

plot_times(avg_timings, exp_name=file_name)



