
from NSKernelBandit import NSKernelBandit
from Environment_KB import Environment
from plot import plot_regret, plot_times
import time
import argparse
import os,pickle

import numpy as np

seed=np.random.randint(0,1000)
seed=205
np.random.seed(seed)


parser = argparse.ArgumentParser(description="Run bandit experiment with specific parameters.")
parser.add_argument("--T", type=int, required=True, help="Horizon length")
parser.add_argument("--xi", type=float, required=True, help="Exploration parameter xi")
parser.add_argument("--PB", type=str, required=False, help="Problem type PB")

args = parser.parse_args()


xi=args.xi
d = 10
num_actions = 100
mean_low = -1
mean_high = 1
theta_bound = 1
action_bound = 1
B=0.8
noise_variance = 0.01
reward_method="kernel_sum"

deterministic=False

continuous=False

cyclic=False

theta = np.random.uniform(-1, 1, d)
theta*=theta_bound/np.linalg.norm(theta)
T = args.T

n_mc=15
q = 25

if(xi!=0):
    change_prob=T**(-xi)
else:
    change_prob=0


algorithms=[]



bandit = NSKernelBandit(num_actions=num_actions, noise_variance=noise_variance, d=d, mean_low=mean_low, mean_high=mean_high,reward_bound=B,reward_generation_method=reward_method)




config = {
        'tol': 0.1,
        'window': 0,
        'kernel': {
            'name': 'rbf',
            'length_scale': 0.2,
        },
        'lambda': noise_variance,
        'v': noise_variance**(0.5),
        'seed': None,
    }



config_red = {
        'tol': 0.1,
        'window': 0,
        'kernel': {
            'name': 'rbf',
            'length_scale': 0.2,
        },
        'lambda': noise_variance,
        'v': noise_variance**(0.5),
        'seed': None,
    }





from DAL_KBs import DAL_KB

reds_cd=DAL_KB(num_actions=num_actions,horizon=T,noise_variance=noise_variance,B=B,config=config_red)
algorithms+=[reds_cd]



from GP_UCB_W import GP_UCB_W
algorithms+=[GP_UCB_W(num_actions=num_actions, horizon=T, xi=xi,B=B,config=config)]


from OPKB import OPKB

algorithms+=[OPKB(num_actions=num_actions,horizon=T,B=B)]

algorithms=algorithms[::-1]


environment = Environment(
    bandit=bandit,
    algorithms=algorithms,
    T=T,
    change_prob=change_prob,
    continuous=False,
    reward_bound=B
)


time_start=time.time()

avg_regret, std_regret, avg_timings, avg_detections, avg_detection_delays = environment.run_experiment(n_mc=n_mc)

print('Total running time: ',time.time()-time_start)

if not deterministic:
    file_name=f'{bandit.__class__.__name__}_T_{T}_d_{d}_A_{num_actions}_N_{n_mc}_xi_{xi}_worst_{cyclic}_seed_{seed}'
    file_name+=""
else:
    file_name=f'{bandit.__class__.__name__}_T_{T}_d_{d}_A_{num_actions}_N_{n_mc}_xi_{xi}_cyclic_{cyclic}_deter_{deterministic}_seed_{seed}'
    file_name+=""

if continuous:
    file_name=f'{bandit.__class__.__name__}_T_{T}_d_{d}_A_{num_actions}_N_{n_mc}_cont_seed_{seed}'

if not os.path.exists('results'):
    os.makedirs('results')

with open(f'results/{file_name}.pkl', 'wb') as f:
    pickle.dump({
        'avg_regret': avg_regret,
        'std_regret': std_regret,
        'avg_timings': avg_timings,
        'avg_detections': avg_detections,
        'avg_detection_delays': avg_detection_delays
    }, f)

plot_regret(avg_regret, std_regret, environment.T, exp_name=file_name)
plot_times(avg_timings, exp_name=file_name)

