from NSGenLinearBandit import NSGenLinearBandit
from Environment_kappa import Environment
from plot import plot_regret, plot_times
import time
from utils import dsigmoid,sigmoid
import argparse


import numpy as np

import os,pickle

import warnings
import traceback

def warn_with_traceback(message, category, filename, lineno, file=None, line=None):
    log = f"{filename}:{lineno}: {category.__name__}: {message}\n"
    log += ''.join(traceback.format_stack())
    print(log)

warnings.showwarning = warn_with_traceback



seed=np.random.randint(0,1000)
seed=205
np.random.seed(seed)


parser = argparse.ArgumentParser(description="Run bandit experiment with specific parameters.")
parser.add_argument("--T", type=int, required=True, help="Horizon length")
parser.add_argument("--xi", type=float, required=True, help="Exploration parameter xi")
parser.add_argument("--PB", type=str, required=False, help="Problem type PB")

args = parser.parse_args()


xi=args.xi
d = 10
num_actions = 100
mean_low = -1
mean_high = 1
theta_bound = 1
action_bound = 1
B=1
deterministic=False

continuous=False

noise_variance = 0.01

cyclic=False

theta = np.random.uniform(-theta_bound, theta_bound, d)

if np.linalg.norm(theta)>theta_bound:
    theta*=theta_bound/np.linalg.norm(theta)*0.999


T = args.T

n_mc=15

if(xi!=0):
    change_prob=T**(-xi)
else:
    change_prob=0


S = theta_bound
L = action_bound
R = np.sqrt(noise_variance)
k_mu = 1/4
c_mu = dsigmoid(L*S)
reward_bound=1
r_lambda_GLB = d
delta = 1/(2*T) 
r_lambda_GLB_Weigh= d / (c_mu ** 2)





from MASTER import MASTER

algorithms = [
            MASTER(num_actions,T,d, delta, r_lambda_GLB, S, L, R, 'GLB', k_mu, c_mu),

]


from DAL_GLBs import DAL_GLB
algorithms+=[DAL_GLB(num_actions=num_actions,horizon=T,noise_variance=noise_variance,d=d,delta=delta,r_lambda=r_lambda_GLB,S=S,L=L,R=R,k_mu=k_mu,c_mu=c_mu)]





from GLB_WeightUCB_new import GLB_WeightUCB

algorithms+=[GLB_WeightUCB(num_actions=num_actions,horizon=T,d=d,delta=delta,r_lambda=r_lambda_GLB_Weigh,S=S,L=L,R=R,k_mu=k_mu,c_mu=c_mu)]


bandit = NSGenLinearBandit(
    num_actions=num_actions,
    noise_variance=noise_variance,
    d=d,
    theta=theta,
    mean_low=mean_low,
    mean_high=mean_high,
    reward_bound=reward_bound,
    theta_bound=theta_bound,
    action_bound=action_bound,
    continuous=continuous
)



environment = Environment(
    bandit=bandit,
    algorithms=algorithms,
    T=T,
    change_prob=change_prob,       
    continuous=continuous,
    reward_bound=B
)

time_start = time.time()

avg_regret, std_regret, avg_timings, avg_detections, avg_detection_delays = environment.run_experiment(n_mc=n_mc)

print('Total running time: ', time.time() - time_start)

if not deterministic:
    file_name = f'{bandit.__class__.__name__}_T_{T}_d_{d}_A_{num_actions}_N_{n_mc}_xi_{xi}_worst_{cyclic}_seed_{seed}'
else:
    file_name = f'{bandit.__class__.__name__}_T_{T}_d_{d}_A_{num_actions}_N_{n_mc}_xi_{xi}_cyclic_{cyclic}_deter_{deterministic}_seed_{seed}'

if continuous:
    file_name = f'{bandit.__class__.__name__}_T_{T}_d_{d}_A_{num_actions}_N_{n_mc}_cont_seed_{seed}'

if not os.path.exists('results'):
    os.makedirs('results')

with open(f'results/{file_name}.pkl', 'wb') as f:
    pickle.dump({
        'avg_regret': avg_regret,
        'std_regret': std_regret,
        'avg_timings': avg_timings,
        'avg_detections': avg_detections,
        'avg_detection_delays': avg_detection_delays
    }, f)

plot_regret(avg_regret, std_regret, environment.T, exp_name=file_name)
plot_times(avg_timings, exp_name=file_name)


