import sys
import numpy as np
import pandas as pd
from Env import FiniteStateFiniteActionMDP
import pickle
from FedQ import FedQlearning_gen
from fed_adv import FedQlearning_gen_adv
n = int(sys.argv[1])

task = n//10 #0,1,2,3,4,5: ucb-h, ucb-b, fed-h, fed-b, ucb-a, fed-a
idd = n%10# 10 replications

np.random.seed(1)
c = 2.0  # UCB exploration parameter
total_episodes = 500000  # Total episodes for training
num_agents = 10

H, S, A = 20, 20, 5
mdp_env = FiniteStateFiniteActionMDP(H=H, S=S, A=A) 
for _ in range(5*n*(10**5) + 1):
    np.random.randint(S)
is_bern=False
is_fed = False
is_adv = False
if task <= 3:
    if task%2 == 1:
        is_bern = True
    if task>=2:
        is_fed = True

    fed_q = FedQlearning_gen(mdp_env, c, total_episodes, num_agents, is_bern= is_bern, is_fed=is_fed,
                            cb = 2.0, using_bern_min = 1000, using_bern_samp = 1000)
    best_value, best_Q, value_fed, global_Q_fed = fed_q.learn()
    f = open('./result/result_' + str(n), 'wb')
    pickle.dump((best_value, best_Q, value_fed, global_Q_fed, fed_q), f)
    f.close()
else:
    is_adv = True
    if task == 5:
        is_fed = True
    fed_q = FedQlearning_gen_adv(mdp_env, total_episodes, num_agents, is_fed=is_fed, is_ber = False, is_adv = is_adv,
                          using_adv_min = 1000)
    best_value, best_Q, value_fed, global_Q_fed = fed_q.learn()
    f = open('./result/result_' + str(n), 'wb')
    pickle.dump((best_value, best_Q, value_fed, global_Q_fed, fed_q), f)
    f.close()