import sys
import numpy as np
import pandas as pd
from Env import FiniteStateFiniteActionMDP
import pickle
from QHoeffding import Qlearning_gen
from Qadv import Qlearning_gen_adv
from Qearly import Qlearning_gen_early
from FedQ_EarlySettled_simple import FedQearly_gen
from QHoeffdinglow import Qlearning_gen_low
from QBernstein import Qlearning_genb
from QBernsteinlow import Qlearning_genb_low

n = int(sys.argv[1])

task = n//10 
idd = n%10

np.random.seed(1)
total_episodes = 2000000
num_agents = 1
c1 = np.sqrt(2)
c2 = 2
c3 = 1
beta = 0.05
n0 = 1
using_adv_min = 200

H, S, A = 7, 10, 5
mdp_env = FiniteStateFiniteActionMDP(H=H, S=S, A=A) 
for _ in range(n*(10**7) + 1):
    np.random.randint(S)

if task == 0:
    q = Qlearning_gen(mdp_env, c1, total_episodes)
    best_Q, global_Q = q.learn()
    f = open('./result_single_large/result_' + str(n), 'wb')
    pickle.dump((best_Q, global_Q, q), f)
    f.close()
elif task == 1:
    qadv = Qlearning_gen_adv(mdp_env, total_episodes, c1, c2, c3, using_adv_min)
    best_Q, global_Q  = qadv.learn()
    f = open('./result_single_large/result_' + str(n), 'wb')
    pickle.dump((best_Q, global_Q, qadv), f)
    f.close()
elif task == 2:
    qearly_low = FedQearly_gen(mdp_env, c1, c2, c3, beta, total_episodes, num_agents)
    best_Q, global_Q = qearly_low.learn()
    f = open('./result_single_large/result_' + str(n), 'wb')
    pickle.dump((best_Q, global_Q, qearly_low), f)
    f.close()
elif task == 3:
    q_low = Qlearning_gen_low(mdp_env, c1, total_episodes)
    best_Q, global_Q = q_low.learn()
    f = open('./result_single_large/result_' + str(n), 'wb')
    pickle.dump((best_Q, global_Q, q_low), f)
    f.close()
elif task == 4:
    qb_low = Qlearning_genb_low(mdp_env, c1, c2, total_episodes)
    best_Q, global_Q = qb_low.learn()
    f = open('./result_single_large/result_' + str(n), 'wb')
    pickle.dump((best_Q, global_Q, qb_low), f)
    f.close() 
elif task == 5:
    qb = Qlearning_genb(mdp_env, c1, c2, total_episodes)
    best_Q, global_Q = qb.learn()
    f = open('./result_single_large/result_' + str(n), 'wb')
    pickle.dump((best_Q, global_Q, qb), f)
    f.close() 
else:
    qearly = Qlearning_gen_early(mdp_env, c1, c2, c3, total_episodes, beta)
    best_Q, global_Q = qearly.learn()
    f = open('./result_single_large/result_' + str(n), 'wb')
    pickle.dump((best_Q, global_Q, qearly), f)
    f.close()
