import sys
import numpy as np
import pandas as pd
from Env_deterministic import FiniteStateFiniteActionMDP_deterministic
import pickle
from QHoeffding import Qlearning_gen
from Qadv import Qlearning_gen_adv
from Qearly import Qlearning_gen_early
from AMB import Qlearning_gen_AMB
n = int(sys.argv[1])

task = n//10 
idd = n%10

np.random.seed(1)
total_episodes = 300000
c1 = np.sqrt(2)
c2 = 2
c3 = 1
beta = 0.05
using_adv_min = 200

H, S, A = 5, 3, 2
mdp_env = FiniteStateFiniteActionMDP_deterministic(H=H, S=S, A=A) 
for _ in range(n*(10**7) + 1):
    np.random.randint(S)

if task == 0:
    q = Qlearning_gen(mdp_env, c1, total_episodes)
    best_value, best_Q, value, global_Q, raw_gap = q.learn()
    f = open('./result_deterministic/result_' + str(n), 'wb')
    pickle.dump((best_value, best_Q, value, global_Q, q), f)
    f.close()
elif task == 1:
    qadv = Qlearning_gen_adv(mdp_env, total_episodes, c1, c2, c3, using_adv_min)
    best_value, best_Q, value, global_Q, raw_gap = qadv.learn()
    f = open('./result_deterministic/result_' + str(n), 'wb')
    pickle.dump((best_value, best_Q, value, global_Q, qadv), f)
    f.close()
elif task == 2:
    qearly =Qlearning_gen_early(mdp_env, c1, c2, c3, total_episodes, beta)
    best_value, best_Q, value, global_Q, raw_gap = qearly.learn()
    f = open('./result_deterministic/result_' + str(n), 'wb')
    pickle.dump((best_value, best_Q, value, global_Q, qearly), f)
    f.close()
else:
    QAMB = Qlearning_gen_AMB(mdp_env, c1 , total_episodes)
    best_value, best_Q, value, QU, raw_gap = QAMB.learn()
    f = open('./result_deterministic/result_' + str(n), 'wb')
    pickle.dump((best_value, best_Q, value, QU, QAMB), f)
    f.close()