
import sys
import numpy as np
import pandas as pd
from Env import FiniteStateFiniteActionMDP
import pickle
from FedQ import FedQlearning_gen
n = int(sys.argv[1])

task = n//10 #0,1,2,3: ucb-h, ucb-b, fed-h, fed-b, ucb-a, fed-a
idd = n%10# 10 replications

np.random.seed(1)
c = 2.0  # UCB exploration parameter
total_episodes = 30000  # Total episodes for training
num_agents = 10

H, S, A = 5, 3, 2
mdp_env = FiniteStateFiniteActionMDP(H=H, S=S, A=A) 
for _ in range(5*n*(10**5) + 1):
    np.random.randint(S)
is_bern=False
is_fed = False
if task%2 == 1:
    is_bern = True
if task>=2:
    is_fed = True

fed_q = FedQlearning_gen(mdp_env, c, total_episodes, num_agents, is_bern= is_bern, is_fed=is_fed,
                          cb = 2.0, using_bern_min = 1000, using_bern_samp = 1000)
best_value, best_Q, value_fed, global_Q_fed = fed_q.learn()
f = open('./result/result_' + str(n), 'wb')
pickle.dump((best_value, best_Q, value_fed, global_Q_fed, fed_q), f)
f.close()


