import sys
import os
from tqdm.notebook import tqdm
import random
from env import *
from algorithms import *
import time

import matplotlib.pyplot as plt


def Agent(agent_name, env, K, c=5e-3, kappa=0.05):
    if agent_name == "RRL-MNL":
        agent = RRL_MNL(env, K, kappa=kappa, c=c)
    elif agent_name == "UCRL-MNL":
        agent = UCRL_MNL(env, K, kappa=kappa, c=c)
    elif agent_name == "ORRL-MNL":
        agent = ORRL_MNL(env, K, c=c)
    elif agent_name == "UCRL-MNL+":
        agent = UCRL_MNL_PLUS(env, K, c=c)
    elif agent_name == "Optimal Policy":
        agent = Optimal_Policy(env, K)
    return agent


K = 10000
nStates = [8]
kappa = 0.1

for nState in nStates:
    H = 3*nState
    env = make_riverSwim(epLen=H, nState=nState)
    runs = 10
    seeds = [1234*(i+1) for i in range(runs)]
    
    algo_list = ['ORRL-MNL', 'RRL-MNL', 'UCRL-MNL', 'UCRL-MNL+', 'Optimal Policy'] 

    for agent_name in algo_list:
        
        out_dir = f"./data/S={nState}, H={H}, kappa={kappa}/{agent_name}"

        # Create target Directory if don't exist
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
            print("Directory " , out_dir ,  " Created ")
        else:    
            print("Directory " , out_dir ,  " already exists")

        run_returns = []
        
        for run in range(10, 10+runs):
            run += 1  
            random.seed(41*run)
            np.random.seed(41*run)
            
            agent = Agent(agent_name, env, K, c=3e-3, kappa=kappa) 
            episodic_return = agent.run()
            run_returns.append(episodic_return)
            np.save(out_dir+'/'+str(agent_name)+str(run)+'.npy', episodic_return)
            