import argparse
import numpy as np
import torch
import gym
from stable_baselines3 import SAC as stableSAC
from stable_baselines3 import PPO as stablePPO
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack, VecVideoRecorder
from stable_baselines3.common.policies import ActorCriticPolicy
from SAC import SAC
from PPO import PPO
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('--turns', type=int, default=1)
parser.add_argument('--log_file', type=str, default='scores.txt')
parser.add_argument('--log-directory', type=str, default='')
parser.add_argument('--models-directory', type=str, default='')
args = parser.parse_args()

# Asteroids BeamRider Breakout
def evaluate_policy_terminal(env, model, sb3 = False, turns = 1):
    #gets normal gym NoFrameskip-v4 env as atari env
    scores = 0
    mean_sess_length = 0
    scoresarr = np.zeros(turns)
    lengthsarr = np.zeros(turns)
    for j in range(turns):
        s_raw, done, ep_r = env.reset(), False, 0
        sess_length = 0
        terminal = False
        while not terminal and sess_length < 10000:
            # Take deterministic actions at test time
            s = env.ale.getRAM()
            if sb3:
                action, _ = model.predict(s, deterministic=True)
            else:
                action = model.get_action(torch.as_tensor(s, dtype=torch.float32), with_log_prob=False)
            s_prime, r, done, info = env.step(action)
            # r = Reward_adapter(r, EnvIdex)
            ep_r += float(r)
            s_raw = s_prime
            sess_length += 1
            terminal = (info['lives']==0)
        # print(ep_r)
        scores += ep_r
        mean_sess_length += sess_length
        scoresarr[j] = ep_r
        lengthsarr[j] = sess_length
    return np.mean(scoresarr), np.std(scoresarr)/np.sqrt(turns), np.mean(lengthsarr), np.std(lengthsarr)/np.sqrt(turns)

# Pong
def evaluate_policy_done(env, model, sb3 = False, turns = 1):
    #gets normal gym NoFrameskip-v4 env as atari env
    scores = 0
    mean_sess_length = 0
    scoresarr = np.zeros(turns)
    lengthsarr = np.zeros(turns)
    for j in range(turns):
        s_raw, done, ep_r = env.reset(), False, 0
        sess_length = 0
        terminal = False
        done = False
        while not done and sess_length < 10000:
            # Take deterministic actions at test time
            s = env.ale.getRAM()
            if sb3:
                action, _ = model.predict(s, deterministic=True)
            else:
                action = model.get_action(torch.as_tensor(s, dtype=torch.float32), with_log_prob=False)
            s_prime, r, done, info = env.step(action)
            # r = Reward_adapter(r, EnvIdex)
            ep_r += float(r)
            s_raw = s_prime
            sess_length += 1
            terminal = (info['lives']==0)
        # print(ep_r)
        scores += ep_r
        mean_sess_length += sess_length
        scoresarr[j] = ep_r
        lengthsarr[j] = sess_length
    return np.mean(scoresarr), np.std(scoresarr)/np.sqrt(turns), np.mean(lengthsarr), np.std(lengthsarr)/np.sqrt(turns)

# Qbert
def evaluate_policy_terminaldone(env, model, sb3 = False, turns = 1):
    #gets normal gym NoFrameskip-v4 env as atari env
    scores = 0
    mean_sess_length = 0
    scoresarr = np.zeros(turns)
    lengthsarr = np.zeros(turns)
    for j in range(turns):
        s_raw, done, ep_r = env.reset(), False, 0
        sess_length = 0
        terminal = False
        terminaldone = False
        while not terminaldone and sess_length < 10000:
            # Take deterministic actions at test time
            s = env.ale.getRAM()
            if sb3:
                action, _ = model.predict(s, deterministic=True)
            else:
                action = model.get_action(torch.as_tensor(s, dtype=torch.float32), with_log_prob=False)
            s_prime, r, done, info = env.step(action)
            # r = Reward_adapter(r, EnvIdex)
            ep_r += float(r)
            s_raw = s_prime
            sess_length += 1
            terminaldone = ((info['lives']==0) and done)
        # print(ep_r)
        scores += ep_r
        mean_sess_length += sess_length
        scoresarr[j] = ep_r
        lengthsarr[j] = sess_length
    return np.mean(scoresarr), np.std(scoresarr)/np.sqrt(turns), np.mean(lengthsarr), np.std(lengthsarr)/np.sqrt(turns)

def evaluate_policy_gym(env, model, sb3=False, turns = 1):
    scores = 0
    mean_sess_length = 0
    scoresarr = np.zeros(turns)
    lengthsarr = np.zeros(turns)
    for j in range(turns):
        s, done, ep_r = env.reset(), False, 0
        sess_length = 0
        while (not done) and (sess_length < 1000):
            # Take deterministic actions at test time
            if sb3:
                a = model.predict(s, deterministic=True)[0]
            else:
                a = model.select_action(s, deterministic=True)
            s_prime, r, done, info = env.step(a)
            # r = Reward_adapter(r, EnvIdex)
            ep_r += r
            s = s_prime
            sess_length += 1
            if done or (sess_length >= 1000):
                break
        # print(ep_r)
        scores += ep_r
        mean_sess_length += sess_length
        scoresarr[j] = ep_r
        lengthsarr[j] = sess_length
    return np.mean(scoresarr), np.std(scoresarr)/np.sqrt(turns), np.mean(lengthsarr), np.std(lengthsarr)/np.sqrt(turns)

if __name__ == '__main__':
    airlfname = 'airl'
    gailfname = 'gail'
    bcfname = 'bc'
    rilefname = 'rile'
    filename = args.log_directory + args.log_file

    # -------------------------------------------------------------------------
    # 'AsteroidsNoFrameskip-v4'
    # -------------------------------------------------------------------------
    env_id = 'AsteroidsNoFrameskip-v4'

    # RILE
    #*************************************************************************
    env = gym.make(env_id)
    agent = PPO(128, env.action_space.n, hidden_shape=(256, 256), log_std_scale=-100,
                linear=True, continuous=False, steps_per_epoch=256)
    agent.load(args.models_directory + rilefname + '_' + env_id+'_100k')
    scores, std, sess_length, sess_std = evaluate_policy_terminal(env, agent, sb3=False, turns=int(args.turns))
    print("RILE : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("RILE 100k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " + str(
            sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # GAIL
    # *************************************************************************

    env = gym.make(env_id)
    agent = stablePPO.load(args.models_directory + gailfname + '_' + env_id+'_100k')
    scores, std, sess_length, sess_std = evaluate_policy_terminal(env, agent, sb3=True, turns=int(args.turns))
    print("GAIL 100k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("GAIL 100k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " + str(
            sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # AIRL
    # *************************************************************************

    env = gym.make(env_id)
    agent = stablePPO.load(args.models_directory + airlfname + '_' + env_id + '_100k')
    scores, std, sess_length, sess_std = evaluate_policy_terminal(env, agent, sb3=True, turns=int(args.turns))
    print("AIRL 100k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("AIRL 100k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " + str(
            sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # BC
    # *************************************************************************

    env = gym.make(env_id)
    agent = ActorCriticPolicy.load(args.models_directory + bcfname + '_' + env_id + '_100k.zip')
    scores, std, sess_length, sess_std = evaluate_policy_terminal(env, agent, sb3=True, turns=int(args.turns))
    print("BC 100k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("BC 100k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " + str(
            sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # -------------------------------------------------------------------------
    # -------------------------------------------------------------------------
    # -------------------------------------------------------------------------

    # -------------------------------------------------------------------------
    # 'BeamRiderNoFrameskip-v4'
    # -------------------------------------------------------------------------
    env_id = 'BeamRiderNoFrameskip-v4'

    # RILE
    # *************************************************************************

    env = gym.make(env_id)
    agent = PPO(128, env.action_space.n, hidden_shape=(256, 256), log_std_scale=-100,
                linear=True, continuous=False, steps_per_epoch=256)
    agent.load(args.models_directory + rilefname + '_' + env_id + '_100k')
    scores, std, sess_length, sess_std = evaluate_policy_terminal(env, agent, sb3=False, turns=int(args.turns))
    print("RILE : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("RILE 100k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " + str(
            sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # GAIL
    # *************************************************************************

    env = gym.make(env_id)
    agent = stablePPO.load(args.models_directory + gailfname + '_' + env_id + '_100k')
    scores, std, sess_length, sess_std = evaluate_policy_terminal(env, agent, sb3=True, turns=int(args.turns))
    print("GAIL 100k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("GAIL 100k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " + str(
            sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # AIRL
    # *************************************************************************

    env = gym.make(env_id)
    agent = stablePPO.load(args.models_directory + airlfname + '_' + env_id + '_100k')
    scores, std, sess_length, sess_std = evaluate_policy_terminal(env, agent, sb3=True, turns=int(args.turns))
    print("AIRL 100k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("AIRL 100k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " + str(
            sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # BC
    # *************************************************************************

    env = gym.make(env_id)
    agent = ActorCriticPolicy.load(args.models_directory + bcfname + '_' + env_id + '_100k.zip')
    scores, std, sess_length, sess_std = evaluate_policy_terminal(env, agent, sb3=True, turns=int(args.turns))
    print("BC 100k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("BC 100k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " + str(
            sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # -------------------------------------------------------------------------
    # -------------------------------------------------------------------------
    # -------------------------------------------------------------------------

    # -------------------------------------------------------------------------
    # 'BreakoutNoFrameskip-v4'
    # -------------------------------------------------------------------------
    env_id = 'BreakoutNoFrameskip-v4'

    # RILE
    # *************************************************************************
 
    env = gym.make(env_id)
    agent = PPO(128, env.action_space.n, hidden_shape=(256, 256), log_std_scale=-100,
                linear=True, continuous=False, steps_per_epoch=256)
    agent.load(args.models_directory + rilefname + '_' + env_id + '_100k')
    scores, std, sess_length, sess_std = evaluate_policy_terminal(env, agent, sb3=False, turns=int(args.turns))
    print("RILE : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("RILE 100k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " + str(
            sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # GAIL
    # *************************************************************************

    env = gym.make(env_id)
    agent = stablePPO.load(args.models_directory + gailfname + '_' + env_id + '_100k')
    scores, std, sess_length, sess_std = evaluate_policy_terminal(env, agent, sb3=True, turns=int(args.turns))
    print("GAIL 100k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("GAIL 100k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " + str(
            sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # AIRL
    # *************************************************************************

    env = gym.make(env_id)
    agent = stablePPO.load(args.models_directory + airlfname + '_' + env_id + '_100k')
    scores, std, sess_length, sess_std = evaluate_policy_terminal(env, agent, sb3=True, turns=int(args.turns))
    print("AIRL 100k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("AIRL 100k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " + str(
            sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # BC
    # *************************************************************************
    
    env = gym.make(env_id)
    agent = ActorCriticPolicy.load(args.models_directory + bcfname + '_' + env_id + '_100k.zip')
    scores, std, sess_length, sess_std = evaluate_policy_terminal(env, agent, sb3=True, turns=int(args.turns))
    print("BC 100k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("BC 100k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " + str(
            sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # -------------------------------------------------------------------------
    # -------------------------------------------------------------------------
    # -------------------------------------------------------------------------

    # -------------------------------------------------------------------------
    # 'PongNoFrameskip-v4'
    # -------------------------------------------------------------------------
    env_id = 'PongNoFrameskip-v4'

    # RILE
    # *************************************************************************
    
    env = gym.make(env_id)
    agent = PPO(128, env.action_space.n, hidden_shape=(256, 256), log_std_scale=-100,
                linear=True, continuous=False, steps_per_epoch=256)
    agent.load(args.models_directory + rilefname + '_' + env_id + '_100k')
    scores, std, sess_length, sess_std = evaluate_policy_done(env, agent, sb3=False, turns=int(args.turns))
    print("RILE : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("RILE 100k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " + str(
            sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # GAIL
    # *************************************************************************
    
    env = gym.make(env_id)
    agent = stablePPO.load(args.models_directory + gailfname + '_' + env_id + '_100k')
    scores, std, sess_length, sess_std = evaluate_policy_done(env, agent, sb3=True, turns=int(args.turns))
    print("GAIL 100k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("GAIL 100k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " + str(
            sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # AIRL
    # *************************************************************************
  
    env = gym.make(env_id)
    agent = stablePPO.load(args.models_directory + airlfname + '_' + env_id + '_100k')
    scores, std, sess_length, sess_std = evaluate_policy_done(env, agent, sb3=True, turns=int(args.turns))
    print("AIRL 100k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("AIRL 100k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " + str(
            sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # BC
    # *************************************************************************
    env = gym.make(env_id)
    agent = ActorCriticPolicy.load(args.models_directory + bcfname + '_' + env_id + '_100k.zip')
    scores, std, sess_length, sess_std = evaluate_policy_done(env, agent, sb3=True, turns=int(args.turns))
    print("BC 100k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("BC 100k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " + str(
            sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # -------------------------------------------------------------------------
    # -------------------------------------------------------------------------
    # -------------------------------------------------------------------------

    # -------------------------------------------------------------------------
    # 'QbertNoFrameskip-v4'
    # -------------------------------------------------------------------------
    env_id = 'QbertNoFrameskip-v4'

    # RILE
    # *************************************************************************

    env = gym.make(env_id)
    agent = PPO(128, env.action_space.n, hidden_shape=(256, 256), log_std_scale=-100,
                linear=True, continuous=False, steps_per_epoch=256)
    agent.load(args.models_directory + rilefname + '_' + env_id + '_100k')
    scores, std, sess_length, sess_std = evaluate_policy_terminaldone(env, agent, sb3=False, turns=int(args.turns))
    print("RILE : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("RILE 100k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " + str(
            sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # GAIL
    # *************************************************************************
 
    env = gym.make(env_id)
    agent = stablePPO.load(args.models_directory + gailfname + '_' + env_id + '_100k')
    scores, std, sess_length, sess_std = evaluate_policy_terminaldone(env, agent, sb3=True, turns=int(args.turns))
    print("GAIL 100k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("GAIL 100k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " + str(
            sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # AIRL
    # *************************************************************************

    env = gym.make(env_id)
    agent = stablePPO.load(args.models_directory + airlfname + '_' + env_id + '_100k')
    scores, std, sess_length, sess_std = evaluate_policy_terminaldone(env, agent, sb3=True, turns=int(args.turns))
    print("AIRL 100k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("AIRL 100k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " + str(
            sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # BC
    # *************************************************************************

    env = gym.make(env_id)
    agent = ActorCriticPolicy.load(args.models_directory + bcfname + '_' + env_id + '_100k.zip')
    scores, std, sess_length, sess_std = evaluate_policy_terminaldone(env, agent, sb3=True, turns=int(args.turns))
    print("BC 100k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("BC 100k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " + str(
            sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # -------------------------------------------------------------------------
    # -------------------------------------------------------------------------
    # -------------------------------------------------------------------------

    # -------------------------------------------------------------------------
    # 'SpaceInvadersNoFrameskip-v4'
    # -------------------------------------------------------------------------
    env_id = 'SpaceInvadersNoFrameskip-v4'

    # RILE
    # *************************************************************************

    env = gym.make(env_id)
    agent = PPO(128, env.action_space.n, hidden_shape=(256, 256), log_std_scale=-100,
                linear=True, continuous=False, steps_per_epoch=256)
    agent.load(args.models_directory + rilefname + '_' + env_id + '_100k')
    scores, std, sess_length, sess_std = evaluate_policy_terminal(env, agent, sb3=False, turns=int(args.turns))
    print("RILE : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("RILE 100k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " + str(
            sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # GAIL
    # *************************************************************************

    env = gym.make(env_id)
    agent = stablePPO.load(args.models_directory + gailfname + '_' + env_id + '_100k')
    scores, std, sess_length, sess_std = evaluate_policy_terminal(env, agent, sb3=True, turns=int(args.turns))
    print("GAIL 100k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("GAIL 100k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " + str(
            sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # AIRL
    # *************************************************************************

    env = gym.make(env_id)
    agent = stablePPO.load(args.models_directory + airlfname + '_' + env_id + '_100k')
    scores, std, sess_length, sess_std = evaluate_policy_terminal(env, agent, sb3=True, turns=int(args.turns))
    print("AIRL 100k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("AIRL 100k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " + str(
            sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # BC
    # *************************************************************************

    env = gym.make(env_id)
    agent = ActorCriticPolicy.load(args.models_directory + bcfname + '_' + env_id + '_100k.zip')
    scores, std, sess_length, sess_std = evaluate_policy_terminal(env, agent, sb3=True, turns=int(args.turns))
    print("BC 100k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("BC 100k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " + str(
            sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # -------------------------------------------------------------------------
    # -------------------------------------------------------------------------
    # -------------------------------------------------------------------------


    # -------------------------------------------------------------------------
    # 'Hopper-v3'
    # -------------------------------------------------------------------------
    env_id = 'Hopper-v3'

    # RILE
    # *************************************************************************
    env = gym.make(env_id)
    agent = SAC(state_dim=env.observation_space.shape[0], action_dim=env.action_space.shape[0], hid_shape=(256, 256))
    agent.load_model_from_file(args.models_directory + rilefname + '_' + env_id + '_1k')
    scores, std, sess_length, sess_std = evaluate_policy_gym(env, agent, sb3=False, turns=int(args.turns))
    print("RILE 1k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("\nRILE 1k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " +
                   str(sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std

    env = gym.make(env_id)
    agent = SAC(state_dim=env.observation_space.shape[0], action_dim=env.action_space.shape[0], hid_shape=(256, 256))
    agent.load_model_from_file(args.models_directory + rilefname + '_' + env_id + '_5k')
    scores, std, sess_length, sess_std = evaluate_policy_gym(env, agent, sb3=False, turns=int(args.turns))
    print("RILE 5k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("\nRILE 5k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " +
                   str(sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # GAIL
    # *************************************************************************
    env = gym.make(env_id)
    agent = stableSAC.load(args.models_directory + gailfname + '_' + env_id + '_1k')
    scores, std, sess_length, sess_std = evaluate_policy_gym(env, agent, sb3=True, turns=int(args.turns))
    print("GAIL 1k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("GAIL 1k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " +
                   str(sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std


    env = gym.make(env_id)
    agent = stableSAC.load(args.models_directory + gailfname + '_' + env_id + '_5k')
    scores, std, sess_length, sess_std = evaluate_policy_gym(env, agent, sb3=True, turns=int(args.turns))
    print("GAIL 5k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("GAIL 5k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " +
                   str(sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # AIRL
    # *************************************************************************
    env = gym.make(env_id)
    agent = stableSAC.load(args.models_directory + airlfname + '_' + env_id + '_1k')
    scores, std, sess_length, sess_std = evaluate_policy_gym(env, agent, sb3=True, turns=int(args.turns))
    print("AIRL 1k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("AIRL 1k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " +
                   str(sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std


    env = gym.make(env_id)
    agent = stableSAC.load(args.models_directory + airlfname + '_' + env_id + '_5k')
    scores, std, sess_length, sess_std = evaluate_policy_gym(env, agent, sb3=True, turns=int(args.turns))
    print("AIRL 5k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("AIRL 5k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " +
                   str(sess_std) + "\n\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # BC
    # *************************************************************************
    env = gym.make(env_id)
    agent = ActorCriticPolicy.load(args.models_directory + bcfname + '_' + env_id + '_1k.zip')
    scores, std, sess_length, sess_std = evaluate_policy_gym(env, agent, sb3=True, turns=int(args.turns))
    print("BC 1k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("BC 1k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " +
                   str(sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std


    env = gym.make(env_id)
    agent = ActorCriticPolicy.load(args.models_directory + bcfname + '_' + env_id + '_5k')
    scores, std, sess_length, sess_std = evaluate_policy_gym(env, agent, sb3=True, turns=int(args.turns))
    print("BC 5k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("BC 5k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " +
                   str(sess_std))
        del file
    del env, agent, scores, std, sess_length, sess_std
    # -------------------------------------------------------------------------
    # -------------------------------------------------------------------------
    # -------------------------------------------------------------------------

    # -------------------------------------------------------------------------
    # 'Humanoid-v3'
    # -------------------------------------------------------------------------
    env_id = 'Humanoid-v3'

    # RILE
    # *************************************************************************
    env = gym.make(env_id)
    agent = SAC(state_dim=env.observation_space.shape[0], action_dim=env.action_space.shape[0], hid_shape=(256, 256))
    agent.load_model_from_file(args.models_directory + rilefname + '_' + env_id + '_1k')
    scores, std, sess_length, sess_std = evaluate_policy_gym(env, agent, sb3=False, turns=int(args.turns))
    print("RILE 1k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("\nRILE 1k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " +
                   str(sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std


    env = gym.make(env_id)
    agent = SAC(state_dim=env.observation_space.shape[0], action_dim=env.action_space.shape[0], hid_shape=(256, 256))
    agent.load_model_from_file(args.models_directory + rilefname + '_' + env_id + '_5k')
    scores, std, sess_length, sess_std = evaluate_policy_gym(env, agent, sb3=False, turns=int(args.turns))
    print("RILE 5k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("\nRILE 5k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " +
                   str(sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # GAIL
    # *************************************************************************
    env = gym.make(env_id)
    agent = stableSAC.load(args.models_directory + gailfname + '_' + env_id + '_1k')
    scores, std, sess_length, sess_std = evaluate_policy_gym(env, agent, sb3=True, turns=int(args.turns))
    print("GAIL 1k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("GAIL 1k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " +
                   str(sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std


    env = gym.make(env_id)
    agent = stableSAC.load(args.models_directory + gailfname + '_' + env_id + '_5k')
    scores, std, sess_length, sess_std = evaluate_policy_gym(env, agent, sb3=True, turns=int(args.turns))
    print("GAIL 5k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("GAIL 5k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " +
                   str(sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # AIRL
    # *************************************************************************
    env = gym.make(env_id)
    agent = stableSAC.load(args.models_directory + airlfname + '_' + env_id + '_1k')
    scores, std, sess_length, sess_std = evaluate_policy_gym(env, agent, sb3=True, turns=int(args.turns))
    print("AIRL 1k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("AIRL 1k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " +
                   str(sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std


    env = gym.make(env_id)
    agent = stableSAC.load(args.models_directory + airlfname + '_' + env_id + '_5k')
    scores, std, sess_length, sess_std = evaluate_policy_gym(env, agent, sb3=True, turns=int(args.turns))
    print("AIRL 5k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("AIRL 5k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " +
                   str(sess_std) + "\n\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # BC
    # *************************************************************************
    env = gym.make(env_id)
    agent = ActorCriticPolicy.load(args.models_directory + bcfname + '_' + env_id + '_1k.zip')
    scores, std, sess_length, sess_std = evaluate_policy_gym(env, agent, sb3=True, turns=int(args.turns))
    print("BC 1k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("BC 1k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " +
                   str(sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std


    env = gym.make(env_id)
    agent = ActorCriticPolicy.load(args.models_directory + bcfname + '_' + env_id + '_5k')
    scores, std, sess_length, sess_std = evaluate_policy_gym(env, agent, sb3=True, turns=int(args.turns))
    print("BC 5k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("BC 5k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " +
                   str(sess_std))
        del file
    del env, agent, scores, std, sess_length, sess_std
    # -------------------------------------------------------------------------
    # -------------------------------------------------------------------------
    # -------------------------------------------------------------------------

    # -------------------------------------------------------------------------
    # 'Walker2d-v3'
    # -------------------------------------------------------------------------
    env_id = 'Walker2d-v3'

    # RILE
    # *************************************************************************
    env = gym.make(env_id)
    agent = SAC(state_dim=env.observation_space.shape[0], action_dim=env.action_space.shape[0], hid_shape=(256, 256))
    agent.load_model_from_file(args.models_directory + rilefname + '_' + env_id + '_1k')
    scores, std, sess_length, sess_std = evaluate_policy_gym(env, agent, sb3=False, turns=int(args.turns))
    print("RILE 1k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("\nRILE 1k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " +
                   str(sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std

    env = gym.make(env_id)
    agent = SAC(state_dim=env.observation_space.shape[0], action_dim=env.action_space.shape[0], hid_shape=(256, 256))
    agent.load_model_from_file(args.models_directory + rilefname + '_' + env_id + '_5k')
    scores, std, sess_length, sess_std = evaluate_policy_gym(env, agent, sb3=False, turns=int(args.turns))
    print("RILE 5k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("\nRILE 5k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " +
                   str(sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # GAIL
    # *************************************************************************
    env = gym.make(env_id)
    agent = stableSAC.load(args.models_directory + gailfname + '_' + env_id + '_1k')
    scores, std, sess_length, sess_std = evaluate_policy_gym(env, agent, sb3=True, turns=int(args.turns))
    print("GAIL 1k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("GAIL 1k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " +
                   str(sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std


    env = gym.make(env_id)
    agent = stableSAC.load(args.models_directory + gailfname + '_' + env_id + '_5k')
    scores, std, sess_length, sess_std = evaluate_policy_gym(env, agent, sb3=True, turns=int(args.turns))
    print("GAIL 5k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("GAIL 5k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " +
                   str(sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # AIRL
    # *************************************************************************
    env = gym.make(env_id)
    agent = stableSAC.load(args.models_directory + airlfname + '_' + env_id + '_1k')
    scores, std, sess_length, sess_std = evaluate_policy_gym(env, agent, sb3=True, turns=int(args.turns))
    print("AIRL 1k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("AIRL 1k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " +
                   str(sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std


    env = gym.make(env_id)
    agent = stableSAC.load(args.models_directory + airlfname + '_' + env_id + '_5k')
    scores, std, sess_length, sess_std = evaluate_policy_gym(env, agent, sb3=True, turns=int(args.turns))
    print("AIRL 5k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("AIRL 5k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " +
                   str(sess_std) + "\n\n")
        del file
    del env, agent, scores, std, sess_length, sess_std
    # *************************************************************************

    # BC
    # *************************************************************************
    env = gym.make(env_id)
    agent = ActorCriticPolicy.load(args.models_directory + bcfname + '_' + env_id + '_1k.zip')
    scores, std, sess_length, sess_std = evaluate_policy_gym(env, agent, sb3=True, turns=int(args.turns))
    print("BC 1k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("BC 1k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " +
                   str(sess_std) + "\n")
        del file
    del env, agent, scores, std, sess_length, sess_std

    env = gym.make(env_id)
    agent = ActorCriticPolicy.load(args.models_directory + bcfname + '_' + env_id + '_5k')
    scores, std, sess_length, sess_std = evaluate_policy_gym(env, agent, sb3=True, turns=int(args.turns))
    print("BC 5k : ", env_id, scores, std, sess_length, sess_std)
    with open(filename, 'a') as file:
        file.write("BC 5k : " + env_id + " " + str(scores) + " " + str(std) + " " + str(sess_length) + " " +
                   str(sess_std))
        del file
    del env, agent, scores, std, sess_length, sess_std
    # -------------------------------------------------------------------------
    # -------------------------------------------------------------------------
    # -------------------------------------------------------------------------