from datetime import datetime
from SimEnv import ENVIRONMENTS

import numpy
from modules.train.TrainHelper import *
from modules.utils.Log import Logger
from modules.utils.EvalCallback import EvalCallback

numpy.set_printoptions(precision=2)

# environment
(env_name, simEnv) = ENVIRONMENTS.CLIFF
env = simEnv.env()
agents = simEnv.agents()
oracle = simEnv.oracle()
training_parameters = {"number_episodes": simEnv.n_batch() * simEnv.batch_size(), "batch_episode": simEnv.batch_size()}
verbose = True
backup_agents = True
render_tests = False
n_tests = 10

for agent in agents:
    # TODO parallelize eventually
    print("\n############\n\n", "Training agent: ", agent.name(), "\n")

    logger = Logger(parent_folder_name='results',
                    env_name=env_name,
                    agent_name=agent.name(),
                    oracle_name=oracle.name(),
                    now=datetime.utcnow().strftime("%b-%d_%H:%M:%S"))

    eval_callback = EvalCallback(simEnv._singleton_env(), logger.save_path())

    train(env, logger, training_parameters, agent, oracle, verbose=verbose, save=backup_agents, eval_callback=eval_callback)

    logger.close()