import numpy as np
from domains import Simple_Grid, Taxi_Domain
from Learning import *
import random 
from log import Log_experiments
from abstraction import Abstraction

# ____________ main Parameters ___________________________
trial = 1
random.seed(23*trial)
approach_name = 'lambda'
map_name = hyper_param.map_name
file_name = map_name + "_" + approach_name  + "_" + str(trial)
step_max = hyper_param.step_max
episodes = hyper_param.episode_max
env = hyper_param.env
#_________________________________________________________


#env = Simple_Grid (map_name, [0,0], [43,43])

agent_con_qlearning = tdlambda (env, state_size = env._state_size, action_size = env._action_size)
agent = agent_con_qlearning

log = Log_experiments()
episodes = hyper_param.episode_max

for i in range (episodes):
    state = env.reset()
    done = False
    reward = 0
    epoch = 0
    new_action = agent.policy (state)
    while (not done) and (epoch < step_max):
        env.update_visited(state)
        action = new_action
        new_state, r, done, success, pitfall = env.step (action)
        new_action = agent.policy(new_state) 
        agent.train (state, new_state, action, new_action, r)
        state = new_state
        reward += r
        epoch += 1
    agent.decay()
    log.log_episode(reward, success, epoch)
    
    print ("_______________________________")
    print ("episode: " + str(i) + "\t" + "reward: " + str (reward) + "\t" + "epochs: " + str(epoch) 
          + "\t" + "epsilon: " + str(round(agent._epsilon,3)) +  "\t" + "success: " + str(success))


log.save_execution (file_name)
log.plot_learning(500, "success") 
