from GridWorld import GridWorld
from library import *
from matplotlib import pyplot as plt


env = GridWorld()
T_states=[(3,3),(3,9),(9,3),(9,9),#]#,
           (1,1),(1,2),(1,3),(1,4),(1,5),(1,7),(1,8),(1,9),(1,10),(1,11),
           (11,1),(11,2),(11,3),(11,4),(11,5),(11,7),(11,8),(11,9),(11,10),
           (2,1),(3,1),(4,1),(5,1),(7,1),(8,1),(9,1),(10,1),
           (2,11),(3,11),(4,11),(5,11),(6,11),(8,11),(9,11),(10,11),(11,11)]
T_states = [[pos,pos] for pos in T_states]

goal_reward=1
step_reward=0
gamma=0.9
slip_prob=0.1
maxiter=500
epsilon=0.1
alpha=0.1

### Learning base tasks
# goals=[(3,3),(3,9)]
# goals = [[pos,pos] for pos in goals]
goals = [[(3, 11), (3, 11)],
  [(4, 11), (4, 11)],
  [(5, 11), (5, 11)],
  [(6, 11), (6, 11)],
  [(8, 11), (8, 11)],
  [(9, 11), (9, 11)],
  [(10, 11), (10, 11)],
  [(11, 11), (11, 11)]]
env = GridWorld(goals=goals, goal_reward=goal_reward, step_reward=step_reward, slip_prob=slip_prob)
env.render(R=env.env_R())
R, A,stats1 = GOAL(env, gamma=gamma, Q_optimal=True, epsilon=epsilon, alpha=alpha, maxiter=maxiter) #Goal_Oriented_Q_learning
env.render( P=EQ_P(A), V = EQ_V(A))
plt.plot(stats1['R'])

# goals=[(3,3),(9,3)]
# goals = [[pos,pos] for pos in goals]
# env = GridWorld(goals=goals, goal_reward=goal_reward, step_reward=step_reward, slip_prob=slip_prob)
# env.render(R=env.env_R())
# B,stats2 = Goal_Oriented_Q_learning(env, gamma=gamma, epsilon=epsilon, alpha=alpha, maxiter=maxiter) #Goal_Oriented_Q_learning

# Q=AND(A, B)
# env.render( P=EQ_P(A), V = EQ_V(A))
# env.render( P=EQ_P(B), V = EQ_V(B))
# env.render( P=EQ_P(Q), V = EQ_V(Q))
# # goal = '[(4, 11), (4, 11)]'
# # env.render( P=EQ_P(A,goal=goal), V = EQ_V(A,goal=goal))


