from GridWorld import *
from library import *
import matplotlib.pyplot as plt
import numpy as np


gamma = 1
alpha=0.1
epsilon=0.1
goals = [(1,4)]
T_states = [(1,4),(2,4)]
start_position = (3,4)

env = GridWorld(goals=goals, T_states=T_states, start_position=None, step_reward=-0.1, wall_reward=-0.1, lava_reward=-0.1, goal_reward=1, slip_prob=0.0)
# env.render()
# plt.savefig("images/lavaworld.pdf",bbox_inches='tight')

Q,stats = Safe_Q_learning(env, penalty_type="minmax", epsilon=epsilon, alpha=alpha, maxiter=10000, p=True)
# Q,stats = Safe_Q_learning(env, penalty_type="", epsilon=epsilon, alpha=alpha, converge=1, p=False)
# env.render(P=Q_P(Q),V=Q_V(Q),show_color_bar=True)
env.render(P=Q_P(Q),Q=Q,show_color_bar=True)#,cmap = 'YlOrRd')
plt.show()