# Imports
import srl_example_setup

from simple_rl.tasks import GymMDP, MiniGridMDP
from simple_rl.agents import QLearningPlanAgent, QLearningAgent
from simple_rl.run_experiments import run_agents_on_mdp


# Run Experiment
#mdp = MiniGridMDP('MiniGrid-DoorKey-5x5-v0')
mdp = MiniGridMDP('MiniGrid-DoorKey-16x16-v0')
agent = QLearningPlanAgent(mdp.get_actions())

# plan = [4, 0, 0, 15, 3, 3, 1, 1]
# state_seq = []
# state_seq.append(mdp.get_init_state())
#
# reward_seq = []
# # Execute the plan sequence to get the state and reward sequence
# for action in plan:
#     reward, state = mdp.execute_agent_action(action)
#     state_seq.append(state)
#     reward_seq.append(reward)
#
# print ("State seq: ", state_seq)
# print ("Reward seq: ", reward_seq)
#
#
# # Initialize the Q function with the plan
# agent.initialize_with_plan(plan, state_seq, reward_seq)

run_agents_on_mdp([agent], mdp)





