# Imports
import srl_example_setup

from simple_rl.tasks import GymMDP, MiniGridMDP
from simple_rl.agents import QLearningPlanAgent, QlearningAgent
from simple_rl.run_experiments import run_agents_on_mdp


# Run Experiment
mdp = MiniGridMDP('MiniGrid-DoorKey-5x5-v0')
agent = QLearningPlanAgent(mdp.get_actions())

plan = []
state_seq = []
state_seq.append(mdp.get_init_state())

reward_seq = []
# Execute the plan sequence to get the state and reward sequence
for action in plan:
    state, reward = mdp.execute_agent_action(action)
    state_seq.append(state)
    reward_seq.append(reward)

# Initialize the Q function with the plan
agent.initialize_with_plan(plan, state_seq, reward_seq)

run_agents_on_mdp([agent], mdp)





