from __future__ import annotations

from minigrid.core.constants import COLOR_NAMES
from minigrid.core.grid import Grid
from minigrid.core.mission import MissionSpace
from minigrid.core.world_object import Door, Goal, Key, Wall, Box, Ball, Lava
from minigrid.manual_control import ManualControl
from minigrid.minigrid_env import MiniGridEnv

import numpy as np
import pygame


class TwoDoorsEnv(MiniGridEnv):
    def __init__(
        self,
        size=5,
        agent_start_pos=(1, 2),
        agent_start_dir=0,
        agent_view_size=3,
        max_steps: int | None = None,
        identity=1,  # identity of the reward function
        position_reward=True,
        **kwargs,
    ):
        self.agent_start_pos = agent_start_pos
        self.agent_start_dir = agent_start_dir
        self.agent_view_size = agent_view_size

        self.position_reward = position_reward

        mission_space = MissionSpace(mission_func=self._gen_mission)

        if max_steps is None:
            max_steps = 100  
        if identity == 1:
            self.door_rewards = [1, 0]  # reward for opening each door
        elif identity == 2:
            self.door_rewards = [0, 1]  # reward for opening each door

        self.identity = identity

        self.door1_opened = False
        self.door2_opened = False

        super().__init__(
            mission_space=mission_space,
            grid_size=size,
            # Set this to True for maximum speed
            agent_view_size=agent_view_size,
            see_through_walls=True,
            max_steps=max_steps,
            **kwargs,
        )

    @staticmethod
    def _gen_mission():
        return "What Two Doors Will You Open?"

    def _gen_grid(self, width, height):
        # Create an empty grid
        self.grid = Grid(width, height)

        # Generate the surrounding walls
        self.grid.wall_rect(0, 0, width, height)

        self.door1 = Door(COLOR_NAMES[3], is_open=False, is_locked=False)
        self.door2 = Door(COLOR_NAMES[5], is_open=False, is_locked=False)
        self.door1 = Door(COLOR_NAMES[3], is_open=False, is_locked=False)
        self.door2 = Door(COLOR_NAMES[5], is_open=False, is_locked=False)
        self.goal = Goal()

        self.grid.wall_rect(2, 2, 1, 1)

        # create three objects (2 doors and one goal)
        self.grid.set(2, 1, self.door1)
        self.grid.set(2, 3, self.door2)
        self.grid.set(3, 2, self.goal)

        # Place the agent
        if self.agent_start_pos is not None:
            self.agent_pos = self.agent_start_pos
            self.agent_dir = self.agent_start_dir
        else:
            self.place_agent()

        self.mission = "What Two Doors Will You Open?"

    def step(self, action):
        obs, reward, terminated, truncated, info = super().step(action)

        # reward agent slightly for getting closer to bottom right corner
        if self.position_reward:
            reward += 0.05 * (self.agent_pos[0] / self.grid.width)
        else:
            reward = 0

        reward -= 0.1
        # reward if agent opens a door
        if self.door1.is_open and not self.door1_opened:
            reward += self.door_rewards[0]
            self.door1_opened = True
            # lock door 2
            self.door2.is_locked = True

        if self.door2.is_open and not self.door2_opened:
            reward += self.door_rewards[1]

            self.door2_opened = True
            # lock door 1
            self.door1.is_locked = True

        return obs, reward, terminated, truncated, info
        # give the agent a reward based on the doors collected

    def reset(self, seed, options=None):
        self.door1_opened = False
        self.door2_opened = False

        obs, info = super().reset()

        return obs, info


def main():
    env = TwoDoorsEnv(identity=2, render_mode="human")

    env.highlight = False

    # enable manual control for testing
    manual_control = ManualControl(env, seed=42)
    manual_control.start()


if __name__ == "__main__":
    main()
