from gym_minigrid.minigrid import *

class SimpleEnv(MiniGridEnv):
    
    class Actions(IntEnum):
        up      = 0
        right   = 1
        down    = 2
        left    = 3

    def __init__(self, grid_size=None, width=None, height=None, max_steps=100, see_through_walls=False, seed=1337, agent_view_size=7):
        super().__init__(grid_size=grid_size, width=width, height=height, max_steps=max_steps, see_through_walls=see_through_walls, seed=seed, agent_view_size=agent_view_size)

        # Action enumeration for this environment
        self.actions = self.Actions

        # Actions are discrete integer values
        self.action_space = spaces.Discrete(len(self.actions))

        self._valid_positions = None

    def copy(self):
        from copy import deepcopy
        return deepcopy(self)

    @property
    def valid_positions(self):
        if not self._valid_positions:
            self._valid_positions = []
            for i in range(self.width):
                for j in range(self.height):
                    if self.grid.get(i, j) is None:
                        self._valid_positions.append((i,j))
        return self._valid_positions

    def get_fwd_pos(self, action):
        if action == self.actions.up:
            fwd_pos = self.agent_pos + np.array((0, -1))
        elif action == self.actions.right:
            fwd_pos = self.agent_pos + np.array((1, 0))
        elif action == self.actions.down:
            fwd_pos = self.agent_pos + np.array((0, 1))
        elif action == self.actions.left:
            fwd_pos = self.agent_pos + np.array((-1, 0))
        else:
            assert False, "unknown action"
        return fwd_pos

    def step(self, action):
        self.step_count += 1

        reward = 0 # -1 / self.max_steps
        done = False

        fwd_pos = self.get_fwd_pos(action)
        fwd_cell = self.grid.get(*fwd_pos)
        if fwd_cell == None or fwd_cell.can_overlap():
            self.agent_pos = fwd_pos
        if fwd_cell != None and fwd_cell.type == 'goal':
            done = True
            reward = self._reward()
        if fwd_cell != None and fwd_cell.type == 'lava':
            done = True

        if self.step_count >= self.max_steps:
            done = True

        obs = self.gen_obs()

        return obs, reward, done, {}

    def _reward(self):
        """
        Compute the reward to be given upon success
        """

        return 1 / self.step_count
