from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np
import gym
import cv2

from easydict import EasyDict as edict
import marlgrid.envs
import marlgrid

def make_environment(env_cfg, lock=None):
    """ Use this to make Environments """

    env_name = env_cfg.env_name

    assert env_name.startswith('MarlGrid')
    env = create_grid_world_env(env_cfg)
    # env = GridWorldEvaluatorWrapper(env)
    env = DictObservationNormalizationWrapper(env)
    env = SimplifiedObservationWrapper(env)

    return env

def create_grid_world_env(env_cfg):
    """
    Automatically generate env instance from env configs.
    """
    env_name = get_env_name(env_cfg)

    env = marlgrid.envs.register_env(
        env_name=env_name,
        n_agents=env_cfg.num_agents,
        grid_size=env_cfg.grid_size,
        view_size=env_cfg.view_size,
        view_tile_size=env_cfg.view_tile_size,
        comm_dim=2,
        comm_len=env_cfg.comm_len,
        discrete_comm=env_cfg.discrete_comm,
        n_adversaries=0,
        observation_style=env_cfg.observation_style,
        observe_position=env_cfg.observe_position,
        observe_self_position=env_cfg.observe_self_position,
        observe_done=env_cfg.observe_done,
        observe_self_env_act=env_cfg.observe_self_env_act,
        observe_t=env_cfg.observe_t,
        neutral_shape=env_cfg.neutral_shape,
        can_overlap=env_cfg.can_overlap,
        use_gym_env=False,
        env_configs={
            'max_steps': env_cfg.max_steps,
            'team_reward_multiplier': env_cfg.team_reward_multiplier,
            'team_reward_type': env_cfg.team_reward_type,
            'team_reward_freq': env_cfg.team_reward_freq,
            'seed': env_cfg.seed,
            'active_after_done': env_cfg.active_after_done,
            'discrete_position': env_cfg.discrete_position,
        },
        env_type=env_cfg.env_type)

    return env


def get_env_name(env_cfg):
    """
    Automatically generate env name from env configs.
    """
    assert env_cfg.env_type == 'd'
    name = f'MarlGridDoors-'

    if env_cfg.num_blind_agents > 0:
        name += f'{env_cfg.num_blind_agents}Blind'

    if env_cfg.active_after_done:
        name += 'Active'

    if not env_cfg.neutral_shape:
        name += 'Tri'

    if env_cfg.discrete_position and (
            env_cfg.observe_position or env_cfg.observe_self_position):
        name += 'Di'
    if env_cfg.observe_door:
        name += 'Door'
    if env_cfg.observe_position:
        name += 'Pos'
    if env_cfg.observe_self_position:
        name += 'Selfpos'
    if env_cfg.observe_self_env_act:
        name += 'Selfenv'

    if env_cfg.observe_done:
        name += 'D'

    if env_cfg.observe_t:
        name += 'T'

    if env_cfg.comm_len > 0:
        name += f'{env_cfg.comm_len}C'
        if not env_cfg.discrete_comm:
            name += 'cont'

    if env_cfg.team_reward_type != 'none':
        name += f'TR{env_cfg.team_reward_type}'

    if env_cfg.team_reward_freq != 'none':
        name += f'TRF{env_cfg.team_reward_freq}'

    if env_cfg.view_size != 7:
        name += f'{env_cfg.view_size}Vs'

    name += f'{env_cfg.grid_size}x{env_cfg.grid_size}-v0'
    return name


class DictObservationNormalizationWrapper(gym.Wrapper):
    def __init__(self, env):
        super().__init__(env)
        return

    def step(self, action):
        obs_dict, rew_dict, done_dict, info_dict = self.env.step(action)
        for k, v in obs_dict.items():
            if k == 'global':
                continue

            if isinstance(v, dict):
                obs_dict[k]['pov'] = (2. * ((v['pov'] / 255.) - 0.5))
            else:
                obs_dict[k] = (2. * ((v / 255.) - 0.5))
        return obs_dict, rew_dict, done_dict, info_dict


class SimplifiedObservationWrapper(gym.Wrapper):
    """
    This wrapper converts the dict observation space to tuple to make it easier to integrate to this codebase
    """
    def __init__(self, env):
        super().__init__(env)
        self.simplified_obs_space = []
        self.new_action_space = []
        for a_i in range(len(self.agents)):
            self.simplified_obs_space.append(gym.spaces.Tuple((self.observation_space['pov'], self.observation_space['selfpos'])))
            self.new_action_space.append(self.action_space)
        self.action_space = tuple(self.new_action_space)
        self.observation_space = self.simplified_obs_space
        self.step_penalty = -0.0001

        return

    def dict_to_tup(self, obs_dict):
        obs_tups = []
        for a_i in range(len(self.agents)):
            obs_tups.append((obs_dict['agent_' + str(a_i)]['pov'], obs_dict['agent_' + str(a_i)]['selfpos']))      
        return obs_tups

    def step(self, action):
        action_dict = {}
        for a_i in range(len(self.agents)):
            action_dict['agent_' + str(a_i)] = action[a_i]
        obs_dict, rew_dict, done_dict, info_dict = self.env.step(action_dict)
        reward_arr = np.zeros((len(self.agents)))
        done_arr = []
        for a_i in range(len(self.agents)):
            agent_key = 'agent_' + str(a_i)
            r = rew_dict[agent_key] + self.step_penalty
            reward_arr[a_i] = r
            done_arr.append(done_dict['__all__'])
        obs_tups = self.dict_to_tup(obs_dict)
        return obs_tups, reward_arr, done_arr, info_dict

    def reset(self):
        obs_dict = self.env.reset()
        return self.dict_to_tup(obs_dict)