from envs.multiagentenv import MultiAgentEnv
from utils.dict2namedtuple import convert
import numpy as np


class NStepMatrixGame(MultiAgentEnv):
    def __init__(self, **kwargs): # batch_size=None, 
        # Unpack arguments from sacred
        args = kwargs.get('env_args', kwargs)
        if isinstance(args, dict):
            args = convert(args)

        # Define the agents
        self.n_agents = 2

        self.episode_limit = args.steps
        # print(args)
        if getattr(args, 'state_type', None):
            print('args.state_type', args.state_type)
            self.state_type = args.state_type
        else:
            self.state_type = 'obs'

        # print('self.state_type', self.state_type)
        # Define the internal state
        self.steps = 0

        r_matrix = [[1,1],[1,1]]
        self.payoff_values = [r_matrix for _ in range(self.episode_limit)]
        self.final_step_diff =[[1,1],[1,4]]

        self.branches = 4
        self.branch = 0
        self.state_num = self.branches * (self.episode_limit + 1)

        

        self.n_actions = len(self.payoff_values[0])

        self.good_branches = args.good_branches

    def reset(self):
        """ Returns initial observations and states"""
        self.steps = 0
        self.branch = 0
        return self.get_obs(), self.get_state()

    def step(self, actions):
        """ Returns reward, terminated, info """
        current_branch = 0
        if (actions[0], actions[1]) == (0,0):
            current_branch = 0
        if (actions[0], actions[1]) == (0,1):
            current_branch = 1
        if (actions[0], actions[1]) == (1,0):
            current_branch = 2
        if (actions[0], actions[1]) == (1,1):
            current_branch = 3

        if self.steps == 0:
            self.branch = current_branch

        info = {}

        info["good_payoff"] = 0
        info["branch"] = self.branch

        if self.good_branches == 4:
            reward = 1 if self.branch == current_branch else 0 # Need to follow your branch
        elif self.good_branches == 2:
            reward = 1 if self.branch in [0,3] and self.branch == current_branch else 0
        else:
            raise Exception("Environment not setup to handle {} good branches".format(self.good_branches))

        if self.episode_limit > 1 and self.steps == self.episode_limit - 1 and self.branch == 0:
            info["good_payoff"] = 1
            reward = self.final_step_diff[actions[0]][actions[1]]
        elif self.episode_limit > 1 and self.steps == self.episode_limit - 1 and self.branch == 3:
            reward = 1

        self.steps += 1

        if self.steps < self.episode_limit and reward > 0:
            terminated = False
        else:
            terminated = True

        print(self.steps, reward, actions)

        info["episode_limit"] = False

        # How often the joint-actions are taken
        info["action_00"] = 0
        info["action_01"] = 0
        info["action_10"] = 0
        info["action_11"] = 0
        if (actions[0], actions[1]) == (0, 0):
            info["action_00"] = 1
        if (actions[0], actions[1]) == (0, 1):
            info["action_01"] = 1
        if (actions[0], actions[1]) == (1, 0):
            info["action_10"] = 1
        if (actions[0], actions[1]) == (1, 1):
            info["action_11"] = 1

        return reward, terminated, info

    def get_obs(self):
        """ Returns all agent observations in a list """
        if self.episode_limit == 1:
            one_hot_step = np.zeros(self.episode_limit + 1)
            one_hot_step[self.steps] = 1
        else:
            one_hot_step = np.zeros(self.episode_limit + 1 + self.branches)
            one_hot_step[self.steps] = 1
            one_hot_step[self.episode_limit + 1 + self.branch] = 1
        return [np.copy(one_hot_step) for _ in range(self.n_agents)]

    def get_obs_agent(self, agent_id):
        """ Returns observation for agent_id """
        return self.get_obs()[agent_id]

    def get_obs_size(self):
        """ Returns the shape of the observation """
        return len(self.get_obs_agent(0))

    def get_state(self):
        if self.state_type == 'obs':
            return self.get_obs_agent(0)
        else:
            s = self.steps * self.branches + self.branch
            # print(self.state_num,  s )
            return [s for _ in range(self.n_agents)]

    def get_state_size(self):
        """ Returns the shape of the state"""
        if self.state_type == 'obs':
            return self.get_obs_size()
        else:
            return 2

    def get_avail_actions(self):
        avail_actions = []
        for agent_id in range(self.n_agents):
            avail_agent = self.get_avail_agent_actions(agent_id)
            avail_actions.append(avail_agent)
        return avail_actions

    def get_avail_agent_actions(self, agent_id):
        """ Returns the available actions for agent_id """
        return np.ones(self.n_actions)

    def get_total_actions(self):
        """ Returns the total number of actions an agent could ever take """
        return self.n_actions

    def get_stats(self):
        return None

    def render(self):
        raise NotImplementedError

    def close(self):
        pass

    def seed(self):
        raise NotImplementedError