import re
import gym
import numpy as np
from pantheonrl.common.multiagentenv import SimultaneousEnv, DummyEnv

STYLE_LIST = list(range(1, 9))

class LBFMultiEnv(SimultaneousEnv):
    def __init__(self, layout_name, ego_agent_idx=0, baselines=False):
        """
        base_env: OvercookedEnv
        featurize_fn: what function is used to featurize states returned in the 'both_agent_obs' field
        """
        super(LBFMultiEnv, self).__init__()


        if baselines: 
            np.random.seed(0)

        self.base_env = gym.make(layout_name)

        self.observation_space = self.base_env.observation_space[0]
        self.action_space = self.base_env.action_space[0]

        partner_style = re.findall(r'\d+', layout_name)[0] # actually target food level
        if partner_style == '0':
            self.style_list = STYLE_LIST

        self.ego_agent_idx = ego_agent_idx

        self.multi_reset()


    def multi_step(self, ego_action, alt_action):
        """
        action:
            (agent with index self.agent_idx action, other agent action)
            is a tuple with the joint action of the primary and secondary agents in index format
            encoded as an int

        returns:
            observation: formatted to be standard input for self.agent_idx's policy
        """
        joint_action = (ego_action, alt_action)

        obs, reward, done, info = self.base_env.step(joint_action)
        
        return obs, reward, all(done), info
    
    def multi_reset(self):
        return self.base_env.reset()

    def render(self, mode='human', close=False):
        pass

    def set_instruction_list(self, instruction_list):
        self.instruction_list = instruction_list