import minigrid  # 注册新环境
import gymnasium

if "BabyAI-GoToRedBallGrey-v0" not in gymnasium.envs.registry.keys():
    minigrid.register_minigrid_envs()

import os
import numpy as np
from verl.environments.base import BaseEnv

import json
def read_json(path):
    with open(path, 'r', encoding='utf8') as f:
        return json.loads(f.read())

ACTION_SPACE = """
- turn right 
- turn left 
- move forward 
- go to {obj} {id} 
- pick up {obj} {id} 
- go through {door} {id}: <door> must be an open door. 
- toggle and go through {door} {id}: <door> can be a closed door or a locked door. If you want to open a locked door, you need to carry a key that is of the same color as the locked door. 
- toggle: there is a closed or locked door right in front of you and you can toggle it. 
"""

class BabyAIEnv(BaseEnv):

    def __init__(self, env_config, special_settings, add_examples=True):
        super().__init__(special_settings)
        self.env_config = env_config
        self.env_name = "babyai"
        self.max_step = 50

        # inital env by env_config
        self.env = gymnasium.make(env_config["game_name"])
        self.seed = env_config["seed"]

        # reward fn
        self.points = 0  # 得分点
        self.obs_to_reward = env_config["obs_to_reward"]
        self.num_obs_to_reward = len(self.obs_to_reward)

        # init env
        init_obs, info = self.env.reset(seed=self.seed)

        # init info
        self.task_id = env_config["game_id"]
        self.task_name = env_config["game_name"]
        self.difficulty = env_config["difficulty"]
        self.task_description = self._process_goal(self.env.unwrapped.mission)
        self.init_obs, self.action_space = self._process_obs(init_obs)

        # prepare prompt
        self.user_prompt = self.user_prompt.replace("<action_space>", ACTION_SPACE)
        self.user_prompt = self.user_prompt.replace("<Task>", self.task_description)
        self.user_prompt = self.user_prompt.replace("<Init Obs>", self.init_obs)

        ######################
        #### add examples ####
        ######################
        if add_examples:
            data = read_json("./verl/environments/babyai/babyai_vanilla_prompt.json")
            examples = data["examples"]
            self.user_prompt = self.user_prompt.replace("<examples>", ''.join(examples))
        else:
            self.user_prompt = self.user_prompt.replace("<examples>", "no example.")

        ############################
        #### prompts for action ####
        ############################
        self.user_prompt_for_deepthink = self.user_prompt_for_deepthink.replace("<action_space>", ACTION_SPACE)
        self.user_prompt_for_deepthink = self.user_prompt_for_deepthink.replace("<Task>", self.task_description)
        self.user_prompt_for_deepthink = self.user_prompt_for_deepthink.replace("<Init Obs>", self.init_obs)

    def _process_goal(self, goal):
        if "then" in goal: 
            goal = goal.replace("then", "and")
        if "after you" in goal:
            goal = goal.replace("after you", "and")
        return goal

    def _get_next_pos(self, pos, action, dir): # get the next position after taking an action
        if action == 0:
            dir = (dir-1)%4
            
        elif action == 1:
            dir = (dir+1)%4
        
        elif action == 2:
            dir_vec = DIR_TO_VEC[dir]
            pos = tuple(pos + dir_vec)
            
        return pos, dir
    
    def _find_path(self, init_pos, goal, all_objs, all_barriers, init_dir, xrange, yrange, arrive=False): # find the shortest path from pos to goal, all_objs is a list of position of objects, need to avoid them
        all_things = all_objs + all_barriers
        pos = init_pos
        dir = init_dir
        graph = dict()
        queue = [(pos, dir)]
        state = set()
        
        
        while len(queue) > 0:
            pos, dir = queue.pop(0)
            state.add((pos, dir))
            
            if arrive:
                if pos[0]==goal[0] and pos[1]==goal[1]:
                    # get all actions that leas to current state
                    path = []
                    while (pos, dir) != (init_pos, init_dir):
                        (pos, dir), action = graph[(pos, dir)]
                        path.append(action)
                    path = path[::-1]
                    
                    return path
            else:
                if goal[0] - pos[0] == DIR_TO_VEC[dir][0] and goal[1] - pos[1] == DIR_TO_VEC[dir][1]:
                    path = []
                    while (pos, dir) != (init_pos, init_dir):
                        (pos, dir), action = graph[(pos, dir)]
                        path.append(action)
                    path = path[::-1]
                    
                    return path
            
            for action in [2, 0, 1]:
                new_pos, new_dir = self._get_next_pos(pos, action, dir)
                # if new_pos == (13,5):
                #     print(1)
                is_obstacle = False
                for obj in all_things:
                    if new_pos[0] not in xrange or new_pos[1] not in yrange:
                        is_obstacle = True
                        break
                    if (new_pos, new_dir) in state:
                        is_obstacle = True
                        break
                    if obj["abs_pos"] == new_pos:
                        # if obj["name"] in ["wall", "box", "lava", "ball", "key"]:
                        if "wall" in obj["name"] or "box" in obj["name"] or "lava" in obj["name"] or "ball" in obj["name"] or "key" in obj["name"]:
                            is_obstacle = True
                            break
                if not is_obstacle:
                    queue.append((new_pos, new_dir))
                    graph[(new_pos, new_dir)] = ((pos, dir), action)
            
        return None

    def _process_obs(self, obs):
        # postprocess the observation, translate the observation into description and possible actions
        _, vis_mask = self.env.unwrapped.gen_obs_grid()
        view_size = self.env.unwrapped.agent_view_size
        pos = self.env.unwrapped.agent_pos
        f_vec = self.env.unwrapped.dir_vec
        r_vec = self.env.unwrapped.right_vec
        
        # Compute the absolute coordinates of the top-left corner
        # of the agent's view area
        top_left = pos + f_vec * (view_size - 1) - r_vec * (view_size // 2)
        
        # calculate the range of the absolute coordinates
        vecs = - f_vec + r_vec
        boarders = top_left + view_size*vecs
    
        xboarder = boarders[0]
        if xboarder < top_left[0]:
            xrange = range(xboarder, top_left[0]+1)
        else:
            xrange = range(top_left[0], xboarder)
            
        yboarder = boarders[1]
        if yboarder < top_left[1]:
            yrange = range(yboarder, top_left[1]+1)
        else:
            yrange = range(top_left[1], yboarder)
        
        grid = obs["image"]
        dir = obs["direction"]
        all_objs = []
        
        # identify distance to walls and barriers (box) in four directions
        left_dis = 0
        all_barriers = []
        
        for vis_j in range(0, view_size):
            for vis_i in range(0, view_size):
                abs_i, abs_j = top_left - (f_vec * vis_j) + (r_vec * vis_i)
                distance = abs(pos[0]-abs_i) + abs(pos[1]-abs_j)
                
                if abs_i < 0 or abs_j < 0:
                    continue
                
                if distance == 0: # in case the agent counts the carrying object as an additional object
                    continue
                
                obj_type = IDX_TO_OBJECT[grid[vis_i, vis_j, 0]]
                obj_color = IDX_TO_COLOR[grid[vis_i, vis_j, 1]]
                obj_state = IDX_TO_STATE[grid[vis_i, vis_j, 2]]
                
                # identify object of interest
                if obj_type in ["door", "key", "ball", "box", "goal", "lava", "wall"]:
                    if obj_type == "door":
                        obj_name = obj_color + " " + obj_state + " " + obj_type
                    else:
                        obj_name = obj_color + " " + obj_type
                    
                    all_objs.append({"name": obj_name, "abs_pos":(abs_i, abs_j), "dis":distance})
                    
                if obj_type in ["box", "wall"]:
                    self_dir = DIR_TO_VEC[dir] # get the direction of the agent
                    obj_relative_pos = (abs_i - pos[0], abs_j - pos[1]) # get the relative position of the object   
                    # check if the object is in front of the agent
                    if np.cross(self_dir, obj_relative_pos) == 0:
                        all_barriers.append({"name": obj_type, "abs_pos":(abs_i, abs_j), "dis":np.dot(self_dir, obj_relative_pos)})
                
        # sort by distance, from near to far
        all_objs.sort(key=lambda x: x["dis"])
        if len(all_objs) > 0:
            cnt_observe = dict()
            obj_description = "In front of you in this room, you can see several objects: "
            for obj_temp in all_objs:
                if 'wall' in obj_temp["name"]:
                    continue
                obj_temp_pos = obj_temp["abs_pos"]
                obj_temp_relative = (obj_temp_pos[0] - pos[0], obj_temp_pos[1] - pos[1])
                self_dir = DIR_TO_VEC[dir]
                front_dis = np.dot(self_dir, obj_temp_relative) 
                right_dis = np.dot(DIR_TO_VEC[(dir+1)%4], obj_temp_relative)
                pos_desc_temp = ""
                
                if right_dis == 0:
                    pos_desc_temp = "right in front of you " + str(int(front_dis)) + " steps away. "
                elif right_dis > 0:
                    pos_desc_temp = str(int(front_dis)) + " steps in front of you and " + str(int(right_dis)) + " steps to your right. "
                else:
                    pos_desc_temp = str(int(front_dis)) + " steps in front of you and " + str(int(-right_dis)) + " steps to your left. "
                
                if obj_temp["name"] not in cnt_observe:
                    cnt_observe[obj_temp["name"]] = 1
                else:
                    cnt_observe[obj_temp["name"]] += 1
                obj_description += "There is a " + obj_temp["name"] + " " + str(cnt_observe[obj_temp["name"]]) + " "+ pos_desc_temp + " "
                
                # obj_description += "There is a " + obj_temp["name"] + " " + pos_desc_temp + " "
        else:
            obj_description = "You cannot see any objects within sight."
        
        barrier_description = "The room has walls around you. "
        if len(all_barriers) > 0:
            all_barriers.sort(key=lambda x: x["dis"])
            barrier_dis_pos = all_barriers[0]["dis"]
            
            barrier_description += "You are facing a " + all_barriers[0]["name"] + " " + str(barrier_dis_pos) + " steps away. "
            
        carry_description = ""
        carrying = self.env.unwrapped.carrying
        if carrying is not None:
            carrying_type = carrying.type
            carrying_color = carrying.color
            carry_description = "You are carrying a " + carrying_color + " " + carrying_type + "."
        else:
            carry_description = "You are not carrying anything."
         
        description = obj_description + barrier_description + carry_description
        
        # ---------------------------finish processing the description of the goal--------------------------------
        
        # ---------------------------    process possible actions space     --------------------------------
        
        possible_actions = {"turn left": [0], "turn right": [1]}
        error_message = {} # create finegrained error message for failed to execute actions
        
        # can the agent move forward?
        if len(all_barriers) == 0 or all_barriers[0]["dis"] > 1: # if there is no barrier or the barrier is far away, the agent can move forward
            possible_actions["move forward"] = [2]
        else:
            error_message["move forward"] = "There is a barrier in front of you, you can't move forward."
        
        # go to pickup an object
        if carrying is None:
            if len(all_objs) > 0:
                
                cnt_object = dict()
                for i, obj_temp in enumerate(all_objs):
                    if 'wall' in obj_temp["name"]:
                        continue
                    if 'door' in obj_temp["name"]:
                        continue
                    
                    if 'goal' in obj_temp["name"]:
                        continue
                    
                    obj_temp_pos = obj_temp["abs_pos"]
                    obj_temp_relative = (obj_temp_pos[0] - pos[0], obj_temp_pos[1] - pos[1])
                    self_dir = DIR_TO_VEC[dir]
                    
                    obj_name = obj_temp["name"]
                    
                    front_dis = np.dot(self_dir, obj_temp_relative) 
                    right_dis = np.dot(DIR_TO_VEC[(dir+1)%4], obj_temp_relative)
                    
                    # actions_temp = []
                    # if front_dis == 1 and right_dis == 0:
                    #     possible_actions["pickup"] = [3] # add simply pickup action
                    #     actions_temp = [3] # pickup
                    # elif right_dis == 0:
                    #     actions_temp = [2] * (int(front_dis) -1) + [3] # move forward and pickup
                    # elif right_dis > 0:
                    #     # first turn to the right direction, then move to the front of the object, then turn to face the object, and pickup
                    #     # actions_temp = [1] + [2]*int(right_dis) + [0] + [2]*(int(front_dis)-1) + [3]
                    #     # first move to the front of the object, then turn to the right direction, then turn to face the object, and pickup
                    #     actions_temp = [2]*int(front_dis) + [1] + [2]*(int(right_dis)-1) + [3]
                    # else:
                    #     actions_temp = [2]*int(front_dis) + [0] + [2]*(int(-right_dis)-1) + [3]
                    
                    actions_temp = self._find_path(pos, obj_temp_pos, all_objs, all_barriers, dir, xrange, yrange, arrive=False) 
                    
                    if actions_temp is not None:
                        actions_temp.append(3) # add pickup action at the end
                    
                    
                    
                        if "pickup "+ obj_name+ " " + str(1) not in possible_actions: # note that this action space is not necessarily successful, we will execute the actions step by step and stops if failed.
                            cnt_object[obj_name] = 1
                            possible_actions["pickup "+ obj_name + " " + str(1)] = actions_temp
                        else:
                            cnt_object[obj_name] += 1
                            possible_actions["pickup "+ obj_name + " " + str(cnt_object[obj_name])] = actions_temp
                    else:
                        if "pickup "+ obj_name+ " " + str(1) not in possible_actions:
                            error_message["pickup "+ obj_name + " " + str(1)] = "You cannot pickup " +  obj_name + " " + str(1) + ", as there is no path leading to it."
                        else:
                            error_message["pickup "+ obj_name + " " + str(cnt_object[obj_name]+1)] = "You cannot pickup " + obj_name + " " + str(cnt_object[obj_name]+1) + ", as there is no path leading to it."
                            
        
        # drop an object
        if carrying is not None:
            drop_pos = tuple(pos + DIR_TO_VEC[dir])
            can_drop = True
            for obj_temp in all_objs:
                if obj_temp["abs_pos"] == drop_pos:
                    for obj_type in ["wall", "box", "lava", "ball", "key"]:
                        if obj_type in obj_temp["name"]:
                            can_drop = False
                            break
            if can_drop:
                possible_actions["drop"] = [4]
            
            else:
                error_message["drop"] = "You cannot drop the object, as there is already an object in front of you."
        else:
            error_message["drop"] = "You cannot drop the object, as you are not carrying anything."
        
        # go through a door or toggle a door or toggle a door with a key
        if len(all_objs)>0:
            cnt_door = dict()
            for obj_temp in all_objs:  
                if 'door' in obj_temp["name"]:
                    if obj_temp["name"] not in cnt_door:
                        cnt_door[obj_temp["name"]] = 1
                    else:
                        cnt_door[obj_temp["name"]] += 1
                    
                if 'open door' in obj_temp["name"]:
                    
                    obj_temp_pos = obj_temp["abs_pos"]
                    obj_temp_relative = (obj_temp_pos[0] - pos[0], obj_temp_pos[1] - pos[1])
                    self_dir = DIR_TO_VEC[dir]
                    
                    obj_name = obj_temp["name"]
                    
                    front_dis = np.dot(self_dir, obj_temp_relative) 
                    right_dis = np.dot(DIR_TO_VEC[(dir+1)%4], obj_temp_relative)
                    
                    # if right_dis == 0:
                    #     possible_actions["go through door " + str(cnt_door)] = [2]*int(front_dis)
                    # elif right_dis > 0:
                    #     possible_actions["go through door " + str(cnt_door)] = [2]*int(front_dis) + [1] + [2]*int(right_dis)
                    # else:
                    #     possible_actions["go through door " + str(cnt_door)] = [2]*int(front_dis) + [0] + [2]*int(-right_dis)
                    
                    actions_temp = self._find_path(pos, obj_temp_pos, all_objs, all_barriers, dir, xrange, yrange,  arrive=True)
                    if actions_temp is not None:
                        possible_actions["go through "+ obj_temp["name"] + " "+ str(cnt_door[obj_temp["name"]])] = actions_temp
                    else:
                        error_message["go through "+ obj_temp["name"] + " "+ str(cnt_door[obj_temp["name"]])] = "You cannot go through " + obj_temp["name"] + " "+ str(cnt_door[obj_temp["name"]]) + ", as there is no path leading to it."
                
                if 'closed door' in obj_temp["name"]:
                    # if carrying is None or carrying.type != 'key':   
                    #     continue
                    
                    
                    obj_temp_pos = obj_temp["abs_pos"]
                    obj_temp_relative = (obj_temp_pos[0] - pos[0], obj_temp_pos[1] - pos[1])
                    self_dir = DIR_TO_VEC[dir]
                    
                    obj_name = obj_temp["name"]
                    
                    front_dis = np.dot(self_dir, obj_temp_relative) 
                    right_dis = np.dot(DIR_TO_VEC[(dir+1)%4], obj_temp_relative)
                    
                    # if front_dis==1 and right_dis == 0:
                    #     possible_actions["toggle and go through door " + str(cnt_door)] = [2]*(int(front_dis)-1) + [5, 2]
                    #     possible_actions["toggle"] = [5] # add toggle if right at the door
                    # elif right_dis == 0:
                    #     possible_actions["toggle and go through door " + str(cnt_door)] = [2]*(int(front_dis)-1) + [5, 2]
                    # elif right_dis > 0: 
                    #     possible_actions["toggle and go through door " + str(cnt_door)] = [2]*int(front_dis) + [1] + [2]*(int(right_dis)-1) + [5, 2]
                    # else:
                    #     possible_actions["toggle and go through door " + str(cnt_door)] = [2]*int(front_dis) + [0] + [2]*(int(-right_dis)-1) + [5, 2]
                    
                    actions_temp = self._find_path(pos, obj_temp_pos, all_objs, all_barriers, dir, xrange, yrange,  arrive=False)
                    
                    # print(cnt_door)
                    
                    if actions_temp is not None:
                        possible_actions["toggle and go through " + obj_temp["name"] + " "+str(cnt_door[obj_temp["name"]])] = actions_temp + [5, 2]
                    else:
                        error_message["toggle and go through " + obj_temp["name"] + " "+str(cnt_door[obj_temp["name"]])] = "You cannot toggle and go through " + obj_temp["name"] + " "+str(cnt_door) + ", as there is no path leading to it."
                    if actions_temp == []:
                        possible_actions["toggle"] = [5]
                    error_message["go through "+ obj_temp["name"] + " "+ str(cnt_door[obj_temp["name"]])] = "You cannot go through " + obj_temp["name"] + " "+ str(cnt_door[obj_temp["name"]]) + ", as it is closed. You should toggle it first."  
                    
                if 'locked door' in obj_temp["name"]:
                        
                    if carrying is None or carrying.type != 'key':   
                        error_message["toggle and go through " + obj_temp["name"] + " "+str(cnt_door[obj_temp["name"]])] = "You cannot toggle and go through " + obj_temp["name"] + " "+str(cnt_door[obj_temp["name"]]) + ", as you are not carrying a key."
                        continue
                    if carrying.color != obj_temp["name"].split(" ")[0]:
                        error_message["toggle and go through " + obj_temp["name"] + " "+str(cnt_door[obj_temp["name"]])] = "You cannot toggle and go through " + obj_temp["name"] + " "+str(cnt_door[obj_temp["name"]]) + ", as the color of the key you are carrying does not match the color of door."
                        continue
                
                    
                    obj_temp_pos = obj_temp["abs_pos"]
                    obj_temp_relative = (obj_temp_pos[0] - pos[0], obj_temp_pos[1] - pos[1])
                    self_dir = DIR_TO_VEC[dir]
                    
                    obj_name = obj_temp["name"]
                    
                    front_dis = np.dot(self_dir, obj_temp_relative) 
                    right_dis = np.dot(DIR_TO_VEC[(dir+1)%4], obj_temp_relative)
                    
                    # if front_dis==1 and right_dis == 0:
                    #     possible_actions["toggle and go through door " + str(cnt_door)] = [2]*(int(front_dis)-1) + [5, 2]
                    #     possible_actions["toggle"] = [5] # add toggle if right at the door
                    # elif right_dis == 0:
                    #     possible_actions["toggle and go through door " + str(cnt_door)] = [2]*(int(front_dis)-1) + [5, 2]
                    # elif right_dis > 0: 
                    #     possible_actions["toggle and go through door " + str(cnt_door)] = [2]*int(front_dis) + [1] + [2]*(int(right_dis)-1) + [5, 2]
                    # else:
                    #     possible_actions["toggle and go through door " + str(cnt_door)] = [2]*int(front_dis) + [0] + [2]*(int(-right_dis)-1) + [5, 2]
                    
                    actions_temp = self._find_path(pos, obj_temp_pos, all_objs, all_barriers, dir, xrange, yrange,  arrive=False)
                    
                    if actions_temp is not None:
                        possible_actions["toggle and go through " + obj_temp["name"] + " "+str(cnt_door[obj_temp["name"]])] = actions_temp + [5, 2]
                    else:
                        error_message["toggle and go through " + obj_temp["name"] + " "+str(cnt_door[obj_temp["name"]])] = "You cannot toggle and go through " + obj_temp["name"] + " "+str(cnt_door) + ", as there is no path leading to it."
                    if actions_temp == []:
                        possible_actions["toggle"] = [5]
                        
                        
        # go to the goal
        if len(all_objs) > 0:
            for obj_temp in all_objs:
                if "goal" not in obj_temp["name"]:
                    continue
                
                obj_temp_pos = obj_temp["abs_pos"]
                obj_temp_relative = (obj_temp_pos[0] - pos[0], obj_temp_pos[1] - pos[1])
                self_dir = DIR_TO_VEC[dir]
                
                obj_name = obj_temp["name"]
                
                front_dis = np.dot(self_dir, obj_temp_relative) 
                right_dis = np.dot(DIR_TO_VEC[(dir+1)%4], obj_temp_relative)
                
                # if right_dis == 0:
                #     possible_actions["go to goal"] = [2]*int(front_dis)
                # elif right_dis > 0:
                #     possible_actions["go to goal"] = [2]*int(front_dis) + [1] + [2]*int(right_dis)
                # else:
                #     possible_actions["go to goal"] = [2]*int(front_dis) + [0] + [2]*int(-right_dis)
                
                actions_temp = self._find_path(pos, obj_temp_pos, all_objs, all_barriers, dir, xrange, yrange,arrive=True)
                if actions_temp is not None:
                    possible_actions["go to goal"] = actions_temp
                else:
                    error_message["go to goal"] = "You cannot go to the goal, as there is no path leading to it."
        
        # go to object
        if len(all_objs) > 0:
            cnt_goto = dict()
            for obj_temp in all_objs:
                if "wall" in obj_temp["name"]:
                    continue
                if "goal" in obj_temp["name"]:
                    continue
                obj_name = obj_temp["name"]
                obj_temp_pos = obj_temp["abs_pos"]
                
                actions_temp = self._find_path(pos, obj_temp_pos, all_objs, all_barriers, dir, xrange, yrange, arrive=False)
                if actions_temp is not None:
                    if "go to " + obj_name + ' 1' not in possible_actions:
                        possible_actions["go to " + obj_name+ ' 1'] = actions_temp
                        cnt_goto[obj_name] = 1
                    else:
                        cnt_goto[obj_name] += 1
                        possible_actions["go to " + obj_name+ ' ' + str(cnt_goto[obj_name])] = actions_temp
                else:
                    if "go to " + obj_name + ' 1' not in possible_actions:
                        error_message["go to " + obj_name+ ' 1'] = "You cannot go to " + obj_name+ ' 1' + ", as there is no path leading to it."
                    else:
                        error_message["go to " + obj_name+ ' ' + str(cnt_goto[obj_name]+1)] = "You cannot go to " + obj_name+ ' ' + str(cnt_goto[obj_name]+1) + ", as there is no path leading to it."
        
        # add check action space as a special action
        possible_actions["check available actions"] = []
        self.error_message = error_message
        return description, possible_actions


    def _get_action_space(self):
        return list(self.action_space.keys()) # return a list of valid actions
    
    def check_action_is_valid(self, action):
        action_space = self.action_space
        if "check" in action:
            return True, None
        if action == "":
            return False, "No change in state."
        if action not in action_space:
            if action in self.error_message:
                return False, self.error_message[action]
            else:
                return False, "The action is not recognized. Please check valid actions."
        else:
            return True, None
    
    def _match_style(self, obs, pattern):
        pattern = pattern.strip()
        split_token = "**"
        if "**" not in pattern:
            split_token = "*"
        pattern_list = pattern.strip().split(split_token)
        all_obs = obs.split(".")
        for obs_temp in all_obs:
            flag = True
            for p in pattern_list:
                p = p.strip(".")
                if p not in obs_temp:
                    flag = False
            if flag:
                return True
        return False
    
    def update_points(self, obs):
        if self.obs_to_reward is None:
            return
        if len(self.obs_to_reward) == 0:
            return
        
        if isinstance(self.obs_to_reward[0], list):
            need_to_award = False   
            path_length = len(self.obs_to_reward[0])
            for i in range(path_length):
                for obs_temp in self.obs_to_reward:
                    if self._match_style(obs, obs_temp[i]):
                        need_to_award = True
                        break
                
                if need_to_award:
                    self.points += 1
                    for obs_temp in self.obs_to_reward:
                        obs_temp.remove(obs_temp[i])
                    break
                
        else:
            for pattern in self.obs_to_reward:
                if self._match_style(obs, pattern):
                    self.points += 1
                    self.obs_to_reward.remove(pattern)
                    break


    def step(self, action):
        action = action.lower()
        action = action.strip()
        is_valid, error = self.check_action_is_valid(action)
        if not is_valid:
            observation = error
            env_done = self.done

        elif action == "check available actions" or "check" in action:
            observation = "You can take the following actions: " + ", ".join(self._get_action_space())
            env_done = self.done

        else:
            action_list = self.action_space[action]

            if action_list == []:
                # print(action)
                observation = "No change in state."
                env_done = self.done
            else:
                for action_step in action_list:
                    obs, _reward, env_done, _truncated, _infos = self.env.step(action_step)
                    observation, new_action_space = self._process_obs(obs)
                    self.update_points(observation)
                    self.action_space = new_action_space
               
        score = self.points / self.num_obs_to_reward

        if env_done:
            done = False if score <= 0.5 else True
        else:
            done = True if (score == 1) else False
        # done = (score == 1)

        score = score * 10  # 0~10

        # update stage
        self.current_step += 1
        if self.current_step >= self.max_step:
            self.gameDone = True
        elif done:
            self.gameDone = True

        self.reward = score - self.score
        self.score = score

        # judge task state
        if self.gameDone:
            if score > 0:
                self.done = True
            else:
                self.over = True

        return observation, self.reward, self.score, self.gameDone
    
    def reset(self):
        self.points = 0  # 得分点
        self.obs_to_reward = self.env_config["obs_to_reward"]
        super().reset(self.seed)



IDX_TO_OBJECT = {
    0: "unseen",
    1: "empty",
    2: "wall",
    3: "floor",
    4: "door",
    5: "key",
    6: "ball",
    7: "box",
    8: "goal",
    9: "lava",
    10: "agent",
}
    
STATE_TO_IDX = {
    "open": 0,
    "closed": 1,
    "locked": 2,
}

IDX_TO_STATE = {
    0: "open",
    1: "closed",
    2: "locked",
}

IDX_TO_COLOR = {0: "red", 1: "green", 2: "blue", 3: "purple", 4: "yellow", 5: "grey"}

DIR_TO_VEC = [
    # Pointing right (positive X)
    np.array((1, 0)),
    # Down (positive Y)
    np.array((0, 1)),
    # Pointing left (negative X)
    np.array((-1, 0)),
    # Up (negative Y)
    np.array((0, -1)),
]