import gym
import numpy as np
from .render.game import Game
from gym import spaces
from .items import Tomato, Lettuce, Onion, Plate, Knife, Delivery, Agent, Food, DirtyPlate, BadLettuce
import copy
import time

DIRECTION = [(0,1), (1,0), (0,-1), (-1,0)]
ITEMNAME = ["space", "counter", "agent", "tomato", "lettuce", "plate", "knife", "delivery", "onion", "dirtyplate", "badlettuce"]
ITEMIDX= {"space": 0, "counter": 1, "agent": 2, "tomato": 3, "lettuce": 4, "plate": 5, "knife": 6, "delivery": 7, "onion": 8, "dirtyplate": 9, "badlettuce": 10}
# AGENTCOLOR = ["blue", "magenta", "green", "yellow"]
# 更改agent的外观为robot
AGENTCOLOR = ["robot", "blue", "green", "yellow"]
TASKLIST = ["tomato salad", "lettuce salad", "onion salad", "lettuce-tomato salad", "onion-tomato salad", "lettuce-onion salad", "lettuce-onion-tomato salad"]
from collections import Counter


class Overcooked(gym.Env):

    """
    Overcooked Domain Description
    ------------------------------
    Agent with primitive actions ["right", "down", "left", "up"]
    TASKLIST = ["tomato salad", "lettuce salad", "onion salad", "lettuce-tomato salad", "onion-tomato salad", "lettuce-onion salad", "lettuce-onion-tomato salad"]
    
    1) Agent is allowed to pick up/put down food/plate on the counter;
    2) Agent is allowed to chop food into pieces if the food is on the cutting board counter;
    3) Agent is allowed to deliver food to the delivery counter;
    4) Only unchopped food is allowed to be chopped;
    """

    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second' : 5
        }

    def __init__(self, grid_dim, task, rewardList, map_type = "A", n_agent = 2, obs_radius = 2, mode = "vector", debug = False):

        """
        Parameters
        ----------
        gird_dim : tuple(int, int)
            The size of the grid world([7, 7]/[9, 9]).
        task : int
            The index of the target recipe.
        rewardList : dictionary
            The list of the reward.
            e.g rewardList = {"subtask finished": 10, "correct delivery": 200, "wrong delivery": -5, "step penalty": -0.1}
        map_type : str 
            The type of the map(A/B/C).
        n_agent: int
            The number of the agents.
        obs_radius: int
            The radius of the agents.
        mode: string
            The type of the observation(vector/image).
        debug : bool
            Whehter print the debug information.
        """
        # 初始化计数器
        self.step_count = 0


        self.xlen, self.ylen = grid_dim
        if debug:
            self.game = Game(self)
        self.task = task
        self.rewardList = rewardList
        self.mapType = map_type
        self.debug = debug
        self.n_agent = n_agent
        self.mode = mode
        self.obs_radius = obs_radius

        self.right_side_plate = None
        self.left_side_lettuce = None

        self.collaborated = False

        self.usedirtyplate = False


        # print(self.obs_radius)
        # time.sleep(10000)


        self.layout_pomap = None


        # 再额外维护一个reward变量
        self.reward = [None, None, None]

        map = []

        if self.xlen == 3 and self.ylen == 3:
            if self.n_agent == 2:
                if self.mapType == "A":
                    map =  [[1, 3, 1],
                            [7, 2, 6],
                            [1, 5, 2]] 
                elif self.mapType == "B":
                    map =  [[1, 3, 1],
                            [7, 2, 6],
                            [1, 5, 2]] 
                elif self.mapType == "C":
                    map =  [[1, 3, 1],
                            [7, 2, 6],
                            [1, 5, 2]]
            elif self.n_agent == 3:
                if self.mapType == "A":
                    map =  [[1, 3, 2],
                            [7, 2, 6],
                            [1, 5, 2]]
                elif self.mapType == "B":
                    map =  [[1, 3, 2],
                            [7, 2, 6],
                            [1, 5, 2]]
                elif self.mapType == "C":
                    map =  [[1, 3, 2],
                            [7, 2, 6],
                            [1, 5, 2]]
        elif self.xlen == 5 and self.ylen == 5:
            if self.n_agent == 2:
                if self.mapType == "A":
                    map =  [[1, 1, 1, 1, 1],
                            [6, 2, 0, 2, 1],
                            [3, 0, 0, 0, 1],
                            [7, 0, 0, 0, 1],
                            [1, 5, 1, 1, 1]] 
                elif self.mapType == "B":
                    map =  [[1, 8, 1, 1, 1],
                            [6, 2, 1, 0, 1],
                            [3, 0, 5, 2, 6],
                            [7, 0, 5, 0, 1],
                            [1, 4, 1, 1, 1]] 
                elif self.mapType == "C":
                    map =  [[1, 1, 1, 5, 1],
                            [6, 2, 1, 2, 1],
                            [3, 0, 5, 0, 6],
                            [7, 0, 0, 0, 1],
                            [1, 1, 1, 1, 1]] 
            elif self.n_agent == 3:
                if self.mapType == "A":
                    map =  [[1, 1, 5, 1, 1],
                            [6, 2, 0, 2, 1],
                            [3, 0, 0, 0, 6],
                            [7, 0, 2, 0, 1],
                            [1, 1, 5, 1, 1]] 
                elif self.mapType == "B":
                    map =  [[1, 1, 1, 1, 1],
                            [6, 2, 1, 2, 1],
                            [3, 0, 5, 2, 6],
                            [7, 0, 5, 0, 1],
                            [1, 1, 1, 1, 1]]  
                elif self.mapType == "C":
                    map =  [[1, 1, 1, 5, 1],
                            [6, 2, 1, 2, 1],
                            [3, 0, 5, 0, 6],
                            [7, 2, 0, 0, 1],
                            [1, 1, 1, 1, 1]] 
        elif self.xlen == 3 and self.ylen == 5:
            if self.n_agent == 2:
                if self.mapType == "A":
                    map =  [[1, 1, 1, 1, 1],
                            [6, 2, 0, 2, 1],
                            [3, 0, 0, 0, 1],
                            [7, 0, 0, 0, 1],
                            [1, 5, 1, 1, 1]] 
                elif self.mapType == "B":
                    # print('------------')
                    # print('------------')
                    # print('------------')
                    # print('------------')
                    # print('------------')
                    map =  [[1, 8, 1, 1, 1],
                            [6, 2, 1, 0, 1],
                            [3, 0, 5, 2, 6]]  
                    # map =  [[1, 8, 1, 1, 1],
                    #         [6, 2, 1, 0, 1],
                    #         [3, 0, 5, 2, 6],
                    #         [7, 0, 5, 0, 1],
                    #         [1, 4, 1, 1, 1]] 
                elif self.mapType == "C":
                    map =  [[1, 1, 1, 5, 1],
                            [6, 2, 1, 2, 1],
                            [3, 0, 5, 0, 6],
                            [7, 0, 0, 0, 1],
                            [1, 1, 1, 1, 1]] 
            elif self.n_agent == 3:
                if self.mapType == "A":
                    map =  [[1, 1, 5, 1, 1],
                            [6, 2, 0, 2, 1],
                            [3, 0, 0, 0, 6],
                            [7, 0, 2, 0, 1],
                            [1, 1, 5, 1, 1]] 
                elif self.mapType == "B":
                    map =  [[1, 1, 1, 1, 1],
                            [6, 2, 1, 2, 1],
                            [3, 0, 5, 2, 6],
                            [7, 0, 5, 0, 1],
                            [1, 1, 1, 1, 1]]  
                elif self.mapType == "C":
                    map =  [[1, 1, 1, 5, 1],
                            [6, 2, 1, 2, 1],
                            [3, 0, 5, 0, 6],
                            [7, 2, 0, 0, 1],
                            [1, 1, 1, 1, 1]] 
        elif self.xlen == 7 and self.ylen == 7:
            if self.n_agent == 2:
                if self.mapType == "A":
                    map =  [[1, 1, 1, 1, 1, 3, 1],
                            [6, 0, 2, 0, 0, 0, 4],
                            [6, 0, 0, 0, 0, 0, 8],
                            [7, 0, 0, 0, 0, 0, 1],
                            [1, 0, 0, 0, 0, 0, 1],
                            [1, 0, 2, 0, 0, 0, 5],
                            [1, 1, 1, 1, 1, 5, 1]]
                elif self.mapType == "B":
                    # ITEMIDX= {"space": 0, "counter": 1, "agent": 2, "tomato": 3, "lettuce": 4, "plate": 5, "knife": 6, "delivery": 7, "onion": 8}
                    # map =  [[1, 4, 1, 0, 1, 1, 1],
                    #         [1, 0, 1, 0, 1, 0, 1],
                    #         [8, 0, 1, 0, 1, 0, 1],
                    #         [1, 0, 7, 1, 1, 0, 1],
                    #         [3, 0, 2, 6, 2, 0, 1],
                    #         [1, 0, 0, 6, 0, 0, 1],
                    #         [1, 1, 5, 1, 5, 1, 1]] 


                    """
                    # 下面这个地图是可以用的
                    """
                    # map =  [[1, 7, 1, 0, 1, 4, 1],
                    #         [1, 0, 1, 0, 1, 0, 1],
                    #         [1, 0, 1, 0, 1, 0, 1],
                    #         [1, 0, 4, 1, 7, 0, 1],
                    #         [1, 0, 2, 6, 2, 0, 1],
                    #         [1, 0, 0, 6, 0, 0, 1],
                    #         [1, 1, 5, 1, 5, 1, 1]] 
                    
                    """
                    # 下面这个地图是可以用的，Negativegain，意味着即便robot是100%可信的，人的最优策略也不是信任robot
                    """
                    # map =  [[1, 1, 1, 0, 1, 4, 1],
                    #         [1, 0, 1, 0, 1, 0, 7],
                    #         [1, 0, 1, 0, 1, 0, 1],
                    #         [1, 0, 4, 1, 1, 0, 1],
                    #         [1, 0, 2, 6, 2, 0, 1],
                    #         [7, 0, 0, 6, 0, 0, 1],
                    #         [1, 1, 5, 1, 5, 1, 1]] 
                    


                    """
                    # 下面是一个新的地图，生菜+番茄
                    """
                    # map =  [[1, 7, 1, 0, 1, 4, 1],
                    #         [1, 0, 1, 0, 1, 0, 3],
                    #         [1, 0, 1, 0, 1, 0, 1],
                    #         [1, 0, 4, 1, 7, 0, 1],
                    #         [1, 0, 2, 6, 2, 0, 1],
                    #         [1, 0, 0, 6, 0, 0, 1],
                    #         [1, 3, 5, 1, 5, 1, 1]] 


                    # """
                    # # 下面是一个新的地图，中间是空的counter，人机合作有优势
                    # """
                    # map =  [[1, 5, 1, 0, 1, 4, 1],
                    #         [1, 0, 1, 0, 1, 0, 1],
                    #         [1, 0, 1, 0, 1, 0, 1],
                    #         [1, 0, 4, 1, 1, 0, 1],
                    #         [1, 0, 2, 1, 2, 0, 1],
                    #         [7, 0, 0, 1, 0, 0, 5],
                    #         [1, 6, 1, 1, 7, 6, 1]]  

                    """
                    # 增加一个dirtyplate(index是9)
                    """
                    map =  [[1, 5, 1, 0, 1, 4, 1],
                            [1, 0, 1, 0, 1, 0, 1],
                            [1, 0, 1, 0, 1, 0, 1],
                            [1, 0, 4, 1, 1, 0, 5],
                            [1, 0, 2, 1, 2, 0, 9],
                            [7, 0, 0, 1, 0, 0, 1],
                            [1, 6, 1, 1, 7, 6, 1]]  
                    
                    map =  [[1, 4, 1, 0, 1, 1, 1],
                            [1, 0, 1, 0, 1, 0, 1],
                            [1, 0, 1, 0, 1, 0, 1],
                            [1, 0, 1, 0, 1, 0, 5],
                            [1, 0, 1, 1, 1, 0, 10],
                            [5, 0, 2, 1, 2, 0, 4],
                            [1, 7, 6, 1, 6, 7, 1]]  
                    



                    """
                    # 下面是一个新的地图，中间是空的counter，人自己完成任务更快
                    """
                    # map =  [[1, 1, 1, 0, 1, 7, 1],
                    #         [1, 0, 1, 0, 4, 0, 5],
                    #         [1, 0, 1, 0, 1, 0, 1],
                    #         [1, 0, 1, 1, 1, 0, 1],
                    #         [4, 0, 2, 1, 2, 0, 1],
                    #         [5, 0, 0, 1, 0, 0, 1],
                    #         [1, 6, 7, 1, 1, 6, 1]]


                    """
                    # 下面是一个新的地图，中间是空的counter，人必须和机器人合作才能完成任务
                    """
                    # map =  [[1, 1, 1, 0, 1, 7, 1],
                    #         [1, 0, 1, 0, 5, 0, 5],
                    #         [1, 0, 1, 0, 1, 0, 1],
                    #         [4, 0, 1, 1, 1, 0, 1],
                    #         [4, 0, 2, 1, 2, 0, 1],
                    #         [1, 0, 0, 1, 0, 0, 6],
                    #         [1, 1, 7, 1, 1, 6, 1]]
                    


                    """
                    # 多recipe地图，生菜+番茄
                    """
                    # map =  [[1, 7, 1, 0, 1, 4, 1],
                    #         [5, 0, 1, 0, 1, 0, 3],
                    #         [1, 0, 1, 0, 1, 0, 1],
                    #         [1, 0, 4, 1, 5, 0, 1],
                    #         [1, 0, 2, 1, 2, 0, 1],
                    #         [1, 0, 0, 1, 0, 0, 1],
                    #         [1, 6, 3, 1, 7, 6, 1]]
                    




 
                elif self.mapType == "C":
                    map =  [[1, 1, 1, 1, 1, 3, 1],
                            [6, 0, 2, 1, 2, 0, 4],
                            [6, 0, 0, 1, 0, 0, 8],
                            [7, 0, 0, 1, 0, 0, 1],
                            [1, 0, 0, 1, 0, 0, 1],
                            [1, 0, 0, 0, 0, 0, 5],
                            [1, 1, 1, 1, 1, 5, 1]]
            elif self.n_agent == 3:
                if self.mapType == "A":
                    map =  [[1, 1, 1, 1, 1, 3, 1],
                            [6, 0, 2, 0, 2, 0, 4],
                            [6, 0, 0, 0, 0, 0, 8],
                            [7, 0, 0, 0, 0, 0, 1],
                            [1, 0, 0, 0, 0, 0, 1],
                            [1, 0, 2, 0, 0, 0, 5],
                            [1, 1, 1, 1, 1, 5, 1]]
                elif self.mapType == "B":
                    map =  [[1, 1, 1, 1, 1, 3, 1],
                            [6, 0, 2, 1, 2, 0, 4],
                            [6, 0, 0, 1, 0, 0, 8],
                            [7, 0, 0, 1, 0, 0, 1],
                            [1, 0, 0, 1, 0, 0, 1],
                            [1, 0, 2, 1, 0, 0, 5],
                            [1, 1, 1, 1, 1, 5, 1]] 
                elif self.mapType == "C":
                    map =  [[1, 1, 1, 1, 1, 3, 1],
                            [6, 0, 2, 1, 2, 0, 4],
                            [6, 0, 0, 1, 0, 0, 8],
                            [7, 0, 0, 1, 0, 0, 1],
                            [1, 0, 0, 1, 0, 0, 1],
                            [1, 0, 2, 0, 0, 0, 5],
                            [1, 1, 1, 1, 1, 5, 1]]
                    
# ITEMIDX= {"space": 0, "counter": 1, "agent": 2, "tomato": 3, "lettuce": 4, "plate": 5, "knife": 6, "delivery": 7, "onion": 8}

        elif self.xlen == 9 and self.ylen == 9:
            if self.n_agent == 2:
                if self.mapType == "A":
                    map =  [[1, 1, 1, 1, 1, 1, 1, 4, 1],
                            [6, 0, 2, 0, 0, 0, 2, 0, 4],
                            [6, 0, 0, 0, 0, 0, 0, 0, 1],
                            [7, 0, 0, 0, 0, 0, 0, 0, 1],
                            [1, 0, 0, 0, 0, 0, 0, 0, 1],
                            [1, 0, 0, 0, 0, 0, 0, 0, 1],
                            [1, 0, 0, 0, 0, 0, 0, 0, 1],
                            [1, 0, 0, 0, 0, 0, 0, 0, 5],
                            [1, 1, 1, 1, 1, 1, 1, 5, 1]]
                elif self.mapType == "B":
                    # map =  [[1, 1, 1, 1, 1, 1, 1, 3, 1],
                    #         [6, 0, 2, 0, 1, 0, 2, 0, 4],
                    #         [6, 0, 0, 0, 1, 0, 0, 0, 8],
                    #         [7, 0, 0, 0, 1, 0, 0, 0, 1],
                    #         [1, 0, 0, 0, 1, 0, 0, 0, 1],
                    #         [1, 0, 0, 0, 1, 0, 0, 0, 1],
                    #         [1, 0, 0, 0, 1, 0, 0, 0, 1],
                    #         [1, 0, 0, 0, 1, 0, 0, 0, 5],
                    #         [1, 1, 1, 1, 1, 1, 1, 5, 1]]

                    




                    """这是原始的"""
                    # map =  [[1, 1, 5, 1, 0, 1, 1, 4, 1],
                    #         [1, 0, 0, 1, 0, 1, 0, 0, 1],
                    #         [1, 0, 0, 1, 0, 1, 0, 0, 1],
                    #         [1, 0, 0, 1, 0, 1, 0, 0, 1],
                    #         [1, 0, 0, 1, 0, 1, 0, 0, 1],
                    #         [1, 0, 0, 4, 1, 7, 0, 0, 1],
                    #         [1, 0, 2, 0, 1, 0, 2, 0, 1],
                    #         [7, 0, 0, 0, 1, 0, 0, 0, 1],
                    #         [1, 6, 1, 1, 1, 6, 5, 1, 1]]
                    

                    """
                    # 增加一个dirtyplate(index是9)
                    """
                    # ITEMIDX= {"space": 0, "counter": 1, "agent": 2, "tomato": 3, "lettuce": 4, "plate": 5, "knife": 6, "delivery": 7, "onion": 8}
                    
                    map =  [[1, 5, 1, 1, 0, 1, 1, 4, 1],
                            [1, 0, 0, 1, 0, 1, 0, 0, 1],
                            [1, 0, 0, 1, 0, 1, 0, 0, 1],
                            [1, 0, 0, 1, 0, 1, 0, 0, 1],
                            [1, 0, 0, 1, 0, 5, 0, 0, 1],
                            [1, 0, 0, 4, 1, 1, 0, 0, 1],
                            [9, 0, 2, 0, 1, 0, 2, 0, 9],
                            [1, 0, 0, 0, 1, 0, 0, 0, 1],
                            [1, 1, 6, 7, 1, 1, 6, 7, 1]]


                elif self.mapType == "C":
                    map =  [[1, 1, 1, 1, 1, 1, 1, 3, 1],
                            [6, 0, 2, 0, 1, 0, 2, 0, 4],
                            [6, 0, 0, 0, 1, 0, 0, 0, 8],
                            [7, 0, 0, 0, 1, 0, 0, 0, 1],
                            [1, 0, 0, 0, 1, 0, 0, 0, 1],
                            [1, 0, 0, 0, 1, 0, 0, 0, 1],
                            [1, 0, 0, 0, 1, 0, 0, 0, 1],
                            [1, 0, 0, 0, 0, 0, 0, 0, 5],
                            [1, 1, 1, 1, 1, 1, 1, 5, 1]]
            elif self.n_agent == 3:
                if self.mapType == "A":
                    map =  [[1, 1, 1, 1, 1, 1, 1, 3, 1],
                            [6, 0, 2, 0, 0, 0, 2, 0, 4],
                            [6, 0, 0, 0, 0, 0, 0, 0, 8],
                            [7, 0, 0, 0, 0, 0, 0, 0, 1],
                            [1, 0, 0, 0, 0, 0, 0, 0, 1],
                            [1, 0, 0, 0, 0, 0, 0, 0, 1],
                            [1, 0, 0, 0, 0, 0, 0, 0, 1],
                            [1, 0, 2, 0, 0, 0, 0, 0, 5],
                            [1, 1, 1, 1, 1, 1, 1, 5, 1]]
                elif self.mapType == "B":
                    map =  [[1, 1, 1, 1, 1, 1, 1, 3, 1],
                            [6, 0, 2, 0, 1, 0, 2, 0, 4],
                            [6, 0, 0, 0, 1, 0, 0, 0, 8],
                            [7, 0, 0, 0, 1, 0, 0, 0, 1],
                            [1, 0, 0, 0, 1, 0, 0, 0, 1],
                            [1, 0, 0, 0, 1, 0, 0, 0, 1],
                            [1, 0, 0, 0, 1, 0, 0, 0, 1],
                            [1, 0, 2, 0, 1, 0, 0, 0, 5],
                            [1, 1, 1, 1, 1, 1, 1, 5, 1]]
                elif self.mapType == "C":
                    map =  [[1, 1, 1, 1, 1, 1, 1, 3, 1],
                            [6, 0, 2, 0, 1, 0, 2, 0, 4],
                            [6, 0, 0, 0, 1, 0, 0, 0, 8],
                            [7, 0, 0, 0, 1, 0, 0, 0, 1],
                            [1, 0, 0, 0, 1, 0, 0, 0, 1],
                            [1, 0, 0, 0, 1, 0, 0, 0, 1],
                            [1, 0, 0, 0, 1, 0, 0, 0, 1],
                            [1, 0, 2, 0, 0, 0, 0, 0, 5],
                            [1, 1, 1, 1, 1, 1, 1, 5, 1]]
                    



        elif self.xlen == 11 and self.ylen == 11:
            if self.mapType == "A":
                map =  [[1, 1, 1, 1, 7, 1, 1, 1, 1, 5, 1],
                        [1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 5],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6],
                        [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 4],
                        [1, 1, 1, 1, 7, 1, 1, 1, 1, 4, 1]]
                
                map =  [[1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 7],
                        [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4],
                        [1, 1, 1, 1, 1, 4, 1, 1, 1, 10, 1]]
                
                # map =  [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 5],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7],
                #         [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1],
                #         [1, 10, 4, 4, 1, 1, 1, 1, 1, 1, 1]]
                

            elif self.mapType == "B":

# ITEMIDX= {"space": 0, "counter": 1, "agent": 2, "tomato": 3, "lettuce": 4, "plate": 5, "knife": 6, "delivery": 7, "onion": 8}

                # map =  [[1, 5, 1, 1, 1, 0, 1, 1, 5, 1, 1],
                #         [1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1],
                #         [1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1],
                #         [1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1],
                #         [1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1],
                #         [1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1],
                #         [6, 0, 0, 2, 4, 0, 1, 2, 0, 0, 1],
                #         [1, 0, 0, 0, 7, 1, 1, 0, 0, 0, 1],
                #         [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1],
                #         [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 4],
                #         [1, 1, 1, 1, 1, 1, 1, 1, 6, 7, 1]]

                # 把盘子放更近一点
                map =  [[1, 5, 1, 1, 1, 0, 1, 1, 1, 1, 1],
                        [1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1],
                        [1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1],
                        [1, 0, 0, 0, 1, 0, 5, 0, 0, 0, 1],
                        [1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1],
                        [1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1],
                        [6, 0, 0, 2, 4, 0, 1, 2, 0, 0, 1],
                        [1, 0, 0, 0, 7, 1, 1, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 4],
                        [1, 1, 1, 1, 1, 1, 1, 1, 6, 7, 1]]
                

            elif self.mapType == "C":
                map =  [[1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6],
                        [1, 5, 9, 1, 1, 1, 1, 1, 1, 1, 1]]

                # map =  [[1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 1],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7],
                #         [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 7],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                #         [5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                #         [1, 5, 9, 1, 1, 1, 1, 1, 1, 1, 1]]

                # map =  [[1, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1],
                #         [6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7],
                #         [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 7],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                #         [5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                #         [1, 5, 9, 1, 1, 1, 1, 1, 1, 1, 1]]
                

                # map =  [[1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 1],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7],
                #         [1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1],
                #         [1, 0, 0, 1, 1, 1, 1, 1, 1, 2, 7],
                #         [1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 6],
                #         [1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1],
                #         [1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1],
                #         [5, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1],
                #         [5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                #         [1, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1]]


                # map =  [[1, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1],
                #         [6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                #         [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 7],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                #         [5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6],
                #         [1, 5, 9, 1, 1, 1, 1, 1, 1, 1, 1]]


                # map =  [[1, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1],
                #         [6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                #         [1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1],
                #         [1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1],
                #         [1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 7],
                #         [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1],
                #         [1, 0, 1, 0, 0, 0, 0, 0, 1, 2, 7],
                #         [1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1],
                #         [1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1],
                #         [5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6],
                #         [1, 5, 9, 1, 1, 1, 1, 1, 1, 1, 1]]                    

        elif self.xlen == 15 and self.ylen == 15:
            if self.mapType == "A":
                
                map =  [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 7],
                        [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 4],
                        [1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 10, 1]]

            if self.mapType == "A2":
                
                map =  [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 7],
                        [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6],
                        [1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 4],
                        [1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 4, 1]]

            if self.mapType == "A_lowuncertainty":

                map =  [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 7],
                        [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1],
                        [1, 1, 1, 1, 1, 10, 1, 7, 4, 4, 6, 1, 1, 1, 1]]





            if self.mapType == "B":
                map =  [[1, 4, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 5, 1],
                        [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 5, 6, 7, 1, 7, 6, 1, 1, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 2, 1, 2, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1],
                        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
                

                map =  [[1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,],
                        [1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 10, 0, 0, 1],
                        [1, 0, 2, 1, 0, 0, 1, 0, 1, 0, 0, 4, 2, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 4, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 1, 1, 1, 0, 1, 5, 1, 1, 0, 0, 1],
                        [1, 0, 0, 1, 5, 6, 7, 1, 7, 6, 1, 1, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1],
                        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]                


                map =  [[1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,],
                        [1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 4, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 10, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 4, 0, 0, 1],
                        [1, 0, 0, 1, 1, 1, 1, 0, 1, 5, 1, 1, 0, 0, 1],
                        [1, 0, 0, 1, 5, 6, 7, 1, 7, 6, 1, 1, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 2, 1, 2, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1],
                        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]    


                # map =  [[1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,],
                #         [1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1],
                #         [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                #         [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                #         [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                #         [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                #         [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                #         [1, 0, 0, 1, 0, 0, 4, 0, 1, 0, 0, 1, 0, 0, 1],
                #         [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 10, 0, 0, 1],
                #         [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 4, 0, 0, 1],
                #         [1, 0, 0, 1, 1, 1, 1, 0, 1, 5, 1, 1, 0, 0, 1],
                #         [1, 0, 0, 6, 5, 1, 7, 1, 7, 6, 1, 1, 0, 0, 1],
                #         [1, 0, 0, 0, 0, 0, 2, 1, 2, 0, 0, 0, 0, 0, 1],
                #         [1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1],
                #         [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]   

            if self.mapType == "B2":
                map =  [[1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,],
                        [1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 4, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 1, 1, 1, 0, 1, 5, 1, 1, 0, 0, 1],
                        [1, 0, 0, 1, 5, 6, 7, 1, 7, 1, 4, 6, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 2, 1, 2, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1],
                        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 10, 1]]


            if self.mapType == "B_lowuncertainty":    
                map =  [[1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,],
                        [1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 4, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                        [1, 0, 0, 1, 1, 1, 1, 0, 1, 5, 1, 1, 0, 0, 1],
                        [1, 0, 0, 7, 1, 5, 1, 1, 1, 4, 1, 7, 0, 0, 1],
                        [1, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 1],
                        [1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1],
                        [1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 6, 1, 10, 1]]





















                
                # map =  [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6],
                #         [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                #         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1],
                #         [1, 1, 1, 1, 1, 1, 10, 4, 4, 1, 1, 1, 1, 1, 1]]
                            

        self.initMap = map
        self.map = copy.deepcopy(self.initMap)

        self.oneHotTask = []

        
        for t in TASKLIST:
            if t in self.task:
            # if t == self.task:
                self.oneHotTask.append(1)

            else:
                self.oneHotTask.append(0)

        # 统计每个元素的出现次数
        counter = Counter(self.task)

        # 生成出现次数向量
        self.taskCompletionStatus = [counter[element] if element in counter else 0 for element in TASKLIST]


        self._createItems()
        self.n_agent = len(self.agent)

        #action: move(up, down, left, right), stay
        self.action_space = spaces.Discrete(5)

        #Observation: agent(pos[x,y]) dim = 2
        #    knife(pos[x,y]) dim = 2
        #    delivery (pos[x,y]) dim = 2
        #    plate(pos[x,y]) dim = 2
        #    food(pos[x,y]/status) dim = 3

        self._initObs()
        # print('~~~~~~')
        # print(len(self._get_obs()))
        # print(len(self._get_obs()[0]))
        # print(self.observation_space.shape)
        self.observation_space = spaces.Box(low=0, high=1, shape=(len(self._get_obs()[0]),), dtype=np.float32)



    def _createItems(self):
        # 存储着这些item（包含位置和其他属性）的list
        self.agent = []
        self.knife = []
        self.delivery = []
        self.tomato = []
        self.lettuce = []
        self.badlettuce = []
        self.onion = []
        self.plate = []
        self.dirtyplate = []
        self.itemList = []
        agent_idx = 0

        # 明白了，self.plate[0]与self.plate[1]是在一开始循环这个二维map的时候append进来的！！！
        for x in range(self.xlen):
            for y in range(self.ylen):
                # print(self.xlen)
                # print(self.ylen)
                # print(self.map)
                if self.map[x][y] == ITEMIDX["agent"]:

                    self.agent.append(Agent(x, y, color = AGENTCOLOR[agent_idx]))
                    agent_idx += 1
                elif self.map[x][y] == ITEMIDX["knife"]:
                    self.knife.append(Knife(x, y))
                elif self.map[x][y] == ITEMIDX["delivery"]:
                    self.delivery.append(Delivery(x, y))                    
                elif self.map[x][y] == ITEMIDX["tomato"]:
                    self.tomato.append(Tomato(x, y))
                elif self.map[x][y] == ITEMIDX["lettuce"]:
                    self.lettuce.append(Lettuce(x, y))
                elif self.map[x][y] == ITEMIDX["badlettuce"]:
                    self.badlettuce.append(BadLettuce(x, y))
                elif self.map[x][y] == ITEMIDX["onion"]:
                    self.onion.append(Onion(x, y))
                elif self.map[x][y] == ITEMIDX["plate"]:
                    self.plate.append(Plate(x, y))
                elif self.map[x][y] == ITEMIDX["dirtyplate"]:
                    self.dirtyplate.append(DirtyPlate(x, y))
        
        # 又有dict又有list，有点浪费
        self.itemDic = {"tomato": self.tomato, "lettuce": self.lettuce, "badlettuce": self.badlettuce, "onion": self.onion, "plate": self.plate, "dirtyplate": self.dirtyplate, "knife": self.knife, "delivery": self.delivery, "agent": self.agent}
        for key in self.itemDic:
            self.itemList += self.itemDic[key]


    # 初始化vec格式的obs
    def _initObs(self):
        obs = []
        for item in self.itemList:

            x = item.x / self.xlen
            y = item.y / self.ylen
            obs.append(x)
            obs.append(y)

            if isinstance(item, Food):
                obs.append(item.cur_chopped_times / item.required_chopped_times)

            # 切菜板是否装着东西，盘子里是否装着东西
            if isinstance(item, Plate):
                if item.containing:
                    obs.append(1)
                else:
                    obs.append(0)

            if isinstance(item, DirtyPlate):
                if item.containing:
                    obs.append(1)
                else:
                    obs.append(0)

            if isinstance(item, Knife):
                if item.holding:
                    obs.append(1)
                else:
                    obs.append(0)     

            if isinstance(item, Agent):
                if item.holding:
                    obs.append(1)
                else:
                    obs.append(0)


        # oneHotTask = [0, 0, 0, 0, 1, 0, 0]
        # +=的意思应该就是拼接吧
        obs += self.oneHotTask

        # obs += self.taskCompletionStatus 

        # 最后初始化的obs是[3/7, 1/7, 1/3, 3/7, 1/7, 1/3, ..., 3/7, 1/7, 1/3, 0, 0, 0, 0, 1, 0, 0]
        # 这个表征方式有点弱啊，不容易学的出来

        # 让每一个agent的obs都是一致的
        for agent in self.agent:
            agent.obs = obs
        return [np.array(obs)] * self.n_agent


    # 初始化的vec state和obs是一样的
    # 我要自己优化state了
    def _get_vector_state(self):
        state = []
        # print('++++++++++++++++++++++++++++++++++++++')
        # print('++++++++++++++++++++++++++++++++++++++')
        # print('++++++++++++++++++++++++++++++++++++++')
        # print('++++++++++++++++++++++++++++++++++++++')
        # print(self.itemList)
        for item in self.itemList:
            x = item.x / self.xlen
            y = item.y / self.ylen
            state.append(x)
            state.append(y)
            if isinstance(item, Food):
                state.append(item.cur_chopped_times / item.required_chopped_times)



            """
            # 下面的代码貌似没生效，因为agent使用的是_get_vector_obs（这里面只对食物添加了第三维度-是否切好，对其他盘子之类的只有x，y）
            # 不对，并不是没有生效，下面代码在full observation的时候是生效了的，是不过在partial observation的时候会采用_get_vector_obs
            # 而恰恰就是在_get_vector_obs中，对agent.obs进行了赋值和修改——————agent.obs，在_get_vector_state只是返回了state vector，
            # 并未对agent.obs做出修改
            """
            # 切菜板是否装着东西，盘子里是否装着东西
            if isinstance(item, Plate):
                if item.containing:
                    state.append(1)
                else:
                    state.append(0)

            if isinstance(item, DirtyPlate):
                if item.containing:
                    state.append(1)
                else:
                    state.append(0)


            if isinstance(item, Knife):
                if item.holding:
                    state.append(1)
                else:
                    state.append(0)                

            if isinstance(item, Agent):
                if item.holding:
                    state.append(1)
                else:
                    state.append(0)

            # print(state)


        state += self.oneHotTask
        # state += self.taskCompletionStatus
        # print(state)
        return [np.array(state)] * self.n_agent




    def _get_vector_state_new(self):
    

        macro_obs = []

        for idx, agent in enumerate(self.agent):
            obs = []

            # === Part 1: Encode own agent ===
            obs.append(agent.x / self.xlen)
            obs.append(agent.y / self.ylen)

            # identity one-hot: self = [1, 0]
            obs.append(1)
            obs.append(0)

            # holding (is holding flag)
            obs.append(1 if agent.holding else 0)



            # === Part 2: Encode teammate agent ===
            for teammate in self.agent:
                if teammate == agent:
                    continue  # skip self
                dx = teammate.x - agent.x
                dy = teammate.y - agent.y
                rel_x = dx / self.xlen
                rel_y = dy / self.ylen
                obs.append(rel_x)
                obs.append(rel_y)

                # identity one-hot: teammate = [0, 1]
                obs.append(0)
                obs.append(1)

                # # teammate holding: only encode whether holding
                # obs.append(1 if teammate.holding else 0)

                # # teammate holding_idx one-hot
                # obs += get_one_hot_index(teammate.holding)

            # === Part 3: Encode items relative to own agent ===
            for item in self.itemList:
                if isinstance(item, Agent):
                    continue  # Agents already encoded separately

                dx = item.x - agent.x
                dy = item.y - agent.y
                rel_x = dx / self.xlen
                rel_y = dy / self.ylen
                obs.append(rel_x)
                obs.append(rel_y)

                # Food chopped progress
                if isinstance(item, Food):
                    obs.append(item.cur_chopped_times / item.required_chopped_times)

                # Plate containing
                if isinstance(item, Plate):
                    obs.append(1 if item.containing else 0)


                # DirtyPlate containing
                if isinstance(item, DirtyPlate):
                    obs.append(1 if item.containing else 0)


                # Knife holding
                if isinstance(item, Knife):
                    obs.append(1 if item.holding else 0)


            # === Part 4: oneHotTask ===
            # obs += self.oneHotTask


            macro_obs.append(obs)

        return macro_obs





    def _get_vector_state_new2(self):
        """
        Returns
        -------
        macro_vector_obs : list
            vector observation for each agent.
        """

        def _held_onehot(holder):
            """
            返回手持物品的 one-hot (3维: Lettuce, BadLettuce, Plate)，否则全 0
            """
            held = None
            # 1) 优先 holding_item
            if hasattr(holder, "holding_item") and holder.holding_item is not None:
                held = holder.holding_item
            # 2) 再看 holding 属性
            elif hasattr(holder, "holding") and holder.holding not in (None, False, 0, 0.0, ""):
                if not isinstance(holder.holding, (bool, int, float)):
                    held = holder.holding
            # 3) 回退：检查 itemList
            if held is None:
                for it in self.itemList:
                    if getattr(it, "holder", None) is holder:
                        held = it
                        break

            # === one-hot ===
            onehot = [0.0, 0.0, 0.0]
            if isinstance(held, Lettuce):
                onehot = [1.0, 0.0, 0.0]
            elif isinstance(held, BadLettuce):
                onehot = [0.0, 1.0, 0.0]
            elif isinstance(held, Plate):
                onehot = [0.0, 0.0, 1.0]
            return onehot

        macro_obs = []

        for idx, agent in enumerate(self.agent):
            obs = []

            # === Part 1: Encode own agent ===
            obs.append(agent.x / self.xlen)
            obs.append(agent.y / self.ylen)

            # identity one-hot: self = [1, 0]
            obs.append(1.0)
            obs.append(0.0)

            # holding flag + one-hot
            # obs.append(1.0 if agent.holding else 0.0)
            obs.extend(_held_onehot(agent))

            # === Part 2: Encode teammate agent ===
            for teammate in self.agent:
                if teammate is agent:
                    continue
                obs.append(teammate.x / self.xlen)
                obs.append(teammate.y / self.ylen)

                # identity one-hot: teammate = [0, 1]
                obs.append(0.0)
                obs.append(1.0)

                # obs.append(1.0 if teammate.holding else 0.0)
                obs.extend(_held_onehot(teammate))

            # === Part 3: Encode items relative to own agent ===
            for item in self.itemList:
                if isinstance(item, Agent):
                    continue  # Agents already encoded separately

                dx = item.x - agent.x
                dy = item.y - agent.y
                rel_x = dx / self.xlen
                rel_y = dy / self.ylen
                obs.append(rel_x)
                obs.append(rel_y)

                # Food chopped progress
                if isinstance(item, Food):
                    obs.append(item.cur_chopped_times / item.required_chopped_times)

                # Plate containing
                if isinstance(item, Plate):
                    obs.append(1.0 if item.containing else 0.0)

                # DirtyPlate containing
                if isinstance(item, DirtyPlate):
                    obs.append(1.0 if item.containing else 0.0)

                # Knife holding
                if isinstance(item, Knife):
                    obs.append(1.0 if item.holding else 0.0)

            # 保存
            macro_obs.append(obs)

        return macro_obs
    



    # def _get_macro_vector_obs_new2(self):
    #     """
    #     Returns
    #     -------
    #     macro_vector_obs : list
    #         vector observation for each agent.
    #     """

    #     def _norm_held_item_index(holder):
    #         """
    #         返回 holder 所持物品在 itemList 的归一化索引：
    #         0.0 表示未持有或找不到，(idx+1)/N 表示第 idx 个（1..N）。
    #         """
    #         idx = -1
    #         N = len(self.itemList)

    #         # 1) 若有显式 holding_item 对象，优先用它
    #         if hasattr(holder, "holding_item") and holder.holding_item is not None:
    #             try:
    #                 idx = self.itemList.index(holder.holding_item)
    #             except ValueError:
    #                 idx = -1

    #         # 2) 某些实现里 agent.holding 直接是对象（而不仅是布尔）
    #         elif hasattr(holder, "holding") and holder.holding not in (None, False, 0, 0.0, ""):
    #             # 避免 bool 被当对象
    #             if not isinstance(holder.holding, (bool, int, float)):
    #                 try:
    #                     idx = self.itemList.index(holder.holding)
    #                 except ValueError:
    #                     idx = -1
    #             else:
    #                 idx = -1

    #         # 3) 回退：在 itemList 里找 holder 标记（如 item.holder == agent）
    #         if idx < 0:
    #             for i, it in enumerate(self.itemList):
    #                 if getattr(it, "holder", None) is holder:
    #                     idx = i
    #                     break

    #         if N == 0 or idx < 0:
    #             return 0.0
    #         return (idx + 1) / N

    #     macro_obs = []

    #     for idx, agent in enumerate(self.agent):
    #         obs = []

    #         # === Part 1: Encode own agent ===
    #         obs.append(agent.x / self.xlen)
    #         obs.append(agent.y / self.ylen)

    #         # identity one-hot: self = [1, 0]
    #         obs.append(1)
    #         obs.append(0)

    #         # holding flag + 持有物品在 itemList 的归一化索引
    #         obs.append(1 if agent.holding else 0)
    #         obs.append(_norm_held_item_index(agent))

    #         # === Part 2: Encode teammate agent ===
    #         for teammate in self.agent:
    #             if teammate == agent:
    #                 continue  # skip self
    #             obs.append(teammate.x / self.xlen)
    #             obs.append(teammate.y / self.ylen)

    #             # identity one-hot: teammate = [0, 1]
    #             obs.append(0)
    #             obs.append(1)

    #             # teammate holding flag + 归一化索引
    #             obs.append(1 if teammate.holding else 0)
    #             obs.append(_norm_held_item_index(teammate))

    #         # === Part 3: Encode items relative to own agent ===
    #         for item in self.itemList:
    #             if isinstance(item, Agent):
    #                 continue  # Agents already encoded separately

    #             dx = item.x - agent.x
    #             dy = item.y - agent.y
    #             rel_x = dx / self.xlen
    #             rel_y = dy / self.ylen
    #             obs.append(rel_x)
    #             obs.append(rel_y)

    #             # Food chopped progress
    #             if isinstance(item, Food):
    #                 obs.append(item.cur_chopped_times / item.required_chopped_times)

    #             # Plate containing
    #             if isinstance(item, Plate):
    #                 obs.append(1 if item.containing else 0)

    #             # DirtyPlate containing
    #             if isinstance(item, DirtyPlate):
    #                 obs.append(1 if item.containing else 0)

    #             # Knife holding
    #             if isinstance(item, Knife):
    #                 obs.append(1 if item.holding else 0)

    #         # 保存
    #         macro_obs.append(obs)

    #     return macro_obs
    



    # image类型的state，其中get_image_obs考虑了agent能观察到的视野半径
    def _get_image_state(self):
        return [self.game.get_image_obs()] * self.n_agent



    # _get_obs是真的返回obs的函数，如果机器人观测半径不够的时候，会返回_get_vector_obs得到的向量，如果机器人观测半径是完整的时候，会返回
    # _get_vector_state得到的向量。但是_get_vector_obs处于半径条件判断之外了，所以不论观测半径如何，都会执行一次_get_vector_obs这个代码
    # 当半径完整的时候，step或者reset的返回的都是self._get_vector_state()这个函数的，但是agent.obs却是self._get_vector_obs()中修改的
    # 这就导致agent.obs是M维度，但是step或者reset得到的obs却是M+N维，N是Plate, Knife, Agent我自己添加的新状态

    def _get_obs(self):
        """
        Returns
        -------
        obs : list
            observation for each agent.
        """

        vec_obs = self._get_vector_obs()

        # print("===========returned obs:", len(self._get_vector_state()[0]))
        # print('~~~~!!!')
        # print(vec_obs)
        if self.obs_radius > 0:
            if self.mode == "vector":
                return vec_obs
            elif self.mode == "image":
                return self._get_image_obs()
        # 如果radius是0的话，那么每一个agent都可以观察到全部的信息，即state
        # 我知道radius怎么控制的观测半径了，原来不是通过在get_vector_obs里进行判断，而是直接当radius=0的时候调用了另一个函数，get_vector_state()
        else:
            if self.mode == "vector":
                # return self._get_vector_state()
                return self._get_vector_state_new()
                # return self._get_vector_state_new2()
            elif self.mode == "image":
                return self._get_image_state()

    def _get_vector_obs(self):

        """
        Returns
        -------
        vector_obs : list
            vector observation for each agent.
        """

        po_obs = []

        # print('self.oneHotTask ', self.oneHotTask )


        # print('here to look at')
        # print(self.xlen)
        # print(self.ylen)
        # print(self.mapType)
        # print(self.agent)
        for agent in self.agent:
            obs = []
            idx = 0
            # print('here0')


            if self.xlen == 3 and self.ylen == 3:
                # print('here1')
                if self.mapType == "A":
                    agent.pomap =  [[1, 1, 1],
                                    [1, 0, 1],
                                    [1, 1, 1]]
                elif self.mapType == "B":
                    # print('here2')
                    agent.pomap =  [[1, 1, 1],
                                    [1, 0, 1],
                                    [1, 1, 1]]
                elif self.mapType == "C":
                    agent.pomap =  [[1, 1, 1],
                                    [1, 0, 1],
                                    [1, 1, 1]]
            elif self.xlen == 5 and self.ylen == 5:
                # print('here1')
                if self.mapType == "A":
                    agent.pomap =  [[1, 1, 1, 1, 1],
                                    [1, 0, 0, 0, 1],
                                    [1, 0, 0, 0, 1],
                                    [1, 0, 0, 0, 1],
                                    [1, 1, 1, 1, 1]]
                elif self.mapType == "B":
                    # print('here2')
                    agent.pomap =  [[1, 1, 1, 1, 1],
                                    [1, 0, 1, 0, 1],
                                    [1, 0, 1, 0, 1],
                                    [1, 0, 1, 0, 1],
                                    [1, 1, 1, 1, 1]]
                elif self.mapType == "C":
                    agent.pomap =  [[1, 1, 1, 1, 1],
                                    [1, 0, 1, 0, 1],
                                    [1, 0, 1, 0, 1],
                                    [1, 0, 0, 0, 1],
                                    [1, 1, 1, 1, 1]]
            elif self.xlen == 3 and self.ylen == 5:
                # print('here1')
                if self.mapType == "A":
                    agent.pomap =  [[1, 1, 1, 1, 1],
                                    [1, 0, 0, 0, 1],
                                    [1, 0, 0, 0, 1],
                                    [1, 0, 0, 0, 1],
                                    [1, 1, 1, 1, 1]]
                elif self.mapType == "B":
                    # print('here2')
                    # agent.pomap =  [[1, 1, 1, 1, 1],
                    #                 [1, 0, 1, 0, 1],
                    #                 [1, 0, 1, 0, 1],
                    #                 [1, 0, 1, 0, 1],
                    #                 [1, 1, 1, 1, 1]]
                    agent.pomap =  [[1, 1, 1, 1, 1],
                                    [1, 0, 1, 0, 1],
                                    [1, 0, 1, 0, 1]]
                elif self.mapType == "C":
                    agent.pomap =  [[1, 1, 1, 1, 1],
                                    [1, 0, 1, 0, 1],
                                    [1, 0, 1, 0, 1],
                                    [1, 0, 0, 0, 1],
                                    [1, 1, 1, 1, 1]]
            elif self.xlen == 7 and self.ylen == 7:
                if self.mapType == "A":
                    agent.pomap= [[1, 1, 1, 1, 1, 1, 1],
                                  [1, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 1],
                                  [1, 1, 1, 1, 1, 1, 1]]
                elif self.mapType == "B":
                    # agent.pomap= [[1, 1, 1, 1, 1, 1, 1],
                    #               [1, 0, 0, 1, 0, 0, 1],
                    #               [1, 0, 0, 1, 0, 0, 1],
                    #               [1, 0, 0, 1, 0, 0, 1],
                    #               [1, 0, 0, 1, 0, 0, 1],
                    #               [1, 0, 0, 1, 0, 0, 1],
                    #               [1, 1, 1, 1, 1, 1, 1]]
                    agent.pomap= [[1, 1, 1, 0, 1, 1, 1],
                                  [1, 0, 1, 0, 1, 0, 1],
                                  [1, 0, 1, 0, 1, 0, 1],
                                  [1, 0, 1, 1, 1, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1],
                                  [1, 1, 1, 1, 1, 1, 1]]
                    
                    agent.pomap= [[1, 1, 1, 0, 1, 1, 1],
                                  [1, 0, 1, 0, 1, 0, 1],
                                  [1, 0, 1, 0, 1, 0, 1],
                                  [1, 0, 1, 0, 1, 0, 1],
                                  [1, 0, 1, 1, 1, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1],
                                  [1, 1, 1, 1, 1, 1, 1]]


                elif self.mapType == "C":
                    agent.pomap= [[1, 1, 1, 1, 1, 1, 1],
                                  [1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 1],
                                  [1, 1, 1, 1, 1, 1, 1]]
                    
            elif self.xlen == 9 and self.ylen == 9:
                if self.mapType == "A":
                    agent.pomap= [[1, 1, 1, 1, 1, 1, 1, 1, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 1, 1, 1, 1, 1, 1, 1, 1]]
                elif self.mapType == "B":
                    # agent.pomap= [[1, 1, 1, 1, 1, 1, 1, 1, 1],
                    #               [1, 0, 0, 0, 1, 0, 0, 0, 1],
                    #               [1, 0, 0, 0, 1, 0, 0, 0, 1],
                    #               [1, 0, 0, 0, 1, 0, 0, 0, 1],
                    #               [1, 0, 0, 0, 1, 0, 0, 0, 1],
                    #               [1, 0, 0, 0, 1, 0, 0, 0, 1],
                    #               [1, 0, 0, 0, 1, 0, 0, 0, 1],
                    #               [1, 0, 0, 0, 1, 0, 0, 0, 1],
                    #               [1, 1, 1, 1, 1, 1, 1, 1, 1]]
                    
                    agent.pomap= [[1, 1, 1, 1, 0, 1, 1, 1, 1],
                                  [1, 0, 0, 1, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 1, 1, 0, 0, 1],
                                  [1, 0, 2, 0, 1, 0, 2, 0, 1],
                                  [1, 0, 0, 0, 1, 0, 0, 0, 1],
                                  [1, 1, 1, 1, 1, 1, 1, 1, 1]]
                elif self.mapType == "C":
                    agent.pomap= [[1, 1, 1, 1, 1, 1, 1, 1, 1],
                                  [1, 0, 0, 0, 1, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 1, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 1, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 1, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 1, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 1, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 1, 1, 1, 1, 1, 1, 1, 1]]


            elif self.xlen == 11 and self.ylen == 11:
                if self.mapType == "A":
                    agent.pomap= [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
                    
                    agent.pomap= [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
                    

                elif self.mapType == "B":

# ITEMIDX= {"space": 0, "counter": 1, "agent": 2, "tomato": 3, "lettuce": 4, "plate": 5, "knife": 6, "delivery": 7, "onion": 8}

                    agent.pomap= [[1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1],
                                  [1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1],
                                  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
                    

                elif self.mapType == "C":
                    agent.pomap= [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]

                    # agent.pomap= [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                    #               [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    #               [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    #               [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                    #               [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    #               [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    #               [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    #               [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    #               [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    #               [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    #               [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
                    


                    # agent.pomap= [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                    #               [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    #               [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    #               [1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1],
                    #               [1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1],
                    #               [1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1],
                    #               [1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1],
                    #               [1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1],
                    #               [1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1],
                    #               [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    #               [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
                    

                    # agent.pomap= [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                    #             [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    #             [1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1],
                    #             [1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1],
                    #             [1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1],
                    #             [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1],
                    #             [1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1],
                    #             [1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1],
                    #             [1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1],
                    #             [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    #             [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]  

            elif self.xlen == 15 and self.ylen == 15:
                if self.mapType == "A":
                    agent.pomap= [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
                
                if self.mapType == "A2":
                    agent.pomap= [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]

                if self.mapType == "A_lowuncertainty":
                    agent.pomap= [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
                    

                if self.mapType == "B":
                    agent.pomap= [[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,],
                                  [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
                    

                    agent.pomap= [[1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,],
                                  [1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]

                    # agent.pomap= [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                    #             [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    #             [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    #             [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    #             [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    #             [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    #             [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    #             [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                    #             [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    #             [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    #             [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    #             [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    #             [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    #             [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    #             [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]

                if self.mapType == "B2":    
                    agent.pomap= [[1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,],
                                  [1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]


                if self.mapType == "B_lowuncertainty":    
                    agent.pomap= [[1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,],
                                  [1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1],
                                  [1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1],
                                  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]




            self.layout_pomap = agent.pomap




            for item in self.itemList:

                x = item.x / self.xlen
                y = item.y / self.ylen
                obs.append(x)
                obs.append(y)

                if isinstance(item, Food):
                    obs.append(item.cur_chopped_times / item.required_chopped_times)

                # 切菜板是否装着东西，盘子里是否装着东西
                if isinstance(item, Plate):
                    if item.containing:
                        obs.append(1)
                    else:
                        obs.append(0)

                if isinstance(item, DirtyPlate):
                    if item.containing:
                        obs.append(1)
                    else:
                        obs.append(0)

                if isinstance(item, Knife):
                    if item.holding:
                        obs.append(1)
                    else:
                        obs.append(0)
                
                if isinstance(item, Agent):
                    if item.holding:
                        obs.append(1)
                    else:
                        obs.append(0)
                # print('Let us see the obs changes')
                # print(obs)
                agent.pomap[int(x * self.xlen)][int(y * self.ylen)] = ITEMIDX[item.rawName]
            agent.pomap[agent.x][agent.y] = ITEMIDX["agent"]
            # print('agent POMAP')
            # print('-------')
            # print(agent.pomap)
            obs += self.oneHotTask 
            # obs += self.taskCompletionStatus
            agent.obs = obs

            # print('obs: ', obs)
            po_obs.append(np.array(obs))
        return po_obs


    # 得到观测的image
    def _get_image_obs(self):

        """
        Returns
        -------
        image_obs : list
            image observation for each agent.
        """

        po_obs = []
        frame = self.game.get_image_obs()
        old_image_width, old_image_height, channels = frame.shape
        new_image_width = int((old_image_width / self.xlen) * (self.xlen + 2 * (self.obs_radius - 1)))
        new_image_height =  int((old_image_height / self.ylen) * (self.ylen + 2 * (self.obs_radius - 1)))
        color = (0,0,0)
        obs = np.full((new_image_height,new_image_width, channels), color, dtype=np.uint8)

        x_center = (new_image_width - old_image_width) // 2
        y_center = (new_image_height - old_image_height) // 2

        obs[x_center:x_center+old_image_width, y_center:y_center+old_image_height] = frame

        for idx, agent in enumerate(self.agent):
            agent_obs = self._get_PO_obs(obs, agent.x, agent.y, old_image_width, old_image_height)
            po_obs.append(agent_obs)
        return po_obs

    # 是_get_image_obs的一个子函数，共同完成image obs的获取
    def _get_PO_obs(self, obs, x, y, ori_width, ori_height):
        x1 = (x - 1) * int(ori_width / self.xlen)
        x2 = (x + self.obs_radius * 2) * int(ori_width / self.xlen)
        y1 = (y - 1) * int(ori_height / self.ylen)
        y2 = (y + self.obs_radius * 2) * int(ori_height / self.ylen)
        return obs[x1:x2, y1:y2]

    def _findItem(self, x, y, itemName):
        for item in self.itemDic[itemName]:
            if item.x == x and item.y == y:
                return item
        return None

    @property
    def state_size(self):
        return self.get_state().shape[0]

    @property
    def obs_size(self):
        return [self.observation_space.shape[0]] * self.n_agent

    @property
    def n_action(self):
        return [a.n for a in self.action_spaces]

    @property
    def action_spaces(self):
        return [self.action_space] * self.n_agent

    def get_avail_actions(self):
        return [self.get_avail_agent_actions(i) for i in range(self.n_agent)]

    def get_avail_agent_actions(self, nth):
        return [1] * self.action_spaces[nth].n

    def action_space_sample(self, i):
        return np.random.randint(self.action_spaces[i].n)
    
    def reset(self):

        """
        Returns
        -------
        obs : list
            observation for each agent.
        """

        self.map = copy.deepcopy(self.initMap)
        self._createItems()

        self.right_side_plate = self._findItem(5, 6, "plate")
        self.left_side_lettuce = self._findItem(3, 2, "lettuce")
        # print('reset的时候的右侧plate和左侧lettuce')
        # print(self.right_side_plate)
        # print(self.left_side_lettuce)
        self.collaborated = False

        self.usedirtyplate = False


        self.first_time_pickup_chopped_food = True


        self.step_count = 0


        """重置taskCompletionStatus"""
        # 统计每个元素的出现次数
        counter = Counter(self.task)
        # 生成出现次数向量
        self.taskCompletionStatus = [counter[element] if element in counter else 0 for element in TASKLIST]


        self._initObs()
        if self.debug:
            self.game.on_cleanup()

        return self._get_obs()
    

    def soft_reset_obs_only(self):
        """
        伪 reset 函数：不重置环境状态，只重置 obs。
        
        Returns
        -------
        obs : list
            当前各 agent 的 observation（重新计算）
        """
        self.map = copy.deepcopy(self.initMap)
        self._createItems()
        
        self._initObs()
        return self._get_obs()




    def step(self, action):

        """
        Parameters
        ----------
        action: list
            action for each agent

        Returns
        -------
        obs : list
            observation for each agent.
        rewards : list
        terminate : list
        info : dictionary
        """

        # print('step了么')

        # 每调用一次step方法，计数加一
        self.step_count += 1


        # 执行任意一个action，都要花费一个step，都要先penalty一下
        self.reward = [self.rewardList[0]["step penalty"], self.rewardList[1]["step penalty"], self.rewardList[0]["step penalty"]]

        done = False


        # 如果步骤计数达到24，标记done为True并重置计数器
        # if self.step_count >= 200:
        if self.step_count >= 400:
            # 如果超过50步仍未完成，则视为失败
            # self.reward += self.rewardList["wrong delivery"]
            done = True
            self.step_count = 0



        info = {}
        info['cur_mac'] = action
        info['mac_done'] = [True] * self.n_agent
        info['collision'] = []

        all_action_done = False

        for agent in self.agent:
            agent.moved = False

        if self.debug:
            # print("in overcooked primitive actions:", action)
            x = 1



        

        
        while not all_action_done:

            """这里有个遍历agent的操作，谁执行了【最终动作】，把分加给谁。至于前序的【准备动作】则不加分。我觉得make sense"""

            for idx, agent in enumerate(self.agent):
                agent_action = action[idx]
                if agent.moved:
                    continue
                agent.moved = True

                if agent_action < 4:
                    target_x = agent.x + DIRECTION[agent_action][0]
                    target_y = agent.y + DIRECTION[agent_action][1]
                    target_name = ITEMNAME[self.map[target_x][target_y]]

                    # print('~~~~~~~~')
                    # print(self.map[target_x][target_y])
                    # print(ITEMNAME[self.map[target_x][target_y]])


                    """
                    # 下面这几行代码是我自己加的，为了保证食物能够始终供给
                    """

                    # print('agent.holding: ', agent.holding)
                    # print(isinstance(agent.holding, Food))
                    # print(isinstance(agent.holding, Lettuce))
                    # if isinstance(agent.holding, Food):
                    #     food = agent.holding
                    #     # 下面两行是刷新food
                    #     food.refresh()
                    #     print(food.x)
                    #     print(food.y)
                    #     self.map[food.x][food.y] = ITEMIDX[food.rawName]




                    """
                    # 把食物放到切菜板，加少量分
                    # 切菜，加少量分
                    # 装盘，加更多的分
                    """
                    # 说实在的，只有这个部分我没看懂    
                    # GPT解释说这是让agent和target agent互换位置的代码，但是不是说两个agent不能碰撞吗
                    if target_name == "agent":
                        target_agent = self._findItem(target_x, target_y, target_name)
                        if not target_agent.moved:
                            agent.moved = False
                            target_agent_action = action[AGENTCOLOR.index(target_agent.color)]
                            if target_agent_action < 4:
                                new_target_agent_x = target_agent.x + DIRECTION[target_agent_action][0]
                                new_target_agent_y = target_agent.y + DIRECTION[target_agent_action][1]
                                if new_target_agent_x == agent.x and new_target_agent_y == agent.y:
                                    target_agent.move(new_target_agent_x, new_target_agent_y)
                                    agent.move(target_x, target_y)
                                    agent.moved = True
                                    target_agent.moved = True
                    # 如果是space，那就直接移动
                    elif  target_name == "space":
                        self.map[agent.x][agent.y] = ITEMIDX["space"]
                        agent.move(target_x, target_y)
                        self.map[target_x][target_y] = ITEMIDX["agent"]

                    #pickup and chop
                    # 如果agent没有持有任何东西
                    elif not agent.holding:
                        # 如果是这四类可以移动的item
                        if target_name == "tomato" or target_name == "lettuce" or target_name == "badlettuce" or target_name == "plate" or target_name == "onion" or target_name == "dirtyplate":
                            item = self._findItem(target_x, target_y, target_name)
                            agent.pickup(item)
                            # 因为取走了这些可移动的item了，所以把地图中对应的位置变成counter
                            self.map[target_x][target_y] = ITEMIDX["counter"]

                            if target_name == "badlettuce":
                                self.reward[idx] += self.rewardList[idx]["pick up bad lettuce"]
                                self.reward[2] += self.rewardList[idx]["pick up bad lettuce"]
                            

                            # reward += self.rewardList["metatask finished"]
                        

                        elif target_name == "knife":
                            knife = self._findItem(target_x, target_y, target_name)
                            # 如果切菜板上面有盘子，则agent会取走盘子
                            if isinstance(knife.holding, Plate) or isinstance(knife.holding, DirtyPlate):
                                item = knife.holding
                                knife.release()
                                agent.pickup(item)
                                # reward += self.rewardList["metatask finished"]
                            # 如果切菜板上面是食物，则判断是否已经切好
                            elif isinstance(knife.holding, Food):
                                # 如果已经切好了，则取走
                                if knife.holding.chopped:
                                    item = knife.holding
                                    knife.release()
                                    agent.pickup(item)
                                    if self.first_time_pickup_chopped_food == True:
                                        self.reward[idx] += self.rewardList[idx]["goodtask finished"]
                                        self.reward[2] += self.rewardList[idx]["goodtask finished"]
                                        self.first_time_pickup_chopped_food = False
                                # 如果还没有切好，则切一次，判断是否切好，如果切好，判断所切的item是否属于当前task中的，是的话则赋予10的奖励
                                # ["tomato salad", "lettuce salad", "onion salad", "lettuce-tomato salad", "onion-tomato salad", "lettuce-onion salad", "lettuce-onion-tomato salad"]
                                else:
                                    knife.holding.chop()
                                    self.reward[idx] += self.rewardList[idx]["goodtask finished"]
                                    self.reward[2] += self.rewardList[idx]["goodtask finished"]

                                    # if isinstance(knife.holding, BadLettuce):
                                    #     self.reward[idx] += self.rewardList["penalize using bad lettuce"]
                                    #     self.reward[2] += self.rewardList["penalize using bad lettuce"]

                                    if knife.holding.chopped:
                                        # if knife.holding.rawName in self.task:
                                        for task in self.task:
                                            if knife.holding.rawName in task:
                                                # 不鼓励切菜和取走，只鼓励装盘
                                                self.reward[idx] += self.rewardList[idx]["minitask finished"]
                                                self.reward[2] += self.rewardList[idx]["minitask finished"]
                    #put down
                    # 如果agent当前已经持有东西
                    elif agent.holding:
                        # 如果移动的目标是counter，则会放下手中的东西
                        if target_name == "counter":
                            if agent.holding.rawName in ["tomato", "lettuce", "badlettuce", "onion", "plate", "dirtyplate"]:
                                # 把该counter变成agent手中持有的可移动item，这个rawName应该是一些数字
                                self.map[target_x][target_y] = ITEMIDX[agent.holding.rawName]
                            # 恢复非持物状态
                            agent.putdown(target_x, target_y)

                            self.reward[idx] += self.rewardList[idx]["metatask failed"]
                            self.reward[2] += self.rewardList[idx]["metatask failed"]
                        # 如果移动目标是盘子
                        elif target_name == "plate" or target_name == "dirtyplate":
                            # 如果手中拿的是食物，判断是否切好，未切好不能装盘
                            if isinstance(agent.holding, Food):
                                if agent.holding.chopped:
                                    if isinstance(agent.holding, BadLettuce):
                                        self.reward[idx] += self.rewardList[idx]["penalize using bad lettuce"]
                                        self.reward[2] += self.rewardList[idx]["penalize using bad lettuce"]

                                    # 给装盘一个较大的奖励
                                    if target_name == "plate":
                                        self.reward[idx] += self.rewardList[idx]["subtask finished"]
                                        self.reward[2] += self.rewardList[idx]["subtask finished"]
                                    else:
                                        """既要加分"""
                                        self.reward[idx] += self.rewardList[idx]["subtask finished"]
                                        self.reward[2] += self.rewardList[idx]["subtask finished"]
                                        """也要减分"""
                                        self.reward[idx] += self.rewardList[idx]["penalize using dirty plate"]
                                        self.reward[2] += self.rewardList[idx]["penalize using dirty plate"]
                                    plate = self._findItem(target_x, target_y, target_name)
                                    item = agent.holding
                                    # 放下手中的物品，恢复未持物状态
                                    agent.putdown(target_x, target_y)
                                    # 把食物装进盘子里
                                    plate.contain(item)
                            else:
                                self.reward[idx] += self.rewardList[idx]["metatask failed"]
                                self.reward[2] += self.rewardList[idx]["metatask failed"]
                        # 如果移动目标是切菜板
                        elif target_name == "knife":
                            knife = self._findItem(target_x, target_y, target_name)
                            # 如果切菜板是空的，则把agent手中的东西放置下来
                            if not knife.holding:
                                item = agent.holding
                                agent.putdown(target_x, target_y)
                                knife.hold(item)
                                if isinstance(item, Food):


                                    if item.chopped:
                                        # 把切好的菜放回去要减分
                                        self.reward[idx] += self.rewardList[idx]["metatask failed"]
                                        self.reward[2] += self.rewardList[idx]["metatask failed"]
                                    else:
                                        # if isinstance(item, BadLettuce):
                                        #     self.reward[idx] += self.rewardList["penalize using bad lettuce"]
                                        #     self.reward[2] += self.rewardList["penalize using bad lettuce"]
                                        # 只有把没切好的食物放在切菜板上才加分
                                        self.reward[idx] += self.rewardList[idx]["goodtask finished"]
                                        self.reward[2] += self.rewardList[idx]["goodtask finished"]
                                else:
                                    self.reward[idx] += self.rewardList[idx]["metatask failed"]
                                    self.reward[2] += self.rewardList[idx]["metatask failed"]
                            # 如果切菜板上是食物，agent手中拿的是盘子，则判断食物是否切好了，如果切好了则把食物装进盘子中
                            elif isinstance(knife.holding, Food) and (isinstance(agent.holding, Plate) or isinstance(agent.holding, DirtyPlate)):
                                item = knife.holding
                                if item.chopped:

                                    if isinstance(item, BadLettuce):
                                        self.reward[idx] += self.rewardList[idx]["penalize using bad lettuce"]
                                        self.reward[2] += self.rewardList[idx]["penalize using bad lettuce"]
                                    if isinstance(agent.holding, DirtyPlate):
                                        """既要加分"""
                                        self.reward[idx] += self.rewardList[idx]["subtask finished"]
                                        self.reward[2] += self.rewardList[idx]["subtask finished"]
                                        """也要减分"""
                                        self.reward[idx] += self.rewardList[idx]["penalize using dirty plate"]
                                        self.reward[2] += self.rewardList[idx]["penalize using dirty plate"]
                                    else:
                                        self.reward[idx] += self.rewardList[idx]["subtask finished"]
                                        self.reward[2] += self.rewardList[idx]["subtask finished"]
                                    knife.release()
                                    agent.holding.contain(item)
                                else:
                                    # 没切好就拿盘子装，减分
                                    self.reward[idx] += self.rewardList[idx]["metatask failed"]
                                    self.reward[2] += self.rewardList[idx]["metatask failed"]
                            elif isinstance(knife.holding, Food) and not (isinstance(agent.holding, Plate) or isinstance(agent.holding, DirtyPlate)):
                                # 切菜板上是食物，但是agent拿的不是盘子，是食物，减分
                                self.reward[idx] += self.rewardList[idx]["metatask failed"]
                                self.reward[2] += self.rewardList[idx]["metatask failed"]
                            # 如果切菜板上的是盘子，agent手中拿的是食物，则判断食物是否切好了，如果切好了，把食物放在盘子中，要注意此时agent需要先拿起盘子，再让盘子装起食物
                            elif (isinstance(knife.holding, Plate) or isinstance(knife.holding, DirtyPlate)) and isinstance(agent.holding, Food):
                                plate_item = knife.holding
                                food_item = agent.holding
                                if food_item.chopped:
                                    if isinstance(food_item, BadLettuce):
                                        self.reward[idx] += self.rewardList[idx]["penalize using bad lettuce"]
                                        self.reward[2] += self.rewardList[idx]["penalize using bad lettuce"]
                                    self.reward[idx] += self.rewardList[idx]["subtask finished"]
                                    self.reward[2] += self.rewardList[idx]["subtask finished"]
                                    knife.release()
                                    # a little different
                                    agent.pickup(plate_item)
                                    agent.holding.contain(food_item)
                                else:
                                    # 切菜板上是盘子，agent拿着没切好的食物，减分
                                    self.reward[idx] += self.rewardList[idx]["metatask failed"]
                                    self.reward[2] += self.rewardList[idx]["metatask failed"]
                            elif (isinstance(knife.holding, Plate) or isinstance(knife.holding, DirtyPlate)) and (isinstance(agent.holding, Plate) or isinstance(agent.holding, DirtyPlate)):
                                # 切菜板上是盘子，agent拿着盘子
                                self.reward[idx] += self.rewardList[idx]["metatask failed"]
                                self.reward[2] += self.rewardList[idx]["metatask failed"]
                        # 如果移动目标是配送站
                        elif target_name == "delivery":
                            # 如果agent手中拿的是盘子，起码大方向对了，需要继续判断
                            if (isinstance(agent.holding, Plate) or isinstance(agent.holding, DirtyPlate)):
                                # 如果盘子里装着food了

                                if agent.holding.containing:
                                    dishName = ""
                                    # 所有菜品名称，哪怕是组合菜，也都是按照lettuce，onion，tomato的顺序命名的，实际的组合顺序任意，但是菜名的顺序是这样的
                                    foodList = [Lettuce, BadLettuce, Onion, Tomato]

                                    # 先把foodInPlate变成[-1, -1, -1]
                                    foodInPlate = [-1] * len(foodList)
                                    # 遍历盘子中的东西，比如lettuce，tomato
                                    for f in range(len(agent.holding.containing)):
                                        for i in range(len(foodList)):
                                            # 如果是食物列表中的
                                            if isinstance(agent.holding.containing[f], foodList[i]):
                                                # 把foodInPlate对应的位置从-1变成盘子中物品的index，说明盘子中的这个物品是任务列表中所需要的item的其中之一
                                                foodInPlate[i] = f
                                    # 再次遍历
                                    for i in range(len(foodList)):
                                        # 如果盘子中的物品属于所需要的物品之一，则拼接到菜品名称中，用-分开
                                        if foodInPlate[i] > -1:
                                            dishName += agent.holding.containing[foodInPlate[i]].rawName + "-"
                                    # 最后加上salad后缀
                                    dishName = dishName[:-1] + " salad"
                                    # print(dishName)
                                    # if dishName == self.task:
                                    if dishName in self.task:

                                        # if dishName == "badlettuce salad":
                                        #     self.reward[idx] += self.rewardList["penalize using bad lettuce"]
                                        #     self.reward[2] += self.rewardList["penalize using bad lettuce"]


                                        """下面这几行代码是来判断，是否上菜的时候盘子是用的右侧的盘子，蔬菜用的是左侧的蔬菜"""
                                        # print(agent.holding)
                                        # print(agent.holding.containing)
                                        # print(self.right_side_plate)
                                        # print(self.left_side_lettuce)
                                        # print(agent.holding == self.right_side_plate)
                                        # print(agent.holding.containing[0] == self.left_side_lettuce)
                                        if agent.holding == self.right_side_plate and agent.holding.containing[0] == self.left_side_lettuce:
                                            self.collaborated = True


                                        item = agent.holding

                                        # if isinstance(agent.holding, DirtyPlate):
                                        #     self.usedirtyplate = True


                                        # agent放下手中的东西
                                        agent.putdown(target_x, target_y)

                                        # print(agent.holding)

                                        # 让上菜处hold一下agent手中的菜，其实就是上菜成功的意思
                                        # 如果注释掉，就是去掉上菜的视觉效果
                                        # self.delivery[0].hold(item)






                                        """下面的代码也是让蔬菜进行刷新"""
                                        food = item.containing
                                        # 盘子release，刷新
                                        item.release()
                                        item.refresh()
                                        self.map[item.x][item.y] = ITEMIDX[item.name]
                                        # 所有盘子中的food刷新，我发现有个核心代码模式就是，self.map[x][y] = ITEMIDX[name]，应该是改变或重置地图中某个位置的意思
                                        for f in food:
                                            f.refresh()
                                            self.map[f.x][f.y] = ITEMIDX[f.rawName]


                                        if self.usedirtyplate == False:
                                            self.reward[idx] += self.rewardList[idx]["correct delivery"]
                                            # self.reward[2] += self.rewardList["correct delivery"]
                                            # done = True
                                        else:
                                            """既要加分"""
                                            self.reward[idx] += self.rewardList[idx]["correct delivery"]
                                            # self.reward[2] += self.rewardList["correct delivery"]
                                            """也要减分"""
                                            self.reward[idx] += self.rewardList[idx]["penalize using dirty plate"]
                                            self.reward[2] += self.rewardList[idx]["penalize using dirty plate"]


                                        """下面是多任务菜单的代码，先取消掉"""
                                        # # print(dishName)
                                        # # print(self.taskCompletionStatus)
                                        # """如果是多recipe的，不能只是完成一个recipe就done了"""
                                        # index = TASKLIST.index(dishName)
                                        # if self.taskCompletionStatus[index] > 0:  # 确保不会减成负值
                                        #     self.taskCompletionStatus[index] -= 1
                                        #     # if self.collaborated == True:
                                        #     #     self.reward += self.rewardList["correct delivery"]
                                        #     #     print('Collaborated!')

                                        #     if self.usedirtyplate == False:
                                        #         self.reward[idx] += self.rewardList["correct delivery"]
                                        #     else:
                                        #         self.reward[idx] += self.rewardList["penalize using dirty plate"]

                                        #     # print(self.taskCompletionStatus[index])
                                        #     # print('Done one task')
                                        # else:
                                        #     self.reward[idx] += self.rewardList["wrong delivery"]
                                        #     # print('overdone')

                                        # # if all(value == 0 for value in self.taskCompletionStatus):
                                        # #     # if self.collaborated == True:
                                        # #     #     self.reward += self.rewardList["correct delivery"]

                                        # #     self.reward[idx] += self.rewardList["correct delivery"]

                                        #     # print('Completed!')
                                        #     """取消掉完成一次任务就done的设置"""
                                        #     # done = True



                                        #     # print(done)
                                        #     # print('!!!!!!!!!')
                                        # # print(self.taskCompletionStatus)
                                    else:
                                        self.reward[idx] += self.rewardList[idx]["wrong delivery"]
                                        self.reward[2] += self.rewardList[idx]["wrong delivery"]
                                        item = agent.holding
                                        agent.putdown(target_x, target_y)
                                        food = item.containing
                                        # 盘子release，刷新
                                        item.release()
                                        item.refresh()
                                        self.map[item.x][item.y] = ITEMIDX[item.name]
                                        # 所有盘子中的food刷新，我发现有个核心代码模式就是，self.map[x][y] = ITEMIDX[name]，应该是改变或重置地图中某个位置的意思
                                        for f in food:
                                            f.refresh()
                                            self.map[f.x][f.y] = ITEMIDX[f.rawName]
                                # 如果盘子里是空的，那是一种wrong delivery
                                else:
                                    self.reward[idx] += self.rewardList[idx]["wrong delivery"]
                                    self.reward[2] += self.rewardList[idx]["wrong delivery"]
                                    plate = agent.holding
                                    # agent放下手中的东西，手头变空
                                    agent.putdown(target_x, target_y)
                                    # 下面两行是刷新盘子
                                    plate.refresh()
                                    self.map[plate.x][plate.y] = ITEMIDX[plate.name]
                            # 如果把food直接deliver了，则是一种wrong delivery，此时会扣分，同时刷新（1）agent手中东西放下变空，（2）food刷新位置，
                            else:
                                self.reward[idx] += self.rewardList[idx]["wrong delivery"]
                                self.reward[2] += self.rewardList[idx]["wrong delivery"]
                                food = agent.holding
                                # agent放下手中的东西，手头变空
                                agent.putdown(target_x, target_y)
                                # 下面两行是刷新food
                                food.refresh()
                                self.map[food.x][food.y] = ITEMIDX[food.rawName]

                        # 如果移动目标是食物，则只有（1）agent手中拿着盘子，（2）食物已经切好了，才能执行put down的操作。pickup当然没问题，但是put down只有满足这个条件才能进行
                        elif target_name in ["tomato", "lettuce", "badlettuce" "onion"]:
                            item = self._findItem(target_x, target_y, target_name)
                            if item.chopped and (isinstance(agent.holding, Plate) or isinstance(agent.holding, DirtyPlate)):
                                self.reward[idx] += self.rewardList[idx]["subtask finished"]
                                self.reward[2] += self.rewardList[idx]["subtask finished"]
                                agent.holding.contain(item)
                                self.map[target_x][target_y] = ITEMIDX["counter"]
                            elif not item.chopped and (isinstance(agent.holding, Plate) or isinstance(agent.holding, DirtyPlate)):
                                # 如果食物没切好就想去装盘，减分
                                self.reward[idx] += self.rewardList[idx]["metatask failed"]
                                self.reward[2] += self.rewardList[idx]["metatask failed"]
                            elif isinstance(agent.holding, Food):
                                # 面向食物，agent手里拿的也是食物，减分
                                self.reward[idx] += self.rewardList[idx]["metatask failed"]
                                self.reward[2] += self.rewardList[idx]["metatask failed"]

            # for idx, agent in enumerate(self.agent)循环结束，所有agent的action都执行完了
            all_action_done = True

            # 只要还有agent没move，all_action_done就不能为True
            for agent in self.agent:
                if agent.moved == False:
                    all_action_done = False
        


        # print(np.array(self.map))
        # 返回经典4个变量
        """我知道了，这里是把reward直接复制了一份，赋予了两个agent，我可以单独维护每一个agent的reward"""
        # return self._get_obs(), [self.reward] * self.n_agent, done, info
        """[你的reward就是我的reward]"""
        # return self._get_obs(), [self.reward[0], self.reward[0]], done, info
        """[我自己的reward才是我自己的reward]"""
        # return self._get_obs(), [self.reward[0], self.reward[1]], done, info
        """[你的reward占据我的reward的一半]"""
        # return self._get_obs(), [self.reward[0] + self.reward[1], self.reward[0] + self.reward[1]], done, info
        """[左边这个reward是团队的reward，右边这个reward是左边的reward（它以为的）]"""
        """我目前的ABI Producing Agent就是靠这个reward训练出来的"""
        # return self._get_obs(), [self.reward[2], self.reward[0]*0.5 + self.reward[2]*0.5], done, info
        """[左右都是团队+个人reward，一半一半]，我的Ego agent就是这样训练出来的"""
        # return self._get_obs(), [self.reward[0]*0.5 + self.reward[2]*0.5, self.reward[0]*0.5 + self.reward[2]*0.5], done, info
        """让右侧极度benevolent"""
        # return self._get_obs(), [self.reward[0]*0.5 + self.reward[2]*0.5, self.reward[0]], done, info
        """让右侧极度NON benevolent"""
        # return self._get_obs(), [self.reward[0]*0.5 + self.reward[2]*0.5, self.reward[1]], done, info
        """让右侧极度NON benevolent"""
        # return self._get_obs(), [self.reward[0], self.reward[1]], done, info
        """让Ego agent以全队利益出发"""
        # return self._get_obs(), [self.reward[0]*0.5 + self.reward[2]*0.5, self.reward[1]], done, info


        """左high benevolence，右high benevolence"""
        # return self._get_obs(), [self.reward[0]*0.5 + self.reward[1]*0.5, self.reward[0]*0.5 + self.reward[1]*0.5], done, info
        """左low benevolence，右high benevolence"""
        # return self._get_obs(), [self.reward[0], self.reward[0]*0.5 + self.reward[1]*0.5], done, info
        """左high benevolence，右low benevolence"""
        # return self._get_obs(), [self.reward[0]*0.5 + self.reward[1]*0.5, self.reward[1]], done, info
        # return self._get_obs(), [self.reward[1], self.reward[1]], done, info
        """左low benevolence，右low benevolence"""
        return self._get_obs(), [self.reward[0], self.reward[1], self.reward[2]], done, info
    
        """团队reward，训练SP agent用的"""
        # return self._get_obs(), [self.reward[2], self.reward[2]], done, info


    # render到界面中
    def render(self, intention=[(0, 0)], mode='human', abi_values=None):
        return self.game.on_render(intention, abi_values)
