import pdb
import re
import numpy as np
import traceback
from critic.ppo_llm_pomdp import Twosome
from llm2planner import llm2planner
from datetime import datetime
import random
RED = "\033[31m"
BLUE = "\033[34m"
GREEN = "\033[32m"
YELLOW = "\033[33m"
RESET = "\033[0m"
import os
import sys
import json
from copy import deepcopy

def extract_receptacles_and_task(text):
    # Extract list of receptacles
    receptacles_pattern = r"\b\w+ \d+\b"
    receptacles = re.findall(receptacles_pattern, text)

    # Extract the task
    task_pattern = r"Your task is to:.*"
    task = re.search(task_pattern, text).group(0)

    return receptacles, task

def extract_object_and_receptacle(s, task_id):
    # task1: put a objecttype in receptacletype. / put some objecttype on receptacletype.
    # task2: examine the objecttype with the desklamp. / look at objecttype under the desklamp.
    # task3: put a clean objecttype in receptacletype. / clean some objecttype and put it in receptacletype.
    # task4: heat some objecttype and put it in receptacletype. / put a hot objecttype in receptacletype.
    # task5: cool some objecttype and put it in receptacletype. / put a cool objecttype in receptacletype.
    # task6: put two objecttype in receptacletype. / find two objecttype and put them in receptacletype.

    objecttype, receptacletype = None, None
    
    if s.startswith("Your task is to: "):
        s = s[len("Your task is to: "):]
    if s.endswith('.'):
        s = s[:-1]
    
    if task_id == 1:
        if s.startswith('put a ') and ' in ' in s:
            objecttype = s[s.find('put a ')+6:s.find(' in ')]
            receptacletype = s[s.find(' in ')+4:]
        elif s.startswith('put some ') and ' on ' in s:
            objecttype = s[s.find('put some ')+9:s.find(' on ')]
            receptacletype = s[s.find(' on ')+4:]
    elif task_id == 2:
        if s.startswith('examine the ') and ' with the ' in s:
            objecttype = s[s.find('examine the ')+12:s.find(' with the ')]
            receptacletype = s[s.find(' with the ')+10:]
        elif s.startswith('look at ') and ' under the ' in s:
            objecttype = s[s.find('look at ')+8:s.find(' under the ')]
            receptacletype = s[s.find(' under the ')+11:]
        assert receptacletype.strip() == "desklamp"
    elif task_id == 3:
        if s.startswith('put a clean ') and ' in ' in s:
            objecttype = s[s.find('put a clean ')+12:s.find(' in ')]
            receptacletype = s[s.find(' in ')+4:]
        elif s.startswith('clean some ') and ' and put it in ' in s:
            objecttype = s[s.find('clean some ')+11:s.find(' and put it in ')]
            receptacletype = s[s.find(' and put it in ')+15:]
    elif task_id == 4:
        if s.startswith('heat some ') and ' and put it in ' in s:
            objecttype = s[s.find('heat some ')+10:s.find(' and put it in ')]
            receptacletype = s[s.find(' and put it in ')+15:]
        elif s.startswith('put a hot ') and ' in ' in s:
            objecttype = s[s.find('put a hot ')+10:s.find(' in ')]
            receptacletype = s[s.find(' in ')+4:]
    elif task_id == 5:
        if s.startswith('cool some ') and ' and put it in ' in s:
            objecttype = s[s.find('cool some ')+10:s.find(' and put it in ')]
            receptacletype = s[s.find(' and put it in ')+15:]
        elif s.startswith('put a cool ') and ' in ' in s:
            objecttype = s[s.find('put a cool ')+11:s.find(' in ')]
            receptacletype = s[s.find(' in ')+4:]
    elif task_id == 6:
        if s.startswith('put two ') and ' in ' in s:
            objecttype = s[s.find('put two ')+8:s.find(' in ')]
            receptacletype = s[s.find(' in ')+4:]
        elif s.startswith('find two ') and ' and put them in ' in s:
            objecttype = s[s.find('find two ')+9:s.find(' and put them in ')]
            receptacletype = s[s.find(' and put them in ')+17:]
            
    return objecttype.strip(), receptacletype.strip()

def process_ob(ob):
    if ob.startswith('You arrive at loc '):
        ob = ob[ob.find('. ')+2:]    
    return ob

def ends_with_space_and_number(s):
    pattern = r'\s+\d+$'
    if re.search(pattern, s):
        return True
    return False

def remove_trailing_space_and_number(s):
    # 正则表达式匹配字符串末尾的空格和数字
    pattern = r'\s+\d+$'
    
    # 使用 re.sub 替换匹配的部分为空字符串
    return re.sub(pattern, 'type ', s)

def name2type(name):
    if not isinstance(name, str):
        return None
    # name maybe 'apple', 'apple 22', 'appletype '
    r_name = remove_trailing_space_and_number(name)
    if r_name.endswith('type '):
        return r_name
    return r_name + 'type '


# Entity class an object/receptacle in the environment, including its properties
class Entity:
    def __init__(self, **kwargs):
        self.name = None 
        self.loc = None
        self.in_on = None
        self.ishot, self.iscool, self.isclean = None, None, None
        self.isopen, self.ison, self.istoggled = None, None, None
        self.pickupable, self.openable, self.toggleable = None, None, None
        self.heatable, self.coolable, self.cleanable = None, None, None
        self.isobject, self.isreceptacle, self.isreceptacleobject = None, None, None
        self.type = None
        self.checked = None
        for key, value in kwargs.items():                
            setattr(self, key, value)
            
        assert self.name is not None
        assert self.type is not None
        
    def to_dict(self):
        return {key: value for key, value in self.__dict__.items()}
        
    def istype(self, etype: str):
        return self.type == etype

# Entitys class stores all the entities in the environment
class Entitys:
    def __init__(self) -> None:
        self.entitys = {}
        self.entitys_type = {}
        self.was_near_receptacles = set()
        
    def add_entity(self, entity: Entity):
        self.entitys.update({entity.name: entity})
        if entity.type not in self.entitys_type:
            self.entitys_type[entity.type] = []
        self.entitys_type[entity.type].append(entity.name)
        
    def update_entity(self, entity: Entity):
        self.entitys.update({entity.name: entity})
        
    def __contains__(self, entity_name):
        # entity_name is something like 'apple 2'
        is_contain = entity_name in self.entitys.keys()
        if is_contain:
            return is_contain
        # entity_name is something like 'apple'
        is_contain = (entity_name + "type ") in self.entitys_type.keys()
        if is_contain:
            return is_contain
        # entity_name is something like 'appletype '
        is_contain = entity_name in self.entitys_type.keys()
        if is_contain:
            return is_contain
        return False
    
    def __getitem__(self, entity_name):
        # if entity_name not in self.entitys.keys():
        #     return Entity(name='nan', type='nan')
        return self.entitys[entity_name]
    
    def check_entity(self, entity_name):
        # entity_name is something like 'apple 2'
        if entity_name in self.entitys.keys():
            return self.entitys[entity_name]
        # entity_name is something like 'apple'
        if (entity_name + "type ") in self.entitys_type.keys():
            return [self.entitys[entity_name] for entity_name in self.entitys_type[entity_name + "type "]]
        # entity_name is something like 'appletype '
        if entity_name in self.entitys_type.keys():
            return [self.entitys[entity_name] for entity_name in self.entitys_type[entity_name]]
        return None
    
    def values(self):
        return self.entitys.values()


class CanContain:
    def __init__(self) -> None:
        self.can_contain = {}
    
    def add_can_contain(self, rtype, otype):
        if rtype not in self.can_contain:
            self.can_contain[rtype] = []
        if otype not in self.can_contain[rtype]:
            self.can_contain[rtype].append(otype)
            
    def check_can_contain(self, rtype):
        # rtype canbe 'apple', 'apple 22', 'appletype ' / or not apple but a receptacle
        r_type = name2type(rtype)
        if r_type in self.can_contain.keys():
            return self.can_contain[r_type]
        return None
        

class CanbeContained:
    def __init__(self) -> None:
        self.canbe_contained = {}
        
    def add_canbe_contained(self, otype, rtype):
        if otype not in self.canbe_contained:
            self.canbe_contained[otype] = []
        if rtype not in self.canbe_contained[otype]:
            self.canbe_contained[otype].append(rtype)
            
    def check_canbe_contained(self, otype):
        # otype canbe 'apple', 'apple 22', 'appletype '
        o_type = name2type(otype)
        if o_type in self.canbe_contained.keys():
            return self.canbe_contained[o_type]
        return None


class RandomCritic():
    def __init__(self) -> None:
        pass
    
    def select_skill(self, obs, done, reward, chosen_skills):
        # pdb.set_trace()
        return np.random.choice(len(chosen_skills))
    
    def update(self):
        pass
    
    
class TwoSomeCritic():
    def __init__(
        self,
        time_stamp,
        record_path,
        task_id=None,
    ) -> None:
        self.score_model = Twosome(
            resume=False,
            load_path=None,
            record_path=record_path,
            # run_name=time_stamp,
            # task_id=task_id,
            infer=False
        )
        
    def select_skill(self, obs, done, reward, chosen_skills, task):
        
        # self.score_model.record_skill(skill_dicts)
        # print(GREEN + f"obs: {obs}" + RESET)
        # print(GREEN + f"done: {done}" + RESET)
        # print(GREEN + f"reward: {reward}" + RESET)
        # print(GREEN + f"chosen_skills: {chosen_skills}" + RESET)
        message = {
            "text_obs": "\n".join(obs),
            "done": any(done),
            "reward": sum(reward),
            "chosen_skills": [', '.join(item) for item in chosen_skills],
            "task": task
        }
        skill_idx = self.score_model.critic(message, return_value=True)
        # print(GREEN + f"chosen_skill_idx: {skill_idx}" + RESET)
        return skill_idx
    
    def update(self):
       self.score_model.update() 


class Agent:
    def __init__(self, env, task_id=1, llm_model_name="gpt-3.5-turbo") -> None:
        self.env = env
        self.interaction_history = {'actions': [], 'observations': [], 'reward': [], 'done': []}
        self.llm2planner = llm2planner(
            model_name=llm_model_name,
            temperature=0.6,
            num_repeat_sample=3,
            time_stamp=datetime.now().strftime("%Y-%m-%d-%H-%M-%S"),
            task_id=task_id
        )
        self.llm2planner.warm_up()
        # self.critic = RandomCritic()
        self.critic = TwoSomeCritic(
            time_stamp=None, 
            record_path=self.llm2planner.planners_path,
            task_id=None
            # time_stamp=self.llm2planner.time_stamp,
            # task_id=task_id
        )
        self.last_inter_idx = 0
        self.inter_idx = 0
        self.error_planner = {i: 0 for i in range(3)}
        self.record = {}
        self.record_idx = 0
        self.task_id = task_id
        
    def _record(self, specific_task_name, is_success, success_rate):
        if specific_task_name not in self.record[self.record_idx]["Eps History"]:
            self.record[self.record_idx]["Eps History"][specific_task_name] = []
        self.record[self.record_idx]["Eps History"][specific_task_name].append(is_success)
        self.record[self.record_idx]["Success Rate"] = success_rate
        self.record[self.record_idx]["Program"] = deepcopy(self.old_program)
        
    def _store_record(self):
        with open(os.path.join(self.llm2planner.planners_path, 'eps_record.json'), 'w') as f:
            json.dump(self.record, f, indent=4)
        
    def name2type(self, name):
        if not isinstance(name, str):
            return None
        # name maybe 'apple', 'apple 22', 'appletype '
        r_name = remove_trailing_space_and_number(name)
        if r_name.endswith('type '):
            return r_name
        return r_name + 'type '
    
    def reset_evolve(self):
        self.evolve_eps = 0
        self.evolve_inter_history = {}
        
    def step_evolve(self):
        self.evolve_inter_history[self.evolve_eps] = {'actions': [], 'observations': [], 'reward': [], 'done': []}
        
    def evolve_text(self, success_rate):
        texts = []
        start_idx = len(self.evolve_inter_history) - 5 - 1
        end_idx = len(self.evolve_inter_history) - 1
        for idx, last_idx in enumerate(range(start_idx, end_idx, 1)):
            acts = self.evolve_inter_history[last_idx]['actions']
            obss = self.evolve_inter_history[last_idx]['observations']
            d = any(self.evolve_inter_history[last_idx]['reward'])
            text = f"Episode {idx + 1}:\n"
            text += f"Obs: {obss[0]}\n"
            for a, o in zip(acts[1:], obss[1:]):
                # a[0] is do nothing, we ignore it
                text += f"Act: {a}\nObs: {o}\n"
            text += f"Success or not? {d}\n\n"
            texts.append(text)
        return "\n".join(texts)

    def reset_var(self):
        self.seen_entitys = Entitys()
        self.holding = None
        self.location = None
        self.can_contain = CanContain()
        self.canbe_contained = CanbeContained()
        self.is_success = None
        self.reset_commands()
    
    def reset_commands(self):
        self.planner_idx = 0
        self.commands_str = [[] for _ in range(3)]
        self.commands = [[] for _ in range(3)]
    
    def reset(self):
        self.reset_var()
        obs, infos = self.env.reset()
        # self.admissible_commands = infos['admissible_commands'][0]
        obs = '\n'.join(obs[0].split('\n\n')[1:])
        obs = process_ob(obs)
        self.add_to_history(None, obs, 0, False)
        task_name = '/'.join(infos['extra.gamefile'][0].split('/')[-3:-1])
        receptacle_list, specific_task_name = extract_receptacles_and_task(obs)
        
        self.init_entitys()
        self.specific_task_name = specific_task_name
        return obs, infos, task_name, specific_task_name
    
    def step(self, action):
        observation, reward, done, info = self.env.step([action])
        # self.admissible_commands = info['admissible_commands'][0]
        observation, reward, done = process_ob(observation[0]), info['won'][0], done[0]
        self.update_entitys()
        
        return observation, reward, done, info
    
    def init_new_entity(self, entity_name: str):
        entity_info = {}
        entity_info["name"] = entity_name
        for proposition in self.env.batch_env.envs[0].state.facts:
            # pdb.set_trace()
            if len(proposition.arguments) == 2:
                v_entity, v_type = proposition.arguments
            elif len(proposition.arguments) == 1:
                v_entity = proposition.arguments[0]
            else:
                raise Exception(f"Invalid proposition: {proposition}")
            
            if v_entity.name == entity_name:
                # pdb.set_trace()
                if proposition.name == "receptacleatlocation" or proposition.name == "objectatlocation":
                    entity_info["loc"] = v_type.name
                elif proposition.name == "not_receptacleatlocation" or proposition.name == "not_objectatlocation":
                    entity_info["loc"] = None
                        
                elif proposition.name == "openable":
                    entity_info["openable"] = True
                
                elif proposition.name == "opened":
                    entity_info["isopen"] = True
                elif proposition.name == "not_opened":
                    entity_info["isopen"] = False
                        
                elif proposition.name == "inreceptacle" or proposition.name == "inreceptacleobject":
                    entity_info["in_on"] = v_type.name
                elif proposition.name == "not_inreceptacle" or proposition.name == "not_inreceptacleobject":
                    entity_info["in_on"] = None
                        
                elif proposition.name == "isreceptacleobject":
                    entity_info["isreceptacleobject"] = True
                        
                elif proposition.name == "isreceptacleobjectfull":
                    entity_info["isreceptacleobjectfull"] = True
                
                elif proposition.name == "wasinreceptacle":
                    continue
                
                elif proposition.name == "checked":
                    entity_info["checked"] = True
                    
                elif proposition.name == "examined":
                    continue
                
                elif proposition.name == "receptacletype":
                    entity_info["isreceptacle"] = True
                    entity_info["type"] = v_type.name
                
                elif proposition.name == "cancontain":
                    continue
                
                elif proposition.name == "objecttype":
                    entity_info["isobject"] = True
                    entity_info["type"] = v_type.name
                        
                elif proposition.name == "holds":
                    self.holding = v_type.name # agent holds v_type.name
                elif proposition.name == "holdsany" or proposition.name == "holdsanyreceptacleobject":
                    continue
                    
                elif proposition.name == "full":
                    entity_info["full"] = True
                elif proposition.name == "not_full":
                    entity_info["full"] = False
                        
                elif proposition.name == "isclean":
                    entity_info["isclean"] = True
                elif proposition.name == "not_isclean":
                    entity_info["isclean"] = False
                        
                elif proposition.name == "cleanable":
                    entity_info["cleanable"] = True
                
                elif proposition.name == "ishot":
                    entity_info["ishot"] = True
                elif proposition.name == "not_ishot":
                    entity_info["ishot"] = False
                        
                elif proposition.name == "heatable":
                    entity_info["heatable"] = True
                        
                elif proposition.name == "iscool":
                    entity_info["iscool"] = True
                elif proposition.name == "not_iscool":
                    entity_info["iscool"] = False
                        
                elif proposition.name == "coolable":
                    entity_info["coolable"] = True
                        
                elif proposition.name == "pickupable":
                    entity_info["pickupable"] = True
                        
                elif proposition.name == "moveable":
                    entity_info["moveable"] = True
                        
                elif proposition.name == "toggleable":
                    entity_info["toggleable"] = True
                        
                elif proposition.name == "ison":
                    entity_info["ison"] = True
                elif proposition.name == "not_ison":
                    entity_info["ison"] = False
                        
                elif proposition.name == "istoggled":
                    entity_info["istoggled"] = True
                elif proposition.name == "not_toggled" or proposition.name == "not_istoggled": # or maybe not_istoggled? I dont know
                    entity_info["istoggled"] = False
                    
                elif proposition.name == "sliceable":
                    entity_info["sliceable"] = True
                        
                elif proposition.name == "issliced":
                    entity_info["issliced"] = True
                        
                else:
                    pdb.set_trace()
                    raise Exception(f"Not implemented for proposition: {proposition.name}")

        return Entity(**entity_info)
    
    def init_entitys(self):
        for entity_name in self.env.batch_env.envs[0].state.seen_entity:
            # entity_name is a str name
            if entity_name in self.seen_entitys:
                continue
            
            self.seen_entitys.add_entity(self.init_new_entity(entity_name))
        
        self.update_contain_and_loc()
        
    def update_entitys(self):
        # pdb.set_trace()
        if self.env.batch_env.envs[0].state.seen_entity is None:
            return # do nothing as nothing happened in last step
        for entity_name in self.env.batch_env.envs[0].state.seen_entity:
            # entity_name is a str name
            if entity_name in self.seen_entitys:
                self.seen_entitys.update_entity(self.update_entity(entity_name))
            else:
                self.seen_entitys.add_entity(self.init_new_entity(entity_name))

        self.update_contain_and_loc()
    
    def update_contain_and_loc(self):
        # update can_contain         
        for proposition in self.env.batch_env.envs[0].state.facts:
            if proposition.name == "cancontain":
                v_entity, v_type = proposition.arguments
                self.can_contain.add_can_contain(v_type.name, v_entity.name)
                self.canbe_contained.add_canbe_contained(v_entity.name, v_type.name)
            
            if proposition.name == "atlocation":
                v_entity, v_type = proposition.arguments
                self.location = v_type.name
    
    def update_entity(self, entity_name):
        return self.init_new_entity(entity_name)
        
    # Note down the history of interactions with the environment
    def add_to_history(self, action, observation, reward, done):
        self.interaction_history['actions'].append(action)
        self.interaction_history['observations'].append(observation)
        self.interaction_history['reward'].append(1 if reward else 0)
        self.interaction_history['done'].append(done)
        
        self.evolve_inter_history[self.evolve_eps]['actions'].append(action)
        self.evolve_inter_history[self.evolve_eps]['observations'].append(observation)
        self.evolve_inter_history[self.evolve_eps]['reward'].append(1 if reward else 0)
        self.evolve_inter_history[self.evolve_eps]['done'].append(done)
        
        if hasattr(self, "test_interaction_steps"):
            if self.test_interaction_steps == {}:
                self.test_eps = 0
                self.test_interaction_steps[self.test_eps] = 1
            else:
                if done:
                    self.test_eps += 1
                    self.test_interaction_steps[self.test_eps] = 0
                else:
                    self.test_interaction_steps[self.test_eps] += 1
        
    def get_history(self, start_idx, end_idx):
        a = self.interaction_history['actions'][start_idx:end_idx]
        o = self.interaction_history['observations'][start_idx:end_idx]
        r = self.interaction_history['reward'][start_idx:end_idx]
        d = self.interaction_history['done'][start_idx:end_idx]
        return a, o, r, d
        
    # Get an observation from the environment after performing an action, and add it to the history
    def observation(self, action):
        observation, reward, done, infos = self.step(action)
        self.add_to_history(action, observation, reward, done)
        print(f'Act: {action}\nObs: {observation}')
        if done:
            self.is_success = reward
            print('Done. Success:', reward)
        return observation

    def _observation(self, command):
        command['func'](**command['kwargs'])
            
    # Go to a receptacle and update the agent's location. It returns an observation in natural language.
    # For example, 'On the countertop 1, you see a candle 1, a cloth 2, and a soapbar 1.' = goto('countertop 1')
    def goto(self, receptacle):
        # if f'go to {receptacle}' in self.admissible_commands:
        if receptacle in self.seen_entitys.entitys.keys():
            if self.seen_entitys[receptacle].isobject:
                target = self.seen_entitys[receptacle].in_on
                self.commands_str[self.planner_idx].append(f'go to {target}')
                self.commands[self.planner_idx].append({'func': self._goto, "kwargs": {"receptacle": target}})
                return
                
        self.commands_str[self.planner_idx].append(f'go to {receptacle}')
        self.commands[self.planner_idx].append({'func': self._goto, "kwargs": {"receptacle": receptacle}})
        
    def _goto(self, receptacle):
        self.seen_entitys.was_near_receptacles.add(receptacle)
        # if self.location == self.seen_entitys[receptacle].loc:
        #     return 
        
        self.observation(f'go to {receptacle}')

    # Take an object from a receptacle if the agent is not holding anything. It returns an observation in natural language.
    # For example, 'You pick up the soapbar 1 from the towelholder 1.' = take('soapbar 1', 'towelholder 1')
    def take(self, object, receptacle):
        # if self.holding is None:
            # if f'take {object} from {receptacle}' in self.admissible_commands:
        self.commands_str[self.planner_idx].append(f'take {object} from {receptacle}')
        self.commands[self.planner_idx].append({'func': self._take, "kwargs": {"object": object, "receptacle": receptacle}})
            
            
    def _take(self, object, receptacle):
        # if self.holding is None:
        observation = self.observation(f'take {object} from {receptacle}')
        if not observation.startswith('Nothing happens.'):
            self.holding = object
        
    # Put an object in or on a receptacle if the agent is holding it. It returns an observation in natural language.
    # For example, 'You put the soapbar 1 in/on the cabinet 1.' = put('soapbar 1', 'cabinet 1')
    def put(self, object, receptacle):
        # if self.name2type(self.holding) == self.name2type(object):
            # if f'put {object} in/on {receptacle}' in self.admissible_commands:
                self.commands_str[self.planner_idx].append(f'put {self.holding} in/on {receptacle}')
                self.commands[self.planner_idx].append({'func': self._put, "kwargs": {"object": self.holding, "receptacle": receptacle}})
            
    def _put(self, object, receptacle):
        # if self.name2type(self.holding) == self.name2type(object):
            observation = self.observation(f'put {self.holding} in/on {receptacle}')
            if not observation.startswith('Nothing happens.'):
                self.holding = None

    # Open a receptacle and observe its contents. It returns an observation in natural language.
    # For example, 'You open the cabinet 1. The cabinet 1 is open. In it, you see a cloth 1.' = open_receptacle('cabinet 1')
    def open_receptacle(self, receptacle):
        # if f'open {receptacle}' in self.admissible_commands:
            self.commands_str[self.planner_idx].append(f'open {receptacle}')
            self.commands[self.planner_idx].append({'func': self._open_receptacle, "kwargs": {"receptacle": receptacle}})
        
    def _open_receptacle(self, receptacle):
        self.observation(f'open {receptacle}')
        
    # Close an opened receptacle. It returns an observation in natural language.
    # For example, 'You close the safe 1.' = close_receptacle('safe 1')
    def close_receptacle(self, receptacle):
        # if f'close {receptacle}' in self.admissible_commands:
            self.commands_str[self.planner_idx].append(f'close {receptacle}')
            self.commands[self.planner_idx].append({'func': self._close_receptacle, "kwargs": {"receptacle": receptacle}})

    def _close_receptacle(self, receptacle):
        self.observation(f'close {receptacle}')
    
    # Clean an object with a receptacle. It returns an observation in natural language.
    # For example, 'You clean the soapbar 1 using the sinkbasin 1.' = clean('soapbar 1', 'sinkbasin 1')
    def clean(self, object, receptacle):
        # if f'clean {object} with {receptacle}' in self.admissible_commands:
            self.commands_str[self.planner_idx].append(f'clean {object} with {receptacle}')
            self.commands[self.planner_idx].append({'func': self._clean, "kwargs": {"object": object, "receptacle": receptacle}})
        
    def _clean(self, object, receptacle):
        self.observation(f'clean {object} with {receptacle}')

    # Heat an object with a receptacle. It returns an observation in natural language.
    # For example, 'You heat the tomato 1 using the microwave 1.' = heat('tomato 1', 'microwave 1')
    def heat(self, object, receptacle):
        # if f'heat {object} with {receptacle}' in self.admissible_commands:
            self.commands_str[self.planner_idx].append(f'heat {object} with {receptacle}')
            self.commands[self.planner_idx].append({'func': self._heat, "kwargs": {"object": object, "receptacle": receptacle}})
        
    def _heat(self, object, receptacle):
        self.observation(f'heat {object} with {receptacle}')

    # Cool an object with a receptacle. It returns an observation in natural language.
    # For example, 'You cool the pan 2 using the fridge 1.' = cool('pan 2', 'fridge 1')
    def cool(self, object, receptacle):
        # if f'cool {object} with {receptacle}' in self.admissible_commands:
            self.commands_str[self.planner_idx].append(f'cool {object} with {receptacle}')
            self.commands[self.planner_idx].append({'func': self._cool, "kwargs": {"object": object, "receptacle": receptacle}})
        
    def _cool(self, object, receptacle):
        self.observation(f'cool {object} with {receptacle}')

    # Turn_on an object. It returns an observation in natural language.
    # For example, 'You turn on the desklamp 1.' = turn_on('desklamp 1')
    def turn_on(self, object):
        # if f'use {object}' in self.admissible_commands:
            self.commands_str[self.planner_idx].append(f'use {object}')
            self.commands[self.planner_idx].append({'func': self._turn_on, "kwargs": {"object": object}})
        
    def _turn_on(self, object):
        self.observation(f'use {object}')
    
    # Report agent's current state, including its location, what it's holding, and last three actions and observations.
    # This function should only be used for assertion.
    def report(self):
        msg = \
f'''The last three interactions before error were:
Act: {self.interaction_history["actions"][-3]}
Obs: {self.interaction_history["observations"][-3]}
Act: {self.interaction_history["actions"][-2]}
Obs: {self.interaction_history["observations"][-2]}
Act: {self.interaction_history["actions"][-1]}
Obs: {self.interaction_history["observations"][-1]}
I am at {self.location} and holding {self.holding}.
'''.strip()
        return msg
        
        
    def explore(self):
        # random find a seen receptacle, which was not closed before, and go to it
        # then open it if it's not opened
        receptacles = [entity.name for entity in self.seen_entitys.values() if (entity.isreceptacle and entity.name not in self.seen_entitys.was_near_receptacles)]
        entity_name = np.random.choice(receptacles)
        self.goto(entity_name)
        if self.seen_entitys[entity_name].openable and not self.seen_entitys[entity_name].isopen:
            self.open_receptacle(entity_name)
    
    # def _explore(self):
    #     # random find a seen receptacle, which was not closed before, and go to it
    #     # then open it if it's not opened
    #     receptacles = [entity.name for entity in self.seen_entitys.values() if (entity.isreceptacle and entity.name not in self.seen_entitys.was_near_receptacles)]
    #     entity_name = np.random.choice(receptacles)
    #     self._goto(entity_name)
    #     if self.seen_entitys[entity_name].openable and not self.seen_entitys[entity_name].isopen:
    #         self._open_receptacle(entity_name)
            
    def find_canbe_contained(self, objecttype: str):
        return self.canbe_contained.check_canbe_contained(objecttype)
    
    def find_object(self, objecttype: str):
        # object_name can be something like: 'apple', 'apple 2', 'appletype ' 
        entity = self.seen_entitys.check_entity(objecttype)
        if entity is not None:
            if isinstance(entity, list):
                # return entity[0]
                return np.random.choice(entity) # only using for task6: pick_two_obj_then_place
            return entity
        return None
    
    def find_objects(self, objecttype: str):
        # object_name can be something like: 'apple', 'apple 2', 'appletype ' 
        entity = self.seen_entitys.check_entity(objecttype)
        if entity is not None:
            if not isinstance(entity, list):
                return [entity]
            return entity
        
        return None
    
    def find_receptacle(self, receptacletype: str):
        # receptacle_name can be format like: 'apple', 'apple 2', 'appletype '
        entity = self.seen_entitys.check_entity(receptacletype)
        if entity is not None:
            if isinstance(entity, list):
                return entity[0]
            return entity
        
        return None
    
    def find_receptacles(self, receptacletype: str):
        # receptacle_name can be format like: 'apple', 'apple 2', 'appletype '
        entity = self.seen_entitys.check_entity(receptacletype)
        if entity is not None:
            if not isinstance(entity, list):
                return [entity]
            return entity
        
        return None
    
    
    def add_method(self, text_list, learn):
        add_success_flag = False
        while not add_success_flag:
            try:
                for idx, text in enumerate(text_list):
                    text = text.strip().split('\n')
                    method = text[0].strip()
                    method = method.replace('(', f'_{idx}(')
                    text[0] = method
                    text = '\n'.join(text)
                    # pdb.set_trace()
                    exec(text, self.__dict__)
                add_success_flag = True
            except Exception as e:
                print(RED + f"Error in adding method: {e}" + RESET)
                if learn:
                    self.llm2planner.regenerate_planner(error_planner=None, error_idx=idx)
                
                
        if self.task_id == 1:
            self.planner_name = [self.pick_and_place_0, self.pick_and_place_1, self.pick_and_place_2]
        elif self.task_id == 2:
            self.planner_name = [self.look_at_obj_0, self.look_at_obj_1, self.look_at_obj_2]
        elif self.task_id == 3:
            self.planner_name = [self.pick_clean_then_place_0, self.pick_clean_then_place_1, self.pick_clean_then_place_2]
        elif self.task_id == 4:
            self.planner_name = [self.pick_heat_then_place_0, self.pick_heat_then_place_1, self.pick_heat_then_place_2]
        elif self.task_id == 5:
            self.planner_name = [self.pick_cool_then_place_0, self.pick_cool_then_place_1, self.pick_cool_then_place_2]
        elif self.task_id == 6:
            self.planner_name = [self.pick_two_obj_then_place_0, self.pick_two_obj_then_place_1, self.pick_two_obj_then_place_2]
        else:
            raise Exception(f"Invalid task_id: {self.task_id}")
                
    
    def get_planner_text(self):
        text_list = self.llm2planner.core_text
        return text_list
    
    def evolve_planner(self, success_rate):
        results = self.evolve_text(success_rate)
        self.llm2planner.evolve_planner(results=results)
        
    def regenerate_planner(self):
        self.old_program = self.llm2planner.core_text
        regenerate_flag = self.llm2planner.regenerate_planner(error_planner=self.error_planner, error_idx=None)
        self.error_planner = {i: 0 for i in range(3)}
        return regenerate_flag
        
    def _solve_goal(self, num_episodes, threshold, learn=True):
        if learn == False:
            self.test_interaction_steps = {} # {eps: steps}
            
        success = 0
        eps = 0
        self.reset_evolve()
        self.record[self.record_idx] = {
            "Success Rate": 0,
            "Eps History": {},
            "Program": []
        }
        
        for eps_idx in range(num_episodes):
            self.add_method(self.get_planner_text(), learn)
            plan_index = 0
            self.step_evolve()
            obs, infos, task_name, specific_task_name = self.reset()
            print(f"Episode: {eps + 1}, TaskName: {task_name}, SpecificTaskName: {specific_task_name}")
            o, r = extract_object_and_receptacle(specific_task_name, self.task_id)
            print(f"Object: {o}, Receptacle: {r}")
            arguments = {"objecttype": o, "receptacletype": r}
            args = {"objecttype": o}
            while self.is_success is None and plan_index < 50:
                plan_index += 1
                # if learn == False:
                #     self.test_interaction_steps[eps_idx] = plan_index
                self.reset_commands()
                for idx_sc, selfcommand in enumerate(self.planner_name):
                    try:
                        if self.task_id == 2:
                            # look at task
                            selfcommand(self, **args)
                        else:
                            selfcommand(self, **arguments)
                        self.planner_idx += 1
                    except Exception as e:
                        self.planner_idx += 1
                        self.error_planner[idx_sc] += 1
                        print(YELLOW + f"Error in planner {idx_sc}: {e}" + RESET)
                        print(YELLOW + traceback.format_exc() + RESET)
                
                self.inter_idx = len(self.interaction_history['actions'])
                action, observation, reward, done = self.get_history(self.last_inter_idx, self.inter_idx)
                # action, observation, reward, done is all a list
                select_plan_idx = self.critic.select_skill(
                    observation, 
                    done, 
                    reward, 
                    self.commands_str, 
                    task=self.specific_task_name
                )
                # print(BLUE + f"Select planner {select_plan_idx}" + RESET)
                for command in self.commands[select_plan_idx]:
                    try:
                        self._observation(command)
                    except Exception as e:
                        print(RED + f"Error in command {command}: {e}" + RESET)
                        # print(RED + traceback.format_exc() + RESET)
                        continue
                self.last_inter_idx = self.inter_idx
                if learn:
                    self.critic.update()
            
            self.evolve_eps += 1
            eps += 1
            if self.is_success:
                success += 1
            
            success_rate = success / eps
            print(GREEN + f"Total Eps in current Evolve: {eps}, Success: {success}, Success Rate: {success_rate}" + RESET)

            # regenerate the planner if there is too much error in the planner
            if learn:
                regenerate_flag = self.regenerate_planner()
                if regenerate_flag:
                    self._record(specific_task_name, self.is_success, success_rate)
                    self.record_idx += 1
                    self._store_record()
                    return "regenerate"
            self._record(specific_task_name, self.is_success, success_rate)
        
        self.record_idx += 1
        self._store_record()
        
        if success_rate < threshold and learn:
            self.evolve_planner(success_rate)
        return success_rate
        
        
    def solve_goal(self, num_episodes, threshold, learn=True):
        while True:
            _r = self._solve_goal(num_episodes, threshold, learn)
            if _r == "regenerate":
                continue
            else:
                return _r
            
    def get_name(self):
        self.reset_evolve()
        self.record[self.record_idx] = {
            "Success Rate": 0,
            "Eps History": {},
            "Program": []
        }
        
        for _ in range(5):
            self.step_evolve()
            obs, infos, task_name, specific_task_name = self.reset()
            print(f"Episode: {_ + 1}, TaskName: {task_name}, SpecificTaskName: {specific_task_name}")
            o, r = extract_object_and_receptacle(specific_task_name, self.task_id)
            print(RED + f"Object: {o}, Receptacle: {r}" + RESET)
        
        
    def test_func(self, num_episodes):
        success = 0
        eps = 0
        self.reset_evolve()
        for _ in range(num_episodes):
            plan_index = 0
            self.step_evolve()
            obs, infos, task_name, specific_task_name = self.reset()
            print(f"Episode: {eps + 1}, TaskName: {task_name}, SpecificTaskName: {specific_task_name}")
            o, r = extract_object_and_receptacle(specific_task_name, self.task_id)
            print(f"Object: {o}, Receptacle: {r}")
            arguments = {"objecttype": o, "receptacletype": r}
            args = {"objecttype": o}
            while self.is_success is None and plan_index < 50:
                self.pick_two_obj_then_place(o, r)
            
            eps += 1
            if self.is_success:
                success += 1
            
            success_rate = success / eps
            print(GREEN + f"Total Eps in current Evolve: {eps}, Success: {success}, Success Rate: {success_rate}" + RESET)
        return success_rate