# Copyright (c) 2024-present, Royal Bank of Canada.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
#

import logging
from dataclasses import dataclass
from typing import Union

from ml_collections import config_dict
from tqdm import tqdm

from domains import PDDLEnv
import error_messages
from pddl_utils import PDDLObj
from utils import extract_code, get_function_from_code, harmonic_mean
import pdb

import os
import sys
sys.path.append(os.getcwd())
from PIL import Image
import torch
try:
    import torch_npu
except: 
    pass
import argparse
import pdb
import json
import numpy as np
import prompts
import time

from LLaMA_Factory.src.llamafactory.chat.chat_model import ChatModel#, run_response, run_response_image
from LLaMA_Factory.src.llamafactory.model import load_model, load_tokenizer
import yaml

if hasattr(torch, "npu"):
    DEVICE = torch.device("npu:0" if torch_npu.npu.is_available() else "cpu")
else:
    DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
import random
import re
import gc
import sys

def message_construct_llama_func(user_prompt_list, response_total_list):
    messages = []
    for i in range(len(user_prompt_list)):
        messages.append({"role": "user", "content": user_prompt_list[i]})
        if i < len(user_prompt_list) - 1:
            messages.append({"role": "assistant", "content": response_total_list[i]})
    return messages

class PlanRatings:
    EMPTY_CODE = -6.0
    INVALID_MODIFICATION = -5.0
    PDDL_SANITY_ERROR = -4.0  # empty effect actions
    INVALID_DOMAIN = -3.0  # e.g., undefined predicates
    NO_PLAN = -1.0  # e.g., disconnected initial state and goal state
    # 0.0 <= rating <= 1.0: ratio of random walks that are executable
    SOLUTION_FOUND = 2.0


@dataclass
class PlanningEvaluation:
    rating: float
    error_msg: Union[str, None]
    new_pddl_obj: PDDLObj
    solution_found: bool = False


class PlanningEvaluator:
    def __init__(
            self, env: PDDLEnv, target_domain_pddl: str = None, target_problem_pddl: str = None, target_gen_problem_pddl: str = None, target_problem_nl: str = None,
            rw_feedback: bool = None, vlm_path = None, predicate_descriptor_py: str = None, exp_flags: config_dict.ConfigDict = None,
            bi_rw_feedback: bool = True, multimodal = False, sequence = False, gpt_client=None
    ):
        self.env = env
        self.rw_feedback = rw_feedback
        self.bi_rw_feedback = bi_rw_feedback
        self.target_domain_pddl = target_domain_pddl
        self.target_problem_pddl = target_problem_pddl
        self.target_gen_problem_pddl = target_gen_problem_pddl
        self.target_problem_nl = target_problem_nl
        self.exp_flags = exp_flags
        with open(vlm_path, 'r') as file:
            self.vlm_args = yaml.safe_load(file)
        
        self.chat_model = ChatModel(self.vlm_args)
        self.multimodal = multimodal
        self.sequence = sequence
        self.vlm_runtime = 0
        self.vlm_numcall = 0
        self.gpt_client = gpt_client
        # self.predicate_descriptor_fn = get_function_from_code(predicate_descriptor_py, 'describe_predicate')

    def rate_domain_valid(self, cur_pddl_obj: PDDLObj, gpt_output: str, img_path: str):
        
        # pdb.set_trace()
        
        new_pddl_obj = cur_pddl_obj.copy_object()
        func_modification, err_msg = self._try_extracting_python_code(gpt_output)
        if err_msg is not None:
            return False, None
        print(err_msg)
        error_msg = new_pddl_obj.modify_domain(func_modification)
        print(error_msg)
        if error_msg is not None:
            return False, None
        err_msg = new_pddl_obj.sanity_check_domain()
        print(err_msg)
        gen_pddl_str = new_pddl_obj.to_str()
        if err_msg is not None:
            return False, None
        gen_plan, is_domain_valid, error_msg = self.env.search_plan(gen_pddl_str, self.target_gen_problem_pddl)
        print(gen_plan, is_domain_valid, error_msg)
        if is_domain_valid:
            return True, new_pddl_obj
        else:
            return False, None
        
    def rate_domain_modification_feedback(self, cur_pddl_obj: PDDLObj, target_gen_problem_pddl:str, gpt_output: str, img_path: str, turn: int, best_rating=None) -> PlanningEvaluation:
        
        print('rate_domain_modification_feedback')
        new_pddl_obj = cur_pddl_obj.copy_object() 
        func_modification, err_msg = self._try_extracting_python_code(gpt_output)
        if err_msg is not None:
            # pdb.set_trace()
            print('error extracting_python_code')
            return PlanningEvaluation(rating=PlanRatings.EMPTY_CODE, error_msg=err_msg, new_pddl_obj=new_pddl_obj)
        error_msg = new_pddl_obj.modify_domain(func_modification)
        if error_msg is not None:
            # pdb.set_trace()
            print('error modify_domain')
            return PlanningEvaluation(
                rating=PlanRatings.INVALID_MODIFICATION, error_msg=error_msg, new_pddl_obj=new_pddl_obj
            )
        return self.rate_domain_feedback(new_pddl_obj, img_path, turn, best_rating)

    def rate_domain_feedback(self, pddl_obj, img_path, turn, best_rating) -> PlanningEvaluation:
        
        err_msg = pddl_obj.sanity_check_domain()
        gen_pddl_str = pddl_obj.to_str()
        if err_msg is not None:
            # pdb.set_trace()
            print('error sanity_check_domain')
            return PlanningEvaluation(rating=PlanRatings.PDDL_SANITY_ERROR, error_msg=err_msg, new_pddl_obj=pddl_obj)
        is_plan_valid, err_msg, aux_test = self._test_generated_pddl_feedback(
            gen_pddl_str, img_path, turn, rw_feedback=self.rw_feedback
        )
        # rw_rating = 0
        rw_rating, _, _ = self.evaluate_generated_domain_with_random_walks(gen_pddl_str, img_path, turn, best_rating=best_rating)
        
        print('!!!!!!!!!!!!rw_rating!!!!!!!!!!!!', rw_rating)
        if is_plan_valid and abs(rw_rating - 1.0) < 1e-6:
            return PlanningEvaluation(
                rating=PlanRatings.SOLUTION_FOUND, error_msg=None, new_pddl_obj=pddl_obj, solution_found=True
            )

        return PlanningEvaluation(rw_rating, err_msg, pddl_obj)

    def rate_domain_modification_nofeedback(self, cur_pddl_obj: PDDLObj, target_gen_problem_pddl:str, gpt_output: str, img_path: str, turn: int, best_rating=None) -> PlanningEvaluation:
        
        print('rate_domain_modification_feedback')
        new_pddl_obj = cur_pddl_obj.copy_object() 
        func_modification, err_msg = self._try_extracting_python_code(gpt_output)
        if err_msg is not None:
            # pdb.set_trace()
            print('error extracting_python_code')
            return PlanningEvaluation(rating=PlanRatings.EMPTY_CODE, error_msg=err_msg, new_pddl_obj=new_pddl_obj)
        error_msg = new_pddl_obj.modify_domain(func_modification)
        if error_msg is not None:
            # pdb.set_trace()
            print('error modify_domain')
            return PlanningEvaluation(
                rating=PlanRatings.INVALID_MODIFICATION, error_msg=error_msg, new_pddl_obj=new_pddl_obj
            )
        err_msg = new_pddl_obj.sanity_check_domain()
        gen_pddl_str = new_pddl_obj.to_str()
        if err_msg is not None:
            # pdb.set_trace()
            print('error sanity_check_domain')
            return PlanningEvaluation(rating=PlanRatings.PDDL_SANITY_ERROR, error_msg=err_msg, new_pddl_obj=new_pddl_obj)
        aux = {'all_plans': []}
        
        gen_plan, is_domain_valid, error_msg = self.env.search_plan(gen_pddl_str, self.target_gen_problem_pddl)
        # pdb.set_trace()
        aux['all_plans'].append(self.env.plan_to_str(gen_plan))
        
        if gen_plan is None:
            if is_domain_valid:
                return PlanningEvaluation(-1, error_msg, new_pddl_obj)
            else:
                return PlanningEvaluation(-3, error_msg, new_pddl_obj)

        if 'maze' in self.target_domain_pddl:
            gen_plan_convert = gen_plan.replace('pos', 'loc')
        elif 'sokoban' in self.target_domain_pddl:
            # gen_plan_convert = gen_plan.replace('box-1', 'stone-01').replace('box-2', 'stone-02').replace(' left', ' dir-up').replace(' right', ' dir-down').replace(' up', ' dir-left').replace(' down', ' dir-right')
            gen_plan_convert = gen_plan.replace('box-1', 'stone-01').replace('box-2', 'stone-02').replace(' left', ' dir-left').replace(' right', ' dir-right').replace(' up', ' dir-up').replace(' down', ' dir-down').replace('move ', 'move player-01 ').replace('push-to-goal ', 'push-to-goal player-01 ').replace('push-to-nongoal ', 'push-to-nongoal player-01 ')
            gen_plan_convert = gen_plan_convert.replace('east', 'right').replace('west', 'left').replace('south', 'down').replace('north', 'up')
            gen_plan_convert = re.sub(r'pos-(\d+)-(\d+)', r'pos-\2-\1', gen_plan_convert)
            print('converted plan', gen_plan_convert)
        elif 'package' in self.target_domain_pddl:
            gen_plan_convert = gen_plan.replace('pkg1', 'pkg-1').replace('pkg2', 'pkg-2').replace('package1', 'pkg-1').replace('package2', 'pkg-2')
        elif 'overcooked' in self.target_domain_pddl:
            gen_plan_convert = gen_plan.replace('tomato', 'tomato1').replace('lettuce', 'lettuce1').replace('board1', 'chopping-board1').replace('onion', 'onion1')
        else:
            gen_plan_convert = gen_plan
        is_plan_valid, _ = self.env.validate_plan(self.target_domain_pddl, self.target_problem_pddl, gen_plan_convert)
        # is_plan_valid = self.validate_plan_vlm(img_path, gen_plan)
        
        if is_plan_valid:
            rw_rating = 1
        else:
            rw_rating = 0
        return PlanningEvaluation(rw_rating, error_msg, new_pddl_obj)
    
    def rate_domain_modification(self, cur_pddl_obj: PDDLObj, gpt_output: str, img_path: str, turn: int) -> PlanningEvaluation:
        new_pddl_obj = cur_pddl_obj.copy_object() 
        func_modification, err_msg = self._try_extracting_python_code(gpt_output)
        if err_msg is not None:
            return PlanningEvaluation(rating=PlanRatings.EMPTY_CODE, error_msg=err_msg, new_pddl_obj=new_pddl_obj)
        error_msg = new_pddl_obj.modify_domain(func_modification)
        if error_msg is not None:
            return PlanningEvaluation(
                rating=PlanRatings.INVALID_MODIFICATION, error_msg=error_msg, new_pddl_obj=new_pddl_obj
            )
        return self.rate_domain(new_pddl_obj, img_path, turn)

    def rate_domain(self, pddl_obj, img_path, turn) -> PlanningEvaluation:
        err_msg = pddl_obj.sanity_check_domain()
        gen_pddl_str = pddl_obj.to_str()
        if err_msg is not None:
            return PlanningEvaluation(rating=PlanRatings.PDDL_SANITY_ERROR, error_msg=err_msg, new_pddl_obj=pddl_obj)
        is_plan_valid, err_msg, aux_test = self._test_generated_pddl(
            gen_pddl_str, img_path, turn, rw_feedback=self.rw_feedback
        )# looking at this
        rw_rating, _, _ = self.evaluate_generated_domain_with_random_walks(gen_pddl_str, img_path, turn)
        
        if is_plan_valid and abs(rw_rating - 1.0) < 1e-6:
            return PlanningEvaluation(
                rating=PlanRatings.SOLUTION_FOUND, error_msg=None, new_pddl_obj=pddl_obj, solution_found=True
            )

        return PlanningEvaluation(rw_rating, err_msg, pddl_obj)

    def _try_extracting_python_code(self, gpt_output: str):
        code_lang = 'python'
        try:
            
            generated_code = extract_code(gpt_output, lang=code_lang)
            err_msg = None
        except ValueError:
            logging.warning(f"Could not extract {code_lang} code from the GPT response:\n{gpt_output}")
            err_msg = "Your response does not contain any modification code."
            generated_code = ""
        return generated_code, err_msg
        
    def validate_plan_vlm(self, img_path, gen_plan):
        json_path = img_path.replace('.png', '_seq.json')
        with open(json_path, 'r') as file:  
            data = json.load(file)
        
        if 'frozenlake' in img_path:
            action_list = []
            for plan in gen_plan.split('\n'):
                pdb.set_trace()
                if 'left' in plan:
                    action_list.append('move left')
                elif 'right' in plan:
                    action_list.append('move right')
                elif 'up' in plan:
                    action_list.append('move up')
                elif 'down' in plan:
                    action_list.append('move down')
        # print(action_list)
        
        action_list = [f"{i+1}: {item}" for i, item in enumerate(action_list)]
        
        data['conversations'][0]['value'] = data['conversations'][0]['value'].split('Action Sequence:\n')[0] + "Action Sequence:\n" + "\n".join(action_list)
        
        messages = message_construct_llama_func([data['conversations'][0]['value']], [])
                
        response = self.run_response_image(messages, [img_path])
        
        if 'Unsuccessful' in response.split('Goal reaching')[1]:
            return False
        else:
            return True

    def _test_generated_pddl_feedback(
            self, domain_gen_pddl, img_path, turn, rw_feedback
    ):
        # generate a plan from generated pddl
        aux = {'all_plans': []}
        
        gen_plan, is_domain_valid, error_msg = self.env.search_plan(domain_gen_pddl, self.target_gen_problem_pddl)
        # pdb.set_trace()
        aux['all_plans'].append(self.env.plan_to_str(gen_plan))
        
        if gen_plan is None:
            if is_domain_valid and rw_feedback:
                return False, self._get_random_walk_feedback(domain_gen_pddl, img_path, turn, None), aux
            else:
                logging.info("Issue with generating a plan." + error_msg)
                return False, error_msg, aux

        if 'maze' in self.target_domain_pddl:
            gen_plan_convert = gen_plan.replace('pos', 'loc')
        elif 'sokoban' in self.target_domain_pddl:
            # gen_plan_convert = gen_plan.replace('box-1', 'stone-01').replace('box-2', 'stone-02').replace(' left', ' dir-up').replace(' right', ' dir-down').replace(' up', ' dir-left').replace(' down', ' dir-right')
            gen_plan_convert = gen_plan.replace('box-1', 'stone-01').replace('box-2', 'stone-02').replace(' left', ' dir-left').replace(' right', ' dir-right').replace(' up', ' dir-up').replace(' down', ' dir-down').replace('move ', 'move player-01 ').replace('push-to-goal ', 'push-to-goal player-01 ').replace('push-to-nongoal ', 'push-to-nongoal player-01 ')
            gen_plan_convert = gen_plan_convert.replace('east', 'right').replace('west', 'left').replace('south', 'down').replace('north', 'up')
            gen_plan_convert = re.sub(r'pos-(\d+)-(\d+)', r'pos-\2-\1', gen_plan_convert)
            print('converted plan', gen_plan_convert)
        elif 'package' in self.target_domain_pddl:
            gen_plan_convert = gen_plan.replace('pkg1', 'pkg-1').replace('pkg2', 'pkg-2').replace('package1', 'pkg-1').replace('package2', 'pkg-2')
        elif 'overcooked' in self.target_domain_pddl:
            gen_plan_convert = gen_plan.replace('tomato', 'tomato1').replace('lettuce', 'lettuce1').replace('board1', 'chopping-board1').replace('onion', 'onion1')
        else:
            gen_plan_convert = gen_plan
        # pdb.set_trace()
        is_plan_valid, _ = self.env.validate_plan(self.target_domain_pddl, self.target_problem_pddl, gen_plan_convert)
        # is_plan_valid = self.validate_plan_vlm(img_path, gen_plan)
        
        if not is_plan_valid:
            # pdb.set_trace()
            logging.info("Plan generated, but it is not valid.")
            if is_domain_valid and rw_feedback:
                error_msg = self._get_random_walk_feedback(domain_gen_pddl, img_path, turn, gen_plan)
        else:
            logging.info("Plan generated and it is valid.")
            aux['plan'] = self.env.plan_to_str(gen_plan)
            aux['gen_domain_pddl'] = domain_gen_pddl
        # if is_domain_valid and rw_feedback:
        #     error_msg = self._get_random_walk_feedback(domain_gen_pddl, img_path, turn, )
        return is_plan_valid, error_msg, aux
    
    
    def _test_generated_pddl(
            self, domain_gen_pddl, img_path, turn, rw_feedback
    ):
        # generate a plan from generated pddl
        aux = {'all_plans': []}
        gen_plan, is_domain_valid, error_msg = self.env.search_plan(domain_gen_pddl, self.target_gen_problem_pddl)
        aux['all_plans'].append(self.env.plan_to_str(gen_plan))
        
        if gen_plan is None:
            if is_domain_valid and rw_feedback:
                return False, self._get_random_walk_feedback(domain_gen_pddl, img_path, turn), aux
            else:
                logging.info("Issue with generating a plan." + error_msg)
                return False, error_msg, aux

        # is_plan_valid, _ = self.env.validate_plan(self.target_domain_pddl, self.target_problem_pddl, gen_plan)
        is_plan_valid = self.validate_plan_vlm(img_path, gen_plan)
        
        if not is_plan_valid:
            logging.info("Plan generated, but it is not valid.")
        else:
            logging.info("Plan generated and it is valid.")
            aux['plan'] = self.env.plan_to_str(gen_plan)
            aux['gen_domain_pddl'] = domain_gen_pddl
        return is_plan_valid, None, aux

    
    def run_response(self, messages):
        response = ""
        for new_text in self.chat_model.stream_chat(messages, skip_special_tokens = True):
            response += new_text
        # print(response)
        # print('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
        return response

    def run_response_image(self, messages, images):
        print(messages)
        response_parts = []
        self.vlm_numcall += 1
        
        try:
            start_time = time.time()
            with torch.no_grad():
                for new_text in self.chat_model.stream_chat(messages,images=images, skip_special_tokens = True):
                    response_parts.append(new_text)
            end_time = time.time()
            
            response = "".join(response_parts)
            self.vlm_runtime += (end_time-start_time)
            print(response)
            return response
        
        finally:
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
            gc.collect()
    
    def run_response_api(self, messages, image):
        example_path = image[:-8] + "example_full.nl"
        instruction = "Given a sequence of actions and an image observation of the initial setup of a scenario, your goal is to decide the execution result after executing given actions. To achieve this task, you will first describe the setup you observe from the image, give the reasoning that explains how the player moves and the success or failure reason for each single action, then rate whether the overall execution is successful, and eventually describe whether the actions achieves the task's goal."
        with open(example_path, 'r') as file:  
            example = file.read()
        messages = messages.split('goal. \n\n')[1]
        count = 0
        while True:
            conv_id, _ = self.gpt_client.make_new_chat(system_message=instruction)
            conv_id, gpt_output, _ = self.gpt_client.complete_one_chat(conv_id, (example+messages, prompts.encode_image(image)))
            print(gpt_output)
            if 'Step' in gpt_output:
                break
            if count >= 10:
                exit(0)
            count += 1
        # pdb.set_trace()
        return gpt_output

    def get_next_obs(self, i, step, img_path = str, prev_tokens = None, prev_feedback=None, position = None, turn=0):
        json_path = img_path.replace('png', 'json')
        with open(json_path, 'r') as file:  
            data = json.load(file)
        
        seed = np.random.randint(2 ** 32 - 1)
        rng = np.random.default_rng(seed)
        action = rng.choice(['left', 'right', 'up', 'down'])
        
        if prev_tokens is not None: 
            if "unsuccessful" in prev_feedback:
                new_position = position[0]
            else:
                new_position = position[1]
            data['input'] = "Current observation is: " + prev_tokens + f". Action: Standing at {new_position}, move " + action
        else:
            data['input'] = data['input'].split('move')[0] + "move " + action
            new_position = (1,1)
        
        messages = message_construct_llama_func([data['instruction'] + data['input']], [])
        seg_tokens = eval(('[' + data['input'].replace('][', ', ').split('8197, ')[1].split(', 8196')[0] + ']').replace(']]', ']'))
        seg_tokens = torch.tensor([seg_tokens]).to(DEVICE)
                
        count = 0
        while True:
            response = self.run_response(messages)
            
            try:
                string = '[' + response.replace('][', ', ').split('8197, ')[1].split(', 8196')[0] + ']'
                seg_tokens = eval(string.replace(']]', ']'))
                seg_tokens = torch.tensor([seg_tokens]).to(DEVICE)
                count += 1
                if seg_tokens.shape[1] == 1024:
                    break
                elif count > 3:
                    if seg_tokens.shape[1] > 1024:
                        
                        seg_tokens = seg_tokens[:,:1024]
                        break
            except:
                count += 1
        img = token_manager.decode_image(seg_tokens)[0]
        
        action_pddl = "move-" + action + f" pos-{new_position[0]}-{new_position[1]} "
        all_directions = {}
        all_directions['left'] = (new_position[0]-1, new_position[1])
        all_directions['right'] = (new_position[0]+1, new_position[1])
        all_directions['up'] = (new_position[0], new_position[1]-1)
        all_directions['down'] = (new_position[0], new_position[1]+1)
        positions = [new_position, all_directions[action]]
        action_pddl += f"pos-{all_directions[action][0]}-{all_directions[action][1]}"
        if 'unsuccessful' in response: 
            terminate = True
        else:
            terminate = False
        
        
        save_path = img_path.replace('data_my', 'saves_my').split('.png')[0]
        if not os.path.exists(save_path):
            os.makedirs(save_path)
            os.makedirs(os.path.join(save_path, 'prediction'))
            os.makedirs(os.path.join(save_path, 'instruction'))
            os.makedirs(os.path.join(save_path, 'full_response'))
        img.save(os.path.join(save_path, f'prediction/{turn}_{i}_{step}_exec.png'))
        with open(os.path.join(save_path, f'instruction/{turn}_{i}_{step}_exec.txt'), "w") as file:
            file.write(data["input"])
        with open(os.path.join(save_path, f'full_response/{turn}_{i}_{step}_exec.txt'), "w") as file:
            file.write(response+'\n'+action_pddl+'\n'+ str(positions))
        
        
        return img, response.split(':')[1].split(' The')[0], response.split(']')[-1].split('<')[0], action_pddl, positions, terminate
    
    
    def get_next_obs_text(self, i, step, json_path = str, prev_obs = None, prev_feedback=None, position = None, turn=0):
        with open(json_path, 'r') as file:  
            data = json.load(file)
        
        seed = np.random.randint(2 ** 32 - 1)
        rng = np.random.default_rng(seed)
        action = rng.choice(['left', 'right', 'up', 'down'])
        
        if prev_obs is not None: 
            if "Unsuccessful" in prev_feedback:
                new_position = position[0]
            else:
                new_position = position[1]
            data['input'] = f"Current state Description: {prev_obs}\nAction: Standing at {new_position}, move " + action
        else:
            data['input'] = data['input'].split('move')[0] + "move " + action
            new_position = (1,1)
        
        messages = message_construct_llama_func([data['instruction'] + '\n' + data['input']], [])
                
        
        response = self.run_response(messages)
        
        reasoning = response.split('Execution Reasoning: ')[1].split('Execution result')[0]
        next_obs = response.split('Next State Description: ')[1]
        
        
        action_pddl = "move-" + action + f" pos-{new_position[0]}-{new_position[1]} "
        all_directions = {}
        all_directions['left'] = (new_position[0]-1, new_position[1])
        all_directions['right'] = (new_position[0]+1, new_position[1])
        all_directions['up'] = (new_position[0], new_position[1]-1)
        all_directions['down'] = (new_position[0], new_position[1]+1)
        positions = [new_position, all_directions[action]]
        action_pddl += f"pos-{all_directions[action][0]}-{all_directions[action][1]}"
        if 'Unsuccessful' in response: 
            terminate = True
        else:
            terminate = False
        
        save_path = json_path.replace('data_my', 'saves_my').split('.json')[0]
        if not os.path.exists(save_path):
            os.makedirs(save_path)
            os.makedirs(os.path.join(save_path, 'prediction'))
            os.makedirs(os.path.join(save_path, 'instruction'))
            os.makedirs(os.path.join(save_path, 'full_response'))
        with open(os.path.join(save_path, f'instruction/{turn}_{i}_{step}_exec.txt'), "w") as file:
            file.write(data["input"])
        with open(os.path.join(save_path, f'full_response/{turn}_{i}_{step}_exec.txt'), "w") as file:
            file.write(response+'\n'+action_pddl+'\n'+ str(positions))
        
        return next_obs, reasoning, action_pddl, positions, terminate
    
    
    def get_next_obs_seq(self, i, img_path = str, max_steps = int, turn=0):
        json_path = img_path.replace('.png', '_seq.json')
        with open(json_path, 'r') as file:  
            data = json.load(file)
        nl = self.target_problem_nl
        
        seed = np.random.randint(2 ** 32 - 1)
        rng = np.random.default_rng(seed)
        if 'frozenlake' in img_path or "maze_wall" in img_path:
            actions = ['move left', 'move right', 'move up', 'move down']
        elif 'sokoban' in img_path:
            actions = ['move', 'push-to-goal', 'push-to-nongoal']
        elif 'package' in img_path:
            actions = ['turn-left', 'turn-right', 'move', 'open', 'close']
        elif 'printer' in img_path:
            actions = ['turn-left', 'turn-right', 'move', 'pick-up', 'drop-down', 'toggle-on', 'toggle-off']
        elif 'overcooked' in img_path:
            actions = ['move', 'pick-ingredient', 'drop-ingredient', 'pick-plate', 'drop-plate', 'chop', 'merge-ingredient', 'put-plate', 'deliver']
        valid = False
        while not valid:
            # action_list = rng.choice(actions, size=max_steps, replace=True)
            if 'frozenlake' in img_path:
                new_position = 'pos-1-1'
                iceholes = nl.split('ice holes are at ')[1].split('.')[0]
                adjacent_positions = [(2, 1), (0, 1), (1, 2), (1, 0)]
                while True:
                    action_list = rng.choice(actions, size=max_steps, replace=True)
                    action = action_list[0]
                    if action == 'move down':
                        adjacent_position = adjacent_positions[0]
                    elif action == 'move up':
                        adjacent_position = adjacent_positions[1]
                    elif action == 'move right':
                        adjacent_position = adjacent_positions[2]
                    else:
                        adjacent_position = adjacent_positions[3]
                    
                    if f'pos-{adjacent_position[0]}-{adjacent_position[1]}' not in iceholes and action not in ['move up', 'move left']:
                        break
                action_list = [f"{i+1}: {item}" for i, item in enumerate(action_list)]
            elif 'sokoban' in img_path:
                clear = nl.split(' are clear')[0].split('positions ')[-1]
                action_list = rng.choice(actions, size=max_steps, replace=True)
                action_list[0] = 'move'
                new_position = nl.split('agent is at ')[1].split('.')[0]
                dir_list = []
                action_nl_list = list(action_list)
                for action_idx, action in enumerate(action_list):
                    try:
                        coor = (int(new_position.split('-')[1]), int(new_position.split('-')[2]))
                    except:
                        pdb.set_trace()
                    adjacent_positions = [(coor[0]+1, coor[1]), (coor[0]-1, coor[1]), (coor[0], coor[1]+1), (coor[0], coor[1]-1)]
                    adjacent_position = rng.choice(adjacent_positions, size=1, replace=True)[0]
                    if action_idx == 0:
                        
                        while (f'pos-{adjacent_position[0]}-{adjacent_position[1]}' not in clear) or adjacent_position[0] < 1 or adjacent_position[1] < 1:
                            adjacent_position = rng.choice(adjacent_positions, size=1, replace=True)[0]
                    else:
                        while adjacent_position[0] < 1 or adjacent_position[1] < 1:
                            adjacent_position = rng.choice(adjacent_positions, size=1, replace=True)[0]
                    adjacent_double_positions = [(adjacent_position[0]+1, adjacent_position[1]), (adjacent_position[0]-1, adjacent_position[1]), (adjacent_position[0], adjacent_position[1]+1), (adjacent_position[0], adjacent_position[1]-1)]
                    adjacent_double_position = rng.choice(adjacent_double_positions, size=1, replace=True)[0]
                    
                    if list(adjacent_position) == [coor[0]+1, coor[1]]:
                        direction = 'down'
                    elif list(adjacent_position) == [coor[0]-1, coor[1]]:
                        direction = 'up'
                    elif list(adjacent_position) == [coor[0], coor[1]+1]:
                        direction = 'right'
                    else:
                        direction = 'left'
                    dir_list.append(direction)
                    if action == 'move':
                        action_nl_list[action_idx] = f'move from {new_position} to pos-{adjacent_position[0]}-{adjacent_position[1]}'
                    elif action == 'push-to-goal':
                        action_nl_list[action_idx] = f'push the box from pos-{adjacent_position[0]}-{adjacent_position[1]} to goal pos-{adjacent_double_position[0]}-{adjacent_double_position[1]}'
                    else:
                        action_nl_list[action_idx] = f'push the box from pos-{adjacent_position[0]}-{adjacent_position[1]} to pos-{adjacent_double_position[0]}-{adjacent_double_position[1]}'
                    new_position = f'pos-{adjacent_position[0]}-{adjacent_position[1]}'
                action_list = [f"{i+1}: {item}" for i, item in enumerate(action_nl_list)]
            elif 'maze_wall' in img_path:
                clear = nl.split(' are clear')[0].split('positions ')[-1]
                new_position = nl.split('agent is at ')[1].split('.')[0].replace('(', '').replace(')', '')
                coor = (int(new_position.split('-')[1]), int(new_position.split('-')[2]))
                adjacent_positions = [(coor[0]+1, coor[1]), (coor[0]-1, coor[1]), (coor[0], coor[1]+1), (coor[0], coor[1]-1)]
                # make sure first action is executable to save runtime
                while True:
                    action_list = rng.choice(actions, size=max_steps, replace=True)
                    action = action_list[0]
                    if action == 'move down':
                        adjacent_position = adjacent_positions[0]
                    elif action == 'move up':
                        adjacent_position = adjacent_positions[1]
                    elif action == 'move right':
                        adjacent_position = adjacent_positions[2]
                    else:
                        adjacent_position = adjacent_positions[3]
                    if f'pos-{adjacent_position[0]}-{adjacent_position[1]}' in clear:
                        break
                    
                # coor = (int(new_position.split('-')[1]), int(new_position.split('-')[2]))
                action_nl_list = list(action_list)
                for action_idx, action in enumerate(action_list):
                    coor = (int(new_position.split('-')[1]), int(new_position.split('-')[2]))
                    adjacent_positions = [(coor[0]+1, coor[1]), (coor[0]-1, coor[1]), (coor[0], coor[1]+1), (coor[0], coor[1]-1)]
                    if action == 'move down':
                        adjacent_position = adjacent_positions[0]
                        action_nl_list[action_idx] = f'{action} from {new_position} to pos-{adjacent_position[0]}-{adjacent_position[1]}'
                    elif action == 'move up':
                        adjacent_position = adjacent_positions[1]
                        action_nl_list[action_idx] = f'{action} from {new_position} to pos-{adjacent_position[0]}-{adjacent_position[1]}'
                    elif action == 'move right':
                        adjacent_position = adjacent_positions[2]
                        action_nl_list[action_idx] = f'{action} from {new_position} to pos-{adjacent_position[0]}-{adjacent_position[1]}'
                    else:
                        adjacent_position = adjacent_positions[3]
                        action_nl_list[action_idx] = f'{action} from {new_position} to pos-{adjacent_position[0]}-{adjacent_position[1]}'
                    new_position = f'pos-{adjacent_position[0]}-{adjacent_position[1]}'
                action_list = [f"{i+1}: {item}" for i, item in enumerate(action_nl_list)]
            
            elif 'package' in img_path:
                new_position = nl.split('agent is at ')[1].split('.')[0].replace('(', '').replace(')', '')
                orientation = nl.split('orientation is ')[1].split('.')[0]
                packages = nl.split('The packages are at ')[1].split('.')[0]
                turn_left = {'left': 'down', 'down': 'right', 'right': 'up', 'up': 'left'}
                turn_right = {'left': 'up', 'up': 'right', 'right': 'down', 'down': 'left'}
                coor = (int(new_position.split('-')[1]), int(new_position.split('-')[2]))
                adjacent_positions = [(coor[0]+1, coor[1]), (coor[0]-1, coor[1]), (coor[0], coor[1]+1), (coor[0], coor[1]-1)]
                dir_list = []
                orientation_list = []
                while True:
                    action_list = rng.choice(actions, size=max_steps, replace=True)
                    action = action_list[0]
                    # if action in ['turn-left', 'turn-right']:
                    #     break
                    # el
                    if action == 'move':
                        if orientation == 'down':
                            adjacent_position = adjacent_positions[0]
                        elif orientation == 'up':
                            adjacent_position = adjacent_positions[1]
                        elif orientation == 'right':
                            adjacent_position = adjacent_positions[2]
                        else:
                            adjacent_position = adjacent_positions[3]
                        if f'pos-{adjacent_position[0]}-{adjacent_position[1]}' not in packages:
                            break
                action_nl_list = list(action_list)
                for action_idx, action in enumerate(action_list):
                    coor = (int(new_position.split('-')[1]), int(new_position.split('-')[2]))
                    adjacent_positions = [(coor[0]+1, coor[1]), (coor[0]-1, coor[1]), (coor[0], coor[1]+1), (coor[0], coor[1]-1)]
                    if action_idx != 0:
                        adjacent_position = rng.choice(adjacent_positions, size=1, replace=True)[0]
                    if list(adjacent_position) == [coor[0]+1, coor[1]]:
                        direction = 'down'
                    elif list(adjacent_position) == [coor[0]-1, coor[1]]:
                        direction = 'up'
                    elif list(adjacent_position) == [coor[0], coor[1]+1]:
                        direction = 'right'
                    else:
                        direction = 'left'
                    dir_list.append(direction)
                    if action in ['turn-left', 'turn-right']:
                        orientation_list.append(orientation)
                        action_nl_list[action_idx] = f'{action} at {new_position}'
                        if 'left' in action:
                            orientation = turn_left[orientation]
                        else:
                            orientation = turn_right[orientation]
                    elif action == 'move':
                        action_nl_list[action_idx] = f'{action} from {new_position} to pos-{adjacent_position[0]}-{adjacent_position[1]}'
                        new_position = f'pos-{adjacent_position[0]}-{adjacent_position[1]}'
                        orientation_list.append(None)
                    elif action in ['open', 'close']:
                        action_nl_list[action_idx] = f'{action} the object at pos-{adjacent_position[0]}-{adjacent_position[1]}'
                        orientation_list.append(new_position)
                action_list = [f"{i+1}: {item}" for i, item in enumerate(action_nl_list)]
                
            elif 'printer' in img_path:
                new_position = nl.split('agent is at ')[1].split('.')[0].replace('(', '').replace(')', '')
                orientation = nl.split('orientation is ')[1].split('.')[0]
                printer = nl.split('The printer is at ')[1].split('.')[0]
                desks = nl.split(' are the desk region')[0].split('positions ')[-1]
                turn_left = {'left': 'down', 'down': 'right', 'right': 'up', 'up': 'left'}
                turn_right = {'left': 'up', 'up': 'right', 'right': 'down', 'down': 'left'}
                coor = (int(new_position.split('-')[1]), int(new_position.split('-')[2]))
                adjacent_positions = [(coor[0]+1, coor[1]), (coor[0]-1, coor[1]), (coor[0], coor[1]+1), (coor[0], coor[1]-1)]
                dir_list = []
                orientation_list = []
                while True:
                    action_list = rng.choice(actions, size=max_steps, replace=True)
                    action = action_list[0]
                    if action == 'move':
                        if orientation == 'down':
                            adjacent_position = adjacent_positions[0]
                        elif orientation == 'up':
                            adjacent_position = adjacent_positions[1]
                        elif orientation == 'right':
                            adjacent_position = adjacent_positions[2]
                        else:
                            adjacent_position = adjacent_positions[3]
                        if f'pos-{adjacent_position[0]}-{adjacent_position[1]}' not in desks:
                            print(f'pos-{adjacent_position[0]}-{adjacent_position[1]}', desks)
                            break
                action_nl_list = list(action_list)
                for action_idx, action in enumerate(action_list):
                    coor = (int(new_position.split('-')[1]), int(new_position.split('-')[2]))
                    adjacent_positions = [(coor[0]+1, coor[1]), (coor[0]-1, coor[1]), (coor[0], coor[1]+1), (coor[0], coor[1]-1)]
                    if action_idx != 0:
                        adjacent_position = rng.choice(adjacent_positions, size=1, replace=True)[0]
                    if list(adjacent_position) == [coor[0]+1, coor[1]]:
                        direction = 'down'
                    elif list(adjacent_position) == [coor[0]-1, coor[1]]:
                        direction = 'up'
                    elif list(adjacent_position) == [coor[0], coor[1]+1]:
                        direction = 'right'
                    else:
                        direction = 'left'
                    dir_list.append(direction)
                    if action in ['turn-left', 'turn-right']:
                        orientation_list.append(orientation)
                        action_nl_list[action_idx] = f'{action} at {new_position}'
                        if 'left' in action:
                            orientation = turn_left[orientation]
                        else:
                            orientation = turn_right[orientation]
                    elif action == 'move':
                        action_nl_list[action_idx] = f'{action} from {new_position} to pos-{adjacent_position[0]}-{adjacent_position[1]}'
                        new_position = f'pos-{adjacent_position[0]}-{adjacent_position[1]}'
                        orientation_list.append(None)
                    elif action in ['toggle-on', 'toggle-off', "pick-up", "drop-down"]:
                        action_nl_list[action_idx] = f'{action} the object at pos-{adjacent_position[0]}-{adjacent_position[1]}'
                        orientation_list.append(new_position)
                action_list = [f"{i+1}: {item}" for i, item in enumerate(action_nl_list)]
            elif 'overcooked' in img_path:
                clear = nl.split(' are clear')[0].split('positions ')[-1]
                action_list = rng.choice(actions, size=max_steps, replace=True)
                action_list[0] = 'move'
                new_position = nl.split('agents are at (')[1].split(')')[0]
                player = 'agent-1'
                mutiple_agent = False
                if ',' in new_position:
                    mutiple_agent = True
                    players = ['agent-1', 'agent-2']
                    new_positions = new_position.split(', ')
                    player_list = rng.choice([0,1], size=max_steps, replace=True)
                dir_list = []
                position_list = []
                action_nl_list = list(action_list)
                for action_idx, action in enumerate(action_list):
                    if mutiple_agent:
                        new_position = new_positions[player_list[action_idx]]
                        player = players[player_list[action_idx]]
                    try:
                        coor = (int(new_position.split('-')[1]), int(new_position.split('-')[2]))
                    except:
                        pdb.set_trace()
                    adjacent_positions = [(coor[0]+1, coor[1]), (coor[0]-1, coor[1]), (coor[0], coor[1]+1), (coor[0], coor[1]-1)]
                    adjacent_position = rng.choice(adjacent_positions, size=1, replace=True)[0]
                    if action_idx == 0:
                        while (f'pos-{adjacent_position[0]}-{adjacent_position[1]}' not in clear) or adjacent_position[0] < 1 or adjacent_position[1] < 1:
                            adjacent_position = rng.choice(adjacent_positions, size=1, replace=True)[0]
                    else:
                        while adjacent_position[0] < 1 or adjacent_position[1] < 1:
                            adjacent_position = rng.choice(adjacent_positions, size=1, replace=True)[0]
                    
                    if list(adjacent_position) == [coor[0]+1, coor[1]]:
                        direction = 'down'
                    elif list(adjacent_position) == [coor[0]-1, coor[1]]:
                        direction = 'up'
                    elif list(adjacent_position) == [coor[0], coor[1]+1]:
                        direction = 'right'
                    else:
                        direction = 'left'
                    dir_list.append(direction)
                    position_list.append(new_position)
                    # actions = ['move', 'pick-ingredient', 'drop-ingredient', 'pick-plate', 'drop-plate', 'chop', 'merge-ingredient', 'put-plate', 'deliver']
                    if action == 'move':
                        action_nl_list[action_idx] = f'{player} move {direction} from {new_position} to pos-{adjacent_position[0]}-{adjacent_position[1]}'
                        if mutiple_agent:
                            new_positions[player_list[action_idx]] = f'pos-{adjacent_position[0]}-{adjacent_position[1]}'
                        else:
                            new_position = f'pos-{adjacent_position[0]}-{adjacent_position[1]}'
                    elif action == 'pick-ingredient':
                        ingredient = rng.choice(['FreshTomato', 'FreshLettuce', 'ChoppedTomato', 'ChoppedLettuce'])
                        action_nl_list[action_idx] = f'{player} pick {ingredient} at pos-{adjacent_position[0]}-{adjacent_position[1]}'
                    elif action == 'pick-plate':
                        action_nl_list[action_idx] = f'{player} pick Plate at pos-{adjacent_position[0]}-{adjacent_position[1]}'
                    elif action == 'drop-ingredient':
                        ingredient = rng.choice(['FreshTomato', 'FreshLettuce', 'ChoppedTomato', 'ChoppedLettuce'])
                        action_nl_list[action_idx] = f'{player} drop {ingredient} at pos-{adjacent_position[0]}-{adjacent_position[1]}'
                    elif action == 'drop-plate':
                            action_nl_list[action_idx] = f'{player} drop Plate at pos-{adjacent_position[0]}-{adjacent_position[1]}'
                    elif action == 'chop':
                        ingredient = rng.choice(['FreshTomato', 'FreshLettuce'])
                        action_nl_list[action_idx] = f'{player} chop {ingredient} at pos-{adjacent_position[0]}-{adjacent_position[1]}'
                    elif action == 'merge-ingredient':
                        ingredient1 = rng.choice(['FreshTomato', 'FreshLettuce', 'ChoppedTomato', 'ChoppedLettuce'])
                        ingredient2 = rng.choice(['FreshTomato', 'FreshLettuce', 'ChoppedTomato', 'ChoppedLettuce'])
                        action_nl_list[action_idx] = f'{player} merge-ingredient {ingredient1} and {ingredient2} at pos-{adjacent_position[0]}-{adjacent_position[1]}'
                    elif action == 'put-plate':
                        ingredient1 = rng.choice(['FreshTomato', 'FreshLettuce', 'ChoppedTomato', 'ChoppedLettuce'])
                        action_nl_list[action_idx] = f'{player} put-plate {ingredient1} into Plate at pos-{adjacent_position[0]}-{adjacent_position[1]}'
                    else:
                        ingredient1 = rng.choice(['FreshTomato', 'FreshLettuce', 'ChoppedTomato', 'ChoppedLettuce'])
                        action_nl_list[action_idx] = f'{player} deliver {ingredient1} at pos-{adjacent_position[0]}-{adjacent_position[1]}'
                action_list = [f"{i+1}: {item}" for i, item in enumerate(action_nl_list)]
                
            data['conversations'][0]['value'] = data['conversations'][0]['value'].split('Action Sequence:\n')[0] + "Action Sequence:\n" + "\n".join(action_list)
            if self.gpt_client is not None and self.gpt_client != 'vila-u':
                response = self.run_response_api(data['conversations'][0]['value'], img_path)
            elif self.gpt_client == 'vila-u':
                response = self.run_response_vila_u(data['conversations'][0]['value'], img_path)
            else:
                messages = message_construct_llama_func([data['conversations'][0]['value']], [])                    
                response = self.run_response_image(messages, [img_path])
            
            reasonings = response.split('Goal reaching')[0]
            if 'Unsuccessful' in reasonings:
                terminate = True
            else:
                terminate = False

            action_pddls = []
            # reasoning_list = reasonings.split('----------------------------------------------\n')
            reasoning_list = re.split(r'(?=Step \d+ -)', reasonings)[1:]
            
            reasoning_list = [s.strip() for s in reasoning_list if s.strip()]
            
            # # this is to try if adding clear description is useful
            # for reason_idx, reasoning in enumerate(reasoning_list):
            #     if 'the agent successfully moves to ' in reasoning:
            #         
            #         ori_pos = action_list[reason_idx].split('from ')[1].split(' to')[0]
            #         clear_sta = f' and the agent original standing position {ori_pos} is now clear'
            #         reasoning_list[reason_idx] = reasoning.split('.\nExecution result:')[0] + clear_sta + '.\nExecution result:' + reasoning.split('.\nExecution result:')[1]
                
            
            if 'package' in img_path or 'printer' in img_path:
                orientation = nl.split('orientation is ')[1].split('.')[0]
            for i, action in enumerate(action_list):
                try:
                    reasoning = reasoning_list[i]
                except:
                    pdb.set_trace()
                if 'Unsuccessful' in reasoning or 'Invalid' in reasoning:
                    break
                if 'frozenlake' in img_path:
                    action_pddl = "move-" + action.split(' ')[2] + f" {new_position} "
                    new_position = reasoning.split(' to ')[-1].split('.')[0]
                    action_pddl += f"{new_position}"     
                elif 'sokoban' in img_path: 
                    boxes = (nl.split(' have boxes')[0].split('positions ')[1]).split(', ')
                    for box_idx, box in enumerate(boxes):
                        if box in action:
                            box_name = f'box-{box_idx+1}'
                    if 'move' in action:
                        action_pddl = action.split(': ')[1].replace('from ', '').replace('to ', '') + ' ' + dir_list[i]
                    elif 'goal' in action:
                        action_pddl = action.split(': ')[1].replace('push the box from  ', f'push-to-goal {box_name}').replace('to goal ', '') + ' ' + dir_list[i]
                    elif 'push' in action:
                        action_pddl = action.split(': ')[1].replace('push the box from  ', f'push-to-nongoal {box_name}').replace('to ', '') + ' ' + dir_list[i]
                elif 'maze_wall' in img_path: 
                    action_pddl = action.split(': ')[1].replace('from ', '').replace('to ', '').replace('move ', 'move-')
                elif 'package' in img_path:
                    boxes = (nl.split('packages are at ')[1].split('.')[1]).split(', ')
                    box_name = 'pkg-1'
                    for box_idx, box in enumerate(boxes):
                        if box in action:
                            box_name = f'pkg-{box_idx+1}'
                    if 'left' in action:
                        orientation = orientation_list[i]
                        action_pddl = f'turn-left {orientation} {turn_left[orientation]}'
                    elif 'right' in action:
                        orientation = orientation_list[i]
                        action_pddl = f'turn-right {orientation} {turn_right[orientation]}'
                    elif 'move' in action:
                        action_pddl = action.split(': ')[1].replace('from ', '').replace('to ', '') + ' ' + dir_list[i]
                    elif 'open' in action:
                        pos = orientation_list[i]
                        action_pddl = action.split(': ')[1].replace('open the object at ', f'open {box_name} {pos} ')+ ' ' + dir_list[i]
                    elif 'close' in action:
                        pos = orientation_list[i]
                        action_pddl = action.split(': ')[1].replace('close the object at ', f'close {box_name} {pos} ')+ ' ' + dir_list[i]
                elif 'printer' in img_path:
                    if 'left' in action:
                        orientation = orientation_list[i]
                        action_pddl = f'turn-left {orientation} {turn_left[orientation]}'
                    elif 'right' in action:
                        orientation = orientation_list[i]
                        action_pddl = f'turn-right {orientation} {turn_right[orientation]}'
                    elif 'move' in action:
                        action_pddl = action.split(': ')[1].replace('from ', '').replace('to ', '') + ' ' + dir_list[i]
                    elif 'pick-up' in action:
                        pos = orientation_list[i]
                        action_pddl = action.split(': ')[1].replace('pick-up the object at ', f'pick-up {box_name} {pos} ')+ ' ' + dir_list[i]
                    elif 'toggle-on' in action:
                        pos = orientation_list[i]
                        action_pddl = action.split(': ')[1].replace('toggle-on the object at ', f'toggle-on {box_name} {pos} ')+ ' ' + dir_list[i]
                    elif 'toggle-off' in action:
                        pos = orientation_list[i]
                        action_pddl = action.split(': ')[1].replace('toggle-off the object at ', f'toggle-off {box_name} {pos} ')+ ' ' + dir_list[i]
                    elif 'drop-down' in action:
                        pos = orientation_list[i]
                        if pos in desks:
                            action_pddl = action.split(': ')[1].replace('drop-down the object at ', f'drop-down-desk {box_name} {pos} ')+ ' ' + dir_list[i]
                        else:
                            action_pddl = action.split(': ')[1].replace('drop-down the object at ', f'drop-down-nondesk {box_name} {pos} ')+ ' ' + dir_list[i]
                elif 'overcooked' in img_path: 
                    if 'put-plate' in reasoning or 'merge-ingredient' in reasoning:
                        break
                    if 'move' in action:
                        action_pddl = 'move '+ action.split(': ')[1].split('move')[0] + action.split(': ')[1].split('move ')[1].replace('from ', '').replace('to ', '').replace('down ', '') .replace('right ', '') .replace('left ', '') .replace('up ', '')  + ' ' + dir_list[i]
                    elif 'pick' in action and 'Plate' in action:
                        pos = position_list[i]
                        action_pddl = 'pick-plate '+ action.split(': ')[1].split('pick')[0] + action.split(': ')[1].split('pick ')[1].replace('at', pos).replace('Plate', 'plate1') + ' ' + dir_list[i]
                    elif 'pick' in action and 'Plate' not in action:
                        pos = position_list[i]
                        action_pddl = 'pick-ingredient '+ action.split(': ')[1].split('pick')[0] + action.split(': ')[1].split('pick ')[1].replace('at', pos).replace('FreshTomato', 'tomato').replace('ChoppedTomato', 'tomato').replace('FreshLettuce', 'lettuce').replace('ChoppedLettuce', 'lettuce') + ' ' + dir_list[i]
                    elif 'drop' in action and 'Plate' in action:
                        pos = position_list[i]
                        action_pddl = 'drop-plate '+ action.split(': ')[1].split('drop')[0] + action.split(': ')[1].split('drop ')[1].replace('at', pos).replace('Plate', 'plate1') + ' ' + dir_list[i]
                    elif 'drop' in action and 'Plate' not in action:
                        pos = position_list[i]
                        action_pddl = 'drop-ingredient '+ action.split(': ')[1].split('drop')[0] + action.split(': ')[1].split('drop ')[1].replace('at', pos).replace('FreshTomato', 'tomato').replace('ChoppedTomato', 'tomato').replace('FreshLettuce', 'lettuce').replace('ChoppedLettuce', 'lettuce') + ' ' + dir_list[i]
                    elif 'chop' in action:
                        pos = position_list[i]
                        action_pddl = 'chop '+ action.split(': ')[1].split('chop')[0] + action.split(': ')[1].split('chop ')[1].replace('at', pos).replace('FreshTomato', 'tomato').replace('ChoppedTomato', 'tomato').replace('FreshLettuce', 'lettuce').replace('ChoppedLettuce', 'lettuce') + ' board1 ' + dir_list[i]
                    else:
                        pdb.set_trace()
                    action_pddl = action_pddl.replace('agent-1', 'player1').replace('agent-2', 'player2')
                # reasoning_list_return.append(reasoning)
                action_pddls.append(action_pddl)
                valid = True
                print('action_pddl', action_pddl)
        # if valid:
        #     terminate = False
        # else:
        #     terminate = True
        save_path = json_path.replace('data_my', 'saves_my').split('.json')[0]
        if not os.path.exists(save_path):
            os.makedirs(save_path)
            os.makedirs(os.path.join(save_path, 'instruction'))
            os.makedirs(os.path.join(save_path, 'full_response'))
        with open(os.path.join(save_path, f'instruction/{turn}_{i}_exec.txt'), "w") as file:
            file.write(data["conversations"][0]['value'])
        with open(os.path.join(save_path, f'full_response/{turn}_{i}_exec.txt'), "w") as file:
            file.write(response+'\n'+str(action_pddls)+'\n')
        
        torch.cuda.empty_cache()
        del response
        gc.collect()
        
        return reasoning_list, action_pddls, terminate
    
    
    def check_executable(self, i, step, img_path = str, plan = str, prev_tokens = None, turn=0):
        json_path = img_path.replace('png', 'json')
        with open(json_path, 'r') as file:  
            data = json.load(file)
            
        action = plan.split('-')[1].split(' ')[0]
        new_position = (int(plan.split('pos-')[1].split('-')[0]), int(plan.split('pos-')[1].split('-')[1].split(' ')[0]))
        
        if prev_tokens is not None: 
            data['input'] = "Current observation is: " + prev_tokens + f". Action: Standing at {new_position}, move " + action
        else:
            data['input'] = data['input'].split('move')[0] + "move " + action
        
        messages = message_construct_llama_func([data['instruction'] + data['input']], [])
        seg_tokens = eval(('[' + data['input'].replace('][', ', ').split('8197, ')[1].split(', 8196')[0] + ']').replace(']]', ']'))
        seg_tokens = torch.tensor([seg_tokens]).to(DEVICE)
                
        count = 0
        while True:
            response = self.run_response(messages)
            
            try:
                string = '[' + response.replace('][', ', ').split('8197, ')[1].split(', 8196')[0] + ']'
                seg_tokens = eval(string.replace(']]', ']'))
                seg_tokens = torch.tensor([seg_tokens]).to(DEVICE)
                count += 1
                if seg_tokens.shape[1] == 1024:
                    break
                elif count > 3:
                    if seg_tokens.shape[1] > 1024:
                        
                        seg_tokens = seg_tokens[:,:1024]
                        break
            except:
                count += 1
                
        img = token_manager.decode_image(seg_tokens)[0]
        if 'unsuccessful' in response: 
            terminate = True
        else:
            terminate = False
        
        
        save_path = img_path.replace('data_my', 'saves_my').split('.png')[0]
        if not os.path.exists(save_path):
            os.makedirs(save_path)
            os.makedirs(os.path.join(save_path, 'prediction'))
            os.makedirs(os.path.join(save_path, 'instruction'))
            os.makedirs(os.path.join(save_path, 'full_response'))
        img.save(os.path.join(save_path, f'prediction/{turn}_{i}_{step}_check.png'))
        with open(os.path.join(save_path, f'instruction/{turn}_{i}_{step}_check.txt'), "w") as file:
            file.write(data["conversations"][0]['value'])
        with open(os.path.join(save_path, f'full_response/{turn}_{i}_{step}_check.txt'), "w") as file:
            file.write(response)
        
        return img, response.split(':')[1].split(' The')[0], response.split(']')[-1].split('<')[0], terminate


    def check_executable_text(self, i, step, json_path = str, plan = str, prev_obs = None, turn=0):
        with open(json_path, 'r') as file:  
            data = json.load(file)
                    
        # print(plan)
        action = plan.split('-')[1].split(' ')[0]
        new_position = (int(plan.split('pos-')[1].split('-')[0]), int(plan.split('pos-')[1].split('-')[1].split(' ')[0]))
        
        if prev_obs is not None: 
            data['input'] = f"Current state Description: {prev_obs}\nAction: Standing at {new_position}, move " + action
        else:
            data['input'] = data['input'].split('move')[0] + "move " + action
        
        messages = message_construct_llama_func([data['instruction'] + '\n' + data['input']], [])
        
        
        response = self.run_response(messages)
        
        reasoning = response.split('Execution Reasoning: ')[1].split('Execution result')[0]
        next_obs = response.split('Next State Description: ')[1]
                
        if 'Unsuccessful' in response: 
            terminate = True
        else:
            terminate = False
        
        
        save_path = json_path.replace('data_my', 'saves_my').split('.json')[0]
        if not os.path.exists(save_path):
            os.makedirs(save_path)
            os.makedirs(os.path.join(save_path, 'prediction'))
            os.makedirs(os.path.join(save_path, 'instruction'))
            os.makedirs(os.path.join(save_path, 'full_response'))
        with open(os.path.join(save_path, f'instruction/{turn}_{i}_{step}_check.txt'), "w") as file:
            file.write(data["input"])
        with open(os.path.join(save_path, f'full_response/{turn}_{i}_{step}_check.txt'), "w") as file:
            file.write(response)
        
        return next_obs, reasoning, terminate
    
    def check_executable_seq(self, i, img_path = str, plans = str, turn=0):
        json_path = img_path.replace('.png', '_seq.json')
        with open(json_path, 'r') as file:  
            data = json.load(file)
            
        action_list = []
        if 'frozenlake' in img_path:
            for plan in plans:
                action = plan.split('-')[1].split(' ')[0]
                action_list.append(f'move {action}')
            action_list = [f"{i+1}: {item}" for i, item in enumerate(action_list)]
        elif 'sokoban' in img_path:
            for plan in plans:
                if 'move' in plan:
                    from_pos = plan.split(' ')[1]
                    to_pos = plan.split(' ')[2]
                    action = f'move from {from_pos} to {to_pos}'
                elif 'nongoal' in plan:
                    from_pos = plan.split(' ')[2]
                    to_pos = plan.split(' ')[3]
                    action = f'push the box from {from_pos} to {to_pos}'
                else:
                    from_pos = plan.split(' ')[2]
                    to_pos = plan.split(' ')[3]
                    action = f'push the box from {from_pos} to goal {to_pos}'                    
                action_list.append(action)
            action_list = [f"{i+1}: {item}" for i, item in enumerate(action_list)]
        elif 'maze_wall' in img_path:
            for plan in plans:
                move_dir = plan.split('-')[1].split(' ')[0]
                from_pos = plan.split(' ')[1]
                to_pos = plan.split(' ')[2]
                action = f'move {move_dir} from {from_pos} to {to_pos}'
                action_list.append(action)
            action_list = [f"{i+1}: {item}" for i, item in enumerate(action_list)]
        elif 'package' in img_path:
            nl = self.target_problem_nl
            # print(nl)
            try:
                new_position = nl.split('agent is at ')[1].split('.')[0].replace('(', '').replace(')', '')
            except:
                pdb.set_trace()
            turn_left = {'left': 'down', 'down': 'right', 'right': 'up', 'up': 'left'}
            turn_right = {'left': 'up', 'up': 'right', 'right': 'down', 'down': 'left'}
            for plan in plans:
                if 'turn-left' in plan:
                    action = f'turn-left at {new_position}'
                    dir1 = plan.split(' ')[1]
                    dir2 = plan.split(' ')[2]
                    if turn_left[dir1] != dir2:
                        return f'turn-left from {dir1} to {dir2} is not valid, turn-left can only turn 90 degrees and {dir2} is not a valid turn-left direction from {dir1}', True
                elif 'turn-right' in plan:
                    action = f'turn-right at {new_position}'
                    dir1 = plan.split(' ')[1]
                    dir2 = plan.split(' ')[2]
                    if turn_right[dir1] != dir2:
                        return f'turn-right from {dir1} to {dir2} is not valid, turn-right can only turn 90 degrees and {dir2} is not a valid turn-right direction from {dir1}', True
                elif 'move' in plan:
                    from_pos = plan.split(' ')[1]
                    to_pos = plan.split(' ')[2]
                    action = f'move from {from_pos} to {to_pos}'
                    new_position = to_pos
                elif 'open' in plan:
                    to_pos = plan.split(' ')[3]
                    action = f'open the object at {to_pos}'
                else:
                    to_pos = plan.split(' ')[3]
                    action = f'close the object at {to_pos}'                   
                action_list.append(action)
            action_list = [f"{i+1}: {item}" for i, item in enumerate(action_list)]
        elif 'printer' in img_path:
            nl = self.target_problem_nl
            new_position = nl.split('agent is at ')[1].split('.')[0].replace('(', '').replace(')', '')
            turn_left = {'left': 'down', 'down': 'right', 'right': 'up', 'up': 'left'}
            turn_right = {'left': 'up', 'up': 'right', 'right': 'down', 'down': 'left'}
            for plan in plans:
                if 'turn-left' in plan:
                    action = f'turn-left at {new_position}'
                    dir1 = plan.split(' ')[1]
                    dir2 = plan.split(' ')[2]
                    if turn_left[dir1] != dir2:
                        return f'turn-left from [{dir1}] to [{dir2}] is not valid. Agent can only turn-left from up to left, from left to down, from down to right, and from right to up. Since the agent current orientation is [{dir1}], the only valid orientation for turn-left action is [{turn_left[dir1]}]', True
                elif 'turn-right' in plan:
                    action = f'turn-right at {new_position}'
                    dir1 = plan.split(' ')[1]
                    dir2 = plan.split(' ')[2]
                    if turn_right[dir1] != dir2:
                        return f'turn-right from [{dir1}] to [{dir2}] is not valid. Agent can only turn-right from up to right, from right to down, from down to left, and from left to up. Since the agent current orientation is [{dir1}], the only valid orientation for turn-right action is [{turn_right[dir1]}]', True
                elif 'move' in plan:
                    from_pos = plan.split(' ')[1]
                    to_pos = plan.split(' ')[2]
                    action = f'move from {from_pos} to {to_pos}'
                    new_position = to_pos
                elif 'pick' in plan:
                    to_pos = plan.split(' ')[2]
                    action = f'pick-up the object at {to_pos}'
                elif 'drop' in plan:
                    to_pos = plan.split(' ')[2]
                    action = f'drop-down the object at {to_pos}'
                elif 'toggle-on' in plan:
                    to_pos = plan.split(' ')[2]
                    action = f'toggle-on the object at {to_pos}'
                else:
                    to_pos = plan.split(' ')[2]
                    action = f'toggle-off the object at {to_pos}'                   
                action_list.append(action)
            action_list = [f"{i+1}: {item}" for i, item in enumerate(action_list)]
        elif 'overcooked' in img_path:
            for plan in plans:
                if 'move' in plan:
                    agent = plan.split(' ')[1]
                    from_pos = plan.split(' ')[2]
                    to_pos = plan.split(' ')[3]
                    direction = plan.split(' ')[4]
                    action = f'{agent} move {direction} from {from_pos} to {to_pos}'
                elif 'pick-ingredient' in plan:
                    agent = plan.split(' ')[1]
                    ingredient = plan.split(' ')[2] #fresh chopped ???
                    to_pos = plan.split(' ')[4]
                    action = f'{agent} pick {ingredient} at {to_pos}'
                elif 'pick-plate' in plan:
                    agent = plan.split(' ')[1]
                    to_pos = plan.split(' ')[4]
                    action = f'{agent} pick Plate at {to_pos}'
                elif 'drop-ingredient' in plan:
                    agent = plan.split(' ')[1]
                    ingredient = plan.split(' ')[2] #fresh chopped ???
                    to_pos = plan.split(' ')[4]
                    action = f'{agent} drop {ingredient} at {to_pos}'
                elif 'drop-plate' in plan:
                    agent = plan.split(' ')[1]
                    to_pos = plan.split(' ')[4]
                    action = f'{agent} drop Plate at {to_pos}'
                elif 'chop' in plan:
                    agent = plan.split(' ')[1]
                    ingredient = plan.split(' ')[2] #fresh chopped ???
                    to_pos = plan.split(' ')[4]
                    action = f'{agent} chop {ingredient} at {to_pos}'
                elif 'merge-ingredient' in plan:
                    agent = plan.split(' ')[1]
                    ingredient1 = plan.split(' ')[2] #fresh chopped ???
                    ingredient2 = plan.split(' ')[3] #fresh chopped ???
                    to_pos = plan.split(' ')[5]
                    action = f'{agent} merge-ingredient {ingredient1} and {ingredient2} at {to_pos}'
                elif 'put-plate' in plan:
                    agent = plan.split(' ')[1]
                    ingredient1 = plan.split(' ')[2] #fresh chopped ???
                    ingredient2 = plan.split(' ')[3] #fresh chopped ???
                    to_pos = plan.split(' ')[5]
                    action = f'{agent} put-plate {ingredient1} and {ingredient2} into Plate at {to_pos}'
                else:
                    agent = plan.split(' ')[1]
                    to_pos = plan.split(' ')[6]
                    action = f'{agent} deliver {ingredient} at {to_pos}'            
                action_list.append(action)
            action_list = [f"{i+1}: {item}" for i, item in enumerate(action_list)]
        
        data['conversations'][0]['value'] = data['conversations'][0]['value'].split('Action Sequence:\n')[0] + "Action Sequence:\n" + "\n".join(action_list)        
        if self.gpt_client is not None and self.gpt_client != 'vila-u':
            response = self.run_response_api(data['conversations'][0]['value'], img_path)
        elif self.gpt_client == 'vila-u':
            response = self.run_response_vila_u(data['conversations'][0]['value'], img_path)
        else:
            messages = message_construct_llama_func([data['conversations'][0]['value']], [])                    
            response = self.run_response_image(messages, [img_path])
        reasonings = response.split('Goal reaching')[0]
        # reasoning_list = reasonings.split('----------------------------------------------\n')
        reasoning_list = re.split(r'(?=Step \d+ -)', reasonings)[1:]
        
        reasoning_list = [s.strip() for s in reasoning_list if s.strip()]
        if 'Unsuccessful' in reasonings:
            terminate = True
        else:
            terminate = False
        # pdb.set_trace()
             
        save_path = img_path.replace('data_my', 'saves_my').split('.png')[0]
        if not os.path.exists(save_path):
            os.makedirs(save_path)
            os.makedirs(os.path.join(save_path, 'instruction'))
            os.makedirs(os.path.join(save_path, 'full_response'))
        with open(os.path.join(save_path, f'instruction/{turn}_{i}_check.txt'), "w") as file:
            file.write(data["conversations"][0]['value'])
        with open(os.path.join(save_path, f'full_response/{turn}_{i}_check.txt'), "w") as file:
            file.write(response)
        
        torch.cuda.empty_cache()
        del response
        gc.collect()
        
        return reasoning_list, terminate


    def _get_random_walk_feedback(self, domain_gen_pddl: str, img_path: str, turn=int, first_action=None):
        self.chat_model = ChatModel(self.vlm_args)
        max_steps = 5
        max_random_walk_tries = 10
        for i in range(1, max_random_walk_tries + 1):
            print('_get_random_walk_feedback', i)
            target_to_gen_turn = self._is_target_to_gen_turn(i) # True False take turn
            # this is modified to remove the ground truth env, replace with the VLM
            if target_to_gen_turn: # random walk in ground truth env and test if the generated domain/problem can execute the actions
                # return 2 lists: plan and natural language feedback
                cur_step = 0
                random_walk_plan = []
                state_descs = []
                if self.multimodal:
                    if self.sequence:
                        state_descs, random_walk_plan, terminate = self.get_next_obs_seq(i, img_path, max_steps=max_steps, turn=turn)
                    else:
                        imgs = []
                        prev_tokens = None
                        feedback = None
                        position = None
                        while cur_step < max_steps:
                            img, prev_tokens, feedback, action, position, terminate = self.get_next_obs(i, cur_step, img_path, prev_tokens=prev_tokens, prev_feedback = feedback, position = position, turn=turn)
                            if terminate:
                                break
                            imgs.append(img)
                            random_walk_plan.append(action)
                            state_descs.append(feedback)
                            cur_step += 1
                else:
                    json_path = img_path.replace('.png', '_text.json')
                    prev_obs = None
                    feedback = None
                    position = None
                    while cur_step < max_steps:
                        prev_obs, feedback, action, position, terminate = self.get_next_obs_text(i, cur_step, json_path, prev_obs=prev_obs, prev_feedback = feedback, position = position, turn=turn)
                        if terminate:
                            break
                        random_walk_plan.append(action)
                        state_descs.append(feedback)
                        cur_step += 1
                
                is_executable, exec_feedback = self.env.get_plan_execution_feedback(
                    domain_gen_pddl, self.target_gen_problem_pddl, random_walk_plan,
                    state_descs, predicate_descriptor_fn=None
                )
                error_prefix = error_messages.RANDOM_WALK_TARGET_TO_GEN_DESC
            else: # random walk in generated env and test if the ground truth env can execute the actions
                if first_action is not None:
                    # pdb.set_trace()
                    random_walk_plan = first_action.replace('(', '').replace(')', '').split('\n;')[0].split('\n')
                else:
                    random_walk_plan, _ = self.env.get_random_walk_plan(
                        domain_gen_pddl, self.target_gen_problem_pddl,
                        predicate_descriptor_fn=None, max_steps=max_steps
                    )
                if len(random_walk_plan) == 0:
                    
                    exec_feedback = error_messages.NO_EXECUTABLE_INITIAL_ACTION
                    logging.info(exec_feedback)
                    return exec_feedback
                is_executable = True
                prev_tokens = None
                prev_obs = None
                if self.sequence:
                    state_descs, terminate = self.check_executable_seq(i, img_path, plans=random_walk_plan, turn=turn)
                    if terminate:
                        is_executable =  False
                        for state_desc in state_descs:
                            if 'Unsuccessful' in state_desc:
                                exec_feedback = state_desc
                                break
                else:
                    for step, plan in enumerate(random_walk_plan):
                        if self.multimodal:
                            img, prev_tokens, feedback, terminate = self.check_executable(i, step, img_path, plan, prev_tokens=prev_tokens, turn=turn)
                            feedback = (f"Error when executing the action ({plan}).\n"
                                    f"This action is not executable on the environment because {feedback.split('because')[1]}.")
                        else:
                            json_path = img_path.replace('.png', '_text.json')
                            prev_obs, feedback, terminate = self.check_executable_text(i, step, json_path, plan, prev_obs=prev_obs, turn=turn)
                            feedback = (f"Error when executing the action ({plan}).\n"
                                    f"{feedback}.")
                        if terminate:
                            is_executable =  False
                            exec_feedback = feedback
                            
                            break
                error_prefix = error_messages.RANDOM_WALK_GEN_TO_TARGET_DESC
            if is_executable:
                logging.info(
                    f"Found a random walk (target to gen turn {target_to_gen_turn}) plan with length {len(random_walk_plan)} and is executable. Skipping this plan."
                )
            else:
                logging.info(
                    f"Found a random walk (target to gen turn {target_to_gen_turn}) plan with length {len(random_walk_plan)} and is not executable. Using this plan."
                )
                # pdb.set_trace()
                return error_prefix + str(exec_feedback)
            if i % 5 == 0:
                max_steps += 2
                logging.info(
                    f"Could not find an invalid random walk plan with {max_steps - 2} steps. Increasing the steps to {max_steps}."
                )
        logging.warning("All random walks are executable (probably a dead loop).")
        return None

    def evaluate_generated_domain_with_random_walks(
            self, domain_gen_pddl: str, img_path: str, turn: int, best_rating=None
    ):
        self.chat_model = ChatModel(self.vlm_args)
        gen_plan, is_domain_valid, error_msg = self.env.search_plan(domain_gen_pddl, self.target_gen_problem_pddl)
        if not is_domain_valid:
            return PlanRatings.INVALID_DOMAIN, 0, 0
        exec_cnt, all_cnt = 0, 0
        t_to_gen_exec, gen_to_t_exec = 0, 0
        t_to_gen_all, gen_to_t_all = 0, 0
        for i in tqdm(range(2)):  # 100 random walks
            print('evaluate_generated_domain_with_random_walks', i)
            if self._is_target_to_gen_turn(i):
                max_steps = (t_to_gen_all % 10) + 1
                cur_step = 0
                random_walk_plan = []
                state_descs = []
                if self.multimodal:
                    if self.sequence:
                        state_descs, random_walk_plan, terminate = self.get_next_obs_seq(i, img_path, max_steps=max_steps, turn=turn)
                    else:
                        imgs = []
                        prev_tokens = None
                        feedback = None
                        position = None
                        while cur_step < max_steps:
                            img, prev_tokens, feedback, action, position, terminate = self.get_next_obs(i, cur_step, img_path, prev_tokens=prev_tokens, prev_feedback = feedback, position = position, turn=turn)
                            if terminate:
                                break
                            imgs.append(img)
                            random_walk_plan.append(action)
                            state_descs.append(feedback)
                            cur_step += 1
                else:
                    json_path = img_path.replace('.png', '_text.json')
                    prev_obs = None
                    feedback = None
                    position = None
                    while cur_step < max_steps:
                        prev_obs, feedback, action, position, terminate = self.get_next_obs_text(i, cur_step, json_path, prev_obs=prev_obs, prev_feedback = feedback, position = position, turn=turn)
                        if terminate:
                            break
                        random_walk_plan.append(action)
                        state_descs.append(feedback)
                        cur_step += 1
                # Empty plan should not exist!
                is_executable, _ = self.env.get_plan_execution_feedback(
                    domain_gen_pddl, self.target_gen_problem_pddl, random_walk_plan,
                    state_descs, predicate_descriptor_fn=None
                )
                print('1', terminate, is_executable)
                t_to_gen_all += 1
                t_to_gen_exec += (is_executable is True)
            else:
                max_steps = (gen_to_t_all % 10) + 1
                random_walk_plan, _ = self.env.get_random_walk_plan(
                    domain_gen_pddl, self.target_gen_problem_pddl,
                    predicate_descriptor_fn=None, max_steps=max_steps
                )
                if len(random_walk_plan) == 0:
                    return PlanRatings.NO_PLAN, 0, 0
                prev_tokens = None
                prev_obs = None
                is_executable = True
                if self.sequence:
                    state_descs, terminate = self.check_executable_seq(i, img_path, plans=random_walk_plan, turn=turn)
                else:
                    for step, plan in enumerate(random_walk_plan):
                        if self.multimodal:
                            img, prev_tokens, feedback, terminate = self.check_executable(i, step, img_path, plan, prev_tokens=prev_tokens, turn=turn)
                            fail_feedback = (f"Error when executing the action ({plan}).\n"
                                    f"This action is not executable on the environment because {feedback.split('because')[1]}.")
                        else:
                            json_path = img_path.replace('.png', '_text.json')
                            prev_obs, feedback, terminate = self.check_executable_text(i, step, json_path, plan, prev_obs=prev_obs, turn=turn)
                            fail_feedback = (f"Error when executing the action ({plan}).\n"
                                    f"{feedback}.")
                        if terminate:
                            is_executable =  False
                            feedback = fail_feedback
                print('2', terminate, is_executable)
                gen_to_t_all += 1
                gen_to_t_exec += (is_executable is True)
            all_cnt += 1
            exec_cnt += (is_executable is True)
            if best_rating is not None:
                if i % 2 == 0:
                    torun1 = 5 - (i // 2) - 1
                    torun2 = 5 - (i // 2)
                else:
                    torun1 = 5 - (i // 2)
                    torun2 = 5 - (i // 2)
                total_avg_2 = harmonic_mean((t_to_gen_exec + torun1) / 5, (gen_to_t_exec + torun2) / 5)
                if total_avg_2 < best_rating:
                    print('stopping')
                    return float('-inf'), float('-inf'), float('-inf')
        total_avg = harmonic_mean(t_to_gen_exec / t_to_gen_all, gen_to_t_exec / gen_to_t_all)
        return total_avg, t_to_gen_exec / t_to_gen_all, gen_to_t_exec / gen_to_t_all

    def _is_target_to_gen_turn(self, idx):  # Generate random walk based on target, test on generated pddl
        if not self.bi_rw_feedback:
            return False
        return idx % 2 == 0
