import copy
import pickle
from typing import Union
import json
import os
import random
from collections import defaultdict
import openai
from tqdm import tqdm
from jinja2 import Template
from appworld.task import Task
from openai import OpenAI
import time
from appworld import AppWorld, load_task_ids
import time
from typing import Dict, Any, List
import freezegun
import numpy as np
from sentence_transformers import SentenceTransformer
from scipy.cluster.hierarchy import linkage, dendrogram
import matplotlib.pyplot as plt
from datetime import datetime
freezegun.config.configure(extend_ignore_list=["transformers"])
import re
import ast
import textwrap
import subprocess
import signal
import random
import psutil
import shutil
import threading
from prompts import *
import socket
import logging
from typing import Optional
from concurrent.futures import ProcessPoolExecutor, as_completed


os.environ["OPENAI_BASE_URL"] = "http://localhost:6000/v1"
os.environ["OPENAI_API_KEY"] = ""

os.environ['APPWORLD_ROOT'] = 'metaflow_neurips'



logging.basicConfig(level=logging.WARNING) # INFO for debugging
logger = logging.getLogger(__name__)


def get_free_port() -> int:
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        s.bind(('', 0))
        return s.getsockname()[1]


def is_port_available_v2(port: int, timeout: float = 30.0, check_used: bool = False) -> bool:

    start_time = time.time()
    retry_delay = 0.1
    max_retry_delay = 2.0

    while time.time() - start_time < timeout:
        try:
            with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
                s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
                s.settimeout(1)

                if check_used:
                    try:
                        s.bind(('127.0.0.1', port))
                        s.listen(1)
                        return True
                    except Exception as e:
                        pass
                else:
                    try:
                        s.connect(('127.0.0.1', port))
                        return True
                    except (socket.timeout, ConnectionRefusedError) as e:
                        pass

        except Exception as e:
            return False

        time.sleep(retry_delay)
        retry_delay = min(retry_delay * 1.5, max_retry_delay)

    return False

def start_appworld_service(port):
    command = f"~/miniconda3/envs/appworld-server/bin/appworld serve environment --port {port}"

    try:
        process = subprocess.Popen(
            ["/bin/bash", "-c", command],
            preexec_fn=os.setsid,
            stdout=subprocess.DEVNULL,
            stderr=subprocess.PIPE,
            text=True
        )

        timeout = 60  # Set total timeout to 20 seconds
        start_time = time.time()
        port_ready = False

        while time.time() - start_time < timeout:
            if process.poll() is not None:  # Process has exited
                stderr_output = process.stderr.read()
                logger.error(f"Process exited early with return code: {process.returncode}, error message: {stderr_output}")
                stop_appworld_service(process)
                return None

            if is_port_available_v2(port, timeout=30):  # Quickly check if port is available
                port_ready = True
                break
            time.sleep(0.1)

        if not port_ready:
            logger.error(f"Port {port} startup timed out")
            stop_appworld_service(process)
            return None

        logger.info(f"Successfully started appworld on port {port}")
        return process

    except Exception as e:
        logger.error(f"Startup failed: {str(e)}")
        return None


def stop_appworld_service(process):
    if process is None:
        return
    try:
        if process.poll() is None:
            pgid = os.getpgid(process.pid)
            os.killpg(pgid, signal.SIGTERM)
            process.wait(timeout=5)
    except ProcessLookupError:
        pass
    except subprocess.TimeoutExpired:
        os.killpg(os.getpgid(process.pid), signal.SIGKILL)
        process.wait()
    except Exception as e:
        logger.error(f"Error in stop process: {e}")



def call_llm(messages: list[dict], client=OpenAI(), model='gpt-4o-mini', temperature=0.0, max_tokens=14192) -> str:
    """
    Call an LLM with a history of messages and return the response.
    """
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=temperature,
        top_p=1.0,
        n=1,
        frequency_penalty=0.0,
        presence_penalty=0.0,
        logit_bias={},
        max_tokens=max_tokens,
        # seed=123
    )
    text_response = ""
    if response.choices:
        text_response = response.choices[0].message.content
    return text_response

class MinimalReactAgent:
    """A minimal ReAct Agent for AppWorld tasks."""

    def __init__(self, sub_task: Dict, task: Task, interaction_history: List, future:List, model_name: str, is_final:bool, exec_temperature:float,  ICL_examples:str=""):
        self.ICL_examples = ICL_examples
        self.task = task
        self.future = future
        self.is_final = is_final
        self.sub_task = sub_task
        self.interaction_history = interaction_history
        self.history: list[dict] = []
        self.model_name = model_name
        self.exec_temperature = exec_temperature
        self.generate_initial_prompt()


    def generate_initial_prompt(self) -> list[dict]:
        api_documentation_string = build_api_docs_for_a_task(self.task, is_final=self.is_final)
        if self.is_final == True:
            to_demo_apps, to_demo_apis = self.task.ground_truth.required_apps, self.task.ground_truth.required_apis
            prompt_template = INSTRUCTION_PROMPT_TEMPLATE_FINAL
        else:
            to_demo_apps, to_demo_apis = self.task.ground_truth.required_apps, self.task.ground_truth.required_apis - { 'supervisor.complete_task'}
            prompt_template = INSTRUCTION_PROMPT_TEMPLATE_NoFINAL

        prompt = render_template(
            prompt_template,
            api_documentation_string=api_documentation_string,
            required_apis=to_demo_apis,
            available_imports=SAID_AVAILABLE_IMPORTS,
            skip_fields=["instruction", "solution_code_body"],
            history=self.interaction_history,
            task_instruction=self.task.instruction,
            subtask_instruction= self.sub_task
        )

        # Extract and return the OpenAI JSON formatted messages from the prompt
        messages: list[dict] = []
        last_start = 0
        for match in re.finditer("(USER|ASSISTANT|SYSTEM):\n", prompt):
            last_end = match.span()[0]
            if len(messages) == 0:
                if last_end != 0:
                    raise ValueError(
                        f"Start of the prompt has no assigned role: {prompt[:last_end]}"
                    )
            else:
                messages[-1]["content"] = prompt[last_start:last_end]
            mesg_type = match.group(1).lower()
            messages.append({"role": mesg_type, "content": None})
            last_start = match.span()[1]
        messages[-1]["content"] = prompt[last_start:]

        # =====================

        if self.ICL_examples:
            messages[-1]['content'] += f"\n\nHere are some similar tasks and their solution codes for your reference.\n{self.ICL_examples}\n```"

        self.history = messages
        return messages

    def next_code_block(self, last_execution_output: str | None = None) -> str:
        """
        Asks Agent to generate next code block given last_execution_output and history.
        """
        # Add the last execution output as the user response to the history
        if last_execution_output is not None:
            self.interaction_history.append(["user",  last_execution_output])
            self.history.append({"role": "user", "content": last_execution_output})
        # Get the next code block based on the history.
        # vanilla_code = call_llm(self.history, model=self.model_name)

        get_code_succes = False
        for _ in range(5):
            try:
                vanilla_code = call_llm(self.history, model=self.model_name, temperature=self.exec_temperature)
                get_code_succes = True
                break
            except Exception as e:
                print(f'ReAct Agent Error in {self.history}', e)
        if get_code_succes == False:
            raise Exception('React Agent Error for 5 times.')

        code = vanilla_code

        if "```python" in vanilla_code:
            try:
                code_pattern = r'```python\s*(.*?)```'
                matched = re.search(code_pattern, vanilla_code, re.DOTALL)
                if matched:
                    code = matched.group(1).strip()
            except Exception as e:
                pass
        self.interaction_history.append(["assistant", code])
        self.history.append({"role": "assistant", "content": code})
        return code




def render_template(
    template: str,
    allow_missing: bool = False,
    skip_fields: list[str] | None = None,
    **kwargs: dict,
) -> str:
    from jinja2 import Template

    # .format and f-strings are quite different. .format doesn't support computation
    # in the curly braces. f-strings do, but f-string's evaluation cannot be delayed.
    # so for that i use Template(...). Template uses {{}} and format uses {}.
    skip_fields = skip_fields or []
    if skip_fields and allow_missing:
        raise ValueError("skip_fields and allow_missing should be used together.")
    for field in skip_fields:
        if field in kwargs:
            raise ValueError(
                f"skip_fields cannot contain fields that are in kwargs. Found: {field}"
            )
        kwargs[field] = ""
    result = Template(template).render(**kwargs)
    if not allow_missing:
        result = result.format(**kwargs)
        result = result.replace("__CURLY_OPEN__", "{").replace("__CURLY_CLOSE__", "}")
        return result
    kwargs_ = defaultdict(lambda: "", kwargs)
    result = result.format_map(kwargs_)
    result = result.replace("__CURLY_OPEN__", "{").replace("__CURLY_CLOSE__", "}")
    return result




def build_api_docs_for_a_task(task, is_final):
    api_docs = task.api_docs
    # do not compress_response_schemas as failure schema can be useful.
    api_docs = api_docs.remove_fields(["path", "method"])
    # do not compress_response_schemas as failure schema can be useful.
    # api_docs = api_docs.compress_parameters()

    if is_final == True:
        to_demo_apps, to_demo_apis = task.ground_truth.required_apps, task.ground_truth.required_apis
    else:
        to_demo_apps, to_demo_apis = task.ground_truth.required_apps, task.ground_truth.required_apis - {
            'supervisor.complete_task'}

    return build_api_docs(to_demo_apps, to_demo_apis, api_docs)


class FullCodeReflexionAgent:
    def __init__(self, sub_task: Dict, task: Task, interaction_history: List, future:List, model_name: str, is_final:bool, mode:str, max_num_retrials:int=5, exec_temperature:float=0, ICL_examples:str=""):
        self.task = task
        self.future = future
        self.is_final = is_final
        self.sub_task = sub_task
        self.interaction_history = interaction_history
        self.model_name = model_name
        self.history = []


        self.golden_solution = self.remove_line(self.task.ground_truth.compiled_solution_code_body)
        self.mode = mode

        # max_interactions for sub-goal
        self.max_num_retrials = max_num_retrials
        self.exec_temperature = exec_temperature
        self.ICL_examples = ICL_examples

    def remove_line(self, code: str) -> str:
        lines = code.split('\n')
        filtered_lines = [line for line in lines if 'apis.supervisor.complete_task' not in line]
        return '\n'.join(filtered_lines)

    def generate_initial_prompt(self) -> list[dict]:
        api_documentation_string = build_api_docs_for_a_task(self.task, is_final=self.is_final)
        if self.is_final == True:
            to_demo_apps, to_demo_apis = self.task.ground_truth.required_apps, self.task.ground_truth.required_apis
            prompt_template = FullCodePrompt_FINAL
        else:
            to_demo_apps, to_demo_apis = self.task.ground_truth.required_apps, self.task.ground_truth.required_apis - { 'supervisor.complete_task'}
            prompt_template = FullCodePrompt_NoFINAL

        prompt = render_template(
            prompt_template,
            api_documentation_string=api_documentation_string,
            required_apis=to_demo_apis,
            available_imports=SAID_AVAILABLE_IMPORTS,
            skip_fields=["instruction", "solution_code_body"],
            history=self.interaction_history,
            task_instruction=self.task.instruction,
            subtask_instruction= self.sub_task
        )

        # Extract and return the OpenAI JSON formatted messages from the prompt
        messages: list[dict] = []
        last_start = 0
        for match in re.finditer("(USER|ASSISTANT|SYSTEM):\n", prompt):
            last_end = match.span()[0]
            if len(messages) == 0:
                if last_end != 0:
                    raise ValueError(
                        f"Start of the prompt has no assigned role: {prompt[:last_end]}"
                    )
            else:
                messages[-1]["content"] = prompt[last_start:last_end]
            mesg_type = match.group(1).lower()
            messages.append({"role": mesg_type, "content": None})
            last_start = match.span()[1]
        messages[-1]["content"] = prompt[last_start:]

        # =======================
        if self.mode == "privileged_FullCodeRefl":
            messages[-1]['content'] += f"\n\nThe code below is designed for the overall task, for your reference to complete the subtask.\n```python{self.golden_solution}\n```"
        elif self.mode == "semi_privileged_FullCodeRefl":
            disclose_messages = [
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user",
                 "content": Template(DISCLOSE_PROMPT_TEMPLATE.lstrip()).render({"total_task": self.task,
                                                                                  'solution': f"""```python{self.golden_solution}\n```""",
                                                                                  'subtask': self.sub_task})}
            ]
            priviledged_information = ""
            is_disclose_successful = False
            for _ in range(5):
                try:
                    priviledged_information = call_llm(disclose_messages, model=self.model_name, temperature=self.exec_temperature)
                    is_disclose_successful = True
                    break
                except Exception as e:
                    print(f'Disclose Error in {self.sub_task}', e)
            if is_disclose_successful == False:
                raise Exception('Disclose Error for 5 times.')
            messages[-1]['content'] += f"\n\nThe subtask plan is for your reference.\n{priviledged_information}\n"


        # =====================

        if self.ICL_examples:
            messages[-1]['content'] += f"\n\nHere are some similar tasks and their solution codes for your reference.\n{self.ICL_examples}\n```"



        self.history = messages

        return messages
        #

    def generate_reflexion_prompt(self, stacktrace:str) -> list[dict]:
        content = stacktrace + ReflextionPrompt
        self.history.append({"role": "user", "content": content})
        return self.history


    def next_code_block(self, step_index: int, executor_output: str | None = None) -> str:
        if step_index == 0:
            self.generate_initial_prompt()
        else:
            self.generate_reflexion_prompt(executor_output)

        get_code_succes = False
        for _ in range(5):
            try:
                vanilla_code = call_llm(self.history, model=self.model_name, temperature=self.exec_temperature)
                get_code_succes = True
                break
            except Exception as e:
                print(f'Reflexion Agent Error in {self.history}', e)
        if get_code_succes == False:
            raise Exception('Reflexion Agent Error for 5 times.')

        code = vanilla_code

        if "```python" in vanilla_code:
            try:
                code_pattern = r'```python\s*(.*?)```'
                matched = re.search(code_pattern, vanilla_code, re.DOTALL)
                if matched:
                    code = matched.group(1).strip()
            except Exception as e:
                pass

        self.history.append({"role": "assistant", "content": code})

        return code


def extract_function_body(code_str: str, function_name: str) -> str:
    parsed_ast = ast.parse(code_str)


    for node in parsed_ast.body:
        if isinstance(node, ast.FunctionDef) and node.name == function_name:

            if hasattr(node, 'end_lineno'):
                start_lineno = node.body[0].lineno
                end_lineno = node.end_lineno
            else:
                start_lineno = node.body[0].lineno
                end_lineno = node.body[-1].lineno

            code_lines = code_str.splitlines()

            function_lines = code_lines[start_lineno - 1:end_lineno]

            function_body = "\n".join(function_lines)

            function_body = textwrap.dedent(function_body)

            break
    else:
        raise ValueError(f"函数 '{function_name}' 未找到。")

    return function_body


def count_python_lines_in_markdown(text: str) -> int:

    lines = text.splitlines()
    total_count = 0

    inside_python_block = False

    has_explicit_python_block = any("```python" in line for line in lines)

    if not has_explicit_python_block:
        return _count_python_lines_no_comments(lines)

    for line in lines:
        if line.strip().startswith("```python"):
            inside_python_block = True
            continue
        if line.strip().startswith("```"):
            inside_python_block = False
            continue

        if inside_python_block:
            stripped = line.strip()
            if not stripped:
                continue
            if '#' in stripped:
                continue
            total_count += 1

    return total_count


def _count_python_lines_no_comments(lines: list[str]) -> int:
    count = 0
    for line in lines:
        stripped = line.strip()
        if not stripped:
            continue
        if '#' in stripped:
            continue
        count += 1
    return count


def extract_substring(input_str, str1, str2):
    try:
        start_index = input_str.index(str1) + len(str1)
        end_index = input_str.index(str2)

        return input_str[start_index:end_index]
    except ValueError:
        return ""

def run_meta_workflow(meta_id:int, meta_query: str, meta_workflow_raw: Union[str, list], task_id: str, experiment_name: str, model_name:str, agent_type:str='privileged_FullCodeRefl', repe_index=0, exec_temperature=0, static_code_max_reflection=5, ICL_examples=""):
    assert agent_type in ['privileged_FullCodeRefl', 'FullCodeRefl', 'react', "semi_privileged_FullCodeRefl"]
    process = None

    evaluation = {
        'passes' : [],
        'num_tests': 1,
        'success':False
    }
    path = f"{experiment_name}_{int(time.time())}_pid{os.getpid()}"

    meta_workflow, instantiated_meta_workflow, history = [], [], []
    try:
        port = get_free_port()

        process = start_appworld_service(port)

        if isinstance(meta_workflow_raw, str):
            pattern_code_blocks = r'```(json|python)\s*([\s\S]*?)```'

            meta_workflow_raw = re.findall(pattern_code_blocks, meta_workflow_raw, re.DOTALL)

            meta_workflow = []
            for block_type, block_content in meta_workflow_raw:
                if block_type == 'json':
                    meta_workflow.append(json.loads(block_content))
                else:
                    meta_workflow.append(block_content)
        else:
            meta_workflow = meta_workflow_raw

        instantiated_meta_workflow = []
        with AppWorld(
                task_id=task_id,
                experiment_name=path,
                remote_environment_url=f"http://0.0.0.0:{port}",
                load_ground_truth=True,
                ground_truth_mode='full'
        ) as world:
            evaluation = world.evaluate().to_dict()

            sub_task_define = """# 
def complete_subtask(*args, **kwargs):
    print("# Sub-task completed successfully.")"""
            world.execute(sub_task_define)

            instruction = world.task.instruction

            if agent_type == 'react':
                # =============
                ALL_TASK_COMPLETED = False
                for index, code_block in enumerate(meta_workflow):
                    is_final = index == len(meta_workflow) - 1
                    if isinstance(code_block, str):

                        output = world.execute(code_block)
                        if 'Execution failed' in output:
                            interaction_history = copy.deepcopy(history)
                            is_success = False
                            for _ in range(static_code_max_reflection):
                                if is_final:
                                    CODE_REFLECTION_PROMPT = Template(
                                        CODE_REFLECTION_PROMPT_TEMPLATE_Final.lstrip()).render(
                                        {"static_code": code_block,
                                         "error_msg": output,
                                         "task_description": instruction,
                                         "history_code": interaction_history,
                                         "api_docs":  build_api_docs_for_a_task(task=world.task, is_final=is_final)})
                                else:
                                    CODE_REFLECTION_PROMPT = Template(
                                        CODE_REFLECTION_PROMPT_TEMPLATE_NoFinal.lstrip()).render(
                                        {"static_code": code_block,
                                         "error_msg": output,
                                         "task_description": instruction,
                                         "history_code": interaction_history,
                                         "api_docs":  build_api_docs_for_a_task(task=world.task, is_final=is_final)})

                                reflection_messages = [
                                    {"role": "system", "content": "You are a master of code correction."},
                                    {"role": "user",
                                     "content": CODE_REFLECTION_PROMPT}
                                ]

                                is_rewrite_success = False
                                for _ in range(5):
                                    try:
                                        rewritten_code = call_llm(reflection_messages, model=model_name,
                                                                  temperature=exec_temperature)
                                        sub_goal_pattern = r'```python\s*([\s\S]*?)```'
                                        matches = re.findall(sub_goal_pattern, rewritten_code, re.DOTALL)
                                        rewritten_code = matches[0]
                                        is_rewrite_success = True
                                        break
                                    except Exception as e:
                                        print(f'ReWrite Error in {code_block}', e)
                                if is_rewrite_success == False:
                                    evaluation['success'] = False
                                    raise Exception('Rewrite Sub-goal Error for 5 times.')

                                output = world.execute(rewritten_code)

                                if 'Execution failed' not in output:
                                    history.append(['assistant', rewritten_code])
                                    history.append(['user', output])
                                    instantiated_meta_workflow.append(rewritten_code)
                                    is_success = True
                                    break
                                else:
                                    interaction_history.append(['assistant', rewritten_code])
                                    interaction_history.append(['user', output])

                            if is_success == False:
                                evaluation['success'] = False
                                raise Exception('Run static code Error for 5 times.')
                        else:
                            history.append(['assistant', code_block])
                            history.append(['user', output])
                            instantiated_meta_workflow.append(code_block)

                    else:
                        future = meta_workflow[index + 1:]
                        adapt_messages = [
                            {"role": "system", "content": "You are a helpful assistant."},
                            {"role": "user",
                             "content": Template(ADAPTATION_PROMPT_TEMPLATE.lstrip()).render({"total_task": instruction,
                                                                                              'current_sub_task': code_block,
                                                                                              'history': history,
                                                                                              'future': future,
                                                                                              "api_docs": build_api_docs_for_a_task(task=world.task,is_final=is_final)})}
                        ]
                        sub_task = str(code_block)
                        is_adapt_successful = False
                        for _ in range(5):
                            try:
                                sub_task = call_llm(adapt_messages, model =model_name, temperature=exec_temperature)
                                sub_goal_pattern = r'```json\s*([\s\S]*?)```'
                                matches = re.findall(sub_goal_pattern, sub_task, re.DOTALL)
                                sub_task = matches[0]
                                is_adapt_successful = True
                                break
                            except Exception as e:
                                print(f'Adapt Sub-goal Error in {code_block}', e)
                        if is_adapt_successful == False:
                            evaluation['success'] = False
                            raise Exception('Adapt Sub-goal Error for 5 times.')
                        instantiated_meta_workflow.append(sub_task)

                        #     def __init__(self, sub_task: Dict, task: Task, interaction_history: List, future:List, model_name: str, is_final:bool, exec_temperature:float,  ICL_examples:str=""):
                        agent = MinimalReactAgent(sub_task, world.task, history, future, model_name=model_name, is_final=index == len(meta_workflow) -1,  exec_temperature=exec_temperature , ICL_examples=ICL_examples)
                        output: str | None = None

                        # max_interactions for sub-goal
                        max_interactions = 20
                        # Until the task is completed or max_interactions is reached
                        for _ in range(max_interactions):
                            code = agent.next_code_block(output)

                            output = world.execute(code)

                            if world.task_completed():
                                ALL_TASK_COMPLETED = True
                                break
                            if '# Sub-task completed successfully.' in output:
                                break
                    if ALL_TASK_COMPLETED:
                        break

            elif agent_type == 'privileged_FullCodeRefl' or agent_type == 'FullCodeRefl' or agent_type == 'semi_privileged_FullCodeRefl':
                history = []

                for index, code_block in enumerate(meta_workflow):
                    is_final = index == len(meta_workflow) - 1
                    if isinstance(code_block, str):
                        output = world.execute(code_block)

                        interaction_history = copy.deepcopy(history)
                        if 'Execution failed' in output:
                            is_success = False
                            for _ in range(static_code_max_reflection):
                                if is_final:
                                    CODE_REFLECTION_PROMPT = Template(CODE_REFLECTION_PROMPT_TEMPLATE_Final.lstrip()).render(
                                         {"static_code": code_block,
                                          "error_msg": output,
                                          "task_description":instruction,
                                          "history_code": interaction_history,
                                          "api_docs":  build_api_docs_for_a_task(task=world.task, is_final=is_final)})
                                else:
                                    CODE_REFLECTION_PROMPT = Template(CODE_REFLECTION_PROMPT_TEMPLATE_NoFinal.lstrip()).render(
                                        {"static_code": code_block,
                                         "error_msg": output,
                                         "task_description": instruction,
                                         "history_code": interaction_history,
                                         "api_docs": build_api_docs_for_a_task(task=world.task, is_final=is_final)})

                                reflection_messages = [
                                    {"role": "system", "content": "You are a master of code correction."},
                                    {"role": "user",
                                     "content": CODE_REFLECTION_PROMPT }
                                ]

                                is_rewrite_success = False
                                for _ in range(5):
                                    try:
                                        rewritten_code = call_llm(reflection_messages, model=model_name,
                                                            temperature=exec_temperature)
                                        sub_goal_pattern = r'```python\s*([\s\S]*?)```'
                                        matches = re.findall(sub_goal_pattern, rewritten_code, re.DOTALL)
                                        rewritten_code = matches[0]
                                        is_rewrite_success = True
                                        break
                                    except Exception as e:
                                        print(f'ReWrite Error in {code_block}', e)
                                if is_rewrite_success == False:
                                    evaluation['success'] = False
                                    raise Exception('Rewrite Sub-goal Error for 5 times.')

                                output = world.execute(rewritten_code)

                                if 'Execution failed' not in output:
                                    history.append(['assistant', rewritten_code])
                                    history.append(['user', output])
                                    instantiated_meta_workflow.append(rewritten_code)
                                    is_success = True
                                    break
                                else:
                                    interaction_history.append(['assistant', rewritten_code])
                                    interaction_history.append(['user', output])

                            if is_success == False:
                                evaluation['success'] = False
                                raise Exception('Run static code Error for 5 times.')
                        else:
                            history.append(['assistant', code_block])
                            history.append(['user', output])
                            instantiated_meta_workflow.append(code_block)
                    else:
                        SUB_TASK_COMPLETED = False
                        future = meta_workflow[index + 1:]
                        adapt_messages = [
                            {"role": "system", "content": "You are an expert at breaking down subtasks."},
                            {"role": "user",
                             "content": Template(ADAPTATION_PROMPT_TEMPLATE.lstrip()).render({"total_task": instruction,
                                                                                              'current_sub_task': code_block,
                                                                                              'history': history,
                                                                                              'future': future,
                                                                                              'api_docs':build_api_docs_for_a_task(task=world.task, is_final=is_final) } )}
                        ]
                        sub_task = str(code_block)
                        is_adapt_successful = False
                        for _ in range(5):
                            try:
                                sub_task = call_llm(adapt_messages, model =model_name, temperature=exec_temperature)
                                sub_goal_pattern = r'```json\s*([\s\S]*?)```'
                                matches = re.findall(sub_goal_pattern, sub_task, re.DOTALL)
                                sub_task = matches[0]
                                is_adapt_successful = True
                                break
                            except Exception as e:
                                print(f'Adapt Sub-goal Error in {code_block}', e)
                        if is_adapt_successful == False:
                            evaluation['success'] = False
                            raise Exception('Adapt Sub-goal Error for 5 times.')
                        instantiated_meta_workflow.append(sub_task)

                        agent = FullCodeReflexionAgent(sub_task, world.task, history, future, model_name=model_name, is_final=is_final, mode=agent_type, exec_temperature=exec_temperature, ICL_examples=ICL_examples)

                        output: str | None = None

                        for try_index in range(agent.max_num_retrials):
                            code = agent.next_code_block(try_index, output)

                            output = world.execute(code)

                            if 'Execution failed' in output:
                                continue
                            else:
                                history.append(['assistant', code])
                                history.append(['user', output])
                                SUB_TASK_COMPLETED =True
                                break
                        if SUB_TASK_COMPLETED == False:
                            evaluation = world.evaluate().to_dict()
                            evaluation['success'] = False
                            raise Exception(f'Dynamic Agent completed Sub-task Error for {agent.max_num_retrials} times. sub_task: {sub_task} code:{code} output:{output}')

            else:
                evaluation['success'] = False
                raise Exception(f'Agent Type {agent_type} is not supported.')

            evaluation = world.evaluate().to_dict()
            result = {
                'task_id': task_id,
                'meta_id': meta_id,
                'meta_query': meta_query,
                'meta_workflow': meta_workflow,
                'instantiated_meta_workflow': instantiated_meta_workflow,
                'history': history,
                'eval_result': evaluation,
                'repe_index': repe_index
            }
            return result

    except Exception as e:
        evaluation['success'] = False
        print('Instantiation Error:', e)
        return {
                'task_id': task_id,
                'meta_id' : meta_id,
                'meta_query' : meta_query,
                'meta_workflow' : meta_workflow,
                'instantiated_meta_workflow' : instantiated_meta_workflow,
                'history': history,
                'eval_result' : evaluation,
                'repe_index': repe_index
            }
    finally:
        if process is not None:
            stop_appworld_service(process)
        else:
            logger.error('Start service error.')
        folder_path = os.path.join(os.environ.get('APPWORLD_ROOT', ''), "experiments", "outputs", path)

        if os.path.exists(folder_path):
            shutil.rmtree(folder_path)
        else:
            print(f"Folder {folder_path} does not exist.")

def process_leaf(args):
    if len(args) == 9:
        meta_id, meta_query, meta_workflow_raw, leaf_id, experiment_name, model_name, agent_type, repe_index, exec_temperature = args
    else:
        raise Exception(f'args {args} is invalid.')
    if meta_query is None or len(meta_workflow_raw) == 0:
        raise Exception('Meta flow is invalid.')
    return run_meta_workflow(meta_id, meta_query, meta_workflow_raw, leaf_id, experiment_name, model_name=model_name, agent_type=agent_type, repe_index=repe_index, exec_temperature=exec_temperature)


def is_fully_commented(code: str) -> bool:
    for line in code.splitlines():
        stripped = line.strip()
        if stripped and not stripped.startswith('#'):
            return False
    return True

def is_valid_metaflow(completion) -> bool:
    if type(completion) is str:
        pattern_code_blocks = r'```(json|python)\s*([\s\S]*?)```'

        meta_workflow_raw = re.findall(pattern_code_blocks, completion, re.DOTALL)

        meta_workflow = []
        for block_type, block_content in meta_workflow_raw:
            if block_type == 'json':
                meta_workflow.append(json.loads(block_content))
            else:
                if is_fully_commented(block_content):
                    return False
                meta_workflow.append(block_content)

        if type(meta_workflow[-1]) is str and 'apis.supervisor.complete_task' not in meta_workflow[-1]:
            return False

        if completion.count('apis.supervisor.complete_task') > 1:
            return False

        if all(isinstance(item, str) for item in meta_workflow):
            return False
        return True
    elif type(completion) is list:
        meta_workflow = completion
        for block_content in meta_workflow:
            if type(block_content) is str and is_fully_commented(block_content):
                return False

        if type(meta_workflow[-1]) is str and 'apis.supervisor.complete_task' not in meta_workflow[-1]:
            return False

        if str(completion).count('apis.supervisor.complete_task') > 1:
            return False

        if all(isinstance(item, str) for item in meta_workflow):
            return False
        return True





if __name__ == "__main__":
    ids = ['229360a_2', '229360a_1']
    test_meta_workflow = """```python
# Static Code: Initialization and Authentication
supervisor_profile = apis.Supervisor.show_profile()
# Retrieve supervisor's account passwords.
supervisor_passwords = {
account_password["account_name"]: account_password["password"]
for account_password in apis.supervisor.show_account_passwords()
}
# Obtain Spotify access token for supervisor.
access_token = apis.spotify.LOGIN(
username=supervisor_profile["email"], password=supervisor_passwords["spotify"]
)["access_token"]

# Static Code: Fetch Libraries
# Retrieve all songs in the user's library with pagination.
library_songs = [
item
for page_index in range(0, 10)
for item in apis.spotify.show_song_library(page_index=page_index, access_token=access_token)
]
# Retrieve all albums in the user's library with pagination.
library_albums = [
item
for page_index in range(0, 10)
for item in apis.spotify.show_album_library(page_index=page_index, access_token=access_token)
]

# Static Code: Fetch Downloaded and Liked Songs
# Retrieve all downloaded songs and extract their IDs.
downloaded_songs = [
item
for page_index in range(0, 10)
for item in apis.spotify.show_downloaded_songs(page_index=page_index, access_token=access_token)
]
downloaded_song_ids = [downloaded_song_["song_id"] for downloaded_song_ in downloaded_songs]
# Retrieve all liked songs and extract their IDs.
liked_songs = [
item
for page_index in range(0, 10)
for item in apis.spotify.show_liked_songs(page_index=page_index, access_token=access_token)
]
liked_song_ids = [liked_song_["song_id"] for liked_song_ in liked_songs]
```
---
```json
{
"task_description": "Loop over `library_songs` and determine whether to keep each song based on the following criteria: {criteria}. If a song does not meet the criteria for retention, use the `apis.spotify.remove_song_from_library` function to remove that song.",
"expected_final_state": "Complete the song cleaning process."
}
```
---
```python
# get liked albums using apis.spotify.show_liked_albums, and get their IDs.
liked_albums = [
item
for page_index in range(0, 10)
for item in apis.spotify.show_liked_albums(page_index=page_index, access_token=access_token)
]
liked_album_ids = [liked_album_["album_id"] for liked_album_ in liked_albums]
```
---
```json
{
"task_description": "Loop over `library_albums` and determine whether to keep each album based on the following criteria: {criteria}. If an album does not meet the criteria for retention, use the `apis.spotify.remove_album_from_library` function to remove that album.",
"expected_final_state": "Complete the album cleaning process."
}
```
---
```python
# commit task completion
apis.supervisor.complete_task(status="success")
``` 
 """

    for task_id in ids:
        print(run_meta_workflow(0, '', test_meta_workflow, task_id, 'debug', 'gpt-4o', agent_type='FullCodeRefl'))