import os
import random
import logging
import json
from tqdm import tqdm
import tempfile
import subprocess

class LLMCG():
    def __init__(self, task_description, LLM_model):
        self.task_description = task_description
        self.LLM_model = LLM_model

    def test_case_error_prompt(self, dict_1, dict_2, current_test_weights, n=3):
        # 筛选共同存在且失败的测试用例
        filtered_tests = [
            test_id for test_id, test_detail in dict_1.items()
            if test_id in dict_2 and not dict_2[test_id].get('success', True)
        ]
        
        if not filtered_tests or n <= 0:
            return "No errors founds"
        
        # 选择权重最高的n个测试用例
        filtered_tests.sort(key=lambda x: current_test_weights.get(x, 0), reverse=True)
        selected = filtered_tests[:n]

        # # 分离test_failed和其他错误类型
        # others = []
        # test_failed = []
        # for test_id in filtered_tests:
        #     reason = dict_2[test_id]['reason']
        #     if reason == 'test_failed':
        #         test_failed.append(test_id)
        #     else:
        #         others.append(test_id)
        
        # # 随机选择其他错误类型（每个类型最多一个）
        # random.shuffle(others)
        # seen_reasons = set()
        # selected_others = []
        # for test_id in others:
        #     reason = dict_2[test_id]['reason']
        #     if reason not in seen_reasons:
        #         seen_reasons.add(reason)
        #         selected_others.append(test_id)
        #         if len(selected_others) == n:
        #             break
        
        # # 补充test_failed用例
        # remaining = max(n - len(selected_others), 0)
        # selected_test_failed = []
        # if remaining > 0 and test_failed:
        #     random.shuffle(test_failed)
        #     selected_test_failed = test_failed[:min(remaining, len(test_failed))]
        
        # selected = selected_others + selected_test_failed
        # selected = selected[:n]  # 确保不超过n个
        
        # 错误类型描述映射
        error_descriptions = {
            'main_func_compile_error': 'Main function compilation failed',
            'test_func_compile_error': 'Test function compilation failed',
            'test_failed': 'Test assertion failed',
            'exception': 'An exception was raised during test execution',
            'timeout': 'Test execution timed out',
            'worker_process_error': 'Worker process encountered an error'
        }
        
        # 构建提示内容
        prompt_lines = []
        if selected != []:
            prompt_lines.append("The code failed to pass following tests:")
        for test_id in selected:
            test_result = dict_2[test_id]
            reason = test_result['reason']
            message = test_result.get('message') or error_descriptions.get(reason, 'Unknown error')
            
            parts = []
            
            if reason == 'test_failed':
                test_info = dict_1[test_id]
                parts.append(f"Test Function:\n{test_info['test_function']}")
                parts.append(f"Test Type: {test_info['test_type']}")
            
            parts.append(f"Error Type: {reason}")
            if message:
                parts.append(f"Error Message:\n{message}")

            prompt_lines.append("\n".join(parts))
        
        # 拼接最终提示
        final_prompt = "\n\n".join(prompt_lines)
        # final_prompt += "\n\nPlease carefully review the above errors and modify the code to address each issue. Ensure that all test cases pass and handle any edge cases or runtime errors appropriately."
        
        return final_prompt

    def create_code_generation_prompt(
        self,
        extracted_plan,
        user_feedback=None,
        task_description=None,
        test_cases=None,
        history=None,
        best_code=None,
        next_code_line=False,
        output_planning=False,
        use_example=False,
        use_task_description=False,
        use_system_prompt=True,
        more_comments=False,
        best_only=False,
        error_test_num=3,
        ):

        components = extracted_plan["components"]
        overall_plan = extracted_plan["overall_plan"]

        prompt_parts = []

        if user_feedback:
            system_prompt = "You are a code refinement specialist designed to improve existing implementations based on specific feedback. Analyze the provided feedback, identify areas for improvement, and modify the code while strictly maintaining the required input/output formats and component specifications."
        else:
            system_prompt = "You are a highly skilled coding assistant designed to generate clear, efficient, and correct code based on structured task descriptions and detailed plans provided by the user. Your responses must precisely follow the instructions, formats, and constraints given by the user, and you must strictly adhere to input-output formats, workflows, and specific guidelines outlined."

        # Add System Prompt if enabled
        if use_system_prompt:
            prompt_parts.append(f"=== Role ===\n{system_prompt}\n")

        # Add Task Description if enabled
        if not task_description:
            task_description = self.task_description

        if use_task_description:
            prompt_parts.append(f"=== Task Description ===\n{task_description}\n")

        # Add Components Section
        if components and not user_feedback:
            prompt_parts.append("=== Components ===")
            for comp_name, comp_details in components.items():
                # Process Input Format
                input_fmt = comp_details["input_format"]
                input_lines = []
                for idx, (dtype, shape) in enumerate(input_fmt, 1):
                    shape_str = f"shape={shape}" if shape is not None else "no fixed shape"
                    input_lines.append(f"Argument {idx}: {dtype} with {shape_str}")
                input_section = "Input Format:\n" + "\n".join([f"- {line}" for line in input_lines])

                # Process Output Format
                output_fmt = comp_details["output_format"]
                output_lines = []
                for idx, (dtype, shape) in enumerate(output_fmt, 1):
                    shape_str = f"shape={shape}" if shape is not None else "no fixed shape"
                    output_lines.append(f"Output {idx}: {dtype} with {shape_str}")
                output_section = "Output Format:\n" + "\n".join([f"- {line}" for line in output_lines])

                # Build Component Details
                component_part = [
                    f"\n**Component: {comp_name}**",
                    f"Step Task Description: {comp_details['step_task_description']}",
                    input_section,
                    output_section,
                    "Workflow Steps:",
                    *[f"- {step}" for step in comp_details["work_flow"]],
                    # "Test Case Generation Advice:",
                    # *[f"- {advice}" for advice in comp_details["test_case_generation_advise"]],
                    # "\n",
                ]
                prompt_parts.extend(component_part)

        # Add Overall Plan Section
        if overall_plan and not user_feedback:
            prompt_parts.append("\n=== Overall Plan ===")
            # Process Input Format
            input_fmt = overall_plan["input_format"]
            input_lines = []
            for idx, (dtype, shape) in enumerate(input_fmt, 1):
                shape_str = f"shape={shape}" if shape is not None else "no fixed shape"
                input_lines.append(f"Argument {idx}: {dtype} with {shape_str}")
            input_section = "Input Format:\n" + "\n".join([f"- {line}" for line in input_lines])

            # Process Output Format
            output_fmt = overall_plan["output_format"]
            output_lines = []
            for idx, (dtype, shape) in enumerate(output_fmt, 1):
                shape_str = f"shape={shape}" if shape is not None else "no fixed shape"
                output_lines.append(f"Output {idx}: {dtype} with {shape_str}")
            output_section = "Output Format:\n" + "\n".join([f"- {line}" for line in output_lines])

            # Build Overall Plan Details
            if isinstance(overall_plan["plan"], list):
                plan_part = [
                    input_section,
                    output_section,
                    f"Components Order: {', '.join(overall_plan['components'])}",
                    "Plan Steps:",
                    *[f"- {step}" for step in overall_plan["plan"]],
                    # "Overall Test Case Advice:",
                    # *[f"- {advice}" for advice in overall_plan["test_case_generation_advise"]],
                    # "\n",
                ]
            elif isinstance(overall_plan["plan"], str):
                plan_part = [
                    input_section,
                    output_section,
                    f"Components Order: {', '.join(overall_plan['components'])}",
                    "Plan Steps:",
                    overall_plan["plan"],
                ]
            else:
                raise ValueError("Invalid plan format. Expected str or list.")
            prompt_parts.extend(plan_part)

        # Add Test Cases if enabled and available
        if use_example and test_cases and not user_feedback:
            prompt_parts.append("\n=== Test Cases ===")
            example_num = 3
            for case_name, case_details in test_cases.items():
                case_part = [
                    f"\n**Test Case: {case_name}**",
                    f"Purpose: {case_details['purpose']}",
                    f"Type: {case_details['test_type']}",
                    f"Test Function:\n{case_details['test_function']}",
                    "\n",
                ]
                prompt_parts.extend(case_part)
                example_num -= 1
                if example_num == 0:
                    break
        if user_feedback:
            prompt_parts.append("\n=== User Feedback ===")
            prompt_parts.append(user_feedback)
            
        # # Add History if available
        # if history:
        #     prompt_parts.append("\n=== Previous Generation Attempts ===")
        #     for gen_name, gen_details in history.items():
        #         history_part = [
        #             f"\n**Generation: {gen_name}**",
        #             f"Score: {gen_details['score']}",
        #             "Generated Code:",
        #             gen_details["generated_code"],
        #             "Generation Plan:",
        #             *[f"- {step}" for step in gen_details["generation_plan"]],
        #             "\n",
        #         ]
        #         prompt_parts.extend(history_part)

        if best_code and best_code != {}:
            best_code_parts  = ["\n=== Previous Best Code Generation ==="]
            best_code_parts.append("Here are the best code generation attempts from previous generations:")

            # sort the best code by pass rate score
            best_code = dict(sorted(best_code.items(), key=lambda x: x[1]['pass_rate_score'], reverse=True))

            for code_index, (code_id, code_info) in enumerate(best_code.items(), 1):
                code_str = code_info['code']
                code_plan = code_info['plan']
                code_test_case_results = code_info['test_case_results']
                current_test_weights = code_info['test_weights']
                error_prompt = self.test_case_error_prompt(test_cases, code_test_case_results, current_test_weights, n=error_test_num)
                
                # 添加代码生成信息
                best_code_parts.append(f"--- Generation {code_index} ---") #  (ID: {code_id})
                
                # 添加代码内容
                best_code_parts.append(f"Code:\n{code_str}")
                
                # # 格式化计划步骤（plan为列表）
                # # debug
                # if isinstance(code_plan, str):
                #     plan_str = "Plan Steps:\n" + code_plan
                # elif isinstance(code_plan, list):
                #     plan_str = "Plan Steps:\n" + "\n".join(
                #         [f"{i+1}. {step}" for i, step in enumerate(code_plan)]
                #     )
                # else:
                #     raise ValueError("Invalid plan format. Expected str or list.")

                # best_code_parts.append(plan_str)
                
                # 添加测试结果统计
                passed = sum(1 for res in code_test_case_results.values() if res['success'])
                total = len(code_test_case_results)
                best_code_parts.append(f"Test Results: Passed {passed}/{total} test cases")
                
                # 添加错误信息
                best_code_parts.append(f"Error Information:\n{error_prompt}")

                if best_only:
                    break

            # print("\n".join(best_code_parts))

            prompt_parts.append("\n".join(best_code_parts))
        else:
            # debug
            # print("debug")
            pass

        # Build Refinement Instructions
        if user_feedback:
            refine_instructions = ["\n=== Refinement Requirements ==="]
            # refine_instructions.append("Generate a revised implementation that:")
            # refine_instructions.append("- Addresses all identified issues from the feedback analysis")
            # refine_instructions.append("- Maintains strict compliance with component specifications")
            # refine_instructions.append("- Preserves existing functionality that passed validation")
            refine_instructions.append("Before refining the code, tell me the reason why the last code failed to pass the test function, and how would you improve the code. Ingore it if the last code passed all test functions.")
            prompt_parts.append("\n".join(refine_instructions))

        # Build Instructions
        instructions = ["\n=== Instructions ==="]
        if next_code_line:
            instructions.append("Generate ONLY the next line or a small code snippet required to proceed.")
        else:
            instructions.append("Generate the COMPLETE code based on the components and plan above.")
        instructions.append("DO MAKE SURE the complete code is a runnable function, all components are correctly integrated with in this function.")
        if not user_feedback:
            instructions.append("The complete function should take the input arguments as specified in the overall plan and return the output as specified.")
        else:
            instructions.append("Make sure the input and output formats is correct. Do not fully rely on the plan.")

        if more_comments:
            instructions.append("Please add as much comments as possible to your code to explain the logic and any critical steps.")

        if output_planning:
            instructions.append("Structure your response as follows:")
            if user_feedback:
                instructions.append("<Think>")
                instructions.append("Explain why does last code failed to pass the Test Function, and how to fix it. If the last code passed all test functions, please ignore this part.")
                instructions.append("</Think>")
            instructions.append("<Code>")
            instructions.append("Your code here. DO make sure the output is a single function that integrates all components.")
            instructions.append("</Code>")
            instructions.append("<Planning>")
            if next_code_line:
                instructions.append("A concise summary of what this specific code part accomplishes.")
            else:
                instructions.append("A detailed step-by-step explanation of the code's workflow.")
            instructions.append("</Planning>")
            instructions.append("<Main Function Name>")
            instructions.append("The name of the main function that integrates all components.")
            instructions.append("</Main Function Name>")
            instructions.append("Provide the reasoning, code, planning, function name with the SAME indicator and structure as shown in Instructions. DO NOT return any test cases or example usages in your code!")
        else:
            instructions.append("Structure your response as follows:")
            instructions.append("<Code>")
            instructions.append("Your code here")
            instructions.append("</Code>")
            instructions.append("Provide the code WITHOUT any additional explanations, and DO use the same indicator and structure as shown in Instructions.")

        prompt_parts.append("\n".join(instructions))

        return "\n".join(prompt_parts)
    
    # def extract_code(self, llm_output):
    #     """Extracts code and planning sections from LLM output."""
    #     result = {"code": None, "plan": None, "main_function_name": None}
        
    #     # Extract code section
    #     code_match = re.search(r'<Code>(.*?)(?:</Code>|<End>)', llm_output, re.DOTALL)
    #     if code_match:
    #         result["code"] = code_match.group(1).strip()
    #     else:
    #         # If not found, try to extract from ```python ... ```
    #         code_block_match = re.search(r'```(?:python)?\s*(.*?)```', llm_output, re.DOTALL)
    #         if code_block_match:
    #             result["code"] = code_block_match.group(1).strip()
        
    #     # Extract planning section
    #     plan_match = re.search(r'<Planning>(.*?)(?:</Planning>|<End>)', llm_output, re.DOTALL)
    #     if plan_match:
    #         result["plan"] = plan_match.group(1).strip()

    #     # Extract main function name
    #     main_func_match = re.search(r'<Main Function Name>(.*?)(?:</Main Function Name>|<End>)', llm_output, re.DOTALL)
    #     if main_func_match:
    #         result["main_function_name"] = main_func_match.group(1).strip()
        
    #     return result

    def extract_code(self, llm_output):
        """支持双标签和单标签的代码提取，优先处理闭合标签"""
        import re
        result = {"code": None, "plan": None, "main_function_name": None}

        llm_output = llm_output.replace("\\n", "\n")

        def preprocess(text):
            """保护代码块内的换行符"""
            placeholder = "###NL###"
            protected = re.sub(
                r'(```python.*?```)', 
                lambda m: m.group(0).replace("\n", placeholder),
                text,
                flags=re.IGNORECASE | re.DOTALL
            )
            return protected, placeholder

        modified_text, placeholder = preprocess(llm_output)

        def extract_paired_tag(tag, text):
            """提取闭合标签内容，例如<tag>content</tag>"""
            pattern = re.compile(
                r'<\s*{tag}\s*>(.*?)<\s*/\s*{tag}\s*>'.format(tag=tag),
                re.IGNORECASE | re.DOTALL
            )
            match = pattern.search(text)
            return match.group(1).replace(placeholder, "\n").strip() if match else None

        def extract_single_tag(tag, text):
            """提取单标签后的内容，例如<tag>content..."""
            pattern = re.compile(
                r'<\s*{tag}\s*>(?!</)(.*?)(?=(<\s*\w|```|$))'.format(tag=tag),
                re.IGNORECASE | re.DOTALL
            )
            match = pattern.search(text)
            return match.group(1).replace(placeholder, "\n").strip() if match else None

        # 代码提取逻辑（优先级：闭合标签 > 代码块 > 函数定义）
        result["think"] = extract_paired_tag('think', modified_text)

        code_string = extract_paired_tag('code', modified_text)
        if not code_string:
            code_blocks = re.findall(r'```python\s*(.*?)\s*```', modified_text, re.DOTALL)
            if code_blocks:
                result["code"] = code_blocks[0].replace(placeholder, "\n").strip()
            else:
                # 提取所有函数定义作为后备
                functions = re.findall(r'(def\s+.+?:\n(?:\s*.+\n)+)', modified_text, re.DOTALL)
                if functions:
                    result["code"] = '\n\n'.join([f.replace(placeholder, "\n").strip() for f in functions])
        else:
            if "```python" in code_string:
                code_blocks = re.findall(r'```python\s*(.*?)\s*```', modified_text, re.DOTALL)
                if code_blocks:
                    result["code"] = code_blocks[0].replace(placeholder, "\n").strip()
                else:
                    result["code"] = code_string.replace("```python", "").strip()
            else:
                result["code"] = code_string

        # 计划提取（双标签优先）
        result["plan"] = extract_paired_tag('planning', modified_text) or \
                        extract_paired_tag('reasoning', modified_text)
        if not result["plan"]:  # 单标签后备
            result["plan"] = extract_single_tag('planning', modified_text) or \
                            extract_single_tag('reasoning', modified_text)

        # 主函数名提取（双标签优先）
        main_func = extract_paired_tag('main\s*function\s*name', modified_text)
        if main_func:
            result["main_function_name"] = re.findall(r'\b\w+\b', main_func)[-1]
        else:  # 单标签后备
            single_tag_content = extract_single_tag('main\s*function\s*name', modified_text)
            if single_tag_content:
                candidates = re.findall(r'\b([a-zA-Z_]\w*)\s*\(?', single_tag_content)
                result["main_function_name"] = candidates[-1] if candidates else None

        return result
    
    # def get_code(self, extracted_plan, task_description=None, test_cases=None, history=None, next_code_line=False, output_planning=True, use_example=True, use_task_description=True, use_system_prompt=True, more_comments=True, gen_kwargs={}, max_retry=3):
    #     retry_num=0
    #     if task_description is None:
    #         task_description = self.task_description
    #     prompt = self.create_code_generation_prompt(extracted_plan, extracted_plan.get('user_feedback'), task_description, test_cases, history, next_code_line, output_planning, use_example, use_task_description, use_system_prompt, more_comments)
    #     while retry_num <= max_retry:
    #         llm_output = self.LLM_model.LLM_response(prompt, gen_kwargs)
    #         code_output = self.extract_code(llm_output)
    #         if code_output["code"] is None:
    #             retry_num += 1
    #             # print(f"Failed to extract code, retrying ({retry_num})...")
    #             # print(f"Current llm_output:\n{llm_output}")
    #             logging.warning(f"Failed to extract code, retrying ({retry_num})...")
    #             logging.warning(f"Current llm_output:\n{llm_output}")
    #         else:
    #             break
    #     if code_output["code"] is None:
    #         logging.error(f"Failed to extract code, current llm_output:\n{llm_output}")
    #         raise ValueError("Failed to extract code, current llm_output:\n", llm_output)
    #     return code_output
    
    def code_runnable_check(self, code_str):
        try:
            compile(code_str, "<string>", "exec")
            return True
        except Exception as error:
            return False

    def get_code(self, extracted_plan, task_description=None, test_cases=None, history=None, best_codes=None, next_code_line=False, output_planning=True, use_example=True, use_task_description=True, use_system_prompt=True, more_comments=True, gen_kwargs={}, max_retry=3, best_only=False, error_test_num=3, prompt_only=False):
        retry_num=0
        if task_description is None:
            task_description = self.task_description
        prompt = self.create_code_generation_prompt(extracted_plan, extracted_plan.get('user_feedback'), task_description, test_cases, history, best_codes, next_code_line, output_planning, use_example, use_task_description, use_system_prompt, more_comments, best_only, error_test_num)

        if prompt_only:
            return prompt
        # debug
        # print("###############################################################")
        # print(prompt)
        while retry_num <= max_retry:
            llm_output = self.LLM_model.LLM_response(prompt, gen_kwargs)
            code_output = self.extract_code(llm_output)
            if code_output["code"] is None or code_output["plan"] is None or code_output["main_function_name"] is None:
                retry_num += 1
                # print(f"Failed to extract code, retrying ({retry_num})...")
                # print(f"Current llm_output:\n{llm_output}")
                logging.warning(f"Failed to extract code, retrying ({retry_num})...")
                logging.warning(f"Current llm_output:\n{llm_output}")
            else:
                code_str = code_output["code"]
                code_check = self.code_runnable_check(code_str)
                if not code_check:
                    retry_num += 1
                    # print(f"Code is not runnable, retrying ({retry_num})...")
                    logging.warning(f"Code is not runnable, retrying ({retry_num})...")
                    logging.warning(f"Current code_output:\n{code_str}")
                    code_output["code"] = None
                else:
                    break
        if code_output["code"] is None:
            logging.error(f"Failed to extract code, current llm_output:\n{llm_output}")
            logging.error(f"Current prompt:\n{prompt}")
            raise ValueError("Failed to extract code, current llm_output:\n", llm_output)
        # debug
        elif code_output["plan"] is None:
            logging.error(f"Failed to extract plan for code generation, current llm_output:\n{llm_output}")
            raise ValueError("Failed to extract plan for code generation, current llm_output:\n", llm_output)
        elif code_output["main_function_name"] is None:
            logging.error(f"Failed to extract main_function_name for code generation, current llm_output:\n{llm_output}")
            raise ValueError("Failed to extract main_function_name for code generation, current llm_output:\n", llm_output)
        # debug
        # print("###############################################################")
        # print("llm output:")
        # print(llm_output)
        return code_output

    async def get_code_async(self, extracted_plan, task_description=None, test_cases=None, history=None, best_codes=None, next_code_line=False, output_planning=True, use_example=True, use_task_description=True, use_system_prompt=True, more_comments=True, gen_kwargs={}, max_retry=3, best_only=False, error_test_num=3):
        retry_num=0
        if task_description is None:
            task_description = self.task_description
        prompt = self.create_code_generation_prompt(extracted_plan, extracted_plan.get('user_feedback'), task_description, test_cases, history, best_codes, next_code_line, output_planning, use_example, use_task_description, use_system_prompt, more_comments, best_only, error_test_num)
        # debug
        # print("###############################################################")
        # print(prompt)
        while retry_num <= max_retry:
            llm_output = await self.LLM_model.LLM_response_async(prompt, gen_kwargs)
            code_output = self.extract_code(llm_output)
            if code_output["code"] is None or code_output["plan"] is None or code_output["main_function_name"] is None:
                retry_num += 1
                # print(f"Failed to extract code, retrying ({retry_num})...")
                # print(f"Current llm_output:\n{llm_output}")
                logging.warning(f"Failed to extract code, retrying ({retry_num})...")
                logging.warning(f"Current llm_output:\n{llm_output}")
            else:
                code_str = code_output["code"]
                code_check = self.code_runnable_check(code_str)
                if not code_check:
                    retry_num += 1
                    # print(f"Code is not runnable, retrying ({retry_num})...")
                    logging.warning(f"Code is not runnable, retrying ({retry_num})...")
                    logging.warning(f"Current code_output:\n{code_str}")
                    code_output["code"] = None
                else:
                    break
        if code_output["code"] is None:
            logging.error(f"Failed to extract code, current llm_output:\n{llm_output}")
            raise ValueError("Failed to extract code, current llm_output:\n", llm_output)
        # debug
        elif code_output["plan"] is None:
            logging.error(f"Failed to extract plan for code generation, current llm_output:\n{llm_output}")
            raise ValueError("Failed to extract plan for code generation, current llm_output:\n", llm_output)
        elif code_output["main_function_name"] is None:
            logging.error(f"Failed to extract main_function_name for code generation, current llm_output:\n{llm_output}")
            raise ValueError("Failed to extract main_function_name for code generation, current llm_output:\n", llm_output)
        return code_output
    
class CodeRunner:
    def __init__(self, max_workers=5):
        self.max_workers = max_workers

    def run_code_runner_in_subprocess(self, functions, test_cases, max_workers=None, timeout=30, script_path=r"E:\python_project_new\AI4SLCDP\src\Test_runner.py"):
        if max_workers is None:
            max_workers = self.max_workers
        
        # 创建临时文件保存数据
        with tempfile.NamedTemporaryFile(mode='w', delete=False) as func_file:
            json.dump(functions, func_file)
            func_file_path = func_file.name
        with tempfile.NamedTemporaryFile(mode='w', delete=False) as test_file:
            json.dump(test_cases, test_file)
            test_file_path = test_file.name
        
        # print("debug01")

        cmd = [
            "python", script_path,
            "--functions_file", func_file_path,
            "--test_cases_file", test_file_path,
            "--max_workers", str(max_workers),
            "--timeout", str(timeout)
        ]

        process = subprocess.Popen(
            cmd, 
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            text=True,
            bufsize=1
        )
        # print("debug02")
        pbar = None
        output = []
        total_tasks = None
        while True:
            line = process.stdout.readline()
            if not line:
                break
            line = line.strip()
            output.append(line)
            
            if line.startswith("PROGRESS_TASK: "):
                # 解析进度数据
                progress_part = line.split("PROGRESS_TASK: ")[1]
                try:
                    current, total = map(int, progress_part.split('/'))
                except ValueError:
                    continue
                
                # 初始化进度条
                if not pbar:
                    total_tasks = total
                    pbar = tqdm(
                        total=total,
                        desc="Testing Progress",
                        bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt}",
                        ascii=True
                    )
                
                # 更新进度（确保不越界）
                current = max(0, min(current, total_tasks))
                if pbar.n != current:
                    pbar.n = current
                    pbar.refresh()
        
        # 收尾工作
        if pbar:
            pbar.n = total_tasks
            pbar.refresh()
            pbar.close()
        
        process.wait()
        
        # 解析最终结果
        func_results = {}
        test_results = {}
        for line in output:
            if line.startswith("FUNCTION_RESULTS:"):
                func_results = json.loads(line[len("FUNCTION_RESULTS:"):])
            elif line.startswith("TEST_RESULTS:"):
                test_results = json.loads(line[len("TEST_RESULTS:"):])
        
        # 清理临时文件
        try:
            os.unlink(func_file_path)
            os.unlink(test_file_path)
        except Exception as e:
            print(f"Error cleaning temp files: {e}")

        return func_results, test_results, "\n".join(output)

    def run_all_tests(self, functions, test_cases, max_workers=None, timeout=5):
        if max_workers is None:
            max_workers = self.max_workers
        # print(functions)
        # print(test_cases)
        fr, tr, _ = self.run_code_runner_in_subprocess(functions, test_cases, max_workers, timeout)
        # print(_)
        return fr, tr