
import contextlib
from io import StringIO
import re
import subprocess
import json
from tqdm import tqdm
import os
from contextlib import contextmanager
import signal
import builtins
import threading
input_path = ""
output_path = f"{os.path.splitext(input_path)[0]}-parsed.json"

success = 0
template_generation_fault_count = 0 # 无法生成模板
template_generation_mistake_count = 0 # 生成的模板与原始问题不匹配
python_generation_fault_count = 0 # 无法生成 Python 代码
python_run_fault_count = 0 # 无法运行的 Python 代码
python_run_mistake_count = 0 # Python 代码运行结果错误
template_python_not_algined_count = 0 # 模板中的变量与 Python 代码中的变量不对齐

# def runcode(code):
#     """执行 Python 代码并返回输出"""
#     try:
#         result = subprocess.run(['python', '-c', code], capture_output=True, text=True)
#         output = float(result.stdout.strip())
#     except Exception as e:
#         return None
#     return output

def mock_input(prompt=""):
    return "" 
builtins.input = mock_input

@contextmanager
def time_limit(seconds):
    def signal_handler(signum, frame):
        raise Exception("Timed out!")
    signal.signal(signal.SIGALRM, signal_handler)
    signal.alarm(seconds)
    try:
        yield
    finally:
        signal.alarm(0)

def runcode(code):
    """执行 Python 代码并返回输出"""
    output = ""
    try:
        with time_limit(10):
            # 捕获标准输出
            with StringIO() as buf, contextlib.redirect_stdout(buf):
                exec(code, {}, {})
                output = buf.getvalue().strip()
    except Exception as e:
        return None
    
    try:
        return float(output)
    except ValueError:
        # 处理字符串答案
        if re.search(r'\d', output):
             output = None
        else:
            return output

def extract_last_num(text: str) -> float:
    text = re.sub(r"(\d),(\d)", r"\g<1>\g<2>", text)  # 处理形如 123,456
    res = re.findall(r"\\boxed\{(\d+(\.\d+)?)", text)  # 匹配 \\boxed
    if len(res) == 0:
        res = re.findall(r"(\d+(\.\d+)?)", text)  # 匹配 123456.789
    if len(res) > 0:
        num_str = res[-1][0]
        return float(num_str)
    else:
        return 0.0

c = 0

def extract_content(item):
    
    global success, python_run_fault_count, python_run_mistake_count, python_generation_fault_count, template_generation_fault_count, template_generation_mistake_count, template_python_not_algined_count
    
    generation = item["generated_texts"][0]
    # print(generation)
    # 提取第一次出现的### template后的内容
    template_match = re.search(r'### (?:Query|Query Template|Template):(.*?)(?=###|$)', generation, re.DOTALL | re.IGNORECASE)
    template_content = template_match.group(1).strip() if template_match else None
    
    # 提取### python代码块中的代码
    python_code_match = re.search(r'### Python Code:\s*```(?:python)?\s*(.*?)\s*```', generation, re.DOTALL | re.IGNORECASE)
    python_code = python_code_match.group(1).strip() if python_code_match else None
    
    if python_code is None:
        python_generation_fault_count += 1
    elif template_content is None:
        template_generation_fault_count += 1 
    elif abs(template_content.count(' ') - item["query"].count(' ')) / item["query"].count(' ') > 1:
        template_generation_mistake_count += 1
    else:
        python_result = runcode(python_code)
        if python_result is None:
            global c
            # c += 1
            # if c == 31:
            #     print(python_code)
            #     print(item["query"])
            #     print(item["response"])
            #     raise Exception
            python_run_fault_count += 1
        elif isinstance(python_result, str):
            item["answer"] = python_result
        elif abs(python_result - item["answer"]) > 1e-2:
            python_run_mistake_count += 1
        else:
            variables = re.findall(r'<([^>]+?)>', template_content)
            for var in variables:
                pattern = r'\b' + re.escape(var) + r'\s*?='
                if re.search(pattern, python_code) is None:
                    template_python_not_algined_count += 1
                    return False
            else:
                success += 1
                item["template"] = template_content
                item["python"] = python_code
            return True
        
    return False

with open(input_path, "r") as f:
    data = json.load(f)
    results = []
    for item in tqdm(data):
        if extract_content(item):
            results.append(item)
    with open(output_path, "w") as f:
        json.dump(results, f, ensure_ascii=False, indent=4)


