import json
import re

def add_period_if_missing(s: str) -> str:
    """如果字符串末尾没有句号，则补上句号"""
    s = s.strip()
    if not s.endswith("."):
        s += "."
    return s

def process_dataset(input_file: str, output_file: str):
    with open(input_file, "r", encoding="utf-8") as f:
        data = json.load(f)  # 这里是一个 list[dict]

    for item in data:
        # 处理 target_new
        if "target_new" in item:
            if isinstance(item["target_new"], list):
                item["target_new"] = [add_period_if_missing(i) for i in item["target_new"]]
            else:
                item["target_new"] = add_period_if_missing(item["target_new"])

        # 处理 ground_truth 列表
        if "ground_truth" in item and isinstance(item["ground_truth"], list):
            item["ground_truth"] = [add_period_if_missing(gt) for gt in item["ground_truth"]]

        if "target_new" in item["Instance"]:
            if isinstance(item["Instance"]["target_new"], list):
                item["Instance"]["target_new"] = [add_period_if_missing(i) for i in item["Instance"]["target_new"]]
            else:
                item["Instance"]["target_new"] = add_period_if_missing(item["Instance"]["target_new"])
                

    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=4)

def add_rad_to_theta(input_file: str, output_file: str):
    with open(input_file, "r", encoding="utf-8") as f:
        data = json.load(f)  # 假设是 list[dict]

    def replace_theta(text: str):
        if not isinstance(text, str):
            return text
        # 把孤立的 θ 替换为 θ rad（避免重复加 rad）
        return re.sub(r"θ(?!\s*rad)", "θ rad", text)

    for item in data:
        # 处理 item 的 prompt 和 rephrase_prompt
        if "prompt" in item:
            item["prompt"] = replace_theta(item["prompt"])
        if "rephrase_prompt" in item:
            item["rephrase_prompt"] = replace_theta(item["rephrase_prompt"])
        if "Formula" in item:
            item["Formula"]["prompt"] = replace_theta(item["Formula"]["prompt"])
            item["Formula"]["rephrase_prompt"] = replace_theta(item["Formula"]["rephrase_prompt"])

        # 如果 locality 里也有 prompt
        if "locality" in item:
            for part in item["locality"].values():
                for obj in part:
                    if "prompt" in obj:
                        obj["prompt"] = replace_theta(obj["prompt"])

        # 如果 portability 里也有 prompt
        if "portability" in item:
            for part in item["portability"].values():
                for obj in part:
                    if "prompt" in obj:
                        obj["prompt"] = replace_theta(obj["prompt"])

    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=4)

def transform_rephrase_prompt(input_file: str, output_file: str):
    with open(input_file, "r", encoding="utf-8") as f:
        data = json.load(f)  # 假设是 list[dict]

    for item in data:
        if "Description" in item and "rephrase_prompt" in item["Description"]:
            rp = item["Description"]["rephrase_prompt"]
            # 检测并替换 "How can we describe ... in words?"
            if rp.startswith("We can describe") and rp.endswith("as"):
                middle = rp[len("We can describe "):-len(" as")]
                item["Description"]["rephrase_prompt"] = f"We can describe the calculation of {middle} as"
            else:
                print(item["case_id"])
    
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=4)


# add_rad_to_theta("math_rule.json", "math_rule.json")

process_dataset("instance_editing.json", "instance_editing.json")

# transform_rephrase_prompt("math_rule.json", "math_rule.json")
