import json

# 输入输出文件
INPUT_FILE = "math_rule_instance.json"
OUTPUT_FILE = "instance_editing.json"

def process_dataset(data, get_key):
    new_data = []
    for item in data:
        formula = item.get(get_key, {})
        # 合并到外层
        item.update(formula)
        if get_key in item:
            del item[get_key]

        # 修改 Description -> Rule_Understanding
        if get_key == "Description":
            if "Formula" in item:
                item["Rule_Understanding"] = item.pop("Formula")
        elif get_key == "Formula":
            if "Description" in item:
                 item["Rule_Understanding"] = item.pop("Description")
        elif get_key == "Instance":
            rule_prompts = []
            rule_target_new = []
            rule_ground_truth = []
            if "Formula" in item:
                rule_prompts.append(item["Formula"]["prompt"])
                rule_target_new.append(item["Formula"]["target_new"])
                rule_ground_truth.append(item["Formula"]["ground_truth"][0])
                item.pop("Formula")
            if "Description" in item:
                rule_prompts.append(item["Description"]["prompt"])
                rule_target_new.append(item["Description"]["target_new"])
                rule_ground_truth.append(item["Description"]["ground_truth"][0])
                item.pop("Description")
            item["Rule_Understanding"] = {
                "prompt": rule_prompts,
                "target_new": rule_target_new,
                "ground_truth": rule_ground_truth
            }
        new_data.append(item)
    return new_data

def reverse_process_dataset(data):
    new_data = []
    for item in data:
        new_item = dict(item)  # 浅拷贝

        # 提取 Formula 相关字段
        formula_fields = {}
        for key in ["prompt", "target_new", "ground_truth", "rephrase_prompt"]:
            if key in new_item:
                formula_fields[key] = new_item.pop(key)

        # 放回 Formula
        new_item["Formula"] = formula_fields

        # 把 Rule_Understanding 改回 Description
        if "Rule_Understanding" in new_item:
            new_item["Description"] = new_item.pop("Rule_Understanding")

        new_data.append(new_item)
    return new_data

if __name__ == "__main__":
    with open(INPUT_FILE, "r", encoding="utf-8") as f:
        dataset = json.load(f)   # 假设是一个 JSON 数组

    # processed = reverse_process_dataset(dataset)
    processed = process_dataset(dataset, "Instance")

    with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
        json.dump(processed, f, indent=4, ensure_ascii=False)

    print(f"处理完成，结果保存到 {OUTPUT_FILE}")
