import json
import random
from collections import defaultdict

def build_locality(input_file: str, output_file: str, n_neigh: int = 2, n_dist: int = 2):
    with open(input_file, "r", encoding="utf-8") as f:
        data = json.load(f)  # 假设是 list[dict]

    # 按 subject 分组
    subject_to_entries = defaultdict(list)
    for item in data:
        subject_to_entries[item["subject"]].append(item)

    new_data = []
    for item in data:
        subject = item["subject"]

        # ---- neighborhood ----
        candidates = [other for other in subject_to_entries[subject] if other["case_id"] != item["case_id"]]
        chosen_neigh = random.sample(candidates, min(len(candidates), n_neigh))

        neigh_prompts = []
        neigh_truths = []
        for i, c in enumerate(chosen_neigh):
            # 交替选择索引：偶数索引选0，奇数索引选1
            index = i % 2
            neigh_prompts.append(c["Rule_Understanding"]["prompt"][index])
            neigh_truths.append(c["Rule_Understanding"]["ground_truth"][index])

        # ---- distracting ----
        other_subjects = [other for other in data if other["subject"] != subject]
        chosen_dist = random.sample(other_subjects, min(len(other_subjects), n_dist))

        dist_prompts = []
        dist_truths = []
        for i, c in enumerate(chosen_dist):
            # 交替选择索引：偶数索引选0，奇数索引选1
            index = i % 2
            dist_prompts.append(c["Rule_Understanding"]["prompt"][index])
            dist_truths.append(c["Rule_Understanding"]["ground_truth"][index])

        # 添加 locality 字段
        item["locality"] = {
            "neighborhood": {
                "prompt": neigh_prompts,
                "ground_truth": neigh_truths
            },
            "distracting": {
                "prompt": dist_prompts,
                "ground_truth": dist_truths
            }
        }
        new_data.append(item)

    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(new_data, f, ensure_ascii=False, indent=4)

    print(f"处理完成，结果保存到 {output_file}")


# 使用示例
build_locality("instance_editing.json", "instance_editing_final.json")
