#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
compare_thinking_and_response.py

功能：
1. 读取 ground_truth 文件A (只用 id, label) 和 文件B (id, judge_thinking, thinking_result, judge_response, judge result)
2. 对比文件B和文件A中相同id的label
3. 将不一致的内容分别保存到新的json文件a和b
"""

import json
from pathlib import Path

# ========== 配置区域：按需修改 ==========
GROUND_TRUTH_PATH = Path()   # 文件A
FILE_B_PATH       = Path()         # 文件B
OUTPUT_A_PATH     = Path()/writing-thinking-wrong.json")              # thinking_result 不一致时写入
OUTPUT_B_PATH     = Path()/writing-response-wrong.json")              # judge result 不一致时写入
# ======================================


def main():
    # 读取文件
    with open(GROUND_TRUTH_PATH, "r", encoding="utf-8") as f:
        ground_truth = json.load(f)

    with open(FILE_B_PATH, "r", encoding="utf-8") as f:
        file_b = json.load(f)

    # 建立 ground_truth 的 id->label 映射
    gt_map = {str(item["id"]): item["label"] for item in ground_truth}

    output_a, output_b = [], []

    for item in file_b:
        idx = str(item["id"])
        if idx not in gt_map:
            continue  # 文件A没有对应id就跳过

        label = gt_map[idx]

        # 检查 thinking_result
        if "thinking_result" in item and item["thinking_result"] != label:
            output_a.append({
                "id": item["id"],
                "judge_thinking": item.get("judge_thinking", "")
            })

        # 检查 judge result
        if "judge result" in item and item["judge result"] != label:
            output_b.append({
                "id": item["id"],
                "judge_response": item.get("judge_response", "")
            })

    # 保存结果
    with open(OUTPUT_A_PATH, "w", encoding="utf-8") as f:
        json.dump(output_a, f, ensure_ascii=False, indent=2)

    with open(OUTPUT_B_PATH, "w", encoding="utf-8") as f:
        json.dump(output_b, f, ensure_ascii=False, indent=2)

    print(f"处理完成：a.json 共 {len(output_a)} 条，b.json 共 {len(output_b)} 条")


if __name__ == "__main__":
    main()
