import os
import json
from collections import defaultdict

def extract_strong_cases(directory_path, output_filename="strong_cases_analysis.json"):
    """
    查找 'Ours' 获胜而所有其他模型均输或平的案例，并提取其完整数据。

    Args:
        directory_path (str): 包含 .jsonl 数据文件的文件夹路径。
        output_filename (str): 输出的 JSON 文件名。
    """
    # 1. --- 数据聚合 ---
    # 结构: {'uid': {'prompt': ..., 'gpt4o_response': ..., 'responses': {'Model': '...'}, 'verdicts': {'Model': '...'}}}
    full_data_by_uid = defaultdict(lambda: {
        'prompt': None,
        'gpt4o_response': None,
        'responses': {},
        'verdicts': {}
    })

    print(f"开始扫描并聚合数据，目录: '{directory_path}'...")
    try:
        jsonl_files = [f for f in os.listdir(directory_path) if f.endswith('.jsonl')]
        if not jsonl_files:
            print(f"错误: 在目录 '{directory_path}' 中未找到 .jsonl 文件。")
            return
    except FileNotFoundError:
        print(f"错误: 找不到目录 '{directory_path}'。请检查路径是否正确。")
        return

    all_model_names = []
    for filename in jsonl_files:
        model_name = filename.split('_vs_GPT4o.jsonl')[0]
        all_model_names.append(model_name)
        
        file_path = os.path.join(directory_path, filename)
        with open(file_path, 'r', encoding='utf-8') as f:
            for i, line in enumerate(f, 1):
                try:
                    data = json.loads(line)
                    uid = data['uid']
                    
                    # 填充数据
                    if not full_data_by_uid[uid]['prompt']:
                        full_data_by_uid[uid]['prompt'] = data.get('prompt', '')
                    if not full_data_by_uid[uid]['gpt4o_response']:
                        full_data_by_uid[uid]['gpt4o_response'] = data.get('response_b', '')
                    
                    full_data_by_uid[uid]['responses'][model_name] = data.get('response_a', '')
                    full_data_by_uid[uid]['verdicts'][model_name] = data.get('verdict', '')
                except (json.JSONDecodeError, KeyError) as e:
                    print(f"警告: 跳过文件 {filename} 中的格式错误行 {i}: {e}")

    # 2. --- 根据条件筛选 UID ---
    if "Ours" not in all_model_names:
        print("错误: 未找到 'Ours_vs_GPT4o.jsonl' 文件。无法执行分析。")
        return
        
    other_models = [name for name in all_model_names if name != "Ours"]
    print(f"\n分析的模型包括: {all_model_names}")
    print(f"将 'Ours' 与其他模型进行比较: {other_models}\n")

    strong_cases = []
    for uid, data in full_data_by_uid.items():
        verdicts = data['verdicts']
        
        # 条件1: 'Ours' 必须战胜 GPT4o ('win_a')
        if verdicts.get("Ours") == "win_a":
            all_others_lost_or_tied = True
            
            # 条件2: 所有其他模型必须输给 ('win_b') 或打平 ('tie') GPT4o
            for other_model in other_models:
                other_verdict = verdicts.get(other_model)
                if other_verdict not in ["win_b", "tie"]:
                    all_others_lost_or_tied = False
                    break
            
            if all_others_lost_or_tied:
                # 构造最终输出对象
                output_case = {
                    "uid": uid,
                    "prompt": data['prompt'],
                    "gpt4o_response": data['gpt4o_response'],
                    "model_responses": data['responses'],
                    "model_verdicts": verdicts
                }
                strong_cases.append(output_case)
    
    # 按 UID 排序，确保每次运行结果顺序一致
    strong_cases.sort(key=lambda x: x['uid'])

    # 3. --- 写入 JSON 文件 ---
    if not strong_cases:
        print("分析完成：没有找到符合所有条件的案例。")
        return

    try:
        with open(output_filename, 'w', encoding='utf--8') as f:
            # indent=2 或 indent=4 使 JSON 文件格式化，易于阅读
            # ensure_ascii=False 确保中文等非 ASCII 字符能正确显示
            json.dump(strong_cases, f, indent=2, ensure_ascii=False)
        
        print("\n--- 分析完成 ---")
        print(f"成功找到 {len(strong_cases)} 个符合条件的案例。")
        print(f"详细数据已保存到文件: '{output_filename}'")
    except IOError as e:
        print(f"错误: 无法写入文件 '{output_filename}': {e}")


if __name__ == '__main__':
    # !!! 重要 !!!
    # 请将这里的路径修改为您存放 .jsonl 文件的文件夹路径
    data_directory = 'ROOT/APLOT/rm_eval/arean_hard/vs_gpt4o/Qwen3-235B-A22B_judgments/group_meta_llama'
    
    extract_strong_cases(data_directory)