#!/usr/bin/env python3
"""
脚本用于分析候选者选择结果与判断结果的一致性
"""

import json
import os
import glob
import re
from pathlib import Path

def extract_instance_id_from_filename(filename):
    """
    从文件名中提取instance_id
    例如: astropy__astropy-14995_selected_similarbug.json -> astropy__astropy-14995
    """
    # 移除文件扩展名和_selected_similarbug后缀
    base_name = filename.replace('_selected_similarbug.json', '')
    return base_name

def candidate_letter_to_idx(letter):
    """
    将候选者字母转换为索引
    A=1, B=2, C=3, D=4, E=5
    """
    if letter is None:
        return None
    mapping = {'A': 1, 'B': 2, 'C': 3, 'D': 4, 'E': 5}
    return mapping.get(letter)

def find_judgment_file(instance_id, judgments_dir):
    """
    根据instance_id查找对应的judgment文件
    """
    # 转换文件名格式 astropy__astropy-14995 -> astropy_astropy__astropy-14995_similarbugs_v1_judged.json
    converted_id = instance_id.replace('__', '_', 1) + '_similarbugs_v1_judged.json'
    judgment_file = os.path.join(judgments_dir, converted_id)
    
    if os.path.exists(judgment_file):
        return judgment_file
    return None

def load_judgment_decisions(judgment_file):
    """
    加载judgment文件中的决策结果
    """
    try:
        with open(judgment_file, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        decisions = {}
        candidates = data['judgment_result']['candidates']
        
        for candidate in candidates:
            idx = candidate['idx']
            decision = candidate['decision']
            decisions[idx] = decision
            
        return decisions
    except Exception as e:
        print(f"Error loading {judgment_file}: {e}")
        return {}

def analyze_consistency():
    """
    分析候选者选择和判断结果的一致性
    """
    # 从之前的结果文件中读取候选者选择
    results_file = '/Users/lichenglin/research/Tree/result_llm_judge/candidate_selection_results.txt'
    judgments_dir = '/Users/lichenglin/research/Tree/result_llm_judge/judgments'
    
    # 解析候选者选择结果
    candidate_selections = {}
    
    with open(results_file, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        
    # 查找详细文件列表部分
    start_parsing = False
    for line in lines:
        line = line.strip()
        if line.startswith('详细文件列表:'):
            start_parsing = True
            continue
        if start_parsing and line.startswith('--'):
            continue
        if start_parsing and '|' in line:
            parts = line.split('|')
            if len(parts) >= 2:
                filename = parts[0].strip()
                candidate = parts[1].strip().replace('Candidate ', '').strip()
                if candidate != 'None':
                    instance_id = extract_instance_id_from_filename(filename)
                    candidate_selections[instance_id] = candidate
    
    print(f"Found {len(candidate_selections)} candidate selections")
    
    # 分析一致性
    consistency_results = []
    missing_judgments = []
    
    for instance_id, selected_candidate in candidate_selections.items():
        judgment_file = find_judgment_file(instance_id, judgments_dir)
        
        if not judgment_file:
            missing_judgments.append(instance_id)
            continue
            
        decisions = load_judgment_decisions(judgment_file)
        selected_idx = candidate_letter_to_idx(selected_candidate)
        
        if selected_idx and selected_idx in decisions:
            decision = decisions[selected_idx]
            is_consistent = (decision.lower() == 'useful')
            
            consistency_results.append({
                'instance_id': instance_id,
                'selected_candidate': selected_candidate,
                'selected_idx': selected_idx,
                'judgment_decision': decision,
                'is_consistent': is_consistent
            })
        else:
            print(f"Warning: Could not find judgment for {instance_id}, candidate {selected_candidate}")
    
    # 统计结果
    total_analyzed = len(consistency_results)
    consistent_count = sum(1 for r in consistency_results if r['is_consistent'])
    inconsistent_count = total_analyzed - consistent_count
    
    print(f\"\\n=== 一致性分析结果 ===\")
    print(f\"总分析案例数: {total_analyzed}\")\n    print(f\"一致案例数 (选择Useful): {consistent_count} ({consistent_count/total_analyzed*100:.1f}%)\")\n    print(f\"不一致案例数 (选择Not useful): {inconsistent_count} ({inconsistent_count/total_analyzed*100:.1f}%)\")\n    print(f\"缺少judgment文件: {len(missing_judgments)}\")\n    \n    # 按候选者分析\n    candidate_stats = {}\n    for result in consistency_results:\n        candidate = result['selected_candidate']\n        if candidate not in candidate_stats:\n            candidate_stats[candidate] = {'total': 0, 'useful': 0}\n        candidate_stats[candidate]['total'] += 1\n        if result['is_consistent']:\n            candidate_stats[candidate]['useful'] += 1\n    \n    print(f\"\\n=== 按候选者分析 ===\")\n    for candidate in sorted(candidate_stats.keys()):\n        stats = candidate_stats[candidate]\n        useful_rate = stats['useful'] / stats['total'] * 100\n        print(f\"Candidate {candidate}: {stats['useful']}/{stats['total']} useful ({useful_rate:.1f}%)\")\n    \n    # 保存详细结果\n    output_file = '/Users/lichenglin/research/Tree/result_llm_judge/consistency_analysis.json'\n    \n    output_data = {\n        'summary': {\n            'total_analyzed': total_analyzed,\n            'consistent_count': consistent_count,\n            'inconsistent_count': inconsistent_count,\n            'missing_judgments': len(missing_judgments)\n        },\n        'candidate_stats': candidate_stats,\n        'detailed_results': consistency_results,\n        'missing_judgments': missing_judgments\n    }\n    \n    with open(output_file, 'w', encoding='utf-8') as f:\n        json.dump(output_data, f, ensure_ascii=False, indent=2)\n    \n    print(f\"\\n详细结果已保存到: {output_file}\")\n    \n    # 显示一些不一致的例子\n    print(f\"\\n=== 不一致案例示例 ===\")\n    inconsistent_examples = [r for r in consistency_results if not r['is_consistent']][:10]\n    for example in inconsistent_examples:\n        print(f\"{example['instance_id']}: Selected {example['selected_candidate']} but judged as {example['judgment_decision']}\")\n    \n    return consistency_results

if __name__ == \"__main__\":\n    results = analyze_consistency()