#!/usr/bin/env python3
"""
脚本用于分析候选者选择结果与判断结果的一致性
"""

import json
import os
import glob
import re
from pathlib import Path

def extract_instance_id_from_filename(filename):
    """
    从文件名中提取instance_id
    例如: astropy__astropy-14995_selected_similarbug.json -> astropy__astropy-14995
    """
    # 移除文件扩展名和_selected_similarbug后缀
    base_name = filename.replace('_selected_similarbug.json', '')
    return base_name

def candidate_letter_to_idx(letter):
    """
    将候选者字母转换为索引
    A=1, B=2, C=3, D=4, E=5
    """
    if letter is None:
        return None
    mapping = {'A': 1, 'B': 2, 'C': 3, 'D': 4, 'E': 5}
    return mapping.get(letter)

def find_judgment_file(instance_id, judgments_dir):
    """
    根据instance_id查找对应的judgment文件
    """
    # 转换文件名格式，判断文件的实际命名模式是：
    # {库名}_{组织名}__{项目名}-{编号}_similarbugs_v1_judged.json
    
    if '__' in instance_id:
        org_proj, number = instance_id.split('__', 1)
        
        # 根据不同的组织和项目名提取库名
        if org_proj == 'mwaskom':
            lib_name = 'seaborn'
        elif org_proj == 'pallets':
            lib_name = 'flask'  
        elif org_proj == 'psf':
            lib_name = 'requests'
        elif org_proj == 'pydata':
            lib_name = 'xarray'
        elif org_proj == 'pylint-dev':
            lib_name = 'pylint'
        elif org_proj == 'pytest-dev':
            lib_name = 'pytest'
        elif org_proj == 'sphinx-doc':
            lib_name = 'sphinx'
        elif org_proj == 'scikit-learn':
            lib_name = 'scikit-learn'
        else:
            # 对于astropy, django, matplotlib, sympy等，使用原来的逻辑
            first_part = org_proj.split('__')[0] if '__' in org_proj else org_proj
            lib_name = first_part
        
        converted_id = f"{lib_name}_{instance_id}_similarbugs_v1_judged.json"
    else:
        converted_id = f"{instance_id}_similarbugs_v1_judged.json"
    
    judgment_file = os.path.join(judgments_dir, converted_id)
    
    if os.path.exists(judgment_file):
        return judgment_file
    return None

def load_judgment_decisions(judgment_file):
    """
    加载judgment文件中的决策结果
    """
    try:
        with open(judgment_file, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        decisions = {}
        candidates = data['judgment_result']['candidates']
        
        for candidate in candidates:
            idx = candidate['idx']
            decision = candidate['decision']
            decisions[idx] = decision
            
        return decisions
    except Exception as e:
        print(f"Error loading {judgment_file}: {e}")
        return {}

def analyze_consistency():
    """
    分析候选者选择和判断结果的一致性
    """
    # 从之前的结果文件中读取候选者选择
    results_file = '/Users/lichenglin/research/Tree/result_llm_judge/candidate_selection_results.txt'
    judgments_dir = '/Users/lichenglin/research/Tree/result_llm_judge/judgments'
    
    # 解析候选者选择结果
    candidate_selections = {}
    
    with open(results_file, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        
    # 查找详细文件列表部分
    start_parsing = False
    for line in lines:
        line = line.strip()
        if line.startswith('详细文件列表:'):
            start_parsing = True
            continue
        if start_parsing and line.startswith('--'):
            continue
        if start_parsing and '|' in line:
            parts = line.split('|')
            if len(parts) >= 2:
                filename = parts[0].strip()
                candidate = parts[1].strip().replace('Candidate ', '').strip()
                if candidate != 'None':
                    instance_id = extract_instance_id_from_filename(filename)
                    candidate_selections[instance_id] = candidate
    
    print(f"Found {len(candidate_selections)} candidate selections")
    
    # 分析一致性
    consistency_results = []
    missing_judgments = []
    
    for instance_id, selected_candidate in candidate_selections.items():
        judgment_file = find_judgment_file(instance_id, judgments_dir)
        
        if not judgment_file:
            missing_judgments.append(instance_id)
            continue
            
        decisions = load_judgment_decisions(judgment_file)
        selected_idx = candidate_letter_to_idx(selected_candidate)
        
        if selected_idx and selected_idx in decisions:
            decision = decisions[selected_idx]
            is_consistent = (decision.lower() == 'useful')
            
            consistency_results.append({
                'instance_id': instance_id,
                'selected_candidate': selected_candidate,
                'selected_idx': selected_idx,
                'judgment_decision': decision,
                'is_consistent': is_consistent
            })
        else:
            print(f"Warning: Could not find judgment for {instance_id}, candidate {selected_candidate}")
    
    # 统计结果
    total_analyzed = len(consistency_results)
    consistent_count = sum(1 for r in consistency_results if r['is_consistent'])
    inconsistent_count = total_analyzed - consistent_count
    
    print("\n=== 一致性分析结果 ===")
    print(f"总分析案例数: {total_analyzed}")
    if total_analyzed > 0:
        print(f"一致案例数 (选择Useful): {consistent_count} ({consistent_count/total_analyzed*100:.1f}%)")
        print(f"不一致案例数 (选择Not useful): {inconsistent_count} ({inconsistent_count/total_analyzed*100:.1f}%)")
    else:
        print("一致案例数 (选择Useful): 0 (0.0%)")
        print("不一致案例数 (选择Not useful): 0 (0.0%)")
    print(f"缺少judgment文件: {len(missing_judgments)}")
    
    # 按候选者分析
    candidate_stats = {}
    for result in consistency_results:
        candidate = result['selected_candidate']
        if candidate not in candidate_stats:
            candidate_stats[candidate] = {'total': 0, 'useful': 0}
        candidate_stats[candidate]['total'] += 1
        if result['is_consistent']:
            candidate_stats[candidate]['useful'] += 1
    
    print("\n=== 按候选者分析 ===")
    for candidate in sorted(candidate_stats.keys()):
        stats = candidate_stats[candidate]
        useful_rate = stats['useful'] / stats['total'] * 100
        print(f"Candidate {candidate}: {stats['useful']}/{stats['total']} useful ({useful_rate:.1f}%)")
    
    # 保存详细结果
    output_file = '/Users/lichenglin/research/Tree/result_llm_judge/consistency_analysis.json'
    
    output_data = {
        'summary': {
            'total_analyzed': total_analyzed,
            'consistent_count': consistent_count,
            'inconsistent_count': inconsistent_count,
            'missing_judgments': len(missing_judgments)
        },
        'candidate_stats': candidate_stats,
        'detailed_results': consistency_results,
        'missing_judgments': missing_judgments
    }
    
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(output_data, f, ensure_ascii=False, indent=2)
    
    print(f"\n详细结果已保存到: {output_file}")
    
    # 显示一些不一致的例子
    print("\n=== 不一致案例示例 ===")
    inconsistent_examples = [r for r in consistency_results if not r['is_consistent']][:10]
    for example in inconsistent_examples:
        print(f"{example['instance_id']}: Selected {example['selected_candidate']} but judged as {example['judgment_decision']}")
    
    return consistency_results

if __name__ == "__main__":
    results = analyze_consistency()