'''
Author: swchen
Date: 2025-09-01 11:19:07
LastEditors: swchen
LastEditTime: 2025-09-02 20:48:14
FilePath: /Agent-KB/eval.py
Description: 

Copyright (c) 2025 by Shaowen Chen, All Rights Reserved. 
'''

import json

def evaluate_jsonl(file_path):
    """评估JSONL文件中的结果"""
    
    # 读取JSONL文件
    data = []
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            data.append(json.loads(line.strip()))
    
    print(f"📊 加载了 {len(data)} 条数据")
    
    # 统计变量
    total_correct = 0
    total_count = len(data)
    total_tokens = 0
    level_stats = {}
    
    # 逐条处理
    for item in data:
        # 1. 计算准确率 (相等为1，不等为0)
        prediction = str(item.get('prediction', ''))
        true_answer = str(item.get('true_answer', ''))
        is_correct = 1 if prediction == true_answer else 0
        total_correct += is_correct
        
        # 2. 计算token使用量
        token_counts = item.get('token_counts', {})
        tokens = token_counts.get('input', 0) + token_counts.get('output', 0)
        total_tokens += tokens
        
        # 3. 按level统计
        level = item.get('level', 'unknown')
        if level not in level_stats:
            level_stats[level] = {'correct': 0, 'total': 0, 'tokens': 0}
        
        level_stats[level]['correct'] += is_correct
        level_stats[level]['total'] += 1
        level_stats[level]['tokens'] += tokens
    
    # 打印结果
    print(f"\n🎯 整体准确率: {total_correct/total_count:.2%} ({total_correct}/{total_count})")
    print(f"💰 总Token消耗: {total_tokens:,}")
    print(f"📈 平均每任务Token: {total_tokens/total_count:.1f}")
    
    print(f"\n📊 各级别准确率:")
    for level in sorted(level_stats.keys()):
        stats = level_stats[level]
        acc = stats['correct'] / stats['total']
        print(f"   Level {level}: {acc:.2%} ({stats['correct']}/{stats['total']}) - 平均Token: {stats['tokens']/stats['total']:.1f}")

    # 打印每个任务的详细信息
    print(f"\n📋 每个任务的详细信息:")
    for item in data:
        prediction = str(item.get('prediction', ''))
        true_answer = str(item.get('true_answer', ''))
        print(f"   任务: {item.get('task_id', 'unknown')} - 预测: {prediction} - 真实: {true_answer}")

# 使用方法
if __name__ == "__main__":
    # 替换为你的文件路径
    file_path = "./Agent-KB-GAIA/examples/open_deep_research/output/validation/gpt-4.1-gaia-test2.jsonl"
    
    evaluate_jsonl(file_path)
