import json 

import argparse
import json
import os
import sys
import asyncio
from pathlib import Path
import re
from typing import Any, Dict, List, Optional, Tuple, Union
import numpy as np
from tqdm import tqdm

import sys
sys.path.append(str(Path(__file__).parent.parent))

from rag_factory.evaluations.exact_match import exact_match, f1_score


print("Evaluating results...")
# Compute evaluation metrics

result_file = "/finance_ML/wuxiaojun/RAG/fast-graphrag/benchmarks/results/vdb/2wikimultihopqa_1000.json"
result_file = "/finance_ML/wuxiaojun/RAG/fast-graphrag/benchmarks/results/nano/2wikimultihopqa_1000_local.json"
result_file = "/finance_ML/wuxiaojun/RAG/fast-graphrag/benchmarks/results/minirag/2wikimultihopqa_1000_light.json"
result_file = "/finance_ML/wuxiaojun/RAG/fast-graphrag/benchmarks/results/minirag/2wikimultihopqa_1000_mini.json"
result_file = "/finance_ML/wuxiaojun/RAG/fast-graphrag/benchmarks/results/lightrag/2wikimultihopqa_101_local.json"

result_file = "/finance_ML/wuxiaojun/RAG/fast-graphrag/benchmarks/results/vdb/hotpotqa_1000.json"
result_file = "/finance_ML/wuxiaojun/RAG/fast-graphrag/benchmarks/results/vdb/musique_1000.json"

results_path = "/finance_ML/wuxiaojun/RAG/fast-graphrag/benchmarks/results/"
result_file = results_path + "minirag/hotpotqa_1000_light.json"
result_file = results_path + "minirag/hotpotqa_1000_mini.json"

result_file = results_path + "minirag/musique_1000_mini.json"
result_file = results_path + "minirag/musique_1000_light.json"

result_file = results_path + "/nano/hotpotqa_1000_local.json"
result_file = results_path + "/nano/musique_1000_local.json"

result_file = "/finance_ML/wuxiaojun/RAG/RAG-Factory/results/hotpotqa/graph_rag/hotpotqa_100.json"
result_file = "/finance_ML/wuxiaojun/RAG/RAG-Factory/results/hotpotqa/graph_rag/hotpotqacombined_results.json"
result_file = "/finance_ML/wuxiaojun/RAG/fast-graphrag/benchmarks/results/vdb/hotpotqa_1000_shuffle_results.json"
result_file = "/finance_ML/wuxiaojun/RAG/fast-graphrag/benchmarks/results/minirag/musique_1000_mini_shuffle_results.json"

with open(result_file, "r") as f:
    results = json.load(f)

em_scores: List[float] = []
f1_scores: List[Tuple[float, float, float]] = []
for result in tqdm(results):
    ground_truth_answer = str(result["ground_truth_answer"])
    predicted_answer = str(result["answer"])

    # print(f"Ground truth answer: {ground_truth_answer}")
    # print(f"Predicted answer: {predicted_answer}")

    # print(f"type of ground truth answer: {type(ground_truth_answer)}")
    # print(f"type of predicted answer: {type(predicted_answer)}")

    # p_answer = 1 if ground_truth_answer in predicted_answer else 0
    # answer_scores.append(p_answer)

    em_score = exact_match(ground_truth_answer, predicted_answer)
    f1 = f1_score(ground_truth_answer, predicted_answer)
    em_scores.append(em_score)
    f1_scores.append(f1)

# Save evaluation results
save_metrics = {
    "exact_match": np.mean(em_scores),
    "f1_score": np.mean([f1_score[0] for f1_score in f1_scores]),
    "precision": np.mean([f1_score[1] for f1_score in f1_scores]),
    "recall": np.mean([f1_score[2] for f1_score in f1_scores]),
}

print("Evaluation metrics:")
print(f"Exact Match: {save_metrics['exact_match']:.4f}")
print(f"F1 Score: {save_metrics['f1_score']:.4f}")
print(f"Precision: {save_metrics['precision']:.4f}")            
print(f"Recall: {save_metrics['recall']:.4f}")

save_metrics_file = result_file.replace(".json", "_metrics.json")
os.makedirs(os.path.dirname(save_metrics_file), exist_ok=True)

with open(save_metrics_file, "w") as f:
    json.dump(save_metrics, f, indent=4)
print(f"Evaluation metrics saved to {save_metrics_file}")
# print(f"answer EM score:{np.mean(answer_scores)}")