import pandas as pd
import torch, os
from sklearn.metrics import r2_score
from loguru import logger

log_file = f"evaluate_llm_predict.log"
logger.add(log_file, encoding="utf-8")      # 同时写文件
logger.add(lambda msg: print(msg, end=""))  # 保持控制台也输出

csv_dirs = "/mnt/shared-storage-user/caipengxiang/workspace/ChemBOMAS/Rag-Cluster/LLM_y_pred"

# for dataset in ["arylation", "suzuki"]:
for dataset in ["buchwald"]:
    # 1. 读数据并转成 float32
    df = pd.read_csv(os.path.join(csv_dirs, f"{dataset}_all_LLM_results.csv"))
    y_true = torch.tensor(df['output'].values, dtype=torch.float32)
    y_gpt5 = torch.tensor(df['GPT-5_output'].values, dtype=torch.float32)
    y_gpt4o = torch.tensor(df['GPT-4o_output'].values, dtype=torch.float32)

    # 2. 计算指标
    def metrics(y_true, y_pred):
        return {
            'MSE' : torch.nn.functional.mse_loss(y_pred, y_true).item(),
            'MAE' : torch.nn.functional.l1_loss(y_pred, y_true).item(),
            'R2'  : r2_score(y_true.numpy(), y_pred.numpy())
        }

    gpt5_res  = metrics(y_true, y_gpt5)
    gpt4o_res = metrics(y_true, y_gpt4o)

    logger.info(f"Dataset: {dataset}")
    logger.info(f"GPT-5  -> {gpt5_res}")
    logger.info(f"GPT-4o -> {gpt4o_res}")