
import os
import logging
import pandas as pd
from typing import List

from data.data_loader import load_score_data

logger = logging.getLogger("rich")

def create_ranking(
    dataset_name: str = "wildbench",
    judge: str = "gpt-4o",
    model_names: List[str] = ['Meta-Llama-3-8B-Instruct'],
    data_dir: str = "data/",
    task_id: str = "",
) -> pd.DataFrame:
    """
    Create ranking file for the given dataset.
    """
    results = load_score_data(dataset_name=dataset_name, judge=judge, model_names=model_names, data_dir=data_dir)
    results = results.groupby("model_test")["score"].mean().sort_values(ascending=False).to_frame(name=judge)
    output_dir = os.path.join(data_dir, "ranking")
    if not os.path.exists(output_dir):
        os.makedirs(output_dir, exist_ok=True)
    results['Rank'] = range(1, len(results) + 1)
    results = results.reset_index().set_index("Rank")
    results.to_csv(os.path.join(output_dir, f"{task_id}.tsv"), sep="\t")
    logger.info(f"""Ranking output to "{output_dir}/{task_id}.tsv" """)
    return results

def compare_ranking(
    dataset_name: str = "wildbench",
    judges: List[str] = ['gpt-4o'],
    model_names: List[str] = ['Meta-Llama-3-8B-Instruct'],
    data_dir: str = "data/",
) -> pd.DataFrame:
    """
    Compare different rankings from different judges.
    """
    rankings = None
    for judge in judges:
        ranking = load_score_data(dataset_name=dataset_name, judge=judge, model_names=model_names, data_dir=data_dir)
        ranking = ranking.groupby("model_test")["score"].mean().sort_values(ascending=False).to_frame(name=judge)
        rankings = rankings.join(ranking, how="outer") if rankings is not None else ranking
    return rankings