


import pandas as pd
import os
from pathlib import Path


def _resolve_paths():
    base_dir = Path(__file__).resolve().parent if "__file__" in globals() else Path.cwd()
    root_dir = base_dir
    while root_dir != root_dir.parent and not (root_dir / "final_results").is_dir():
        root_dir = root_dir.parent
    results_dir = root_dir / "final_results"
    rank_dir = results_dir / "ranking" / "nlp_cv"
    table_dir = results_dir / "tables"
    table_dir.mkdir(parents=True, exist_ok=True)
    return results_dir, rank_dir, table_dir


RESULTS_DIR, RANK_DIR, TABLE_DIR = _resolve_paths()

RANK_FILES = {
    : "dvmad_result_nlp_cv_10_datasets_rank.csv",
    : "dvmad_result_tabular_47_datasets_rank.csv",
}

for label, filename in RANK_FILES.items():
    file_rank = RANK_DIR / filename
    if not file_rank.exists():
        print(f"Missing file: {file_rank}")
        continue

    df_rank = pd.read_csv(file_rank)

    time_train_col = "Time Train" if "Time Train" in df_rank.columns else "Time train"
    time_test_col = "Time Test" if "Time Test" in df_rank.columns else "Time test"

    avg_time_train = df_rank.groupby('Model')[time_train_col].mean().reset_index()
    avg_time_test = df_rank.groupby('Model')[time_test_col].mean().reset_index()

    avg_time_train = avg_time_train.rename(columns={time_train_col: "Time Train"})
    avg_time_test = avg_time_test.rename(columns={time_test_col: "Time Test"})

    avg_times = pd.merge(avg_time_train, avg_time_test, on='Model')
    avg_times = avg_times.sort_values(by='Model', ascending=True)

    output_path = TABLE_DIR / f"avg_time_{label}.csv"
    avg_times.to_csv(output_path, index=False)

    print(avg_times)
