


import pandas as pd
from pathlib import Path
from collections import defaultdict

BASE_DIR = Path(__file__).resolve().parent if "__file__" in globals() else Path.cwd()
ROOT_DIR = BASE_DIR
while ROOT_DIR != ROOT_DIR.parent and not (ROOT_DIR / "process_tabular").is_dir():
    ROOT_DIR = ROOT_DIR.parent




RAW_DIR = ROOT_DIR / "process_tabular" / "raw_data" / "test_each_scaler"
OUT_DIR = ROOT_DIR / "process_tabular" / "processed_results" / "test_each_scaler" / "Ranking_MultiMetric_Detail"
OUT_DIR.mkdir(parents=True, exist_ok=True)


RANK_METRICS = ["AUCROC", "AUCPR", "MCC","Recall","F1 Score"]


EXPERIMENTS = [
    ("real", "dvmad_result_realworld_data.csv", "Baseline_result_realworld_data.csv"),
    ("cluster", "dvmad_result_cluster_synthetic_data.csv", "Baseline_synthentic_cluster.csv"),
    ("global", "dvmad_result_global_synthetic_data.csv", "Baseline_synthentic_global.csv"),
    ("local", "dvmad_result_local_synthetic_data.csv", "Baseline_synthentic_local.csv"),
    ("dependency", "dvmad_result_dependency_synthetic_data.csv", "Baseline_synthentic_dependency.csv"),
]

print("📊 Metrics:", RANK_METRICS)
print("📂 Output:", OUT_DIR)




results = defaultdict(dict)




for metric in RANK_METRICS:
    print(f"\n=== Metric: {metric} ===")

    for exp_name, ours_file, baseline_file in EXPERIMENTS:
        ours_path = RAW_DIR / ours_file
        baseline_path = RAW_DIR / baseline_file

        if not ours_path.exists() or not baseline_path.exists():
            continue

        
        df_dvmad = pd.read_csv(ours_path, skiprows=1, header=None)
        df_baseline = pd.read_csv(baseline_path, skiprows=1, header=None)

        if df_dvmad.shape[1] > df_baseline.shape[1]:
            df_dvmad = df_dvmad.iloc[:, :-1]

        columns = [
            , "Model", "AUCROC", "AUCPR", "Accuracy", "MCC",
            , "Precision", "Recall", "Time Train", "Time Test"
        ]
        df_dvmad.columns = columns[:df_dvmad.shape[1]]
        df_baseline.columns = columns[:df_baseline.shape[1]]

        
        df_dvmad["Model"] = "DVM-AD(" + df_dvmad["Model"].astype(str) + ")"

        df_dvmad[metric] = df_dvmad[metric].fillna(0)
        df_baseline[metric] = df_baseline[metric].fillna(0)

        
        scalers = sorted(
            df_dvmad["Model"]
            .str.replace("DVM-AD(", "", regex=False)
            .str.replace(")", "", regex=False)
            .unique()
        )

        for scaler in scalers:
            
            df_one = df_dvmad[df_dvmad["Model"] == f"DVM-AD({scaler})"].copy()
            df_one["Model"] = "DVM-AD"

            merged = pd.concat([df_one, df_baseline], ignore_index=True)

            merged["Rank"] = merged.groupby("Dataset")[metric]                                   .rank(ascending=False, method="dense")

            avg_rank = (
                merged.groupby("Model")["Rank"]
                .mean()
                .sort_values()
            )

            dvm_rank = avg_rank.index.get_loc("DVM-AD") + 1

            col_name = f"{metric}_{exp_name}"
            results[scaler][col_name] = dvm_rank




all_columns = []
for metric in RANK_METRICS:
    for exp_name, _, _ in EXPERIMENTS:
        all_columns.append(f"{metric}_{exp_name}")

rows = []
for scaler in sorted(results.keys()):
    row = {"Scaler": scaler}
    for col in all_columns:
        row[col] = results[scaler].get(col, None)
    rows.append(row)

final_table = pd.DataFrame(rows)




out_csv = OUT_DIR / "DVMAD_Scaler_Detail_Rank_Table.csv"
final_table.to_csv(out_csv, index=False)

print("\n==============================")
print("🏆 FINAL DETAIL RANK TABLE")
print("==============================")
print(final_table)
print(f"\n📄 Saved to: {out_csv}")
print("\n🎉 DONE.")
