


import pandas as pd
from natsort import natsorted
from pathlib import Path
import os

BASE_DIR = Path(__file__).resolve().parent if "__file__" in globals() else Path.cwd()
ROOT_DIR = BASE_DIR
while ROOT_DIR != ROOT_DIR.parent and not (ROOT_DIR / "process_tabular").is_dir():
    ROOT_DIR = ROOT_DIR.parent




RAW_DIR = ROOT_DIR / "process_tabular" / "raw_data"
PROC_DIR = ROOT_DIR / "process_tabular" / "processed_results"
RANK_DIR = ROOT_DIR / "final_results" / "ranking" / "tabular"
RANK_DIR.mkdir(parents=True, exist_ok=True)

print("📂 Base directory:", BASE_DIR)
print("📂 Raw data:", RAW_DIR)
print("📂 Processed results:", PROC_DIR)
print("📂 Ranking directory:", RANK_DIR)




def process_files(input_file_ours, input_file_baseline,
                  output_file_rank, output_file_avg_ranks):

    
    df_dvmad = pd.read_csv(input_file_ours, skiprows=1, header=None)
    df_baseline = pd.read_csv(input_file_baseline, skiprows=1, header=None)

    
    if df_dvmad.shape[1] > df_baseline.shape[1]:
        df_dvmad = df_dvmad.iloc[:, :-1]

    
    merged_df = pd.concat([df_dvmad, df_baseline], ignore_index=True)

    
    new_header = [
        , 'Model', 'AUCROC', 'AUCPR', 'Accuracy', 'MCC',
        , 'Precision', 'Recall', 'Time Train', 'Time Test'
    ]
    merged_df.columns = new_header[:merged_df.shape[1]]

    
    merged_df['AUCROC'] = merged_df['AUCROC'].fillna(0)
    merged_df['AUCPR'] = merged_df['AUCPR'].fillna(0)

    
    merged_df['Rank_AUCROC'] = merged_df.groupby('Dataset')['AUCROC']                                        .rank(ascending=False, method='dense')

    merged_df['Rank_AUCPR'] = merged_df.groupby('Dataset')['AUCPR']                                       .rank(ascending=False, method='dense')

    
    merged_df['Dataset'] = pd.Categorical(
        merged_df['Dataset'],
        categories=natsorted(merged_df['Dataset'].unique()),
        ordered=True
    )

    merged_df = merged_df.sort_values(
        ['Dataset', 'Rank_AUCROC']
    ).reset_index(drop=True)

    
    merged_df.to_csv(output_file_rank, index=False)

    
    avg_ranks = merged_df.groupby('Model').agg(
        Avg_Rank_AUCROC=('Rank_AUCROC', 'mean'),
        Avg_Rank_AUCPR=('Rank_AUCPR', 'mean')
    ).reset_index()

    
    avg_ranks_auc = avg_ranks.sort_values(
        
    ).reset_index(drop=True)

    avg_ranks_aupr = avg_ranks.sort_values(
        
    ).reset_index(drop=True)

    
    if 'DVM-AD' in avg_ranks['Model'].values:
        d = avg_ranks[avg_ranks['Model'] == 'DVM-AD']

        auc_rank = avg_ranks_auc[
            avg_ranks_auc['Model'] == 'DVM-AD'
        ].index[0] + 1

        aupr_rank = avg_ranks_aupr[
            avg_ranks_aupr['Model'] == 'DVM-AD'
        ].index[0] + 1

        print(f"📊 {input_file_ours.name}")
        print(f"   DVM-AD Avg_Rank_AUCROC: {d['Avg_Rank_AUCROC'].values[0]:.3f} "
              f"(Rank {auc_rank})")
        print(f"   DVM-AD Avg_Rank_AUCPR:  {d['Avg_Rank_AUCPR'].values[0]:.3f} "
              f"(Rank {aupr_rank})\n")

    
    avg_ranks.to_csv(output_file_avg_ranks, index=False)





file_pairs = [
    ('dvmad_result_realworld_data.csv', 'Baseline_result_realworld_data.csv'),
    ('dvmad_result_cluster_synthetic_data.csv', 'Baseline_synthentic_cluster.csv'),
    ('dvmad_result_global_synthetic_data.csv', 'Baseline_synthentic_global.csv'),
    ('dvmad_result_local_synthetic_data.csv', 'Baseline_synthentic_local.csv'),
    ('dvmad_result_dependency_synthetic_data.csv', 'Baseline_synthentic_dependency.csv'),
]




for ours_file, baseline_file in file_pairs:
    ours_path = PROC_DIR / ours_file
    baseline_path = RAW_DIR / baseline_file

    if not ours_path.exists():
        print(f"❌ Missing file: {ours_path}")
        continue

    if not baseline_path.exists():
        print(f"❌ Missing file: {baseline_path}")
        continue

    rank_file = RANK_DIR / f"{ours_path.stem}_rank.csv"
    avg_rank_file = RANK_DIR / f"{ours_path.stem}_avg_ranks.csv"

    process_files(
        ours_path,
        baseline_path,
        rank_file,
        avg_rank_file
    )

print("🎉 DONE: All ranking files generated successfully.")
