









import os
os.environ.setdefault("MPLCONFIGDIR", "/tmp/matplotlib")

import numpy as np
import pandas as pd
import matplotlib

matplotlib.use('agg')
import matplotlib.pyplot as plt




import operator
import math
from scipy.stats import wilcoxon
from scipy.stats import friedmanchisquare
import networkx


def graph_ranks(avranks, names, avg_value, p_values, cd=None, cdmethod=None, lowv=None, highv=None,
                width=8, textspace=1, reverse=False, filename=None, labels=False, **kwargs):
    
    try:
        import matplotlib
        import matplotlib.pyplot as plt
        from matplotlib.backends.backend_agg import FigureCanvasAgg
    except ImportError:
        raise ImportError("Function graph_ranks requires matplotlib.")

    width = float(width)
    textspace = float(textspace)

    def nth(l, n):
        
        n = lloc(l, n)
        return [a[n] for a in l]

    def lloc(l, n):
        
        if n < 0:
            return len(l[0]) + n
        else:
            return n

    def mxrange(lr):
        
        if not len(lr):
            yield ()
        else:
            
            index = lr[0]
            if isinstance(index, int):
                index = [index]
            for a in range(*index):
                for b in mxrange(lr[1:]):
                    yield tuple([a] + list(b))

    def print_figure(fig, *args, **kwargs):
        canvas = FigureCanvasAgg(fig)
        canvas.print_figure(*args, **kwargs)
   
    sums = avranks
    

    nnames = names
    ssums = sums
     
    
    if lowv is None:
        lowv = min(1, int(math.floor(min(ssums))))
    if highv is None:
        highv = max(len(avranks), int(math.ceil(max(ssums))))

    cline = 0.4

    k = len(sums)

    lines = None

    linesblank = 0
    scalewidth = width - 2 * textspace

    def rankpos(rank):
        if not reverse:
            a = rank - lowv
        else:
            a = highv - rank
        return textspace + scalewidth / (highv - lowv) * a

    distanceh = 0.25

    cline += distanceh

    
    minnotsignificant = max(2 * 0.2, linesblank)
    height = cline + ((k + 4) / 2) * 0.2 + minnotsignificant

    fig = plt.figure(figsize=(width, height))
    fig.set_facecolor('white')
    ax = fig.add_axes([0, 0, 1, 1])  
    ax.set_axis_off()

    hf = 1. / height  
    wf = 1. / width

    def hfl(l):
        return [a * hf for a in l]

    def wfl(l):
        return [a * wf for a in l]

    
    ax.plot([0, 1], [0, 1], c="w")
    ax.set_xlim(0, 1)
    ax.set_ylim(1, 0)

    def line(l, color='k', **kwargs):
        
        ax.plot(wfl(nth(l, 0)), hfl(nth(l, 1)), color=color, **kwargs)

    def text(x, y, s, *args, **kwargs):
        ax.text(wf * x, hf * y, s, *args, **kwargs)

    line([(textspace, cline), (width - textspace, cline)], linewidth=2)

    bigtick = 0.3
    smalltick = 0.15
    linewidth = 2.0
    linewidth_sign = 4.0

    tick = None
    for a in list(np.arange(lowv, highv, 0.5)) + [highv]:
        tick = smalltick
        if a == int(a):
            tick = bigtick
        line([(rankpos(a), cline - tick / 2),
              (rankpos(a), cline)],
             linewidth=2)

    for a in range(lowv, highv + 1):
        text(rankpos(a), cline - tick / 2 - 0.05, str(a),
             ha="center", va="bottom", size=16)

    k = len(ssums)

    def filter_names(name):
        return name

    space_between_names = 0.24

    for i in range(math.ceil(k / 2)):
        chei = cline + minnotsignificant + i * space_between_names
        line([(rankpos(ssums[i]), cline),
              (rankpos(ssums[i]), chei),
              (textspace - 0.1, chei)],
             linewidth=linewidth)
        if labels:
            
            text(textspace + 0.9, chei - 0.075, "{0:.2f} / {1:.2f}".format(avg_value[i],ssums[i]), ha="right", va="center", size=12)
        text(textspace - 0.2, chei, filter_names(nnames[i]), ha="right", va="center", size=16)

    for i in range(math.ceil(k / 2), k):
        chei = cline + minnotsignificant + (k - i - 1) * space_between_names
        line([(rankpos(ssums[i]), cline),
              (rankpos(ssums[i]), chei),
              (textspace + scalewidth + 0.1, chei)],
             linewidth=linewidth)
        if labels:
            
            text(textspace + scalewidth - 0.8, chei - 0.075, "{0:.2f} / {1:.2f}".format(avg_value[i],ssums[i]), ha="left", va="center", size=12)
        text(textspace + scalewidth + 0.2, chei, filter_names(nnames[i]),
             ha="left", va="center", size=16)

    
    def draw_lines(lines, side=0.05, height=0.1):
        start = cline + 0.2

        for l, r in lines:
            line([(rankpos(ssums[l]) - side, start),
                  (rankpos(ssums[r]) + side, start)],
                 linewidth=linewidth_sign)
            start += height
            print('drawing: ', l, r)

    
    start = cline + 0.2
    side = -0.02
    height = 0.1

    
    
    cliques = form_cliques(p_values, nnames)
    i = 1
    achieved_half = False
    
    for clq in cliques:
        if len(clq) == 1:
            continue
        
        min_idx = np.array(clq).min()
        max_idx = np.array(clq).max()
        if min_idx >= len(nnames) / 2 and achieved_half == False:
            start = cline + 0.25
            achieved_half = True
        line([(rankpos(ssums[min_idx]) - side, start),
              (rankpos(ssums[max_idx]) + side, start)],
             linewidth=linewidth_sign)
        start += height
        


def form_cliques(p_values, nnames):
    
    
    m = len(nnames)
    g_data = np.zeros((m, m), dtype=np.int64)
    for p in p_values:
        if p[3] == False:
            i = np.where(nnames == p[0])[0][0]
            j = np.where(nnames == p[1])[0][0]
            min_i = min(i, j)
            max_j = max(i, j)
            g_data[min_i, max_j] = 1

    g = networkx.Graph(g_data)
    return networkx.find_cliques(g)

def draw_cd_diagram_AUC(df_perf=None, alpha=0.05, title=None, labels=False, save_path = ".\example.png"):
    
    p_values, average_ranks, _, average_value = wilcoxon_holm_AUC(df_perf=df_perf, alpha=alpha)

    
    
    
    


    graph_ranks(average_ranks.values, average_ranks.keys(), average_value['AUCROC'].to_list(), p_values,
                cd=None, reverse=True, width=10, textspace=1, labels=labels)

    font = {
        :  'black',
        : 'normal',
        : 22,
        }
    if title:
        plt.title(title,fontdict=font, y=0.9, x=0.5)
    plt.savefig(save_path,bbox_inches='tight', dpi = 300)




























































    




    






    






















def wilcoxon_holm_AUC(alpha=0.05, df_perf=None):
    
    print(pd.unique(df_perf['Model']))

    
    df_counts = pd.DataFrame({'count': df_perf.groupby(['Model']).size()}).reset_index()
    max_nb_datasets = df_counts['count'].max()

    
    classifiers = list(df_counts.loc[df_counts['count'] == max_nb_datasets]['Model'])

    
    friedman_p_value = friedmanchisquare(*(
        np.array(df_perf.loc[df_perf['Model'] == c]['AUCROC'])
        for c in classifiers))[1]
    if friedman_p_value >= alpha:
        print('The null hypothesis over the entire classifiers cannot be rejected')
        exit()

    
    m = len(classifiers)
    p_values = []
    for i in range(m - 1):
        classifier_1 = classifiers[i]
        perf_1 = np.array(df_perf.loc[df_perf['Model'] == classifier_1]['AUCROC'], dtype=np.float64)
        for j in range(i + 1, m):
            classifier_2 = classifiers[j]
            perf_2 = np.array(df_perf.loc[df_perf['Model'] == classifier_2]['AUCROC'], dtype=np.float64)
            p_value = wilcoxon(perf_1, perf_2, zero_method='pratt')[1]
            p_values.append((classifier_1, classifier_2, p_value, False))

    
    k = len(p_values)
    p_values.sort(key=operator.itemgetter(2))
    for i in range(k):
        new_alpha = float(alpha / (k - i))
        if p_values[i][2] <= new_alpha:
            p_values[i] = (p_values[i][0], p_values[i][1], p_values[i][2], True)
        else:
            break

    
    if 'Rank_AUCROC' not in df_perf.columns:
        raise ValueError("DataFrame must contain a 'Rank_AUCROC' column.")

    average_ranks = df_perf[df_perf['Model'].isin(classifiers)]        .groupby('Model')['Rank_AUCROC'].mean()        .sort_values(ascending=False)

    
    average_value = df_perf.groupby('Model').agg({
        : 'mean',
        : 'mean'
    }).reset_index()

    average_value.Model = average_value.Model.astype("category")
    average_value.Model = average_value.Model.cat.set_categories(average_ranks.index)
    average_value = average_value.sort_values(["Model"])

    return p_values, average_ranks, max_nb_datasets, average_value

def draw_cd_diagram_MCC(df_perf=None, alpha=0.05, title=None, labels=False, save_path = ".\example.png"):
    
    p_values, average_ranks, _, average_value = wilcoxon_holm_MCC(df_perf=df_perf, alpha=alpha)

    
    
    
    


    graph_ranks(average_ranks.values, average_ranks.keys(), average_value['MCC'].to_list(), p_values,
                cd=None, reverse=True, width=10, textspace=1, labels=labels)

    font = {
        :  'black',
        : 'normal',
        : 22,
        }
    if title:
        plt.title(title,fontdict=font, y=0.9, x=0.5)
    plt.savefig(save_path,bbox_inches='tight', dpi = 300)




























































    




    






    






















def wilcoxon_holm_MCC(alpha=0.05, df_perf=None):
    
    print(pd.unique(df_perf['Model']))

    
    df_counts = pd.DataFrame({'count': df_perf.groupby(['Model']).size()}).reset_index()
    max_nb_datasets = df_counts['count'].max()

    
    classifiers = list(df_counts.loc[df_counts['count'] == max_nb_datasets]['Model'])

    
    friedman_p_value = friedmanchisquare(*(
        np.array(df_perf.loc[df_perf['Model'] == c]['MCC'])
        for c in classifiers))[1]
    if friedman_p_value >= alpha:
        print('The null hypothesis over the entire classifiers cannot be rejected')
        exit()

    
    m = len(classifiers)
    p_values = []
    for i in range(m - 1):
        classifier_1 = classifiers[i]
        perf_1 = np.array(df_perf.loc[df_perf['Model'] == classifier_1]['MCC'], dtype=np.float64)
        for j in range(i + 1, m):
            classifier_2 = classifiers[j]
            perf_2 = np.array(df_perf.loc[df_perf['Model'] == classifier_2]['MCC'], dtype=np.float64)
            p_value = wilcoxon(perf_1, perf_2, zero_method='pratt')[1]
            p_values.append((classifier_1, classifier_2, p_value, False))

    
    k = len(p_values)
    p_values.sort(key=operator.itemgetter(2))
    for i in range(k):
        new_alpha = float(alpha / (k - i))
        if p_values[i][2] <= new_alpha:
            p_values[i] = (p_values[i][0], p_values[i][1], p_values[i][2], True)
        else:
            break

    
    if 'Rank_MCC' not in df_perf.columns:
        raise ValueError("DataFrame must contain a 'Rank_MCC' column.")

    average_ranks = df_perf[df_perf['Model'].isin(classifiers)]        .groupby('Model')['Rank_MCC'].mean()        .sort_values(ascending=False)

    
    average_value = df_perf.groupby('Model').agg({
        : 'mean',
        : 'mean'
    }).reset_index()

    average_value.Model = average_value.Model.astype("category")
    average_value.Model = average_value.Model.cat.set_categories(average_ranks.index)
    average_value = average_value.sort_values(["Model"])

    return p_values, average_ranks, max_nb_datasets, average_value











from pathlib import Path


def _resolve_paths():
    base_dir = Path(__file__).resolve().parent if "__file__" in globals() else Path.cwd()
    root_dir = base_dir
    while root_dir != root_dir.parent and not (root_dir / "final_results").is_dir():
        root_dir = root_dir.parent
    results_dir = root_dir / "final_results"
    rank_dir_nlp_cv = results_dir / "ranking" / "nlp_cv"
    rank_dir_tabular = results_dir / "ranking" / "tabular"
    fig_dir = results_dir / "figures"
    fig_dir.mkdir(parents=True, exist_ok=True)
    return results_dir, rank_dir_nlp_cv, rank_dir_tabular, fig_dir


RESULTS_DIR, RANK_DIR_NLP_CV, RANK_DIR_TABULAR, FIG_DIR = _resolve_paths()


def ensure_rank_column(df_perf, metric_col, rank_col):
    if rank_col in df_perf.columns:
        return df_perf
    df_perf = df_perf.copy()
    if metric_col not in df_perf.columns:
        raise ValueError(f"Missing column '{metric_col}' in data")
    df_perf[rank_col] = df_perf.groupby('Dataset')[metric_col].rank(ascending=False, method='dense')
    return df_perf


def wilcoxon_holm_AUROC(alpha=0.05, df_perf=None):
    df_perf = ensure_rank_column(df_perf, 'AUCROC', 'Rank_AUCROC')

    df_counts = pd.DataFrame({'count': df_perf.groupby(['Model']).size()}).reset_index()
    max_nb_datasets = df_counts['count'].max()
    classifiers = list(df_counts.loc[df_counts['count'] == max_nb_datasets]['Model'])

    friedman_p_value = friedmanchisquare(*(
        np.array(df_perf.loc[df_perf['Model'] == c]['AUCROC'])
        for c in classifiers))[1]
    if friedman_p_value >= alpha:
        print('The null hypothesis over the entire classifiers cannot be rejected')
        exit()

    m = len(classifiers)
    p_values = []
    for i in range(m - 1):
        classifier_1 = classifiers[i]
        perf_1 = np.array(df_perf.loc[df_perf['Model'] == classifier_1]['AUCROC'], dtype=np.float64)
        for j in range(i + 1, m):
            classifier_2 = classifiers[j]
            perf_2 = np.array(df_perf.loc[df_perf['Model'] == classifier_2]['AUCROC'], dtype=np.float64)
            p_value = wilcoxon(perf_1, perf_2, zero_method='pratt')[1]
            p_values.append((classifier_1, classifier_2, p_value, False))

    k = len(p_values)
    p_values.sort(key=operator.itemgetter(2))
    for i in range(k):
        new_alpha = float(alpha / (k - i))
        if p_values[i][2] <= new_alpha:
            p_values[i] = (p_values[i][0], p_values[i][1], p_values[i][2], True)
        else:
            break

    average_ranks = df_perf[df_perf['Model'].isin(classifiers)]         .groupby('Model')['Rank_AUCROC'].mean()         .sort_values(ascending=False)

    average_value = df_perf.groupby('Model').agg({
        : 'mean',
        : 'mean',
        : 'mean'
    }).reset_index()

    average_value.Model = average_value.Model.astype("category")
    average_value.Model = average_value.Model.cat.set_categories(average_ranks.index)
    average_value = average_value.sort_values(["Model"])

    return p_values, average_ranks, max_nb_datasets, average_value


def wilcoxon_holm_AUPRC(alpha=0.05, df_perf=None):
    df_perf = ensure_rank_column(df_perf, 'AUCPR', 'Rank_AUCPR')

    df_counts = pd.DataFrame({'count': df_perf.groupby(['Model']).size()}).reset_index()
    max_nb_datasets = df_counts['count'].max()
    classifiers = list(df_counts.loc[df_counts['count'] == max_nb_datasets]['Model'])

    friedman_p_value = friedmanchisquare(*(
        np.array(df_perf.loc[df_perf['Model'] == c]['AUCPR'])
        for c in classifiers))[1]
    if friedman_p_value >= alpha:
        print('The null hypothesis over the entire classifiers cannot be rejected')
        exit()

    m = len(classifiers)
    p_values = []
    for i in range(m - 1):
        classifier_1 = classifiers[i]
        perf_1 = np.array(df_perf.loc[df_perf['Model'] == classifier_1]['AUCPR'], dtype=np.float64)
        for j in range(i + 1, m):
            classifier_2 = classifiers[j]
            perf_2 = np.array(df_perf.loc[df_perf['Model'] == classifier_2]['AUCPR'], dtype=np.float64)
            p_value = wilcoxon(perf_1, perf_2, zero_method='pratt')[1]
            p_values.append((classifier_1, classifier_2, p_value, False))

    k = len(p_values)
    p_values.sort(key=operator.itemgetter(2))
    for i in range(k):
        new_alpha = float(alpha / (k - i))
        if p_values[i][2] <= new_alpha:
            p_values[i] = (p_values[i][0], p_values[i][1], p_values[i][2], True)
        else:
            break

    average_ranks = df_perf[df_perf['Model'].isin(classifiers)]         .groupby('Model')['Rank_AUCPR'].mean()         .sort_values(ascending=False)

    average_value = df_perf.groupby('Model').agg({
        : 'mean',
        : 'mean',
        : 'mean'
    }).reset_index()

    average_value.Model = average_value.Model.astype("category")
    average_value.Model = average_value.Model.cat.set_categories(average_ranks.index)
    average_value = average_value.sort_values(["Model"])

    return p_values, average_ranks, max_nb_datasets, average_value


def wilcoxon_holm_MCC(alpha=0.05, df_perf=None):
    df_perf = ensure_rank_column(df_perf, 'MCC', 'Rank_MCC')

    df_counts = pd.DataFrame({'count': df_perf.groupby(['Model']).size()}).reset_index()
    max_nb_datasets = df_counts['count'].max()
    classifiers = list(df_counts.loc[df_counts['count'] == max_nb_datasets]['Model'])

    friedman_p_value = friedmanchisquare(*(
        np.array(df_perf.loc[df_perf['Model'] == c]['MCC'])
        for c in classifiers))[1]
    if friedman_p_value >= alpha:
        print('The null hypothesis over the entire classifiers cannot be rejected')
        exit()

    m = len(classifiers)
    p_values = []
    for i in range(m - 1):
        classifier_1 = classifiers[i]
        perf_1 = np.array(df_perf.loc[df_perf['Model'] == classifier_1]['MCC'], dtype=np.float64)
        for j in range(i + 1, m):
            classifier_2 = classifiers[j]
            perf_2 = np.array(df_perf.loc[df_perf['Model'] == classifier_2]['MCC'], dtype=np.float64)
            p_value = wilcoxon(perf_1, perf_2, zero_method='pratt')[1]
            p_values.append((classifier_1, classifier_2, p_value, False))

    k = len(p_values)
    p_values.sort(key=operator.itemgetter(2))
    for i in range(k):
        new_alpha = float(alpha / (k - i))
        if p_values[i][2] <= new_alpha:
            p_values[i] = (p_values[i][0], p_values[i][1], p_values[i][2], True)
        else:
            break

    average_ranks = df_perf[df_perf['Model'].isin(classifiers)]         .groupby('Model')['Rank_MCC'].mean()         .sort_values(ascending=False)

    average_value = df_perf.groupby('Model').agg({
        : 'mean',
        : 'mean',
        : 'mean'
    }).reset_index()

    average_value.Model = average_value.Model.astype("category")
    average_value.Model = average_value.Model.cat.set_categories(average_ranks.index)
    average_value = average_value.sort_values(["Model"])

    return p_values, average_ranks, max_nb_datasets, average_value


def draw_cd_diagram_AUROC(df_perf=None, alpha=0.05, title=None, labels=False, save_path="./example.png"):
    p_values, average_ranks, _, average_value = wilcoxon_holm_AUROC(df_perf=df_perf, alpha=alpha)

    graph_ranks(average_ranks.values, average_ranks.keys(), average_value['AUCROC'].to_list(), p_values,
                cd=None, reverse=True, width=10, textspace=1, labels=labels)

    if title:
        plt.title(title, color='black', weight='normal', size=22, y=0.9, x=0.5)
    plt.savefig(save_path, bbox_inches='tight', dpi=300)


def draw_cd_diagram_AUPRC(df_perf=None, alpha=0.05, title=None, labels=False, save_path="./example.png"):
    p_values, average_ranks, _, average_value = wilcoxon_holm_AUPRC(df_perf=df_perf, alpha=alpha)

    graph_ranks(average_ranks.values, average_ranks.keys(), average_value['AUCPR'].to_list(), p_values,
                cd=None, reverse=True, width=10, textspace=1, labels=labels)

    if title:
        plt.title(title, color='black', weight='normal', size=22, y=0.9, x=0.5)
    plt.savefig(save_path, bbox_inches='tight', dpi=300)


def draw_cd_diagram_MCC(df_perf=None, alpha=0.05, title=None, labels=False, save_path="./example.png"):
    p_values, average_ranks, _, average_value = wilcoxon_holm_MCC(df_perf=df_perf, alpha=alpha)

    graph_ranks(average_ranks.values, average_ranks.keys(), average_value['MCC'].to_list(), p_values,
                cd=None, reverse=True, width=10, textspace=1, labels=labels)

    if title:
        plt.title(title, color='black', weight='normal', size=22, y=0.9, x=0.5)
    plt.savefig(save_path, bbox_inches='tight', dpi=300)


RANK_FILES = {
    : (RANK_DIR_NLP_CV, "dvmad_result_nlp_cv_10_datasets_rank.csv"),
    : (RANK_DIR_NLP_CV, "dvmad_result_tabular_47_datasets_rank.csv"),
    : (RANK_DIR_TABULAR, "dvmad_result_cluster_synthetic_data_rank.csv"),
    : (RANK_DIR_TABULAR, "dvmad_result_dependency_synthetic_data_rank.csv"),
    : (RANK_DIR_TABULAR, "dvmad_result_global_synthetic_data_rank.csv"),
    : (RANK_DIR_TABULAR, "dvmad_result_local_synthetic_data_rank.csv"),
}

for label, (rank_dir, filename) in RANK_FILES.items():
    file_path = rank_dir / filename
    if not file_path.exists():
        print(f"Missing file: {file_path}")
        continue

    df_perf = pd.read_csv(file_path, index_col=False)

    draw_cd_diagram_AUROC(df_perf=df_perf.copy(), labels=True, save_path=FIG_DIR / f"CDD_AUROC_{label}.png")
    draw_cd_diagram_AUPRC(df_perf=df_perf.copy(), labels=True, save_path=FIG_DIR / f"CDD_AUPRC_{label}.png")
