{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['TS-CHIEF' 'HIVE-COTE' 'ROCKET' 'InceptionTime' 'STC' 'ResNet' 'PF'\n",
      " 'OS-CNN']\n",
      "HIVE-COTE         9.0\n",
      "InceptionTime     7.0\n",
      "OS-CNN           17.0\n",
      "PF                1.0\n",
      "ROCKET            8.0\n",
      "ResNet            5.0\n",
      "STC              12.0\n",
      "TS-CHIEF         18.0\n",
      "dtype: float64\n",
      "Index(['PF', 'ResNet', 'STC', 'InceptionTime', 'HIVE-COTE', 'TS-CHIEF',\n",
      "       'ROCKET', 'OS-CNN'],\n",
      "      dtype='object')\n",
      "<class 'pandas.core.series.Series'> PF               6.576471\n",
      "ResNet           5.411765\n",
      "STC              5.052941\n",
      "InceptionTime    4.052941\n",
      "HIVE-COTE        3.988235\n",
      "TS-CHIEF         3.682353\n",
      "ROCKET           3.641176\n",
      "OS-CNN           3.594118\n",
      "dtype: float64\n",
      "('PF', 'ROCKET', 1.961472576790109e-12, True)\n",
      "('HIVE-COTE', 'PF', 2.069419735770822e-09, True)\n",
      "('PF', 'TS-CHIEF', 2.602511340228031e-09, True)\n",
      "('OS-CNN', 'PF', 5.917278684095931e-09, True)\n",
      "('InceptionTime', 'PF', 7.779543287271127e-09, True)\n",
      "('OS-CNN', 'ResNet', 3.2604999205491285e-05, True)\n",
      "('PF', 'STC', 5.600722918145825e-05, True)\n",
      "('ROCKET', 'ResNet', 0.0003078044896145065, True)\n",
      "('InceptionTime', 'ResNet', 0.0006058530468700131, True)\n",
      "('HIVE-COTE', 'ResNet', 0.0014327932826343204, True)\n",
      "('PF', 'ResNet', 0.004906248933834534, False)\n",
      "('ResNet', 'TS-CHIEF', 0.005428716653219579, False)\n",
      "('ROCKET', 'STC', 0.011179199965193459, False)\n",
      "('HIVE-COTE', 'STC', 0.047150772113166255, False)\n",
      "('OS-CNN', 'STC', 0.06571503275194601, False)\n",
      "('InceptionTime', 'STC', 0.12086626807596805, False)\n",
      "('ResNet', 'STC', 0.12839154016076187, False)\n",
      "('STC', 'TS-CHIEF', 0.15569661161932322, False)\n",
      "('InceptionTime', 'OS-CNN', 0.25361920765835466, False)\n",
      "('HIVE-COTE', 'ROCKET', 0.28287015073688926, False)\n",
      "('HIVE-COTE', 'OS-CNN', 0.31239016973015477, False)\n",
      "('OS-CNN', 'ROCKET', 0.32725731915465917, False)\n",
      "('InceptionTime', 'ROCKET', 0.5137399603048958, False)\n",
      "('HIVE-COTE', 'TS-CHIEF', 0.6867935526023554, False)\n",
      "('InceptionTime', 'TS-CHIEF', 0.7193317799043902, False)\n",
      "('OS-CNN', 'TS-CHIEF', 0.7909202974310037, False)\n",
      "('HIVE-COTE', 'InceptionTime', 0.8161368486332066, False)\n",
      "('ROCKET', 'TS-CHIEF', 0.9895095878703316, False)\n",
      "Index(['PF', 'ResNet', 'STC', 'InceptionTime', 'HIVE-COTE', 'TS-CHIEF',\n",
      "       'ROCKET', 'OS-CNN'],\n",
      "      dtype='object')\n",
      "[0, 1]\n",
      "[2, 5, 1]\n",
      "[2, 5, 3, 4, 6, 7]\n"
     ]
    }
   ],
   "source": [
    "# Author: Hassan Ismail Fawaz <hassan.ismail-fawaz@uha.fr>\n",
    "#         Germain Forestier <germain.forestier@uha.fr>\n",
    "#         Jonathan Weber <jonathan.weber@uha.fr>\n",
    "#         Lhassane Idoumghar <lhassane.idoumghar@uha.fr>\n",
    "#         Pierre-Alain Muller <pierre-alain.muller@uha.fr>\n",
    "# License: GPL3\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib\n",
    "\n",
    "matplotlib.use('agg')\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "matplotlib.rcParams['font.family'] = 'sans-serif'\n",
    "matplotlib.rcParams['font.sans-serif'] = 'Arial'\n",
    "\n",
    "import operator\n",
    "import math\n",
    "from scipy.stats import wilcoxon\n",
    "from scipy.stats import friedmanchisquare\n",
    "import networkx\n",
    "\n",
    "# inspired from orange3 https://docs.orange.biolab.si/3/data-mining-library/reference/evaluation.cd.html\n",
    "def graph_ranks(avranks, names, p_values, cd=None, cdmethod=None, lowv=None, highv=None,\n",
    "                width=6, textspace=1, reverse=False, filename=None, **kwargs):\n",
    "    \"\"\"\n",
    "    Draws a CD graph, which is used to display  the differences in methods'\n",
    "    performance. See Janez Demsar, Statistical Comparisons of Classifiers over\n",
    "    Multiple Data Sets, 7(Jan):1--30, 2006.\n",
    "\n",
    "    Needs matplotlib to work.\n",
    "\n",
    "    The image is ploted on `plt` imported using\n",
    "    `import matplotlib.pyplot as plt`.\n",
    "\n",
    "    Args:\n",
    "        avranks (list of float): average ranks of methods.\n",
    "        names (list of str): names of methods.\n",
    "        cd (float): Critical difference used for statistically significance of\n",
    "            difference between methods.\n",
    "        cdmethod (int, optional): the method that is compared with other methods\n",
    "            If omitted, show pairwise comparison of methods\n",
    "        lowv (int, optional): the lowest shown rank\n",
    "        highv (int, optional): the highest shown rank\n",
    "        width (int, optional): default width in inches (default: 6)\n",
    "        textspace (int, optional): space on figure sides (in inches) for the\n",
    "            method names (default: 1)\n",
    "        reverse (bool, optional):  if set to `True`, the lowest rank is on the\n",
    "            right (default: `False`)\n",
    "        filename (str, optional): output file name (with extension). If not\n",
    "            given, the function does not write a file.\n",
    "    \"\"\"\n",
    "    try:\n",
    "        import matplotlib\n",
    "        import matplotlib.pyplot as plt\n",
    "        from matplotlib.backends.backend_agg import FigureCanvasAgg\n",
    "    except ImportError:\n",
    "        raise ImportError(\"Function graph_ranks requires matplotlib.\")\n",
    "\n",
    "    width = float(width)\n",
    "    textspace = float(textspace)\n",
    "\n",
    "    def nth(l, n):\n",
    "        \"\"\"\n",
    "        Returns only nth elemnt in a list.\n",
    "        \"\"\"\n",
    "        n = lloc(l, n)\n",
    "        return [a[n] for a in l]\n",
    "\n",
    "    def lloc(l, n):\n",
    "        \"\"\"\n",
    "        List location in list of list structure.\n",
    "        Enable the use of negative locations:\n",
    "        -1 is the last element, -2 second last...\n",
    "        \"\"\"\n",
    "        if n < 0:\n",
    "            return len(l[0]) + n\n",
    "        else:\n",
    "            return n\n",
    "\n",
    "    def mxrange(lr):\n",
    "        \"\"\"\n",
    "        Multiple xranges. Can be used to traverse matrices.\n",
    "        This function is very slow due to unknown number of\n",
    "        parameters.\n",
    "\n",
    "        >>> mxrange([3,5])\n",
    "        [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)]\n",
    "\n",
    "        >>> mxrange([[3,5,1],[9,0,-3]])\n",
    "        [(3, 9), (3, 6), (3, 3), (4, 9), (4, 6), (4, 3)]\n",
    "\n",
    "        \"\"\"\n",
    "        if not len(lr):\n",
    "            yield ()\n",
    "        else:\n",
    "            # it can work with single numbers\n",
    "            index = lr[0]\n",
    "            if isinstance(index, int):\n",
    "                index = [index]\n",
    "            for a in range(*index):\n",
    "                for b in mxrange(lr[1:]):\n",
    "                    yield tuple([a] + list(b))\n",
    "\n",
    "    def print_figure(fig, *args, **kwargs):\n",
    "        canvas = FigureCanvasAgg(fig)\n",
    "        canvas.print_figure(*args, **kwargs)\n",
    "\n",
    "    sums = avranks\n",
    "\n",
    "    nnames = names\n",
    "    ssums = sums\n",
    "\n",
    "    if lowv is None:\n",
    "        lowv = min(1, int(math.floor(min(ssums))))\n",
    "    if highv is None:\n",
    "        highv = max(len(avranks), int(math.ceil(max(ssums))))\n",
    "\n",
    "    cline = 0.4\n",
    "\n",
    "    k = len(sums)\n",
    "\n",
    "    lines = None\n",
    "\n",
    "    linesblank = 0\n",
    "    scalewidth = width - 2 * textspace\n",
    "\n",
    "    def rankpos(rank):\n",
    "        if not reverse:\n",
    "            a = rank - lowv\n",
    "        else:\n",
    "            a = highv - rank\n",
    "        return textspace + scalewidth / (highv - lowv) * a\n",
    "\n",
    "    distanceh = 0.25\n",
    "\n",
    "    cline += distanceh\n",
    "\n",
    "    # calculate height needed height of an image\n",
    "    minnotsignificant = max(2 * 0.2, linesblank)\n",
    "    height = cline + ((k + 1) / 2) * 0.2 + minnotsignificant\n",
    "\n",
    "    fig = plt.figure(figsize=(width, height))\n",
    "    fig.set_facecolor('white')\n",
    "    ax = fig.add_axes([0, 0, 1, 1])  # reverse y axis\n",
    "    ax.set_axis_off()\n",
    "\n",
    "    hf = 1. / height  # height factor\n",
    "    wf = 1. / width\n",
    "\n",
    "    def hfl(l):\n",
    "        return [a * hf for a in l]\n",
    "\n",
    "    def wfl(l):\n",
    "        return [a * wf for a in l]\n",
    "\n",
    "    # Upper left corner is (0,0).\n",
    "    ax.plot([0, 1], [0, 1], c=\"w\")\n",
    "    ax.set_xlim(0, 1)\n",
    "    ax.set_ylim(1, 0)\n",
    "\n",
    "    def line(l, color='k', **kwargs):\n",
    "        \"\"\"\n",
    "        Input is a list of pairs of points.\n",
    "        \"\"\"\n",
    "        ax.plot(wfl(nth(l, 0)), hfl(nth(l, 1)), color=color, **kwargs)\n",
    "\n",
    "    def text(x, y, s, *args, **kwargs):\n",
    "        ax.text(wf * x, hf * y, s, *args, **kwargs)\n",
    "\n",
    "    line([(textspace, cline), (width - textspace, cline)], linewidth=0.7)\n",
    "\n",
    "    bigtick = 0.1\n",
    "    smalltick = 0.05\n",
    "    linewidth = 2.0\n",
    "    linewidth_sign = 4.0\n",
    "\n",
    "    tick = None\n",
    "    for a in list(np.arange(lowv, highv, 0.5)) + [highv]:\n",
    "        tick = smalltick\n",
    "        if a == int(a):\n",
    "            tick = bigtick\n",
    "        line([(rankpos(a), cline - tick / 2),\n",
    "              (rankpos(a), cline)],\n",
    "             linewidth=0.7)\n",
    "\n",
    "    for a in range(lowv, highv + 1):\n",
    "        text(rankpos(a), cline - tick / 2 - 0.05, str(a),\n",
    "             ha=\"center\", va=\"bottom\", size=16)\n",
    "\n",
    "    k = len(ssums)\n",
    "\n",
    "    def filter_names(name):\n",
    "        return name\n",
    "\n",
    "    space_between_names = 0.24\n",
    "\n",
    "    for i in range(math.ceil(k / 2)):\n",
    "        chei = cline + minnotsignificant + i * space_between_names\n",
    "        line([(rankpos(ssums[i]), cline),\n",
    "              (rankpos(ssums[i]), chei),\n",
    "              (textspace - 0.1, chei)],\n",
    "             linewidth=linewidth)\n",
    "        text(textspace - 0.2, chei, filter_names(nnames[i]), ha=\"right\", va=\"center\", size=16)\n",
    "        ### my design\n",
    "        text(textspace - 0.2 + 0.5, chei, round(avranks[i],2), ha=\"right\", va=\"bottom\", size=12)\n",
    "         ### my design\n",
    "\n",
    "    for i in range(math.ceil(k / 2), k):\n",
    "        chei = cline + minnotsignificant + (k - i - 1) * space_between_names\n",
    "        line([(rankpos(ssums[i]), cline),\n",
    "              (rankpos(ssums[i]), chei),\n",
    "              (textspace + scalewidth + 0.1, chei)],\n",
    "             linewidth=linewidth)\n",
    "        text(textspace + scalewidth + 0.2, chei, filter_names(nnames[i]), ha=\"left\", va=\"center\", size=16)\n",
    "        \n",
    "        ### my design\n",
    "        text(textspace + scalewidth + 0.2 - 0.5, chei, round(avranks[i],2), ha=\"left\", va=\"bottom\", size=12)\n",
    "        ### my design\n",
    "\n",
    "        \n",
    "    # no-significance lines\n",
    "    def draw_lines(lines, side=0.05, height=0.1):\n",
    "        start = cline + 0.2\n",
    "\n",
    "        for l, r in lines:\n",
    "            line([(rankpos(ssums[l]) - side, start),\n",
    "                  (rankpos(ssums[r]) + side, start)],\n",
    "                 linewidth=linewidth_sign)\n",
    "            start += height\n",
    "            print('drawing: ', l, r)\n",
    "\n",
    "    # draw_lines(lines)\n",
    "    start = cline + 0.2\n",
    "    side = -0.02\n",
    "    height = 0.1\n",
    "\n",
    "    # draw no significant lines\n",
    "    # get the cliques\n",
    "    cliques = form_cliques(p_values, nnames)\n",
    "    i = 1\n",
    "    achieved_half = False\n",
    "    print(nnames)\n",
    "    for clq in cliques:\n",
    "        if len(clq) == 1:\n",
    "            continue\n",
    "        print(clq)\n",
    "        min_idx = np.array(clq).min()\n",
    "        max_idx = np.array(clq).max()\n",
    "        if min_idx >= len(nnames) / 2 and achieved_half == False:\n",
    "            start = cline + 0.25\n",
    "            achieved_half = True\n",
    "        line([(rankpos(ssums[min_idx]) - side, start),\n",
    "              (rankpos(ssums[max_idx]) + side, start)],\n",
    "             linewidth=linewidth_sign)\n",
    "        start += height\n",
    "\n",
    "\n",
    "def form_cliques(p_values, nnames):\n",
    "    \"\"\"\n",
    "    This method forms the cliques\n",
    "    \"\"\"\n",
    "    # first form the numpy matrix data\n",
    "    m = len(nnames)\n",
    "    g_data = np.zeros((m, m), dtype=np.int64)\n",
    "    for p in p_values:\n",
    "        if p[3] == False:\n",
    "            i = np.where(nnames == p[0])[0][0]\n",
    "            j = np.where(nnames == p[1])[0][0]\n",
    "            min_i = min(i, j)\n",
    "            max_j = max(i, j)\n",
    "            g_data[min_i, max_j] = 1\n",
    "\n",
    "    g = networkx.Graph(g_data)\n",
    "    return networkx.find_cliques(g)\n",
    "\n",
    "\n",
    "def draw_cd_diagram(df_perf=None, alpha=0.05):\n",
    "    \"\"\"\n",
    "    Draws the critical difference diagram given the list of pairwise classifiers that are\n",
    "    significant or not\n",
    "    \"\"\"\n",
    "    p_values, average_ranks, _ = wilcoxon_holm(df_perf=df_perf, alpha=alpha)\n",
    "\n",
    "    for p in p_values:\n",
    "        print(p)\n",
    "\n",
    "    graph_ranks(average_ranks.values, average_ranks.keys(), p_values,\n",
    "                cd=None, reverse=True, width=9, textspace=1.5)\n",
    "\n",
    "    plt.savefig('cd-diagram.png',bbox_inches='tight')\n",
    "\n",
    "def wilcoxon_holm(alpha=0.05, df_perf=None):\n",
    "    \"\"\"\n",
    "    Applies the wilcoxon signed rank test between each pair of algorithm and then use Holm\n",
    "    to reject the null's hypothesis\n",
    "    \"\"\"\n",
    "    print(pd.unique(df_perf['classifier_name']))\n",
    "    # count the number of tested datasets per classifier\n",
    "    df_counts = pd.DataFrame({'count': df_perf.groupby(\n",
    "        ['classifier_name']).size()}).reset_index()\n",
    "    # get the maximum number of tested datasets\n",
    "    max_nb_datasets = df_counts['count'].max()\n",
    "    # get the list of classifiers who have been tested on nb_max_datasets\n",
    "    classifiers = list(df_counts.loc[df_counts['count'] == max_nb_datasets]\n",
    "                       ['classifier_name'])\n",
    "    # test the null hypothesis using friedman before doing a post-hoc analysis\n",
    "    friedman_p_value = friedmanchisquare(*(\n",
    "        np.array(df_perf.loc[df_perf['classifier_name'] == c]['accuracy'])\n",
    "        for c in classifiers))[1]\n",
    "    if friedman_p_value >= alpha:\n",
    "        # then the null hypothesis over the entire classifiers cannot be rejected\n",
    "        print('the null hypothesis over the entire classifiers cannot be rejected')\n",
    "        exit()\n",
    "    # get the number of classifiers\n",
    "    m = len(classifiers)\n",
    "    # init array that contains the p-values calculated by the Wilcoxon signed rank test\n",
    "    p_values = []\n",
    "    # loop through the algorithms to compare pairwise\n",
    "    for i in range(m - 1):\n",
    "        # get the name of classifier one\n",
    "        classifier_1 = classifiers[i]\n",
    "        # get the performance of classifier one\n",
    "        perf_1 = np.array(df_perf.loc[df_perf['classifier_name'] == classifier_1]['accuracy']\n",
    "                          , dtype=np.float64)\n",
    "        for j in range(i + 1, m):\n",
    "            # get the name of the second classifier\n",
    "            classifier_2 = classifiers[j]\n",
    "            # get the performance of classifier one\n",
    "            perf_2 = np.array(df_perf.loc[df_perf['classifier_name'] == classifier_2]\n",
    "                              ['accuracy'], dtype=np.float64)\n",
    "            # calculate the p_value\n",
    "            p_value = wilcoxon(perf_1, perf_2, zero_method='pratt')[1]\n",
    "            # appen to the list\n",
    "            p_values.append((classifier_1, classifier_2, p_value, False))\n",
    "    # get the number of hypothesis\n",
    "    k = len(p_values)\n",
    "    # sort the list in acsending manner of p-value\n",
    "    p_values.sort(key=operator.itemgetter(2))\n",
    "\n",
    "    # loop through the hypothesis\n",
    "    for i in range(k):\n",
    "        # correct alpha with holm\n",
    "        new_alpha = float(alpha / (k - i))\n",
    "        #new_alpha= alpha\n",
    "        # test if significant after holm's correction of alpha\n",
    "        if p_values[i][2] <= new_alpha:\n",
    "            p_values[i] = (p_values[i][0], p_values[i][1], p_values[i][2], True)\n",
    "        else:\n",
    "            # stop\n",
    "            break\n",
    "    # compute the average ranks to be returned (useful for drawing the cd diagram)\n",
    "    # sort the dataframe of performances\n",
    "    sorted_df_perf = df_perf.loc[df_perf['classifier_name'].isin(classifiers)]. \\\n",
    "        sort_values(['classifier_name', 'dataset_name'])\n",
    "    # get the rank data\n",
    "    \n",
    "    rank_data = np.array(sorted_df_perf['accuracy']).reshape(m, max_nb_datasets)\n",
    "    # create the data frame containg the accuracies\n",
    "    df_ranks = pd.DataFrame(data=rank_data, index=np.sort(classifiers), columns=\n",
    "    np.unique(sorted_df_perf['dataset_name']))\n",
    "    \n",
    "    \n",
    "    # number of wins\n",
    "    dfff = df_ranks.rank(ascending=False)\n",
    "    print(dfff[dfff == 1.0].sum(axis=1))\n",
    "\n",
    "    # average the ranks\n",
    "    #print(dfff.sort_values(ascending=False,by=[])\n",
    "    average_ranks = df_ranks.rank(ascending=False).mean(axis=1).sort_values(ascending=False)\n",
    "    \n",
    "    print(average_ranks.keys())\n",
    "    #name_dict = {name:name+'('+str(average_ranks(name))+')' for name in }\n",
    "    #average_ranks.rename()\n",
    "    print(type(average_ranks),average_ranks)\n",
    "    \n",
    "    # return the p-values and the average ranks\n",
    "    return p_values, average_ranks, max_nb_datasets\n",
    "\n",
    "df_perf = pd.read_csv('example.csv',index_col=False)\n",
    "\n",
    "draw_cd_diagram(df_perf=df_perf, alpha=0.05)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
