{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "8babb1f6",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "import sys\n",
    "sys.path.append(\"..\")\n",
    "from data import all_datasets\n",
    "from IPython.display import display"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "02b9357d",
   "metadata": {},
   "outputs": [],
   "source": [
    "def official_method_name(method):\n",
    "    if method== \"douglas\":\n",
    "        return \"DOUGLAS numpy\"\n",
    "    elif method == \"torchdouglas\":\n",
    "        return \"DOUGLAS\"\n",
    "    elif \"ktree\" in method:\n",
    "        return \"KMeans+Tree\"\n",
    "    elif \"kauri\" in method:\n",
    "        return \"KAURI\"\n",
    "    else:\n",
    "        return method.upper()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6bed5f17",
   "metadata": {},
   "source": [
    "# First generate tables for methods with as many leaves as clusters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "16120459",
   "metadata": {},
   "outputs": [],
   "source": [
    "small_methods = [\"kauri_small\", \"ktree_small\", \"imm\", \"exshallow\", \"rdm\", \"torchdouglas\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "14c98a3c",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_outputs_files = []\n",
    "for method in small_methods:\n",
    "    files = os.listdir(method)\n",
    "    all_outputs_files += [os.path.join(method,x) for x in files if \".csv\"==x[-4:]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "c69c261d",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Method: kauri_small\n",
      "Method: ktree_small\n",
      "Method: imm\n",
      "Method: exshallow\n",
      "Method: rdm\n",
      "Method: torchdouglas\n"
     ]
    }
   ],
   "source": [
    "new_table = []\n",
    "for method in small_methods:\n",
    "    print(f\"Method: {method}\")\n",
    "    for dataset in sorted(all_datasets):\n",
    "        if \"celeux\" in dataset:\n",
    "            continue\n",
    "        associated_files = [x for x in all_outputs_files if method in x and dataset in x]\n",
    "        if method==\"douglas\":\n",
    "            associated_files = [x for x in associated_files if \"torch\" not in x]\n",
    "            \n",
    "        if len(associated_files)==30:\n",
    "            df = pd.concat([pd.read_csv(x) for x in associated_files], ignore_index=True)\n",
    "            \n",
    "            results = {\"Dataset\":dataset, \"Method\":official_method_name(method)}\n",
    "            for score in [\"ARI\", \"WAD\",\"WAES\", \"KScore\"]:\n",
    "                if score in df.columns:\n",
    "                    results[score] = f\"{df[score].mean():.2f}\"+\"\\std{\"+f\"{df[score].std():.2f}\"+\"}\"\n",
    "            new_table += [results]\n",
    "new_table = pd.DataFrame(new_table)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "d1c32449",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_names = [\"KAURI\", \"KMeans+Tree\", \"DOUGLAS\", \"EXSHALLOW\", \"RDM\", \"IMM\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "52561435",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>Method</th>\n",
       "      <th>KAURI</th>\n",
       "      <th>KMeans+Tree</th>\n",
       "      <th>DOUGLAS</th>\n",
       "      <th>EXSHALLOW</th>\n",
       "      <th>RDM</th>\n",
       "      <th>IMM</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Dataset</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>avila</th>\n",
       "      <td>0.02\\std{0.01}</td>\n",
       "      <td>0.04\\std{0.01}</td>\n",
       "      <td>0.02\\std{0.01}</td>\n",
       "      <td>0.06\\std{0.02}</td>\n",
       "      <td>0.05\\std{0.02}</td>\n",
       "      <td>0.06\\std{0.01}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>breast_cancer</th>\n",
       "      <td>0.74\\std{0.02}</td>\n",
       "      <td>0.73\\std{0.01}</td>\n",
       "      <td>0.84\\std{0.02}</td>\n",
       "      <td>0.74\\std{0.01}</td>\n",
       "      <td>0.68\\std{0.02}</td>\n",
       "      <td>0.73\\std{0.02}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>car_evaluation</th>\n",
       "      <td>0.06\\std{0.06}</td>\n",
       "      <td>0.08\\std{0.07}</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.05\\std{0.05}</td>\n",
       "      <td>0.07\\std{0.05}</td>\n",
       "      <td>0.05\\std{0.05}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>congressional_votes</th>\n",
       "      <td>0.49\\std{0.03}</td>\n",
       "      <td>0.46\\std{0.04}</td>\n",
       "      <td>0.56\\std{0.04}</td>\n",
       "      <td>0.49\\std{0.03}</td>\n",
       "      <td>0.39\\std{0.02}</td>\n",
       "      <td>0.48\\std{0.03}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>digits</th>\n",
       "      <td>0.26\\std{0.02}</td>\n",
       "      <td>0.36\\std{0.05}</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.31\\std{0.03}</td>\n",
       "      <td>0.16\\std{0.03}</td>\n",
       "      <td>0.27\\std{0.03}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>haberman_survival</th>\n",
       "      <td>0.00\\std{0.03}</td>\n",
       "      <td>-0.00\\std{0.00}</td>\n",
       "      <td>0.02\\std{0.04}</td>\n",
       "      <td>-0.00\\std{0.00}</td>\n",
       "      <td>0.00\\std{0.02}</td>\n",
       "      <td>-0.00\\std{0.00}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>iris</th>\n",
       "      <td>0.63\\std{0.07}</td>\n",
       "      <td>0.60\\std{0.06}</td>\n",
       "      <td>0.47\\std{0.12}</td>\n",
       "      <td>0.62\\std{0.06}</td>\n",
       "      <td>0.49\\std{0.04}</td>\n",
       "      <td>0.59\\std{0.05}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mice_protein</th>\n",
       "      <td>0.21\\std{0.03}</td>\n",
       "      <td>0.18\\std{0.04}</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.19\\std{0.03}</td>\n",
       "      <td>0.12\\std{0.04}</td>\n",
       "      <td>0.16\\std{0.03}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>poker_hand</th>\n",
       "      <td>0.00\\std{0.00}</td>\n",
       "      <td>0.00\\std{0.00}</td>\n",
       "      <td>0.00\\std{0.00}</td>\n",
       "      <td>0.00\\std{0.00}</td>\n",
       "      <td>0.00\\std{0.00}</td>\n",
       "      <td>0.00\\std{0.00}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>vowel</th>\n",
       "      <td>0.01\\std{0.01}</td>\n",
       "      <td>0.03\\std{0.03}</td>\n",
       "      <td>0.07\\std{0.05}</td>\n",
       "      <td>0.05\\std{0.04}</td>\n",
       "      <td>0.07\\std{0.03}</td>\n",
       "      <td>0.08\\std{0.04}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>wine</th>\n",
       "      <td>0.60\\std{0.10}</td>\n",
       "      <td>0.71\\std{0.05}</td>\n",
       "      <td>0.54\\std{0.13}</td>\n",
       "      <td>0.74\\std{0.04}</td>\n",
       "      <td>0.33\\std{0.05}</td>\n",
       "      <td>0.75\\std{0.04}</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Method                        KAURI      KMeans+Tree         DOUGLAS  \\\n",
       "Dataset                                                                \n",
       "avila                0.02\\std{0.01}   0.04\\std{0.01}  0.02\\std{0.01}   \n",
       "breast_cancer        0.74\\std{0.02}   0.73\\std{0.01}  0.84\\std{0.02}   \n",
       "car_evaluation       0.06\\std{0.06}   0.08\\std{0.07}             NaN   \n",
       "congressional_votes  0.49\\std{0.03}   0.46\\std{0.04}  0.56\\std{0.04}   \n",
       "digits               0.26\\std{0.02}   0.36\\std{0.05}             NaN   \n",
       "haberman_survival    0.00\\std{0.03}  -0.00\\std{0.00}  0.02\\std{0.04}   \n",
       "iris                 0.63\\std{0.07}   0.60\\std{0.06}  0.47\\std{0.12}   \n",
       "mice_protein         0.21\\std{0.03}   0.18\\std{0.04}             NaN   \n",
       "poker_hand           0.00\\std{0.00}   0.00\\std{0.00}  0.00\\std{0.00}   \n",
       "vowel                0.01\\std{0.01}   0.03\\std{0.03}  0.07\\std{0.05}   \n",
       "wine                 0.60\\std{0.10}   0.71\\std{0.05}  0.54\\std{0.13}   \n",
       "\n",
       "Method                     EXSHALLOW             RDM              IMM  \n",
       "Dataset                                                                \n",
       "avila                 0.06\\std{0.02}  0.05\\std{0.02}   0.06\\std{0.01}  \n",
       "breast_cancer         0.74\\std{0.01}  0.68\\std{0.02}   0.73\\std{0.02}  \n",
       "car_evaluation        0.05\\std{0.05}  0.07\\std{0.05}   0.05\\std{0.05}  \n",
       "congressional_votes   0.49\\std{0.03}  0.39\\std{0.02}   0.48\\std{0.03}  \n",
       "digits                0.31\\std{0.03}  0.16\\std{0.03}   0.27\\std{0.03}  \n",
       "haberman_survival    -0.00\\std{0.00}  0.00\\std{0.02}  -0.00\\std{0.00}  \n",
       "iris                  0.62\\std{0.06}  0.49\\std{0.04}   0.59\\std{0.05}  \n",
       "mice_protein          0.19\\std{0.03}  0.12\\std{0.04}   0.16\\std{0.03}  \n",
       "poker_hand            0.00\\std{0.00}  0.00\\std{0.00}   0.00\\std{0.00}  \n",
       "vowel                 0.05\\std{0.04}  0.07\\std{0.03}   0.08\\std{0.04}  \n",
       "wine                  0.74\\std{0.04}  0.33\\std{0.05}   0.75\\std{0.04}  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.pivot(new_table, index=\"Dataset\", columns=\"Method\", values=\"ARI\")[all_names]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "20a05e14",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\\begin{tabular}{lllllll}\n",
      "\\toprule\n",
      "Method & KAURI & KMeans+Tree & DOUGLAS & EXSHALLOW & RDM & IMM \\\\\n",
      "Dataset &  &  &  &  &  &  \\\\\n",
      "\\midrule\n",
      "avila & 0.02\\std{0.01} & 0.04\\std{0.01} & 0.02\\std{0.01} & 0.06\\std{0.02} & 0.05\\std{0.02} & 0.06\\std{0.01} \\\\\n",
      "breast_cancer & 0.74\\std{0.02} & 0.73\\std{0.01} & 0.84\\std{0.02} & 0.74\\std{0.01} & 0.68\\std{0.02} & 0.73\\std{0.02} \\\\\n",
      "car_evaluation & 0.06\\std{0.06} & 0.08\\std{0.07} & NaN & 0.05\\std{0.05} & 0.07\\std{0.05} & 0.05\\std{0.05} \\\\\n",
      "congressional_votes & 0.49\\std{0.03} & 0.46\\std{0.04} & 0.56\\std{0.04} & 0.49\\std{0.03} & 0.39\\std{0.02} & 0.48\\std{0.03} \\\\\n",
      "digits & 0.26\\std{0.02} & 0.36\\std{0.05} & NaN & 0.31\\std{0.03} & 0.16\\std{0.03} & 0.27\\std{0.03} \\\\\n",
      "haberman_survival & 0.00\\std{0.03} & -0.00\\std{0.00} & 0.02\\std{0.04} & -0.00\\std{0.00} & 0.00\\std{0.02} & -0.00\\std{0.00} \\\\\n",
      "iris & 0.63\\std{0.07} & 0.60\\std{0.06} & 0.47\\std{0.12} & 0.62\\std{0.06} & 0.49\\std{0.04} & 0.59\\std{0.05} \\\\\n",
      "mice_protein & 0.21\\std{0.03} & 0.18\\std{0.04} & NaN & 0.19\\std{0.03} & 0.12\\std{0.04} & 0.16\\std{0.03} \\\\\n",
      "poker_hand & 0.00\\std{0.00} & 0.00\\std{0.00} & 0.00\\std{0.00} & 0.00\\std{0.00} & 0.00\\std{0.00} & 0.00\\std{0.00} \\\\\n",
      "vowel & 0.01\\std{0.01} & 0.03\\std{0.03} & 0.07\\std{0.05} & 0.05\\std{0.04} & 0.07\\std{0.03} & 0.08\\std{0.04} \\\\\n",
      "wine & 0.60\\std{0.10} & 0.71\\std{0.05} & 0.54\\std{0.13} & 0.74\\std{0.04} & 0.33\\std{0.05} & 0.75\\std{0.04} \\\\\n",
      "\\bottomrule\n",
      "\\end{tabular}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(pd.pivot(new_table, index=\"Dataset\", columns=\"Method\", values=\"ARI\")[all_names].to_latex(escape=False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "9ab33652",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>Method</th>\n",
       "      <th>KAURI</th>\n",
       "      <th>KMeans+Tree</th>\n",
       "      <th>EXSHALLOW</th>\n",
       "      <th>RDM</th>\n",
       "      <th>IMM</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Dataset</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>avila</th>\n",
       "      <td>5.47\\std{0.30}</td>\n",
       "      <td>4.00\\std{0.13}</td>\n",
       "      <td>6.43\\std{0.56}</td>\n",
       "      <td>7.81\\std{0.33}</td>\n",
       "      <td>9.19\\std{0.10}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>breast_cancer</th>\n",
       "      <td>1.00\\std{0.00}</td>\n",
       "      <td>1.00\\std{0.00}</td>\n",
       "      <td>1.00\\std{0.00}</td>\n",
       "      <td>1.00\\std{0.00}</td>\n",
       "      <td>1.00\\std{0.00}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>car_evaluation</th>\n",
       "      <td>2.00\\std{0.00}</td>\n",
       "      <td>2.04\\std{0.06}</td>\n",
       "      <td>2.05\\std{0.06}</td>\n",
       "      <td>2.03\\std{0.08}</td>\n",
       "      <td>2.04\\std{0.06}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>congressional_votes</th>\n",
       "      <td>1.00\\std{0.00}</td>\n",
       "      <td>1.00\\std{0.00}</td>\n",
       "      <td>1.00\\std{0.00}</td>\n",
       "      <td>1.00\\std{0.00}</td>\n",
       "      <td>1.00\\std{0.00}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>digits</th>\n",
       "      <td>3.45\\std{0.22}</td>\n",
       "      <td>3.48\\std{0.17}</td>\n",
       "      <td>3.98\\std{0.19}</td>\n",
       "      <td>5.21\\std{0.83}</td>\n",
       "      <td>6.79\\std{0.34}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>haberman_survival</th>\n",
       "      <td>1.00\\std{0.00}</td>\n",
       "      <td>1.00\\std{0.00}</td>\n",
       "      <td>1.00\\std{0.00}</td>\n",
       "      <td>1.00\\std{0.00}</td>\n",
       "      <td>1.00\\std{0.00}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>iris</th>\n",
       "      <td>1.67\\std{0.02}</td>\n",
       "      <td>1.67\\std{0.02}</td>\n",
       "      <td>1.67\\std{0.02}</td>\n",
       "      <td>1.62\\std{0.03}</td>\n",
       "      <td>1.67\\std{0.02}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mice_protein</th>\n",
       "      <td>3.04\\std{0.07}</td>\n",
       "      <td>3.16\\std{0.13}</td>\n",
       "      <td>3.23\\std{0.16}</td>\n",
       "      <td>3.47\\std{0.39}</td>\n",
       "      <td>4.85\\std{0.41}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>poker_hand</th>\n",
       "      <td>3.26\\std{0.00}</td>\n",
       "      <td>3.26\\std{0.01}</td>\n",
       "      <td>3.38\\std{0.05}</td>\n",
       "      <td>3.28\\std{0.11}</td>\n",
       "      <td>4.40\\std{0.45}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>vowel</th>\n",
       "      <td>1.00\\std{0.00}</td>\n",
       "      <td>1.00\\std{0.00}</td>\n",
       "      <td>1.00\\std{0.00}</td>\n",
       "      <td>1.00\\std{0.00}</td>\n",
       "      <td>1.00\\std{0.00}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>wine</th>\n",
       "      <td>1.58\\std{0.07}</td>\n",
       "      <td>1.65\\std{0.04}</td>\n",
       "      <td>1.69\\std{0.03}</td>\n",
       "      <td>1.75\\std{0.03}</td>\n",
       "      <td>1.71\\std{0.02}</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Method                        KAURI     KMeans+Tree       EXSHALLOW  \\\n",
       "Dataset                                                               \n",
       "avila                5.47\\std{0.30}  4.00\\std{0.13}  6.43\\std{0.56}   \n",
       "breast_cancer        1.00\\std{0.00}  1.00\\std{0.00}  1.00\\std{0.00}   \n",
       "car_evaluation       2.00\\std{0.00}  2.04\\std{0.06}  2.05\\std{0.06}   \n",
       "congressional_votes  1.00\\std{0.00}  1.00\\std{0.00}  1.00\\std{0.00}   \n",
       "digits               3.45\\std{0.22}  3.48\\std{0.17}  3.98\\std{0.19}   \n",
       "haberman_survival    1.00\\std{0.00}  1.00\\std{0.00}  1.00\\std{0.00}   \n",
       "iris                 1.67\\std{0.02}  1.67\\std{0.02}  1.67\\std{0.02}   \n",
       "mice_protein         3.04\\std{0.07}  3.16\\std{0.13}  3.23\\std{0.16}   \n",
       "poker_hand           3.26\\std{0.00}  3.26\\std{0.01}  3.38\\std{0.05}   \n",
       "vowel                1.00\\std{0.00}  1.00\\std{0.00}  1.00\\std{0.00}   \n",
       "wine                 1.58\\std{0.07}  1.65\\std{0.04}  1.69\\std{0.03}   \n",
       "\n",
       "Method                          RDM             IMM  \n",
       "Dataset                                              \n",
       "avila                7.81\\std{0.33}  9.19\\std{0.10}  \n",
       "breast_cancer        1.00\\std{0.00}  1.00\\std{0.00}  \n",
       "car_evaluation       2.03\\std{0.08}  2.04\\std{0.06}  \n",
       "congressional_votes  1.00\\std{0.00}  1.00\\std{0.00}  \n",
       "digits               5.21\\std{0.83}  6.79\\std{0.34}  \n",
       "haberman_survival    1.00\\std{0.00}  1.00\\std{0.00}  \n",
       "iris                 1.62\\std{0.03}  1.67\\std{0.02}  \n",
       "mice_protein         3.47\\std{0.39}  4.85\\std{0.41}  \n",
       "poker_hand           3.28\\std{0.11}  4.40\\std{0.45}  \n",
       "vowel                1.00\\std{0.00}  1.00\\std{0.00}  \n",
       "wine                 1.75\\std{0.03}  1.71\\std{0.02}  "
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.pivot(new_table, index=\"Dataset\", columns=\"Method\", values=\"WAD\")[[x for x in all_names if \"DOUGLAS\"!=x]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "57e7d09c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\\begin{tabular}{llllll}\n",
      "\\toprule\n",
      "Method & KAURI & KMeans+Tree & EXSHALLOW & RDM & IMM \\\\\n",
      "Dataset &  &  &  &  &  \\\\\n",
      "\\midrule\n",
      "avila & 5.47\\std{0.30} & 4.00\\std{0.13} & 6.43\\std{0.56} & 7.81\\std{0.33} & 9.19\\std{0.10} \\\\\n",
      "breast_cancer & 1.00\\std{0.00} & 1.00\\std{0.00} & 1.00\\std{0.00} & 1.00\\std{0.00} & 1.00\\std{0.00} \\\\\n",
      "car_evaluation & 2.00\\std{0.00} & 2.04\\std{0.06} & 2.05\\std{0.06} & 2.03\\std{0.08} & 2.04\\std{0.06} \\\\\n",
      "congressional_votes & 1.00\\std{0.00} & 1.00\\std{0.00} & 1.00\\std{0.00} & 1.00\\std{0.00} & 1.00\\std{0.00} \\\\\n",
      "digits & 3.45\\std{0.22} & 3.48\\std{0.17} & 3.98\\std{0.19} & 5.21\\std{0.83} & 6.79\\std{0.34} \\\\\n",
      "haberman_survival & 1.00\\std{0.00} & 1.00\\std{0.00} & 1.00\\std{0.00} & 1.00\\std{0.00} & 1.00\\std{0.00} \\\\\n",
      "iris & 1.67\\std{0.02} & 1.67\\std{0.02} & 1.67\\std{0.02} & 1.62\\std{0.03} & 1.67\\std{0.02} \\\\\n",
      "mice_protein & 3.04\\std{0.07} & 3.16\\std{0.13} & 3.23\\std{0.16} & 3.47\\std{0.39} & 4.85\\std{0.41} \\\\\n",
      "poker_hand & 3.26\\std{0.00} & 3.26\\std{0.01} & 3.38\\std{0.05} & 3.28\\std{0.11} & 4.40\\std{0.45} \\\\\n",
      "vowel & 1.00\\std{0.00} & 1.00\\std{0.00} & 1.00\\std{0.00} & 1.00\\std{0.00} & 1.00\\std{0.00} \\\\\n",
      "wine & 1.58\\std{0.07} & 1.65\\std{0.04} & 1.69\\std{0.03} & 1.75\\std{0.03} & 1.71\\std{0.02} \\\\\n",
      "\\bottomrule\n",
      "\\end{tabular}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(pd.pivot(new_table, index=\"Dataset\", columns=\"Method\", values=\"WAD\")[[x for x in all_names if \"DOUGLAS\"!=x]].to_latex(escape=False))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "32281744",
   "metadata": {},
   "source": [
    "# Same thing when we have more leaves than clusters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "eacf697d",
   "metadata": {},
   "outputs": [],
   "source": [
    "large_methods = [\"kauri_large\",\"ktree_large\",\"exkmc\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "2af524b2",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_outputs_files = []\n",
    "for method in large_methods:\n",
    "    files = os.listdir(method)\n",
    "    all_outputs_files += [os.path.join(method,x) for x in files if \".csv\"==x[-4:]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "bc925ce2",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Method: kauri_large\n",
      "Method: ktree_large\n",
      "Method: exkmc\n"
     ]
    }
   ],
   "source": [
    "new_table = []\n",
    "for method in large_methods:\n",
    "    print(f\"Method: {method}\")\n",
    "    for dataset in sorted(all_datasets):\n",
    "        if \"celeux\" in dataset:\n",
    "            continue\n",
    "        associated_files = [x for x in all_outputs_files if method in x and dataset in x]\n",
    "        if method==\"douglas\":\n",
    "            associated_files = [x for x in associated_files if \"torch\" not in x]\n",
    "            \n",
    "        if len(associated_files)==30:\n",
    "            df = pd.concat([pd.read_csv(x) for x in associated_files], ignore_index=True)\n",
    "            \n",
    "            results = {\"Dataset\":dataset, \"Method\":official_method_name(method)}\n",
    "            for score in [\"ARI\", \"WAD\",\"WAES\", \"KScore\"]:\n",
    "                if score in df.columns:\n",
    "                    results[score] = f\"{df[score].mean():.2f}\"+\"\\std{\"+f\"{df[score].std():.2f}\"+\"}\"\n",
    "            new_table += [results]\n",
    "new_table = pd.DataFrame(new_table)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "532fa6c4",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_names = [\"KAURI\",\"KMeans+Tree\",\"EXKMC\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "cd1324f5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>Method</th>\n",
       "      <th>KAURI</th>\n",
       "      <th>KMeans+Tree</th>\n",
       "      <th>EXKMC</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Dataset</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>avila</th>\n",
       "      <td>0.04\\std{0.00}</td>\n",
       "      <td>0.06\\std{0.01}</td>\n",
       "      <td>0.06\\std{0.01}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>breast_cancer</th>\n",
       "      <td>0.86\\std{0.02}</td>\n",
       "      <td>0.84\\std{0.03}</td>\n",
       "      <td>0.86\\std{0.02}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>car_evaluation</th>\n",
       "      <td>0.05\\std{0.06}</td>\n",
       "      <td>0.07\\std{0.06}</td>\n",
       "      <td>0.06\\std{0.07}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>congressional_votes</th>\n",
       "      <td>0.50\\std{0.04}</td>\n",
       "      <td>0.56\\std{0.04}</td>\n",
       "      <td>0.53\\std{0.04}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>digits</th>\n",
       "      <td>0.41\\std{0.03}</td>\n",
       "      <td>0.45\\std{0.04}</td>\n",
       "      <td>0.44\\std{0.04}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>haberman_survival</th>\n",
       "      <td>0.01\\std{0.05}</td>\n",
       "      <td>0.00\\std{0.00}</td>\n",
       "      <td>0.00\\std{0.01}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>iris</th>\n",
       "      <td>0.61\\std{0.03}</td>\n",
       "      <td>0.62\\std{0.04}</td>\n",
       "      <td>0.61\\std{0.03}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mice_protein</th>\n",
       "      <td>0.19\\std{0.02}</td>\n",
       "      <td>0.18\\std{0.02}</td>\n",
       "      <td>0.18\\std{0.01}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>poker_hand</th>\n",
       "      <td>0.00\\std{0.00}</td>\n",
       "      <td>0.00\\std{0.00}</td>\n",
       "      <td>0.00\\std{0.00}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>vowel</th>\n",
       "      <td>0.13\\std{0.04}</td>\n",
       "      <td>0.15\\std{0.02}</td>\n",
       "      <td>0.17\\std{0.02}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>wine</th>\n",
       "      <td>0.89\\std{0.03}</td>\n",
       "      <td>0.91\\std{0.03}</td>\n",
       "      <td>0.90\\std{0.03}</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Method                        KAURI     KMeans+Tree           EXKMC\n",
       "Dataset                                                            \n",
       "avila                0.04\\std{0.00}  0.06\\std{0.01}  0.06\\std{0.01}\n",
       "breast_cancer        0.86\\std{0.02}  0.84\\std{0.03}  0.86\\std{0.02}\n",
       "car_evaluation       0.05\\std{0.06}  0.07\\std{0.06}  0.06\\std{0.07}\n",
       "congressional_votes  0.50\\std{0.04}  0.56\\std{0.04}  0.53\\std{0.04}\n",
       "digits               0.41\\std{0.03}  0.45\\std{0.04}  0.44\\std{0.04}\n",
       "haberman_survival    0.01\\std{0.05}  0.00\\std{0.00}  0.00\\std{0.01}\n",
       "iris                 0.61\\std{0.03}  0.62\\std{0.04}  0.61\\std{0.03}\n",
       "mice_protein         0.19\\std{0.02}  0.18\\std{0.02}  0.18\\std{0.01}\n",
       "poker_hand           0.00\\std{0.00}  0.00\\std{0.00}  0.00\\std{0.00}\n",
       "vowel                0.13\\std{0.04}  0.15\\std{0.02}  0.17\\std{0.02}\n",
       "wine                 0.89\\std{0.03}  0.91\\std{0.03}  0.90\\std{0.03}"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.pivot(new_table, index=\"Dataset\", columns=\"Method\", values=\"ARI\")[all_names]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "56515cf1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\\begin{tabular}{llll}\n",
      "\\toprule\n",
      "Method & KAURI & KMeans+Tree & EXKMC \\\\\n",
      "Dataset &  &  &  \\\\\n",
      "\\midrule\n",
      "avila & 0.04\\std{0.00} & 0.06\\std{0.01} & 0.06\\std{0.01} \\\\\n",
      "breast_cancer & 0.86\\std{0.02} & 0.84\\std{0.03} & 0.86\\std{0.02} \\\\\n",
      "car_evaluation & 0.05\\std{0.06} & 0.07\\std{0.06} & 0.06\\std{0.07} \\\\\n",
      "congressional_votes & 0.50\\std{0.04} & 0.56\\std{0.04} & 0.53\\std{0.04} \\\\\n",
      "digits & 0.41\\std{0.03} & 0.45\\std{0.04} & 0.44\\std{0.04} \\\\\n",
      "haberman_survival & 0.01\\std{0.05} & 0.00\\std{0.00} & 0.00\\std{0.01} \\\\\n",
      "iris & 0.61\\std{0.03} & 0.62\\std{0.04} & 0.61\\std{0.03} \\\\\n",
      "mice_protein & 0.19\\std{0.02} & 0.18\\std{0.02} & 0.18\\std{0.01} \\\\\n",
      "poker_hand & 0.00\\std{0.00} & 0.00\\std{0.00} & 0.00\\std{0.00} \\\\\n",
      "vowel & 0.13\\std{0.04} & 0.15\\std{0.02} & 0.17\\std{0.02} \\\\\n",
      "wine & 0.89\\std{0.03} & 0.91\\std{0.03} & 0.90\\std{0.03} \\\\\n",
      "\\bottomrule\n",
      "\\end{tabular}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(pd.pivot(new_table, index=\"Dataset\", columns=\"Method\", values=\"ARI\")[all_names].to_latex(escape=False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "6e30c0ae",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>Method</th>\n",
       "      <th>KAURI</th>\n",
       "      <th>KMeans+Tree</th>\n",
       "      <th>EXKMC</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Dataset</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>avila</th>\n",
       "      <td>8.47\\std{0.37}</td>\n",
       "      <td>6.77\\std{0.17}</td>\n",
       "      <td>11.95\\std{0.20}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>breast_cancer</th>\n",
       "      <td>2.49\\std{0.18}</td>\n",
       "      <td>2.80\\std{0.24}</td>\n",
       "      <td>3.44\\std{0.44}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>car_evaluation</th>\n",
       "      <td>2.00\\std{0.00}</td>\n",
       "      <td>2.05\\std{0.11}</td>\n",
       "      <td>2.05\\std{0.06}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>congressional_votes</th>\n",
       "      <td>1.65\\std{0.33}</td>\n",
       "      <td>2.43\\std{0.24}</td>\n",
       "      <td>2.73\\std{0.27}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>digits</th>\n",
       "      <td>6.29\\std{0.19}</td>\n",
       "      <td>5.74\\std{0.25}</td>\n",
       "      <td>9.59\\std{0.49}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>haberman_survival</th>\n",
       "      <td>1.08\\std{0.18}</td>\n",
       "      <td>2.55\\std{0.33}</td>\n",
       "      <td>3.14\\std{0.39}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>iris</th>\n",
       "      <td>2.65\\std{0.21}</td>\n",
       "      <td>2.88\\std{0.34}</td>\n",
       "      <td>3.63\\std{0.38}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mice_protein</th>\n",
       "      <td>5.61\\std{0.24}</td>\n",
       "      <td>5.47\\std{0.29}</td>\n",
       "      <td>7.34\\std{0.43}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>poker_hand</th>\n",
       "      <td>3.69\\std{0.15}</td>\n",
       "      <td>5.28\\std{0.02}</td>\n",
       "      <td>6.33\\std{0.65}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>vowel</th>\n",
       "      <td>3.11\\std{0.20}</td>\n",
       "      <td>2.85\\std{0.42}</td>\n",
       "      <td>3.61\\std{0.49}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>wine</th>\n",
       "      <td>2.82\\std{0.34}</td>\n",
       "      <td>2.96\\std{0.28}</td>\n",
       "      <td>3.69\\std{0.61}</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Method                        KAURI     KMeans+Tree            EXKMC\n",
       "Dataset                                                             \n",
       "avila                8.47\\std{0.37}  6.77\\std{0.17}  11.95\\std{0.20}\n",
       "breast_cancer        2.49\\std{0.18}  2.80\\std{0.24}   3.44\\std{0.44}\n",
       "car_evaluation       2.00\\std{0.00}  2.05\\std{0.11}   2.05\\std{0.06}\n",
       "congressional_votes  1.65\\std{0.33}  2.43\\std{0.24}   2.73\\std{0.27}\n",
       "digits               6.29\\std{0.19}  5.74\\std{0.25}   9.59\\std{0.49}\n",
       "haberman_survival    1.08\\std{0.18}  2.55\\std{0.33}   3.14\\std{0.39}\n",
       "iris                 2.65\\std{0.21}  2.88\\std{0.34}   3.63\\std{0.38}\n",
       "mice_protein         5.61\\std{0.24}  5.47\\std{0.29}   7.34\\std{0.43}\n",
       "poker_hand           3.69\\std{0.15}  5.28\\std{0.02}   6.33\\std{0.65}\n",
       "vowel                3.11\\std{0.20}  2.85\\std{0.42}   3.61\\std{0.49}\n",
       "wine                 2.82\\std{0.34}  2.96\\std{0.28}   3.69\\std{0.61}"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.pivot(new_table, index=\"Dataset\", columns=\"Method\", values=\"WAD\")[all_names]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "05df6c0e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\\begin{tabular}{llll}\n",
      "\\toprule\n",
      "Method & KAURI & KMeans+Tree & EXKMC \\\\\n",
      "Dataset &  &  &  \\\\\n",
      "\\midrule\n",
      "avila & 8.29\\std{0.32} & 6.42\\std{0.16} & 10.77\\std{0.18} \\\\\n",
      "breast_cancer & 2.49\\std{0.18} & 2.77\\std{0.24} & 3.20\\std{0.33} \\\\\n",
      "car_evaluation & 2.00\\std{0.00} & 2.05\\std{0.11} & 2.05\\std{0.06} \\\\\n",
      "congressional_votes & 1.65\\std{0.33} & 2.42\\std{0.24} & 2.65\\std{0.29} \\\\\n",
      "digits & 6.26\\std{0.21} & 5.74\\std{0.25} & 9.40\\std{0.46} \\\\\n",
      "haberman_survival & 1.08\\std{0.18} & 2.03\\std{0.23} & 2.41\\std{0.47} \\\\\n",
      "iris & 2.49\\std{0.20} & 2.59\\std{0.25} & 3.01\\std{0.26} \\\\\n",
      "mice_protein & 5.53\\std{0.23} & 5.42\\std{0.31} & 7.22\\std{0.40} \\\\\n",
      "poker_hand & 3.64\\std{0.13} & 5.27\\std{0.03} & 4.78\\std{0.37} \\\\\n",
      "vowel & 3.10\\std{0.20} & 2.85\\std{0.42} & 3.61\\std{0.49} \\\\\n",
      "wine & 2.72\\std{0.38} & 2.87\\std{0.31} & 3.53\\std{0.50} \\\\\n",
      "\\bottomrule\n",
      "\\end{tabular}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(pd.pivot(new_table, index=\"Dataset\", columns=\"Method\", values=\"WAES\")[all_names].to_latex(escape=False))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.17"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
