{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "68cb67a9-98b3-45b2-9aa0-83f7e88b64bc",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "from plotnine import ggplot, aes, geom_line, geom_point, geom_errorbar, \\\n",
    "                     facet_wrap, position_dodge, theme, geom_violin, xlab, ylab, \\\n",
    "                     coord_flip, geom_bar, element_text, scale_x_discrete, scale_fill_manual, \\\n",
    "                     geom_hline, xlim, ylim\n",
    "\n",
    "import warnings\n",
    "import plotnine"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "29088a8a-8196-4356-b36d-a5f6a9fa2139",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>task</th>\n",
       "      <th>dataset</th>\n",
       "      <th>boot_iter</th>\n",
       "      <th>algorithm</th>\n",
       "      <th>scoring</th>\n",
       "      <th>n_leaves</th>\n",
       "      <th>max_leaves</th>\n",
       "      <th>regularization</th>\n",
       "      <th>train_score</th>\n",
       "      <th>test_score</th>\n",
       "      <th>train_wall_time</th>\n",
       "      <th>test_wall_time</th>\n",
       "      <th>train_cpu_time</th>\n",
       "      <th>test_cpu_time</th>\n",
       "      <th>tunning_wall_time</th>\n",
       "      <th>tunning_cpu_time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>regression</td>\n",
       "      <td>music</td>\n",
       "      <td>0</td>\n",
       "      <td>DT</td>\n",
       "      <td>R2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>None</td>\n",
       "      <td>0.048112</td>\n",
       "      <td>0.017939</td>\n",
       "      <td>0.009294</td>\n",
       "      <td>0.000177</td>\n",
       "      <td>0.009466</td>\n",
       "      <td>0.000177</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>regression</td>\n",
       "      <td>music</td>\n",
       "      <td>0</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>R2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.048112</td>\n",
       "      <td>0.017967</td>\n",
       "      <td>0.000100</td>\n",
       "      <td>0.000182</td>\n",
       "      <td>0.000101</td>\n",
       "      <td>0.000182</td>\n",
       "      <td>0.111042</td>\n",
       "      <td>0.111036</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>regression</td>\n",
       "      <td>music</td>\n",
       "      <td>0</td>\n",
       "      <td>DT</td>\n",
       "      <td>R2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>None</td>\n",
       "      <td>0.136489</td>\n",
       "      <td>0.067355</td>\n",
       "      <td>0.013186</td>\n",
       "      <td>0.000284</td>\n",
       "      <td>0.013187</td>\n",
       "      <td>0.000285</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>regression</td>\n",
       "      <td>music</td>\n",
       "      <td>0</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>R2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.136489</td>\n",
       "      <td>0.067366</td>\n",
       "      <td>0.000124</td>\n",
       "      <td>0.000194</td>\n",
       "      <td>0.000124</td>\n",
       "      <td>0.000194</td>\n",
       "      <td>0.158875</td>\n",
       "      <td>0.159254</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>regression</td>\n",
       "      <td>music</td>\n",
       "      <td>0</td>\n",
       "      <td>DT</td>\n",
       "      <td>R2</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>None</td>\n",
       "      <td>0.193190</td>\n",
       "      <td>0.063399</td>\n",
       "      <td>0.016016</td>\n",
       "      <td>0.000188</td>\n",
       "      <td>0.016016</td>\n",
       "      <td>0.000188</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15995</th>\n",
       "      <td>regression</td>\n",
       "      <td>red-wine</td>\n",
       "      <td>99</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>R2</td>\n",
       "      <td>27</td>\n",
       "      <td>28</td>\n",
       "      <td>50.0</td>\n",
       "      <td>0.492416</td>\n",
       "      <td>0.292801</td>\n",
       "      <td>0.000458</td>\n",
       "      <td>0.000149</td>\n",
       "      <td>0.000459</td>\n",
       "      <td>0.000150</td>\n",
       "      <td>0.043065</td>\n",
       "      <td>0.043061</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15996</th>\n",
       "      <td>regression</td>\n",
       "      <td>red-wine</td>\n",
       "      <td>99</td>\n",
       "      <td>DT</td>\n",
       "      <td>R2</td>\n",
       "      <td>29</td>\n",
       "      <td>30</td>\n",
       "      <td>None</td>\n",
       "      <td>0.526433</td>\n",
       "      <td>0.229395</td>\n",
       "      <td>0.002861</td>\n",
       "      <td>0.000165</td>\n",
       "      <td>0.002860</td>\n",
       "      <td>0.000165</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15997</th>\n",
       "      <td>regression</td>\n",
       "      <td>red-wine</td>\n",
       "      <td>99</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>R2</td>\n",
       "      <td>29</td>\n",
       "      <td>30</td>\n",
       "      <td>50.0</td>\n",
       "      <td>0.500966</td>\n",
       "      <td>0.287589</td>\n",
       "      <td>0.000485</td>\n",
       "      <td>0.000145</td>\n",
       "      <td>0.000485</td>\n",
       "      <td>0.000145</td>\n",
       "      <td>0.043738</td>\n",
       "      <td>0.043729</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15998</th>\n",
       "      <td>regression</td>\n",
       "      <td>red-wine</td>\n",
       "      <td>99</td>\n",
       "      <td>DT</td>\n",
       "      <td>R2</td>\n",
       "      <td>31</td>\n",
       "      <td>32</td>\n",
       "      <td>None</td>\n",
       "      <td>0.536494</td>\n",
       "      <td>0.230524</td>\n",
       "      <td>0.002884</td>\n",
       "      <td>0.000156</td>\n",
       "      <td>0.002882</td>\n",
       "      <td>0.000157</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15999</th>\n",
       "      <td>regression</td>\n",
       "      <td>red-wine</td>\n",
       "      <td>99</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>R2</td>\n",
       "      <td>31</td>\n",
       "      <td>32</td>\n",
       "      <td>50.0</td>\n",
       "      <td>0.506599</td>\n",
       "      <td>0.287917</td>\n",
       "      <td>0.000512</td>\n",
       "      <td>0.000154</td>\n",
       "      <td>0.000513</td>\n",
       "      <td>0.000155</td>\n",
       "      <td>0.045957</td>\n",
       "      <td>0.045959</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>16000 rows × 16 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             task   dataset  boot_iter  algorithm scoring  n_leaves  \\\n",
       "0      regression     music          0         DT      R2         1   \n",
       "1      regression     music          0  HS (CART)      R2         1   \n",
       "2      regression     music          0         DT      R2         3   \n",
       "3      regression     music          0  HS (CART)      R2         3   \n",
       "4      regression     music          0         DT      R2         7   \n",
       "...           ...       ...        ...        ...     ...       ...   \n",
       "15995  regression  red-wine         99  HS (CART)      R2        27   \n",
       "15996  regression  red-wine         99         DT      R2        29   \n",
       "15997  regression  red-wine         99  HS (CART)      R2        29   \n",
       "15998  regression  red-wine         99         DT      R2        31   \n",
       "15999  regression  red-wine         99  HS (CART)      R2        31   \n",
       "\n",
       "       max_leaves regularization  train_score  test_score  train_wall_time  \\\n",
       "0               2           None     0.048112    0.017939         0.009294   \n",
       "1               2            1.0     0.048112    0.017967         0.000100   \n",
       "2               4           None     0.136489    0.067355         0.013186   \n",
       "3               4            0.1     0.136489    0.067366         0.000124   \n",
       "4               8           None     0.193190    0.063399         0.016016   \n",
       "...           ...            ...          ...         ...              ...   \n",
       "15995          28           50.0     0.492416    0.292801         0.000458   \n",
       "15996          30           None     0.526433    0.229395         0.002861   \n",
       "15997          30           50.0     0.500966    0.287589         0.000485   \n",
       "15998          32           None     0.536494    0.230524         0.002884   \n",
       "15999          32           50.0     0.506599    0.287917         0.000512   \n",
       "\n",
       "       test_wall_time  train_cpu_time  test_cpu_time  tunning_wall_time  \\\n",
       "0            0.000177        0.009466       0.000177                NaN   \n",
       "1            0.000182        0.000101       0.000182           0.111042   \n",
       "2            0.000284        0.013187       0.000285                NaN   \n",
       "3            0.000194        0.000124       0.000194           0.158875   \n",
       "4            0.000188        0.016016       0.000188                NaN   \n",
       "...               ...             ...            ...                ...   \n",
       "15995        0.000149        0.000459       0.000150           0.043065   \n",
       "15996        0.000165        0.002860       0.000165                NaN   \n",
       "15997        0.000145        0.000485       0.000145           0.043738   \n",
       "15998        0.000156        0.002882       0.000157                NaN   \n",
       "15999        0.000154        0.000513       0.000155           0.045957   \n",
       "\n",
       "       tunning_cpu_time  \n",
       "0                   NaN  \n",
       "1              0.111036  \n",
       "2                   NaN  \n",
       "3              0.159254  \n",
       "4                   NaN  \n",
       "...                 ...  \n",
       "15995          0.043061  \n",
       "15996               NaN  \n",
       "15997          0.043729  \n",
       "15998               NaN  \n",
       "15999          0.045959  \n",
       "\n",
       "[16000 rows x 16 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "regression = pd.read_csv(\"results/claim_1_1_dt_comparison_regression.csv\")\n",
    "regression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "7a99994a-775e-4193-b19d-0da888accb83",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_113260/548491370.py:1: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>dataset</th>\n",
       "      <th>algorithm</th>\n",
       "      <th>train_score</th>\n",
       "      <th>test_score</th>\n",
       "      <th>train_wall_time</th>\n",
       "      <th>test_wall_time</th>\n",
       "      <th>train_cpu_time</th>\n",
       "      <th>test_cpu_time</th>\n",
       "      <th>tunning_wall_time</th>\n",
       "      <th>tunning_cpu_time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>breast-cancer</td>\n",
       "      <td>DT</td>\n",
       "      <td>851.830484</td>\n",
       "      <td>618.070426</td>\n",
       "      <td>0.335582</td>\n",
       "      <td>0.122240</td>\n",
       "      <td>0.335352</td>\n",
       "      <td>0.122619</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>breast-cancer</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>813.595726</td>\n",
       "      <td>666.624299</td>\n",
       "      <td>0.282225</td>\n",
       "      <td>0.115824</td>\n",
       "      <td>0.282650</td>\n",
       "      <td>0.116438</td>\n",
       "      <td>16.829625</td>\n",
       "      <td>16.838201</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>diabetes</td>\n",
       "      <td>DT</td>\n",
       "      <td>875.186958</td>\n",
       "      <td>744.172624</td>\n",
       "      <td>1.025753</td>\n",
       "      <td>0.159827</td>\n",
       "      <td>1.024479</td>\n",
       "      <td>0.160137</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>diabetes</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>863.277905</td>\n",
       "      <td>773.470549</td>\n",
       "      <td>0.290464</td>\n",
       "      <td>0.144738</td>\n",
       "      <td>0.290812</td>\n",
       "      <td>0.145175</td>\n",
       "      <td>24.984394</td>\n",
       "      <td>24.983740</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>german-credit</td>\n",
       "      <td>DT</td>\n",
       "      <td>812.339909</td>\n",
       "      <td>695.125940</td>\n",
       "      <td>1.102294</td>\n",
       "      <td>0.172289</td>\n",
       "      <td>1.101392</td>\n",
       "      <td>0.172639</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>german-credit</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>799.265928</td>\n",
       "      <td>718.391474</td>\n",
       "      <td>0.285833</td>\n",
       "      <td>0.160829</td>\n",
       "      <td>0.286381</td>\n",
       "      <td>0.161631</td>\n",
       "      <td>27.032853</td>\n",
       "      <td>27.043223</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>haberman</td>\n",
       "      <td>DT</td>\n",
       "      <td>829.788765</td>\n",
       "      <td>575.471111</td>\n",
       "      <td>0.371763</td>\n",
       "      <td>0.125628</td>\n",
       "      <td>0.370851</td>\n",
       "      <td>0.125965</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>haberman</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>793.550123</td>\n",
       "      <td>637.396296</td>\n",
       "      <td>0.283939</td>\n",
       "      <td>0.114236</td>\n",
       "      <td>0.284378</td>\n",
       "      <td>0.114705</td>\n",
       "      <td>16.532525</td>\n",
       "      <td>16.538794</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>heart</td>\n",
       "      <td>DT</td>\n",
       "      <td>941.312312</td>\n",
       "      <td>748.148750</td>\n",
       "      <td>0.411391</td>\n",
       "      <td>0.123552</td>\n",
       "      <td>0.411302</td>\n",
       "      <td>0.123968</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>heart</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>930.275938</td>\n",
       "      <td>807.559000</td>\n",
       "      <td>0.286573</td>\n",
       "      <td>0.117987</td>\n",
       "      <td>0.287002</td>\n",
       "      <td>0.118509</td>\n",
       "      <td>17.570681</td>\n",
       "      <td>17.582094</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>ionosphere</td>\n",
       "      <td>DT</td>\n",
       "      <td>965.606190</td>\n",
       "      <td>854.442857</td>\n",
       "      <td>2.074230</td>\n",
       "      <td>0.148256</td>\n",
       "      <td>2.073828</td>\n",
       "      <td>0.148617</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>ionosphere</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>963.343175</td>\n",
       "      <td>902.620952</td>\n",
       "      <td>0.240100</td>\n",
       "      <td>0.131967</td>\n",
       "      <td>0.240447</td>\n",
       "      <td>0.132427</td>\n",
       "      <td>29.200948</td>\n",
       "      <td>29.201881</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>juvenile</td>\n",
       "      <td>DT</td>\n",
       "      <td>846.382474</td>\n",
       "      <td>806.573649</td>\n",
       "      <td>10.115685</td>\n",
       "      <td>0.582895</td>\n",
       "      <td>10.115326</td>\n",
       "      <td>0.583269</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>juvenile</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>843.808384</td>\n",
       "      <td>816.952741</td>\n",
       "      <td>0.298826</td>\n",
       "      <td>0.658518</td>\n",
       "      <td>0.299199</td>\n",
       "      <td>0.659014</td>\n",
       "      <td>153.543736</td>\n",
       "      <td>153.569026</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>recidivism</td>\n",
       "      <td>DT</td>\n",
       "      <td>737.757550</td>\n",
       "      <td>712.308505</td>\n",
       "      <td>3.124833</td>\n",
       "      <td>0.421708</td>\n",
       "      <td>3.123318</td>\n",
       "      <td>0.422029</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>recidivism</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>737.116193</td>\n",
       "      <td>713.988697</td>\n",
       "      <td>0.308159</td>\n",
       "      <td>0.417665</td>\n",
       "      <td>0.308507</td>\n",
       "      <td>0.418160</td>\n",
       "      <td>56.499663</td>\n",
       "      <td>56.497488</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          dataset  algorithm  train_score  test_score  train_wall_time  \\\n",
       "0   breast-cancer         DT   851.830484  618.070426         0.335582   \n",
       "1   breast-cancer  HS (CART)   813.595726  666.624299         0.282225   \n",
       "2        diabetes         DT   875.186958  744.172624         1.025753   \n",
       "3        diabetes  HS (CART)   863.277905  773.470549         0.290464   \n",
       "4   german-credit         DT   812.339909  695.125940         1.102294   \n",
       "5   german-credit  HS (CART)   799.265928  718.391474         0.285833   \n",
       "6        haberman         DT   829.788765  575.471111         0.371763   \n",
       "7        haberman  HS (CART)   793.550123  637.396296         0.283939   \n",
       "8           heart         DT   941.312312  748.148750         0.411391   \n",
       "9           heart  HS (CART)   930.275938  807.559000         0.286573   \n",
       "10     ionosphere         DT   965.606190  854.442857         2.074230   \n",
       "11     ionosphere  HS (CART)   963.343175  902.620952         0.240100   \n",
       "12       juvenile         DT   846.382474  806.573649        10.115685   \n",
       "13       juvenile  HS (CART)   843.808384  816.952741         0.298826   \n",
       "14     recidivism         DT   737.757550  712.308505         3.124833   \n",
       "15     recidivism  HS (CART)   737.116193  713.988697         0.308159   \n",
       "\n",
       "    test_wall_time  train_cpu_time  test_cpu_time  tunning_wall_time  \\\n",
       "0         0.122240        0.335352       0.122619           0.000000   \n",
       "1         0.115824        0.282650       0.116438          16.829625   \n",
       "2         0.159827        1.024479       0.160137           0.000000   \n",
       "3         0.144738        0.290812       0.145175          24.984394   \n",
       "4         0.172289        1.101392       0.172639           0.000000   \n",
       "5         0.160829        0.286381       0.161631          27.032853   \n",
       "6         0.125628        0.370851       0.125965           0.000000   \n",
       "7         0.114236        0.284378       0.114705          16.532525   \n",
       "8         0.123552        0.411302       0.123968           0.000000   \n",
       "9         0.117987        0.287002       0.118509          17.570681   \n",
       "10        0.148256        2.073828       0.148617           0.000000   \n",
       "11        0.131967        0.240447       0.132427          29.200948   \n",
       "12        0.582895       10.115326       0.583269           0.000000   \n",
       "13        0.658518        0.299199       0.659014         153.543736   \n",
       "14        0.421708        3.123318       0.422029           0.000000   \n",
       "15        0.417665        0.308507       0.418160          56.499663   \n",
       "\n",
       "    tunning_cpu_time  \n",
       "0           0.000000  \n",
       "1          16.838201  \n",
       "2           0.000000  \n",
       "3          24.983740  \n",
       "4           0.000000  \n",
       "5          27.043223  \n",
       "6           0.000000  \n",
       "7          16.538794  \n",
       "8           0.000000  \n",
       "9          17.582094  \n",
       "10          0.000000  \n",
       "11         29.201881  \n",
       "12          0.000000  \n",
       "13        153.569026  \n",
       "14          0.000000  \n",
       "15         56.497488  "
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "time = regression.drop([\"task\", \"boot_iter\", \"scoring\", \"n_leaves\", \"max_leaves\"], axis=1).groupby([\"dataset\", \"algorithm\"]).sum().reset_index()\n",
    "time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "b780d0c6-08b7-4383-9941-7cf4a721193f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>task</th>\n",
       "      <th>dataset</th>\n",
       "      <th>boot_iter</th>\n",
       "      <th>algorithm</th>\n",
       "      <th>scoring</th>\n",
       "      <th>n_leaves</th>\n",
       "      <th>max_leaves</th>\n",
       "      <th>regularization</th>\n",
       "      <th>train_score</th>\n",
       "      <th>test_score</th>\n",
       "      <th>train_wall_time</th>\n",
       "      <th>test_wall_time</th>\n",
       "      <th>train_cpu_time</th>\n",
       "      <th>test_cpu_time</th>\n",
       "      <th>tunning_wall_time</th>\n",
       "      <th>tunning_cpu_time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>classification</td>\n",
       "      <td>heart</td>\n",
       "      <td>0</td>\n",
       "      <td>DT</td>\n",
       "      <td>AUC</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>None</td>\n",
       "      <td>0.760000</td>\n",
       "      <td>0.680000</td>\n",
       "      <td>0.000346</td>\n",
       "      <td>0.000124</td>\n",
       "      <td>0.000344</td>\n",
       "      <td>0.000124</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>classification</td>\n",
       "      <td>heart</td>\n",
       "      <td>0</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>AUC</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>25.0</td>\n",
       "      <td>0.760000</td>\n",
       "      <td>0.680000</td>\n",
       "      <td>0.000106</td>\n",
       "      <td>0.000112</td>\n",
       "      <td>0.000107</td>\n",
       "      <td>0.000113</td>\n",
       "      <td>0.013177</td>\n",
       "      <td>0.013298</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>classification</td>\n",
       "      <td>heart</td>\n",
       "      <td>0</td>\n",
       "      <td>DT</td>\n",
       "      <td>AUC</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>None</td>\n",
       "      <td>0.864250</td>\n",
       "      <td>0.863000</td>\n",
       "      <td>0.000349</td>\n",
       "      <td>0.000119</td>\n",
       "      <td>0.000349</td>\n",
       "      <td>0.000120</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>classification</td>\n",
       "      <td>heart</td>\n",
       "      <td>0</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>AUC</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.864250</td>\n",
       "      <td>0.863000</td>\n",
       "      <td>0.000136</td>\n",
       "      <td>0.000116</td>\n",
       "      <td>0.000136</td>\n",
       "      <td>0.000117</td>\n",
       "      <td>0.015152</td>\n",
       "      <td>0.015154</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>classification</td>\n",
       "      <td>heart</td>\n",
       "      <td>0</td>\n",
       "      <td>DT</td>\n",
       "      <td>AUC</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>None</td>\n",
       "      <td>0.914000</td>\n",
       "      <td>0.883750</td>\n",
       "      <td>0.000360</td>\n",
       "      <td>0.000134</td>\n",
       "      <td>0.000360</td>\n",
       "      <td>0.000134</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15995</th>\n",
       "      <td>classification</td>\n",
       "      <td>recidivism</td>\n",
       "      <td>99</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>AUC</td>\n",
       "      <td>27</td>\n",
       "      <td>28</td>\n",
       "      <td>100.0</td>\n",
       "      <td>0.767420</td>\n",
       "      <td>0.723390</td>\n",
       "      <td>0.000662</td>\n",
       "      <td>0.000515</td>\n",
       "      <td>0.000662</td>\n",
       "      <td>0.000515</td>\n",
       "      <td>0.065993</td>\n",
       "      <td>0.065994</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15996</th>\n",
       "      <td>classification</td>\n",
       "      <td>recidivism</td>\n",
       "      <td>99</td>\n",
       "      <td>DT</td>\n",
       "      <td>AUC</td>\n",
       "      <td>29</td>\n",
       "      <td>30</td>\n",
       "      <td>None</td>\n",
       "      <td>0.770873</td>\n",
       "      <td>0.719165</td>\n",
       "      <td>0.003724</td>\n",
       "      <td>0.000447</td>\n",
       "      <td>0.003723</td>\n",
       "      <td>0.000448</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15997</th>\n",
       "      <td>classification</td>\n",
       "      <td>recidivism</td>\n",
       "      <td>99</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>AUC</td>\n",
       "      <td>29</td>\n",
       "      <td>30</td>\n",
       "      <td>100.0</td>\n",
       "      <td>0.768487</td>\n",
       "      <td>0.723435</td>\n",
       "      <td>0.000438</td>\n",
       "      <td>0.000435</td>\n",
       "      <td>0.000438</td>\n",
       "      <td>0.000436</td>\n",
       "      <td>0.066314</td>\n",
       "      <td>0.066305</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15998</th>\n",
       "      <td>classification</td>\n",
       "      <td>recidivism</td>\n",
       "      <td>99</td>\n",
       "      <td>DT</td>\n",
       "      <td>AUC</td>\n",
       "      <td>31</td>\n",
       "      <td>32</td>\n",
       "      <td>None</td>\n",
       "      <td>0.772336</td>\n",
       "      <td>0.714142</td>\n",
       "      <td>0.003777</td>\n",
       "      <td>0.000443</td>\n",
       "      <td>0.003776</td>\n",
       "      <td>0.000443</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15999</th>\n",
       "      <td>classification</td>\n",
       "      <td>recidivism</td>\n",
       "      <td>99</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>AUC</td>\n",
       "      <td>31</td>\n",
       "      <td>32</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.772307</td>\n",
       "      <td>0.714159</td>\n",
       "      <td>0.000474</td>\n",
       "      <td>0.000434</td>\n",
       "      <td>0.000474</td>\n",
       "      <td>0.000435</td>\n",
       "      <td>0.067369</td>\n",
       "      <td>0.067369</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>16000 rows × 16 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                 task     dataset  boot_iter  algorithm scoring  n_leaves  \\\n",
       "0      classification       heart          0         DT     AUC         1   \n",
       "1      classification       heart          0  HS (CART)     AUC         1   \n",
       "2      classification       heart          0         DT     AUC         3   \n",
       "3      classification       heart          0  HS (CART)     AUC         3   \n",
       "4      classification       heart          0         DT     AUC         7   \n",
       "...               ...         ...        ...        ...     ...       ...   \n",
       "15995  classification  recidivism         99  HS (CART)     AUC        27   \n",
       "15996  classification  recidivism         99         DT     AUC        29   \n",
       "15997  classification  recidivism         99  HS (CART)     AUC        29   \n",
       "15998  classification  recidivism         99         DT     AUC        31   \n",
       "15999  classification  recidivism         99  HS (CART)     AUC        31   \n",
       "\n",
       "       max_leaves regularization  train_score  test_score  train_wall_time  \\\n",
       "0               2           None     0.760000    0.680000         0.000346   \n",
       "1               2           25.0     0.760000    0.680000         0.000106   \n",
       "2               4           None     0.864250    0.863000         0.000349   \n",
       "3               4            1.0     0.864250    0.863000         0.000136   \n",
       "4               8           None     0.914000    0.883750         0.000360   \n",
       "...           ...            ...          ...         ...              ...   \n",
       "15995          28          100.0     0.767420    0.723390         0.000662   \n",
       "15996          30           None     0.770873    0.719165         0.003724   \n",
       "15997          30          100.0     0.768487    0.723435         0.000438   \n",
       "15998          32           None     0.772336    0.714142         0.003777   \n",
       "15999          32           10.0     0.772307    0.714159         0.000474   \n",
       "\n",
       "       test_wall_time  train_cpu_time  test_cpu_time  tunning_wall_time  \\\n",
       "0            0.000124        0.000344       0.000124                NaN   \n",
       "1            0.000112        0.000107       0.000113           0.013177   \n",
       "2            0.000119        0.000349       0.000120                NaN   \n",
       "3            0.000116        0.000136       0.000117           0.015152   \n",
       "4            0.000134        0.000360       0.000134                NaN   \n",
       "...               ...             ...            ...                ...   \n",
       "15995        0.000515        0.000662       0.000515           0.065993   \n",
       "15996        0.000447        0.003723       0.000448                NaN   \n",
       "15997        0.000435        0.000438       0.000436           0.066314   \n",
       "15998        0.000443        0.003776       0.000443                NaN   \n",
       "15999        0.000434        0.000474       0.000435           0.067369   \n",
       "\n",
       "       tunning_cpu_time  \n",
       "0                   NaN  \n",
       "1              0.013298  \n",
       "2                   NaN  \n",
       "3              0.015154  \n",
       "4                   NaN  \n",
       "...                 ...  \n",
       "15995          0.065994  \n",
       "15996               NaN  \n",
       "15997          0.066305  \n",
       "15998               NaN  \n",
       "15999          0.067369  \n",
       "\n",
       "[16000 rows x 16 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "classification = pd.read_csv(\"results/claim_1_1_dt_comparison_classification.csv\")\n",
    "classification"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "24651091-3890-41ed-877c-d6e557fd1322",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_113260/3084832542.py:1: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>dataset</th>\n",
       "      <th>algorithm</th>\n",
       "      <th>train_score</th>\n",
       "      <th>test_score</th>\n",
       "      <th>train_wall_time</th>\n",
       "      <th>test_wall_time</th>\n",
       "      <th>train_cpu_time</th>\n",
       "      <th>test_cpu_time</th>\n",
       "      <th>tunning_wall_time</th>\n",
       "      <th>tunning_cpu_time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>breast-cancer</td>\n",
       "      <td>DT</td>\n",
       "      <td>851.830484</td>\n",
       "      <td>618.070426</td>\n",
       "      <td>0.335582</td>\n",
       "      <td>0.122240</td>\n",
       "      <td>0.335352</td>\n",
       "      <td>0.122619</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>breast-cancer</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>813.595726</td>\n",
       "      <td>666.624299</td>\n",
       "      <td>0.282225</td>\n",
       "      <td>0.115824</td>\n",
       "      <td>0.282650</td>\n",
       "      <td>0.116438</td>\n",
       "      <td>16.829625</td>\n",
       "      <td>16.838201</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>diabetes</td>\n",
       "      <td>DT</td>\n",
       "      <td>875.186958</td>\n",
       "      <td>744.172624</td>\n",
       "      <td>1.025753</td>\n",
       "      <td>0.159827</td>\n",
       "      <td>1.024479</td>\n",
       "      <td>0.160137</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>diabetes</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>863.277905</td>\n",
       "      <td>773.470549</td>\n",
       "      <td>0.290464</td>\n",
       "      <td>0.144738</td>\n",
       "      <td>0.290812</td>\n",
       "      <td>0.145175</td>\n",
       "      <td>24.984394</td>\n",
       "      <td>24.983740</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>german-credit</td>\n",
       "      <td>DT</td>\n",
       "      <td>812.339909</td>\n",
       "      <td>695.125940</td>\n",
       "      <td>1.102294</td>\n",
       "      <td>0.172289</td>\n",
       "      <td>1.101392</td>\n",
       "      <td>0.172639</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>german-credit</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>799.265928</td>\n",
       "      <td>718.391474</td>\n",
       "      <td>0.285833</td>\n",
       "      <td>0.160829</td>\n",
       "      <td>0.286381</td>\n",
       "      <td>0.161631</td>\n",
       "      <td>27.032853</td>\n",
       "      <td>27.043223</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>haberman</td>\n",
       "      <td>DT</td>\n",
       "      <td>829.788765</td>\n",
       "      <td>575.471111</td>\n",
       "      <td>0.371763</td>\n",
       "      <td>0.125628</td>\n",
       "      <td>0.370851</td>\n",
       "      <td>0.125965</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>haberman</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>793.550123</td>\n",
       "      <td>637.396296</td>\n",
       "      <td>0.283939</td>\n",
       "      <td>0.114236</td>\n",
       "      <td>0.284378</td>\n",
       "      <td>0.114705</td>\n",
       "      <td>16.532525</td>\n",
       "      <td>16.538794</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>heart</td>\n",
       "      <td>DT</td>\n",
       "      <td>941.312312</td>\n",
       "      <td>748.148750</td>\n",
       "      <td>0.411391</td>\n",
       "      <td>0.123552</td>\n",
       "      <td>0.411302</td>\n",
       "      <td>0.123968</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>heart</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>930.275938</td>\n",
       "      <td>807.559000</td>\n",
       "      <td>0.286573</td>\n",
       "      <td>0.117987</td>\n",
       "      <td>0.287002</td>\n",
       "      <td>0.118509</td>\n",
       "      <td>17.570681</td>\n",
       "      <td>17.582094</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>ionosphere</td>\n",
       "      <td>DT</td>\n",
       "      <td>965.606190</td>\n",
       "      <td>854.442857</td>\n",
       "      <td>2.074230</td>\n",
       "      <td>0.148256</td>\n",
       "      <td>2.073828</td>\n",
       "      <td>0.148617</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>ionosphere</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>963.343175</td>\n",
       "      <td>902.620952</td>\n",
       "      <td>0.240100</td>\n",
       "      <td>0.131967</td>\n",
       "      <td>0.240447</td>\n",
       "      <td>0.132427</td>\n",
       "      <td>29.200948</td>\n",
       "      <td>29.201881</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>juvenile</td>\n",
       "      <td>DT</td>\n",
       "      <td>846.382474</td>\n",
       "      <td>806.573649</td>\n",
       "      <td>10.115685</td>\n",
       "      <td>0.582895</td>\n",
       "      <td>10.115326</td>\n",
       "      <td>0.583269</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>juvenile</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>843.808384</td>\n",
       "      <td>816.952741</td>\n",
       "      <td>0.298826</td>\n",
       "      <td>0.658518</td>\n",
       "      <td>0.299199</td>\n",
       "      <td>0.659014</td>\n",
       "      <td>153.543736</td>\n",
       "      <td>153.569026</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>recidivism</td>\n",
       "      <td>DT</td>\n",
       "      <td>737.757550</td>\n",
       "      <td>712.308505</td>\n",
       "      <td>3.124833</td>\n",
       "      <td>0.421708</td>\n",
       "      <td>3.123318</td>\n",
       "      <td>0.422029</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>recidivism</td>\n",
       "      <td>HS (CART)</td>\n",
       "      <td>737.116193</td>\n",
       "      <td>713.988697</td>\n",
       "      <td>0.308159</td>\n",
       "      <td>0.417665</td>\n",
       "      <td>0.308507</td>\n",
       "      <td>0.418160</td>\n",
       "      <td>56.499663</td>\n",
       "      <td>56.497488</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          dataset  algorithm  train_score  test_score  train_wall_time  \\\n",
       "0   breast-cancer         DT   851.830484  618.070426         0.335582   \n",
       "1   breast-cancer  HS (CART)   813.595726  666.624299         0.282225   \n",
       "2        diabetes         DT   875.186958  744.172624         1.025753   \n",
       "3        diabetes  HS (CART)   863.277905  773.470549         0.290464   \n",
       "4   german-credit         DT   812.339909  695.125940         1.102294   \n",
       "5   german-credit  HS (CART)   799.265928  718.391474         0.285833   \n",
       "6        haberman         DT   829.788765  575.471111         0.371763   \n",
       "7        haberman  HS (CART)   793.550123  637.396296         0.283939   \n",
       "8           heart         DT   941.312312  748.148750         0.411391   \n",
       "9           heart  HS (CART)   930.275938  807.559000         0.286573   \n",
       "10     ionosphere         DT   965.606190  854.442857         2.074230   \n",
       "11     ionosphere  HS (CART)   963.343175  902.620952         0.240100   \n",
       "12       juvenile         DT   846.382474  806.573649        10.115685   \n",
       "13       juvenile  HS (CART)   843.808384  816.952741         0.298826   \n",
       "14     recidivism         DT   737.757550  712.308505         3.124833   \n",
       "15     recidivism  HS (CART)   737.116193  713.988697         0.308159   \n",
       "\n",
       "    test_wall_time  train_cpu_time  test_cpu_time  tunning_wall_time  \\\n",
       "0         0.122240        0.335352       0.122619           0.000000   \n",
       "1         0.115824        0.282650       0.116438          16.829625   \n",
       "2         0.159827        1.024479       0.160137           0.000000   \n",
       "3         0.144738        0.290812       0.145175          24.984394   \n",
       "4         0.172289        1.101392       0.172639           0.000000   \n",
       "5         0.160829        0.286381       0.161631          27.032853   \n",
       "6         0.125628        0.370851       0.125965           0.000000   \n",
       "7         0.114236        0.284378       0.114705          16.532525   \n",
       "8         0.123552        0.411302       0.123968           0.000000   \n",
       "9         0.117987        0.287002       0.118509          17.570681   \n",
       "10        0.148256        2.073828       0.148617           0.000000   \n",
       "11        0.131967        0.240447       0.132427          29.200948   \n",
       "12        0.582895       10.115326       0.583269           0.000000   \n",
       "13        0.658518        0.299199       0.659014         153.543736   \n",
       "14        0.421708        3.123318       0.422029           0.000000   \n",
       "15        0.417665        0.308507       0.418160          56.499663   \n",
       "\n",
       "    tunning_cpu_time  \n",
       "0           0.000000  \n",
       "1          16.838201  \n",
       "2           0.000000  \n",
       "3          24.983740  \n",
       "4           0.000000  \n",
       "5          27.043223  \n",
       "6           0.000000  \n",
       "7          16.538794  \n",
       "8           0.000000  \n",
       "9          17.582094  \n",
       "10          0.000000  \n",
       "11         29.201881  \n",
       "12          0.000000  \n",
       "13        153.569026  \n",
       "14          0.000000  \n",
       "15         56.497488  "
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "time = classification.drop([\"task\", \"boot_iter\", \"scoring\", \"n_leaves\", \"max_leaves\"], axis=1).groupby([\"dataset\", \"algorithm\"]).sum().reset_index()\n",
    "time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "298114cd-eb97-4f21-84d5-bd6581626f13",
   "metadata": {},
   "outputs": [],
   "source": [
    "wall_time = time[\"train_wall_time\"] + time[\"test_wall_time\"] + time[\"tunning_wall_time\"]\n",
    "cpu_time  = time[\"train_cpu_time\"] + time[\"test_cpu_time\"] + time[\"tunning_cpu_time\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "26d913c9-18db-499f-9838-22d265b2c66a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6.116666666666666"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.ceil(wall_time.sum()) / 60"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "1c058118-53f9-4ac3-b366-7499b51895e6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6.116666666666666"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.ceil(cpu_time.sum()) / 60"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "32c1a13e-1eb1-4f16-ae76-e9b40e9444a3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "train_score          6745\n",
       "test_score           6038\n",
       "train_wall_time         3\n",
       "test_wall_time          2\n",
       "train_cpu_time          3\n",
       "test_cpu_time           2\n",
       "tunning_wall_time     343\n",
       "tunning_cpu_time      343\n",
       "dtype: int64"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.ceil(time.drop([\"dataset\", \"algorithm\"], axis = 1).loc[time[\"algorithm\"] == \"HS (CART)\"].T.sum(axis = 1)).astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "055761b9-ff0c-40e9-9d88-bf41c4edd358",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "train_score          6861\n",
       "test_score           5755\n",
       "train_wall_time        19\n",
       "test_wall_time          2\n",
       "train_cpu_time         19\n",
       "test_cpu_time           2\n",
       "tunning_wall_time       0\n",
       "tunning_cpu_time        0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.ceil(time.drop([\"dataset\", \"algorithm\"], axis = 1).loc[time[\"algorithm\"] == \"DT\"].T.sum(axis = 1)).astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "ae3d69a1-fba6-4cb8-96dd-502a271b26a2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>train_score</th>\n",
       "      <th>test_score</th>\n",
       "      <th>train_wall_time</th>\n",
       "      <th>test_wall_time</th>\n",
       "      <th>train_cpu_time</th>\n",
       "      <th>test_cpu_time</th>\n",
       "      <th>tunning_wall_time</th>\n",
       "      <th>tunning_cpu_time</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>dataset</th>\n",
       "      <th>algorithm</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">breast-cancer</th>\n",
       "      <th>DT</th>\n",
       "      <td>851.830484</td>\n",
       "      <td>618.070426</td>\n",
       "      <td>0.335582</td>\n",
       "      <td>0.122240</td>\n",
       "      <td>0.335352</td>\n",
       "      <td>0.122619</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HS (CART)</th>\n",
       "      <td>813.595726</td>\n",
       "      <td>666.624299</td>\n",
       "      <td>0.282225</td>\n",
       "      <td>0.115824</td>\n",
       "      <td>0.282650</td>\n",
       "      <td>0.116438</td>\n",
       "      <td>16.829625</td>\n",
       "      <td>16.838201</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">diabetes</th>\n",
       "      <th>DT</th>\n",
       "      <td>875.186958</td>\n",
       "      <td>744.172624</td>\n",
       "      <td>1.025753</td>\n",
       "      <td>0.159827</td>\n",
       "      <td>1.024479</td>\n",
       "      <td>0.160137</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HS (CART)</th>\n",
       "      <td>863.277905</td>\n",
       "      <td>773.470549</td>\n",
       "      <td>0.290464</td>\n",
       "      <td>0.144738</td>\n",
       "      <td>0.290812</td>\n",
       "      <td>0.145175</td>\n",
       "      <td>24.984394</td>\n",
       "      <td>24.983740</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">german-credit</th>\n",
       "      <th>DT</th>\n",
       "      <td>812.339909</td>\n",
       "      <td>695.125940</td>\n",
       "      <td>1.102294</td>\n",
       "      <td>0.172289</td>\n",
       "      <td>1.101392</td>\n",
       "      <td>0.172639</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HS (CART)</th>\n",
       "      <td>799.265928</td>\n",
       "      <td>718.391474</td>\n",
       "      <td>0.285833</td>\n",
       "      <td>0.160829</td>\n",
       "      <td>0.286381</td>\n",
       "      <td>0.161631</td>\n",
       "      <td>27.032853</td>\n",
       "      <td>27.043223</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">haberman</th>\n",
       "      <th>DT</th>\n",
       "      <td>829.788765</td>\n",
       "      <td>575.471111</td>\n",
       "      <td>0.371763</td>\n",
       "      <td>0.125628</td>\n",
       "      <td>0.370851</td>\n",
       "      <td>0.125965</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HS (CART)</th>\n",
       "      <td>793.550123</td>\n",
       "      <td>637.396296</td>\n",
       "      <td>0.283939</td>\n",
       "      <td>0.114236</td>\n",
       "      <td>0.284378</td>\n",
       "      <td>0.114705</td>\n",
       "      <td>16.532525</td>\n",
       "      <td>16.538794</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">heart</th>\n",
       "      <th>DT</th>\n",
       "      <td>941.312312</td>\n",
       "      <td>748.148750</td>\n",
       "      <td>0.411391</td>\n",
       "      <td>0.123552</td>\n",
       "      <td>0.411302</td>\n",
       "      <td>0.123968</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HS (CART)</th>\n",
       "      <td>930.275938</td>\n",
       "      <td>807.559000</td>\n",
       "      <td>0.286573</td>\n",
       "      <td>0.117987</td>\n",
       "      <td>0.287002</td>\n",
       "      <td>0.118509</td>\n",
       "      <td>17.570681</td>\n",
       "      <td>17.582094</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">ionosphere</th>\n",
       "      <th>DT</th>\n",
       "      <td>965.606190</td>\n",
       "      <td>854.442857</td>\n",
       "      <td>2.074230</td>\n",
       "      <td>0.148256</td>\n",
       "      <td>2.073828</td>\n",
       "      <td>0.148617</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HS (CART)</th>\n",
       "      <td>963.343175</td>\n",
       "      <td>902.620952</td>\n",
       "      <td>0.240100</td>\n",
       "      <td>0.131967</td>\n",
       "      <td>0.240447</td>\n",
       "      <td>0.132427</td>\n",
       "      <td>29.200948</td>\n",
       "      <td>29.201881</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">juvenile</th>\n",
       "      <th>DT</th>\n",
       "      <td>846.382474</td>\n",
       "      <td>806.573649</td>\n",
       "      <td>10.115685</td>\n",
       "      <td>0.582895</td>\n",
       "      <td>10.115326</td>\n",
       "      <td>0.583269</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HS (CART)</th>\n",
       "      <td>843.808384</td>\n",
       "      <td>816.952741</td>\n",
       "      <td>0.298826</td>\n",
       "      <td>0.658518</td>\n",
       "      <td>0.299199</td>\n",
       "      <td>0.659014</td>\n",
       "      <td>153.543736</td>\n",
       "      <td>153.569026</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">recidivism</th>\n",
       "      <th>DT</th>\n",
       "      <td>737.757550</td>\n",
       "      <td>712.308505</td>\n",
       "      <td>3.124833</td>\n",
       "      <td>0.421708</td>\n",
       "      <td>3.123318</td>\n",
       "      <td>0.422029</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HS (CART)</th>\n",
       "      <td>737.116193</td>\n",
       "      <td>713.988697</td>\n",
       "      <td>0.308159</td>\n",
       "      <td>0.417665</td>\n",
       "      <td>0.308507</td>\n",
       "      <td>0.418160</td>\n",
       "      <td>56.499663</td>\n",
       "      <td>56.497488</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                         train_score  test_score  train_wall_time  \\\n",
       "dataset       algorithm                                             \n",
       "breast-cancer DT          851.830484  618.070426         0.335582   \n",
       "              HS (CART)   813.595726  666.624299         0.282225   \n",
       "diabetes      DT          875.186958  744.172624         1.025753   \n",
       "              HS (CART)   863.277905  773.470549         0.290464   \n",
       "german-credit DT          812.339909  695.125940         1.102294   \n",
       "              HS (CART)   799.265928  718.391474         0.285833   \n",
       "haberman      DT          829.788765  575.471111         0.371763   \n",
       "              HS (CART)   793.550123  637.396296         0.283939   \n",
       "heart         DT          941.312312  748.148750         0.411391   \n",
       "              HS (CART)   930.275938  807.559000         0.286573   \n",
       "ionosphere    DT          965.606190  854.442857         2.074230   \n",
       "              HS (CART)   963.343175  902.620952         0.240100   \n",
       "juvenile      DT          846.382474  806.573649        10.115685   \n",
       "              HS (CART)   843.808384  816.952741         0.298826   \n",
       "recidivism    DT          737.757550  712.308505         3.124833   \n",
       "              HS (CART)   737.116193  713.988697         0.308159   \n",
       "\n",
       "                         test_wall_time  train_cpu_time  test_cpu_time  \\\n",
       "dataset       algorithm                                                  \n",
       "breast-cancer DT               0.122240        0.335352       0.122619   \n",
       "              HS (CART)        0.115824        0.282650       0.116438   \n",
       "diabetes      DT               0.159827        1.024479       0.160137   \n",
       "              HS (CART)        0.144738        0.290812       0.145175   \n",
       "german-credit DT               0.172289        1.101392       0.172639   \n",
       "              HS (CART)        0.160829        0.286381       0.161631   \n",
       "haberman      DT               0.125628        0.370851       0.125965   \n",
       "              HS (CART)        0.114236        0.284378       0.114705   \n",
       "heart         DT               0.123552        0.411302       0.123968   \n",
       "              HS (CART)        0.117987        0.287002       0.118509   \n",
       "ionosphere    DT               0.148256        2.073828       0.148617   \n",
       "              HS (CART)        0.131967        0.240447       0.132427   \n",
       "juvenile      DT               0.582895       10.115326       0.583269   \n",
       "              HS (CART)        0.658518        0.299199       0.659014   \n",
       "recidivism    DT               0.421708        3.123318       0.422029   \n",
       "              HS (CART)        0.417665        0.308507       0.418160   \n",
       "\n",
       "                         tunning_wall_time  tunning_cpu_time  \n",
       "dataset       algorithm                                       \n",
       "breast-cancer DT                  0.000000          0.000000  \n",
       "              HS (CART)          16.829625         16.838201  \n",
       "diabetes      DT                  0.000000          0.000000  \n",
       "              HS (CART)          24.984394         24.983740  \n",
       "german-credit DT                  0.000000          0.000000  \n",
       "              HS (CART)          27.032853         27.043223  \n",
       "haberman      DT                  0.000000          0.000000  \n",
       "              HS (CART)          16.532525         16.538794  \n",
       "heart         DT                  0.000000          0.000000  \n",
       "              HS (CART)          17.570681         17.582094  \n",
       "ionosphere    DT                  0.000000          0.000000  \n",
       "              HS (CART)          29.200948         29.201881  \n",
       "juvenile      DT                  0.000000          0.000000  \n",
       "              HS (CART)         153.543736        153.569026  \n",
       "recidivism    DT                  0.000000          0.000000  \n",
       "              HS (CART)          56.499663         56.497488  "
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "bfbb160f-a512-47ca-9e36-094345e67152",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "train_score           2181.992600\n",
       "test_score            1815.117546\n",
       "train_wall_time        637.756476\n",
       "test_wall_time          74.862484\n",
       "train_cpu_time        1092.721296\n",
       "test_cpu_time           88.473703\n",
       "tunning_wall_time    14889.286633\n",
       "tunning_cpu_time     24379.788667\n",
       "dtype: float64"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rf = pd.read_csv(\"results/rf_comparison_classification.csv\")\n",
    "rf.drop([\"task\", \"boot_iter\", \"scoring\", \"n_trees\", \"Unnamed: 0\", \"dataset\", \"regularization\", \"algorithm\"], axis=1).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "7218c30f-f32c-4a18-95d9-c99472732982",
   "metadata": {},
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "'DataFrame' object has no attribute 'train_time'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "Cell \u001b[0;32mIn [55], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mrf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain_time\u001b[49m\n",
      "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/generic.py:5902\u001b[0m, in \u001b[0;36mNDFrame.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m   5895\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m   5896\u001b[0m     name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_internal_names_set\n\u001b[1;32m   5897\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_metadata\n\u001b[1;32m   5898\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_accessors\n\u001b[1;32m   5899\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info_axis\u001b[38;5;241m.\u001b[39m_can_hold_identifiers_and_holds_name(name)\n\u001b[1;32m   5900\u001b[0m ):\n\u001b[1;32m   5901\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m[name]\n\u001b[0;32m-> 5902\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mobject\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__getattribute__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n",
      "\u001b[0;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'train_time'"
     ]
    }
   ],
   "source": [
    "rf.train_time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "43b738bf-1a80-44cc-9424-481cc42ca8d1",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "mlds",
   "language": "python",
   "name": "mlds"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
