{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "3a79209b-1560-473e-a6ad-b4c7b807d9db",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "7931fb97-4655-45a4-aa62-8b0f95e09c2d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>task</th>\n",
       "      <th>dataset</th>\n",
       "      <th>boot_iter</th>\n",
       "      <th>algorithm</th>\n",
       "      <th>scoring</th>\n",
       "      <th>n_leaves</th>\n",
       "      <th>max_leaves</th>\n",
       "      <th>regularization</th>\n",
       "      <th>train_score</th>\n",
       "      <th>test_score</th>\n",
       "      <th>train_wall_time</th>\n",
       "      <th>test_wall_time</th>\n",
       "      <th>train_cpu_time</th>\n",
       "      <th>test_cpu_time</th>\n",
       "      <th>tunning_wall_time</th>\n",
       "      <th>tunning_cpu_time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>classification</td>\n",
       "      <td>heart</td>\n",
       "      <td>0</td>\n",
       "      <td>CCP</td>\n",
       "      <td>AUC</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0.029145</td>\n",
       "      <td>0.852812</td>\n",
       "      <td>0.788750</td>\n",
       "      <td>0.000424</td>\n",
       "      <td>0.000133</td>\n",
       "      <td>0.000425</td>\n",
       "      <td>0.000133</td>\n",
       "      <td>0.009463</td>\n",
       "      <td>0.009799</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>classification</td>\n",
       "      <td>heart</td>\n",
       "      <td>0</td>\n",
       "      <td>HS (CART-CCP)</td>\n",
       "      <td>AUC</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0.100000</td>\n",
       "      <td>0.852812</td>\n",
       "      <td>0.788750</td>\n",
       "      <td>0.000152</td>\n",
       "      <td>0.000124</td>\n",
       "      <td>0.000153</td>\n",
       "      <td>0.000124</td>\n",
       "      <td>0.107800</td>\n",
       "      <td>0.107865</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>classification</td>\n",
       "      <td>heart</td>\n",
       "      <td>0</td>\n",
       "      <td>CCP</td>\n",
       "      <td>AUC</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>0.023645</td>\n",
       "      <td>0.885500</td>\n",
       "      <td>0.826750</td>\n",
       "      <td>0.000405</td>\n",
       "      <td>0.000111</td>\n",
       "      <td>0.000405</td>\n",
       "      <td>0.000111</td>\n",
       "      <td>0.008243</td>\n",
       "      <td>0.008243</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>classification</td>\n",
       "      <td>heart</td>\n",
       "      <td>0</td>\n",
       "      <td>HS (CART-CCP)</td>\n",
       "      <td>AUC</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>25.000000</td>\n",
       "      <td>0.878375</td>\n",
       "      <td>0.843750</td>\n",
       "      <td>0.000144</td>\n",
       "      <td>0.000117</td>\n",
       "      <td>0.000145</td>\n",
       "      <td>0.000117</td>\n",
       "      <td>0.113454</td>\n",
       "      <td>0.113600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>classification</td>\n",
       "      <td>heart</td>\n",
       "      <td>0</td>\n",
       "      <td>CCP</td>\n",
       "      <td>AUC</td>\n",
       "      <td>9</td>\n",
       "      <td>8</td>\n",
       "      <td>0.009524</td>\n",
       "      <td>0.932813</td>\n",
       "      <td>0.798250</td>\n",
       "      <td>0.000412</td>\n",
       "      <td>0.000132</td>\n",
       "      <td>0.000413</td>\n",
       "      <td>0.000132</td>\n",
       "      <td>0.008329</td>\n",
       "      <td>0.008332</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12795</th>\n",
       "      <td>classification</td>\n",
       "      <td>recidivism</td>\n",
       "      <td>0</td>\n",
       "      <td>LBS</td>\n",
       "      <td>AUC</td>\n",
       "      <td>32</td>\n",
       "      <td>32</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>0.713868</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12796</th>\n",
       "      <td>classification</td>\n",
       "      <td>recidivism</td>\n",
       "      <td>0</td>\n",
       "      <td>LBS</td>\n",
       "      <td>AUC</td>\n",
       "      <td>32</td>\n",
       "      <td>32</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>0.721374</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12797</th>\n",
       "      <td>classification</td>\n",
       "      <td>recidivism</td>\n",
       "      <td>0</td>\n",
       "      <td>LBS</td>\n",
       "      <td>AUC</td>\n",
       "      <td>32</td>\n",
       "      <td>32</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>0.721035</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12798</th>\n",
       "      <td>classification</td>\n",
       "      <td>recidivism</td>\n",
       "      <td>0</td>\n",
       "      <td>LBS</td>\n",
       "      <td>AUC</td>\n",
       "      <td>32</td>\n",
       "      <td>32</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>0.732659</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12799</th>\n",
       "      <td>classification</td>\n",
       "      <td>recidivism</td>\n",
       "      <td>0</td>\n",
       "      <td>LBS</td>\n",
       "      <td>AUC</td>\n",
       "      <td>32</td>\n",
       "      <td>32</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>0.737613</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>12800 rows × 16 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                 task     dataset  boot_iter      algorithm scoring  n_leaves  \\\n",
       "0      classification       heart          0            CCP     AUC         3   \n",
       "1      classification       heart          0  HS (CART-CCP)     AUC         3   \n",
       "2      classification       heart          0            CCP     AUC         4   \n",
       "3      classification       heart          0  HS (CART-CCP)     AUC         4   \n",
       "4      classification       heart          0            CCP     AUC         9   \n",
       "...               ...         ...        ...            ...     ...       ...   \n",
       "12795  classification  recidivism          0            LBS     AUC        32   \n",
       "12796  classification  recidivism          0            LBS     AUC        32   \n",
       "12797  classification  recidivism          0            LBS     AUC        32   \n",
       "12798  classification  recidivism          0            LBS     AUC        32   \n",
       "12799  classification  recidivism          0            LBS     AUC        32   \n",
       "\n",
       "       max_leaves  regularization  train_score  test_score  train_wall_time  \\\n",
       "0               2        0.029145     0.852812    0.788750         0.000424   \n",
       "1               2        0.100000     0.852812    0.788750         0.000152   \n",
       "2               4        0.023645     0.885500    0.826750         0.000405   \n",
       "3               4       25.000000     0.878375    0.843750         0.000144   \n",
       "4               8        0.009524     0.932813    0.798250         0.000412   \n",
       "...           ...             ...          ...         ...              ...   \n",
       "12795          32       -1.000000    -1.000000    0.713868        -1.000000   \n",
       "12796          32       -1.000000    -1.000000    0.721374        -1.000000   \n",
       "12797          32       -1.000000    -1.000000    0.721035        -1.000000   \n",
       "12798          32       -1.000000    -1.000000    0.732659        -1.000000   \n",
       "12799          32       -1.000000    -1.000000    0.737613        -1.000000   \n",
       "\n",
       "       test_wall_time  train_cpu_time  test_cpu_time  tunning_wall_time  \\\n",
       "0            0.000133        0.000425       0.000133           0.009463   \n",
       "1            0.000124        0.000153       0.000124           0.107800   \n",
       "2            0.000111        0.000405       0.000111           0.008243   \n",
       "3            0.000117        0.000145       0.000117           0.113454   \n",
       "4            0.000132        0.000413       0.000132           0.008329   \n",
       "...               ...             ...            ...                ...   \n",
       "12795       -1.000000       -1.000000      -1.000000          -1.000000   \n",
       "12796       -1.000000       -1.000000      -1.000000          -1.000000   \n",
       "12797       -1.000000       -1.000000      -1.000000          -1.000000   \n",
       "12798       -1.000000       -1.000000      -1.000000          -1.000000   \n",
       "12799       -1.000000       -1.000000      -1.000000          -1.000000   \n",
       "\n",
       "       tunning_cpu_time  \n",
       "0              0.009799  \n",
       "1              0.107865  \n",
       "2              0.008243  \n",
       "3              0.113600  \n",
       "4              0.008332  \n",
       "...                 ...  \n",
       "12795         -1.000000  \n",
       "12796         -1.000000  \n",
       "12797         -1.000000  \n",
       "12798         -1.000000  \n",
       "12799         -1.000000  \n",
       "\n",
       "[12800 rows x 16 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ccp = pd.read_csv(\"results/claim_1_1_ccp_comparison_classification.csv\")\n",
    "ccp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "ffd58877-bd88-4ea6-8e28-d944f5c0abba",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>task</th>\n",
       "      <th>dataset</th>\n",
       "      <th>boot_iter</th>\n",
       "      <th>algorithm</th>\n",
       "      <th>scoring</th>\n",
       "      <th>n_leaves</th>\n",
       "      <th>max_leaves</th>\n",
       "      <th>regularization</th>\n",
       "      <th>train_score</th>\n",
       "      <th>test_score</th>\n",
       "      <th>train_wall_time</th>\n",
       "      <th>test_wall_time</th>\n",
       "      <th>train_cpu_time</th>\n",
       "      <th>test_cpu_time</th>\n",
       "      <th>tunning_wall_time</th>\n",
       "      <th>tunning_cpu_time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>classification</td>\n",
       "      <td>heart</td>\n",
       "      <td>0</td>\n",
       "      <td>LBS</td>\n",
       "      <td>AUC</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0.712500</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>classification</td>\n",
       "      <td>heart</td>\n",
       "      <td>0</td>\n",
       "      <td>LBS</td>\n",
       "      <td>AUC</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0.731816</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>classification</td>\n",
       "      <td>heart</td>\n",
       "      <td>0</td>\n",
       "      <td>LBS</td>\n",
       "      <td>AUC</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0.712392</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>classification</td>\n",
       "      <td>heart</td>\n",
       "      <td>0</td>\n",
       "      <td>LBS</td>\n",
       "      <td>AUC</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0.727941</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>classification</td>\n",
       "      <td>heart</td>\n",
       "      <td>0</td>\n",
       "      <td>LBS</td>\n",
       "      <td>AUC</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0.724702</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7995</th>\n",
       "      <td>classification</td>\n",
       "      <td>recidivism</td>\n",
       "      <td>0</td>\n",
       "      <td>LBS</td>\n",
       "      <td>AUC</td>\n",
       "      <td>32</td>\n",
       "      <td>32</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0.713868</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7996</th>\n",
       "      <td>classification</td>\n",
       "      <td>recidivism</td>\n",
       "      <td>0</td>\n",
       "      <td>LBS</td>\n",
       "      <td>AUC</td>\n",
       "      <td>32</td>\n",
       "      <td>32</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0.721374</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7997</th>\n",
       "      <td>classification</td>\n",
       "      <td>recidivism</td>\n",
       "      <td>0</td>\n",
       "      <td>LBS</td>\n",
       "      <td>AUC</td>\n",
       "      <td>32</td>\n",
       "      <td>32</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0.721035</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7998</th>\n",
       "      <td>classification</td>\n",
       "      <td>recidivism</td>\n",
       "      <td>0</td>\n",
       "      <td>LBS</td>\n",
       "      <td>AUC</td>\n",
       "      <td>32</td>\n",
       "      <td>32</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0.732659</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7999</th>\n",
       "      <td>classification</td>\n",
       "      <td>recidivism</td>\n",
       "      <td>0</td>\n",
       "      <td>LBS</td>\n",
       "      <td>AUC</td>\n",
       "      <td>32</td>\n",
       "      <td>32</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0.737613</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8000 rows × 16 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                task     dataset  boot_iter algorithm scoring  n_leaves  \\\n",
       "0     classification       heart          0       LBS     AUC         2   \n",
       "1     classification       heart          0       LBS     AUC         2   \n",
       "2     classification       heart          0       LBS     AUC         2   \n",
       "3     classification       heart          0       LBS     AUC         2   \n",
       "4     classification       heart          0       LBS     AUC         2   \n",
       "...              ...         ...        ...       ...     ...       ...   \n",
       "7995  classification  recidivism          0       LBS     AUC        32   \n",
       "7996  classification  recidivism          0       LBS     AUC        32   \n",
       "7997  classification  recidivism          0       LBS     AUC        32   \n",
       "7998  classification  recidivism          0       LBS     AUC        32   \n",
       "7999  classification  recidivism          0       LBS     AUC        32   \n",
       "\n",
       "      max_leaves  regularization  train_score  test_score  train_wall_time  \\\n",
       "0              2              -1           -1    0.712500               -1   \n",
       "1              2              -1           -1    0.731816               -1   \n",
       "2              2              -1           -1    0.712392               -1   \n",
       "3              2              -1           -1    0.727941               -1   \n",
       "4              2              -1           -1    0.724702               -1   \n",
       "...          ...             ...          ...         ...              ...   \n",
       "7995          32              -1           -1    0.713868               -1   \n",
       "7996          32              -1           -1    0.721374               -1   \n",
       "7997          32              -1           -1    0.721035               -1   \n",
       "7998          32              -1           -1    0.732659               -1   \n",
       "7999          32              -1           -1    0.737613               -1   \n",
       "\n",
       "      test_wall_time  train_cpu_time  test_cpu_time  tunning_wall_time  \\\n",
       "0                 -1              -1             -1                 -1   \n",
       "1                 -1              -1             -1                 -1   \n",
       "2                 -1              -1             -1                 -1   \n",
       "3                 -1              -1             -1                 -1   \n",
       "4                 -1              -1             -1                 -1   \n",
       "...              ...             ...            ...                ...   \n",
       "7995              -1              -1             -1                 -1   \n",
       "7996              -1              -1             -1                 -1   \n",
       "7997              -1              -1             -1                 -1   \n",
       "7998              -1              -1             -1                 -1   \n",
       "7999              -1              -1             -1                 -1   \n",
       "\n",
       "      tunning_cpu_time  \n",
       "0                   -1  \n",
       "1                   -1  \n",
       "2                   -1  \n",
       "3                   -1  \n",
       "4                   -1  \n",
       "...                ...  \n",
       "7995                -1  \n",
       "7996                -1  \n",
       "7997                -1  \n",
       "7998                -1  \n",
       "7999                -1  \n",
       "\n",
       "[8000 rows x 16 columns]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lbs = pd.read_csv(\"results/lbs_classification.csv\").dropna()\n",
    "lbs[\"task\"] = \"classification\"\n",
    "lbs[\"scoring\"] = \"AUC\"\n",
    "lbs[\"algorithm\"] = \"LBS\"\n",
    "lbs[\"boot_iter\"] = 0\n",
    "lbs[\"test_score\"] = lbs[\"score\"]\n",
    "lbs[\"max_leaves\"] = lbs[\"n_leaves\"]\n",
    "lbs = lbs.drop([\"score\"], axis = 1)\n",
    "for col in [\"train_score\", \"regularization\", \"train_wall_time\", \"test_wall_time\", \"train_cpu_time\", \"test_cpu_time\", \"tunning_wall_time\", \"tunning_cpu_time\"]:\n",
    "    lbs[col] = -1\n",
    "    \n",
    "lbs = lbs[ccp.columns]\n",
    "lbs = lbs.replace(\"german\", \"german-credit\").replace(\"breat-cancer\", \"breast-cancer\")\n",
    "\n",
    "lbs.to_csv(\"results/lbs_class.csv\", index = False)\n",
    "\n",
    "lbs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "edb56c97-7608-4fc2-ac10-affe1bcf014e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>task</th>\n",
       "      <th>dataset</th>\n",
       "      <th>boot_iter</th>\n",
       "      <th>algorithm</th>\n",
       "      <th>scoring</th>\n",
       "      <th>n_leaves</th>\n",
       "      <th>max_leaves</th>\n",
       "      <th>regularization</th>\n",
       "      <th>train_score</th>\n",
       "      <th>test_score</th>\n",
       "      <th>train_wall_time</th>\n",
       "      <th>test_wall_time</th>\n",
       "      <th>train_cpu_time</th>\n",
       "      <th>test_cpu_time</th>\n",
       "      <th>tunning_wall_time</th>\n",
       "      <th>tunning_cpu_time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>regression</td>\n",
       "      <td>friedman1</td>\n",
       "      <td>0</td>\n",
       "      <td>LBS</td>\n",
       "      <td>R2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0.410432</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>regression</td>\n",
       "      <td>friedman1</td>\n",
       "      <td>0</td>\n",
       "      <td>LBS</td>\n",
       "      <td>R2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0.386399</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>regression</td>\n",
       "      <td>friedman1</td>\n",
       "      <td>0</td>\n",
       "      <td>LBS</td>\n",
       "      <td>R2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0.392907</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>regression</td>\n",
       "      <td>friedman1</td>\n",
       "      <td>0</td>\n",
       "      <td>LBS</td>\n",
       "      <td>R2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0.306529</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>regression</td>\n",
       "      <td>friedman1</td>\n",
       "      <td>0</td>\n",
       "      <td>LBS</td>\n",
       "      <td>R2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0.195128</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7995</th>\n",
       "      <td>regression</td>\n",
       "      <td>california_housing</td>\n",
       "      <td>0</td>\n",
       "      <td>LBS</td>\n",
       "      <td>R2</td>\n",
       "      <td>32</td>\n",
       "      <td>32</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0.662050</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7996</th>\n",
       "      <td>regression</td>\n",
       "      <td>california_housing</td>\n",
       "      <td>0</td>\n",
       "      <td>LBS</td>\n",
       "      <td>R2</td>\n",
       "      <td>32</td>\n",
       "      <td>32</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0.621685</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7997</th>\n",
       "      <td>regression</td>\n",
       "      <td>california_housing</td>\n",
       "      <td>0</td>\n",
       "      <td>LBS</td>\n",
       "      <td>R2</td>\n",
       "      <td>32</td>\n",
       "      <td>32</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0.636259</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7998</th>\n",
       "      <td>regression</td>\n",
       "      <td>california_housing</td>\n",
       "      <td>0</td>\n",
       "      <td>LBS</td>\n",
       "      <td>R2</td>\n",
       "      <td>32</td>\n",
       "      <td>32</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0.630501</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7999</th>\n",
       "      <td>regression</td>\n",
       "      <td>california_housing</td>\n",
       "      <td>0</td>\n",
       "      <td>LBS</td>\n",
       "      <td>R2</td>\n",
       "      <td>32</td>\n",
       "      <td>32</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0.624231</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8000 rows × 16 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            task             dataset  boot_iter algorithm scoring  n_leaves  \\\n",
       "0     regression           friedman1          0       LBS      R2         2   \n",
       "1     regression           friedman1          0       LBS      R2         2   \n",
       "2     regression           friedman1          0       LBS      R2         2   \n",
       "3     regression           friedman1          0       LBS      R2         2   \n",
       "4     regression           friedman1          0       LBS      R2         2   \n",
       "...          ...                 ...        ...       ...     ...       ...   \n",
       "7995  regression  california_housing          0       LBS      R2        32   \n",
       "7996  regression  california_housing          0       LBS      R2        32   \n",
       "7997  regression  california_housing          0       LBS      R2        32   \n",
       "7998  regression  california_housing          0       LBS      R2        32   \n",
       "7999  regression  california_housing          0       LBS      R2        32   \n",
       "\n",
       "      max_leaves  regularization  train_score  test_score  train_wall_time  \\\n",
       "0              2              -1           -1    0.410432               -1   \n",
       "1              2              -1           -1    0.386399               -1   \n",
       "2              2              -1           -1    0.392907               -1   \n",
       "3              2              -1           -1    0.306529               -1   \n",
       "4              2              -1           -1    0.195128               -1   \n",
       "...          ...             ...          ...         ...              ...   \n",
       "7995          32              -1           -1    0.662050               -1   \n",
       "7996          32              -1           -1    0.621685               -1   \n",
       "7997          32              -1           -1    0.636259               -1   \n",
       "7998          32              -1           -1    0.630501               -1   \n",
       "7999          32              -1           -1    0.624231               -1   \n",
       "\n",
       "      test_wall_time  train_cpu_time  test_cpu_time  tunning_wall_time  \\\n",
       "0                 -1              -1             -1                 -1   \n",
       "1                 -1              -1             -1                 -1   \n",
       "2                 -1              -1             -1                 -1   \n",
       "3                 -1              -1             -1                 -1   \n",
       "4                 -1              -1             -1                 -1   \n",
       "...              ...             ...            ...                ...   \n",
       "7995              -1              -1             -1                 -1   \n",
       "7996              -1              -1             -1                 -1   \n",
       "7997              -1              -1             -1                 -1   \n",
       "7998              -1              -1             -1                 -1   \n",
       "7999              -1              -1             -1                 -1   \n",
       "\n",
       "      tunning_cpu_time  \n",
       "0                   -1  \n",
       "1                   -1  \n",
       "2                   -1  \n",
       "3                   -1  \n",
       "4                   -1  \n",
       "...                ...  \n",
       "7995                -1  \n",
       "7996                -1  \n",
       "7997                -1  \n",
       "7998                -1  \n",
       "7999                -1  \n",
       "\n",
       "[8000 rows x 16 columns]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ccp = pd.read_csv(\"results/claim_1_1_ccp_comparison_regression.csv\")\n",
    "ccp\n",
    "\n",
    "lbs = pd.read_csv(\"results/lbs_regression.csv\").dropna()\n",
    "lbs[\"task\"] = \"regression\"\n",
    "lbs[\"scoring\"] = \"R2\"\n",
    "lbs[\"algorithm\"] = \"LBS\"\n",
    "lbs[\"boot_iter\"] = 0\n",
    "lbs[\"test_score\"] = lbs[\"score\"]\n",
    "lbs[\"max_leaves\"] = lbs[\"n_leaves\"]\n",
    "lbs = lbs.drop([\"score\"], axis = 1)\n",
    "for col in [\"train_score\", \"regularization\", \"train_wall_time\", \"test_wall_time\", \"train_cpu_time\", \"test_cpu_time\", \"tunning_wall_time\", \"tunning_cpu_time\"]:\n",
    "    lbs[col] = -1\n",
    "    \n",
    "lbs = lbs[ccp.columns]\n",
    "lbs = lbs.replace(\"294_satellite_image\", \"satellite-image\").replace('wine_quality_red', \"red-wine\").replace('diabetes', \"diabetes-regr\").replace('183', \"abalone\")\n",
    "    \n",
    "lbs.to_csv(\"results/lbs_reg.csv\", index = False)\n",
    "\n",
    "lbs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1bd59e68-b0ee-4c10-b190-46f50d733157",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "mlds",
   "language": "python",
   "name": "mlds"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
