{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "from os import SEEK_CUR, SEEK_END\n",
    "from datetime import datetime\n",
    "\n",
    "def readlast(f):\n",
    "    try:\n",
    "        f.seek(-2, SEEK_END)     \n",
    "        while f.read(1) != b\"\\n\":\n",
    "            f.seek(-2, SEEK_CUR)  \n",
    "    except OSError:               \n",
    "        f.seek(0)                  \n",
    "    return f.read()     \n",
    "\n",
    "\n",
    "def get_runtimes(path: Path) -> dict[int, dict[str, float]]:\n",
    "\n",
    "    results = {}\n",
    "\n",
    "    for sweep_folder in path.iterdir():\n",
    "        if not sweep_folder.is_dir():\n",
    "            # We are only interested in the folders, which represent openml datasets\n",
    "            continue\n",
    "\n",
    "        dataset_id = int(sweep_folder.name)\n",
    "\n",
    "        run_times = []\n",
    "\n",
    "        for run_folder in sweep_folder.iterdir():\n",
    "            log_path = run_folder / 'log.log'\n",
    "\n",
    "            with open(log_path, 'rb') as f:\n",
    "                first = f.readline()\n",
    "                last  = readlast(f)\n",
    "\n",
    "            # Format is: 2024-03-26 21:00:42.190 | INFO   | etc....\n",
    "            start_time_str = first.split(b' | ')[0].decode('utf-8')\n",
    "            end_time_str = last.split(b' | ')[0].decode('utf-8')\n",
    "\n",
    "            start_time = datetime.strptime(start_time_str, '%Y-%m-%d %H:%M:%S.%f')\n",
    "            end_time = datetime.strptime(end_time_str, '%Y-%m-%d %H:%M:%S.%f')\n",
    "\n",
    "            run_time = end_time - start_time\n",
    "            run_time = run_time.total_seconds()\n",
    "            run_times.append(run_time)\n",
    "\n",
    "        results[dataset_id] = {\n",
    "            'mean': sum(run_times) / len(run_times),\n",
    "            'min': min(run_times),\n",
    "            'max': max(run_times),\n",
    "            'total': sum(run_times)\n",
    "        }\n",
    "\n",
    "    return results\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "whytrees_mixed_finetuned = get_runtimes(Path('outputs/saved/tabforestpfn/test_categorical_classification_finetune'))\n",
    "whytrees_mixed_zeroshot = get_runtimes(Path('outputs/saved/tabforestpfn/test_categorical_classification_zeroshot'))\n",
    "whytrees_numerical_finetuned = get_runtimes(Path('outputs/saved/tabforestpfn/test_numerical_classification_finetune'))\n",
    "whytrees_numerical_zeroshot = get_runtimes(Path('outputs/saved/tabforestpfn/test_numerical_classification_zeroshot'))\n",
    "tabzilla_finetuned = get_runtimes(Path('outputs/saved/tabforestpfn/test_tabzilla_has_completed_runs_finetune'))\n",
    "tabzilla_zeroshot = get_runtimes(Path('outputs/saved/tabforestpfn/test_tabzilla_has_completed_runs_zeroshot'))\n",
    "\n",
    "whytrees_finetuned = {**whytrees_mixed_finetuned, **whytrees_numerical_finetuned}\n",
    "whytrees_zeroshot = {**whytrees_mixed_zeroshot, **whytrees_numerical_zeroshot}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total time for zeroshot: 2.52 hours\n",
      "Total time for finetune: 18.8 hours\n",
      "Total time for all runs: 21.32 hours\n"
     ]
    }
   ],
   "source": [
    "zeroshot_total = sum([v['total'] for v in {**whytrees_zeroshot, **tabzilla_zeroshot}.values()])\n",
    "finetune_total = sum([v['total'] for v in {**whytrees_finetuned, **tabzilla_finetuned}.values()])\n",
    "all_total = zeroshot_total + finetune_total\n",
    "\n",
    "print(f\"Total time for zeroshot: {round(zeroshot_total / 3600, 2)} hours\")\n",
    "print(f\"Total time for finetune: {round(finetune_total / 3600, 2)} hours\")\n",
    "print(f\"Total time for all runs: {round(all_total / 3600, 2)} hours\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>openml_dataset_name</th>\n",
       "      <th>n_observations</th>\n",
       "      <th>n_train</th>\n",
       "      <th>n_val</th>\n",
       "      <th>n_test</th>\n",
       "      <th>n_features</th>\n",
       "      <th>n_splits</th>\n",
       "      <th>n_classes</th>\n",
       "      <th>runtime_zeroshot</th>\n",
       "      <th>runtime_zeroshot_per_cv</th>\n",
       "      <th>runtime_finetune</th>\n",
       "      <th>runtime_finetune_per_cv</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>openml_dataset_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>44089</th>\n",
       "      <td>credit</td>\n",
       "      <td>16714</td>\n",
       "      <td>10000</td>\n",
       "      <td>2014</td>\n",
       "      <td>4700</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>17.3</td>\n",
       "      <td>8.7</td>\n",
       "      <td>205.9</td>\n",
       "      <td>102.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44120</th>\n",
       "      <td>electricity</td>\n",
       "      <td>38474</td>\n",
       "      <td>10000</td>\n",
       "      <td>8542</td>\n",
       "      <td>19932</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>15.1</td>\n",
       "      <td>15.1</td>\n",
       "      <td>150.7</td>\n",
       "      <td>150.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44121</th>\n",
       "      <td>covertype</td>\n",
       "      <td>566602</td>\n",
       "      <td>10000</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "      <td>10</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>33.5</td>\n",
       "      <td>33.5</td>\n",
       "      <td>167.3</td>\n",
       "      <td>167.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44122</th>\n",
       "      <td>pol</td>\n",
       "      <td>10082</td>\n",
       "      <td>7057</td>\n",
       "      <td>907</td>\n",
       "      <td>2118</td>\n",
       "      <td>26</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>18.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>172.3</td>\n",
       "      <td>57.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44123</th>\n",
       "      <td>house_16H</td>\n",
       "      <td>13488</td>\n",
       "      <td>9441</td>\n",
       "      <td>1214</td>\n",
       "      <td>2833</td>\n",
       "      <td>16</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>22.7</td>\n",
       "      <td>7.6</td>\n",
       "      <td>217.3</td>\n",
       "      <td>72.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44125</th>\n",
       "      <td>MagicTelescope</td>\n",
       "      <td>13376</td>\n",
       "      <td>9363</td>\n",
       "      <td>1203</td>\n",
       "      <td>2810</td>\n",
       "      <td>10</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>21.6</td>\n",
       "      <td>7.2</td>\n",
       "      <td>313.9</td>\n",
       "      <td>104.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44126</th>\n",
       "      <td>bank-marketing</td>\n",
       "      <td>10578</td>\n",
       "      <td>7404</td>\n",
       "      <td>952</td>\n",
       "      <td>2222</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>19.7</td>\n",
       "      <td>6.6</td>\n",
       "      <td>204.6</td>\n",
       "      <td>68.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44128</th>\n",
       "      <td>MiniBooNE</td>\n",
       "      <td>72998</td>\n",
       "      <td>10000</td>\n",
       "      <td>18899</td>\n",
       "      <td>44099</td>\n",
       "      <td>50</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>27.5</td>\n",
       "      <td>27.5</td>\n",
       "      <td>126.3</td>\n",
       "      <td>126.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44129</th>\n",
       "      <td>Higgs</td>\n",
       "      <td>940160</td>\n",
       "      <td>10000</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "      <td>24</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>33.5</td>\n",
       "      <td>33.5</td>\n",
       "      <td>118.8</td>\n",
       "      <td>118.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44130</th>\n",
       "      <td>eye_movements</td>\n",
       "      <td>7608</td>\n",
       "      <td>5325</td>\n",
       "      <td>684</td>\n",
       "      <td>1599</td>\n",
       "      <td>20</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>16.1</td>\n",
       "      <td>5.4</td>\n",
       "      <td>187.8</td>\n",
       "      <td>62.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44156</th>\n",
       "      <td>electricity</td>\n",
       "      <td>38474</td>\n",
       "      <td>10000</td>\n",
       "      <td>8542</td>\n",
       "      <td>19932</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>17.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>141.7</td>\n",
       "      <td>141.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44157</th>\n",
       "      <td>eye_movements</td>\n",
       "      <td>7608</td>\n",
       "      <td>5325</td>\n",
       "      <td>684</td>\n",
       "      <td>1599</td>\n",
       "      <td>23</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>16.8</td>\n",
       "      <td>5.6</td>\n",
       "      <td>195.0</td>\n",
       "      <td>65.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44159</th>\n",
       "      <td>covertype</td>\n",
       "      <td>423680</td>\n",
       "      <td>10000</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "      <td>54</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>37.1</td>\n",
       "      <td>37.1</td>\n",
       "      <td>219.2</td>\n",
       "      <td>219.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45019</th>\n",
       "      <td>Bioresponse</td>\n",
       "      <td>3434</td>\n",
       "      <td>2403</td>\n",
       "      <td>309</td>\n",
       "      <td>722</td>\n",
       "      <td>419</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>39.9</td>\n",
       "      <td>8.0</td>\n",
       "      <td>168.5</td>\n",
       "      <td>33.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45020</th>\n",
       "      <td>default-of-credit-card-clients</td>\n",
       "      <td>13272</td>\n",
       "      <td>9290</td>\n",
       "      <td>1194</td>\n",
       "      <td>2788</td>\n",
       "      <td>20</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>21.4</td>\n",
       "      <td>7.1</td>\n",
       "      <td>244.3</td>\n",
       "      <td>81.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45021</th>\n",
       "      <td>jannis</td>\n",
       "      <td>57580</td>\n",
       "      <td>10000</td>\n",
       "      <td>14274</td>\n",
       "      <td>33306</td>\n",
       "      <td>54</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>22.9</td>\n",
       "      <td>22.9</td>\n",
       "      <td>130.1</td>\n",
       "      <td>130.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45022</th>\n",
       "      <td>Diabetes130US</td>\n",
       "      <td>71090</td>\n",
       "      <td>10000</td>\n",
       "      <td>18327</td>\n",
       "      <td>42763</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>24.6</td>\n",
       "      <td>24.6</td>\n",
       "      <td>95.2</td>\n",
       "      <td>95.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45026</th>\n",
       "      <td>heloc</td>\n",
       "      <td>10000</td>\n",
       "      <td>7000</td>\n",
       "      <td>900</td>\n",
       "      <td>2100</td>\n",
       "      <td>22</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>18.2</td>\n",
       "      <td>6.1</td>\n",
       "      <td>167.3</td>\n",
       "      <td>55.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45028</th>\n",
       "      <td>california</td>\n",
       "      <td>20634</td>\n",
       "      <td>10000</td>\n",
       "      <td>3190</td>\n",
       "      <td>7444</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>11.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>112.4</td>\n",
       "      <td>112.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45035</th>\n",
       "      <td>albert</td>\n",
       "      <td>58252</td>\n",
       "      <td>10000</td>\n",
       "      <td>14475</td>\n",
       "      <td>33777</td>\n",
       "      <td>31</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>20.7</td>\n",
       "      <td>20.7</td>\n",
       "      <td>102.9</td>\n",
       "      <td>102.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45036</th>\n",
       "      <td>default-of-credit-card-clients</td>\n",
       "      <td>13272</td>\n",
       "      <td>9290</td>\n",
       "      <td>1194</td>\n",
       "      <td>2788</td>\n",
       "      <td>21</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>22.7</td>\n",
       "      <td>7.6</td>\n",
       "      <td>237.9</td>\n",
       "      <td>79.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45038</th>\n",
       "      <td>road-safety</td>\n",
       "      <td>111762</td>\n",
       "      <td>10000</td>\n",
       "      <td>30528</td>\n",
       "      <td>50000</td>\n",
       "      <td>32</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>29.8</td>\n",
       "      <td>29.8</td>\n",
       "      <td>153.4</td>\n",
       "      <td>153.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45039</th>\n",
       "      <td>compas-two-years</td>\n",
       "      <td>4966</td>\n",
       "      <td>3476</td>\n",
       "      <td>447</td>\n",
       "      <td>1043</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>14.3</td>\n",
       "      <td>4.8</td>\n",
       "      <td>129.2</td>\n",
       "      <td>43.1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                              openml_dataset_name  n_observations  n_train  \\\n",
       "openml_dataset_id                                                            \n",
       "44089                                      credit           16714    10000   \n",
       "44120                                 electricity           38474    10000   \n",
       "44121                                   covertype          566602    10000   \n",
       "44122                                         pol           10082     7057   \n",
       "44123                                   house_16H           13488     9441   \n",
       "44125                              MagicTelescope           13376     9363   \n",
       "44126                              bank-marketing           10578     7404   \n",
       "44128                                   MiniBooNE           72998    10000   \n",
       "44129                                       Higgs          940160    10000   \n",
       "44130                               eye_movements            7608     5325   \n",
       "44156                                 electricity           38474    10000   \n",
       "44157                               eye_movements            7608     5325   \n",
       "44159                                   covertype          423680    10000   \n",
       "45019                                 Bioresponse            3434     2403   \n",
       "45020              default-of-credit-card-clients           13272     9290   \n",
       "45021                                      jannis           57580    10000   \n",
       "45022                               Diabetes130US           71090    10000   \n",
       "45026                                       heloc           10000     7000   \n",
       "45028                                  california           20634    10000   \n",
       "45035                                      albert           58252    10000   \n",
       "45036              default-of-credit-card-clients           13272     9290   \n",
       "45038                                 road-safety          111762    10000   \n",
       "45039                            compas-two-years            4966     3476   \n",
       "\n",
       "                   n_val  n_test  n_features  n_splits  n_classes  \\\n",
       "openml_dataset_id                                                   \n",
       "44089               2014    4700          10         2          2   \n",
       "44120               8542   19932           7         1          2   \n",
       "44121              50000   50000          10         1          2   \n",
       "44122                907    2118          26         3          2   \n",
       "44123               1214    2833          16         3          2   \n",
       "44125               1203    2810          10         3          2   \n",
       "44126                952    2222           7         3          2   \n",
       "44128              18899   44099          50         1          2   \n",
       "44129              50000   50000          24         1          2   \n",
       "44130                684    1599          20         3          2   \n",
       "44156               8542   19932           8         1          2   \n",
       "44157                684    1599          23         3          2   \n",
       "44159              50000   50000          54         1          2   \n",
       "45019                309     722         419         5          2   \n",
       "45020               1194    2788          20         3          2   \n",
       "45021              14274   33306          54         1          2   \n",
       "45022              18327   42763           7         1          2   \n",
       "45026                900    2100          22         3          2   \n",
       "45028               3190    7444           8         1          2   \n",
       "45035              14475   33777          31         1          2   \n",
       "45036               1194    2788          21         3          2   \n",
       "45038              30528   50000          32         1          2   \n",
       "45039                447    1043          11         3          2   \n",
       "\n",
       "                   runtime_zeroshot  runtime_zeroshot_per_cv  \\\n",
       "openml_dataset_id                                              \n",
       "44089                          17.3                      8.7   \n",
       "44120                          15.1                     15.1   \n",
       "44121                          33.5                     33.5   \n",
       "44122                          18.0                      6.0   \n",
       "44123                          22.7                      7.6   \n",
       "44125                          21.6                      7.2   \n",
       "44126                          19.7                      6.6   \n",
       "44128                          27.5                     27.5   \n",
       "44129                          33.5                     33.5   \n",
       "44130                          16.1                      5.4   \n",
       "44156                          17.0                     17.0   \n",
       "44157                          16.8                      5.6   \n",
       "44159                          37.1                     37.1   \n",
       "45019                          39.9                      8.0   \n",
       "45020                          21.4                      7.1   \n",
       "45021                          22.9                     22.9   \n",
       "45022                          24.6                     24.6   \n",
       "45026                          18.2                      6.1   \n",
       "45028                          11.0                     11.0   \n",
       "45035                          20.7                     20.7   \n",
       "45036                          22.7                      7.6   \n",
       "45038                          29.8                     29.8   \n",
       "45039                          14.3                      4.8   \n",
       "\n",
       "                   runtime_finetune  runtime_finetune_per_cv  \n",
       "openml_dataset_id                                             \n",
       "44089                         205.9                    102.9  \n",
       "44120                         150.7                    150.7  \n",
       "44121                         167.3                    167.3  \n",
       "44122                         172.3                     57.4  \n",
       "44123                         217.3                     72.4  \n",
       "44125                         313.9                    104.6  \n",
       "44126                         204.6                     68.2  \n",
       "44128                         126.3                    126.3  \n",
       "44129                         118.8                    118.8  \n",
       "44130                         187.8                     62.6  \n",
       "44156                         141.7                    141.7  \n",
       "44157                         195.0                     65.0  \n",
       "44159                         219.2                    219.2  \n",
       "45019                         168.5                     33.7  \n",
       "45020                         244.3                     81.4  \n",
       "45021                         130.1                    130.1  \n",
       "45022                          95.2                     95.2  \n",
       "45026                         167.3                     55.8  \n",
       "45028                         112.4                    112.4  \n",
       "45035                         102.9                    102.9  \n",
       "45036                         237.9                     79.3  \n",
       "45038                         153.4                    153.4  \n",
       "45039                         129.2                     43.1  "
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from tabicl.core.enums import BenchmarkOrigin\n",
    "from tabicl.data.metadata import create_metadata\n",
    "from tabicl.data.benchmarks import BENCHMARKS, BenchmarkName\n",
    "\n",
    "metadata_whytrees = create_metadata(benchmark_origin=BenchmarkOrigin.WHYTREES)\n",
    "\n",
    "ids_used_in_paper_numerical = BENCHMARKS[BenchmarkName.NUMERICAL_CLASSIFICATION].openml_dataset_ids\n",
    "ids_used_in_paper_categorical = BENCHMARKS[BenchmarkName.CATEGORICAL_CLASSIFICATION].openml_dataset_ids\n",
    "\n",
    "ids_used_in_paper = ids_used_in_paper_numerical + ids_used_in_paper_categorical\n",
    "ids_used_in_paper.sort()\n",
    "\n",
    "metadata_whytrees = metadata_whytrees.loc[ids_used_in_paper]\n",
    "\n",
    "metadata_whytrees['runtime_zeroshot'] = [whytrees_zeroshot[dataset_id]['mean'] for dataset_id in metadata_whytrees.index]\n",
    "metadata_whytrees['runtime_zeroshot_per_cv'] = metadata_whytrees['runtime_zeroshot'] / metadata_whytrees['n_splits']\n",
    "metadata_whytrees['runtime_finetune'] = [whytrees_finetuned[dataset_id]['mean'] for dataset_id in metadata_whytrees.index]\n",
    "metadata_whytrees['runtime_finetune_per_cv'] = metadata_whytrees['runtime_finetune'] / metadata_whytrees['n_splits']\n",
    "\n",
    "metadata_whytrees['runtime_zeroshot'] = metadata_whytrees['runtime_zeroshot'].round(decimals=1)\n",
    "metadata_whytrees['runtime_zeroshot_per_cv'] = metadata_whytrees['runtime_zeroshot_per_cv'].round(decimals=1)\n",
    "metadata_whytrees['runtime_finetune'] = metadata_whytrees['runtime_finetune'].round(decimals=1)\n",
    "metadata_whytrees['runtime_finetune_per_cv'] = metadata_whytrees['runtime_finetune_per_cv'].round(decimals=1)\n",
    "\n",
    "metadata_whytrees"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\\begin{tabular}{llrrrr}\n",
      "\\toprule\n",
      " & openml_dataset_name & n_train & n_features & runtime_zeroshot_per_cv & runtime_finetune_per_cv \\\\\n",
      "openml_dataset_id &  &  &  &  &  \\\\\n",
      "\\midrule\n",
      "44089 & credit & 10000 & 10 & 9 & 103 \\\\\n",
      "44120 & electricity & 10000 & 7 & 15 & 151 \\\\\n",
      "44121 & covertype & 10000 & 10 & 34 & 167 \\\\\n",
      "44122 & pol & 7057 & 26 & 6 & 57 \\\\\n",
      "44123 & house_16H & 9441 & 16 & 8 & 72 \\\\\n",
      "44125 & MagicTelescope & 9363 & 10 & 7 & 105 \\\\\n",
      "44126 & bank-marketing & 7404 & 7 & 7 & 68 \\\\\n",
      "44128 & MiniBooNE & 10000 & 50 & 28 & 126 \\\\\n",
      "44129 & Higgs & 10000 & 24 & 34 & 119 \\\\\n",
      "44130 & eye_movements & 5325 & 20 & 5 & 63 \\\\\n",
      "44156 & electricity & 10000 & 8 & 17 & 142 \\\\\n",
      "44157 & eye_movements & 5325 & 23 & 6 & 65 \\\\\n",
      "44159 & covertype & 10000 & 54 & 37 & 219 \\\\\n",
      "45019 & Bioresponse & 2403 & 419 & 8 & 34 \\\\\n",
      "45020 & default-of-credit-card-clients & 9290 & 20 & 7 & 81 \\\\\n",
      "45021 & jannis & 10000 & 54 & 23 & 130 \\\\\n",
      "45022 & Diabetes130US & 10000 & 7 & 25 & 95 \\\\\n",
      "45026 & heloc & 7000 & 22 & 6 & 56 \\\\\n",
      "45028 & california & 10000 & 8 & 11 & 112 \\\\\n",
      "45035 & albert & 10000 & 31 & 21 & 103 \\\\\n",
      "45036 & default-of-credit-card-clients & 9290 & 21 & 8 & 79 \\\\\n",
      "45038 & road-safety & 10000 & 32 & 30 & 153 \\\\\n",
      "45039 & compas-two-years & 3476 & 11 & 5 & 43 \\\\\n",
      "\\bottomrule\n",
      "\\end{tabular}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(metadata_whytrees[['openml_dataset_name', 'n_train', 'n_features', 'runtime_zeroshot_per_cv', 'runtime_finetune_per_cv']].to_latex(float_format=\"%.0f\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>openml_dataset_name</th>\n",
       "      <th>n_observations</th>\n",
       "      <th>n_train</th>\n",
       "      <th>n_val</th>\n",
       "      <th>n_test</th>\n",
       "      <th>n_features</th>\n",
       "      <th>n_splits</th>\n",
       "      <th>n_classes</th>\n",
       "      <th>runtime_zeroshot</th>\n",
       "      <th>runtime_zeroshot_per_cv</th>\n",
       "      <th>runtime_finetune</th>\n",
       "      <th>runtime_finetune_per_cv</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>openml_dataset_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>kr-vs-kp</td>\n",
       "      <td>3196</td>\n",
       "      <td>2556</td>\n",
       "      <td>320</td>\n",
       "      <td>320</td>\n",
       "      <td>36</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "      <td>40.8</td>\n",
       "      <td>4.1</td>\n",
       "      <td>286.0</td>\n",
       "      <td>28.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>labor</td>\n",
       "      <td>57</td>\n",
       "      <td>45</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>16</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "      <td>31.0</td>\n",
       "      <td>3.1</td>\n",
       "      <td>131.6</td>\n",
       "      <td>13.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>autos</td>\n",
       "      <td>205</td>\n",
       "      <td>163</td>\n",
       "      <td>21</td>\n",
       "      <td>21</td>\n",
       "      <td>25</td>\n",
       "      <td>10</td>\n",
       "      <td>6</td>\n",
       "      <td>33.5</td>\n",
       "      <td>3.3</td>\n",
       "      <td>112.2</td>\n",
       "      <td>11.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>lymph</td>\n",
       "      <td>148</td>\n",
       "      <td>118</td>\n",
       "      <td>15</td>\n",
       "      <td>15</td>\n",
       "      <td>18</td>\n",
       "      <td>10</td>\n",
       "      <td>4</td>\n",
       "      <td>28.5</td>\n",
       "      <td>2.8</td>\n",
       "      <td>90.8</td>\n",
       "      <td>9.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>balance-scale</td>\n",
       "      <td>625</td>\n",
       "      <td>499</td>\n",
       "      <td>63</td>\n",
       "      <td>63</td>\n",
       "      <td>4</td>\n",
       "      <td>10</td>\n",
       "      <td>3</td>\n",
       "      <td>31.5</td>\n",
       "      <td>3.1</td>\n",
       "      <td>320.8</td>\n",
       "      <td>32.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>167141</th>\n",
       "      <td>churn</td>\n",
       "      <td>5000</td>\n",
       "      <td>4000</td>\n",
       "      <td>500</td>\n",
       "      <td>500</td>\n",
       "      <td>20</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "      <td>47.6</td>\n",
       "      <td>4.8</td>\n",
       "      <td>406.0</td>\n",
       "      <td>40.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>167211</th>\n",
       "      <td>Satellite</td>\n",
       "      <td>5100</td>\n",
       "      <td>4080</td>\n",
       "      <td>510</td>\n",
       "      <td>510</td>\n",
       "      <td>36</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "      <td>53.6</td>\n",
       "      <td>5.4</td>\n",
       "      <td>396.1</td>\n",
       "      <td>39.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>168911</th>\n",
       "      <td>jasmine</td>\n",
       "      <td>2984</td>\n",
       "      <td>2386</td>\n",
       "      <td>299</td>\n",
       "      <td>299</td>\n",
       "      <td>144</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "      <td>41.4</td>\n",
       "      <td>4.1</td>\n",
       "      <td>355.0</td>\n",
       "      <td>35.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>190408</th>\n",
       "      <td>Click_prediction_small</td>\n",
       "      <td>39948</td>\n",
       "      <td>31958</td>\n",
       "      <td>3995</td>\n",
       "      <td>3995</td>\n",
       "      <td>11</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "      <td>137.7</td>\n",
       "      <td>13.8</td>\n",
       "      <td>1294.2</td>\n",
       "      <td>129.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>360948</th>\n",
       "      <td>libras</td>\n",
       "      <td>360</td>\n",
       "      <td>288</td>\n",
       "      <td>36</td>\n",
       "      <td>36</td>\n",
       "      <td>104</td>\n",
       "      <td>10</td>\n",
       "      <td>10</td>\n",
       "      <td>30.3</td>\n",
       "      <td>3.0</td>\n",
       "      <td>109.6</td>\n",
       "      <td>11.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>94 rows × 12 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                      openml_dataset_name  n_observations  n_train  n_val  \\\n",
       "openml_dataset_id                                                           \n",
       "3                                kr-vs-kp            3196     2556    320   \n",
       "4                                   labor              57       45      6   \n",
       "9                                   autos             205      163     21   \n",
       "10                                  lymph             148      118     15   \n",
       "11                          balance-scale             625      499     63   \n",
       "...                                   ...             ...      ...    ...   \n",
       "167141                              churn            5000     4000    500   \n",
       "167211                          Satellite            5100     4080    510   \n",
       "168911                            jasmine            2984     2386    299   \n",
       "190408             Click_prediction_small           39948    31958   3995   \n",
       "360948                             libras             360      288     36   \n",
       "\n",
       "                   n_test  n_features  n_splits  n_classes  runtime_zeroshot  \\\n",
       "openml_dataset_id                                                              \n",
       "3                     320          36        10          2              40.8   \n",
       "4                       6          16        10          2              31.0   \n",
       "9                      21          25        10          6              33.5   \n",
       "10                     15          18        10          4              28.5   \n",
       "11                     63           4        10          3              31.5   \n",
       "...                   ...         ...       ...        ...               ...   \n",
       "167141                500          20        10          2              47.6   \n",
       "167211                510          36        10          2              53.6   \n",
       "168911                299         144        10          2              41.4   \n",
       "190408               3995          11        10          2             137.7   \n",
       "360948                 36         104        10         10              30.3   \n",
       "\n",
       "                   runtime_zeroshot_per_cv  runtime_finetune  \\\n",
       "openml_dataset_id                                              \n",
       "3                                      4.1             286.0   \n",
       "4                                      3.1             131.6   \n",
       "9                                      3.3             112.2   \n",
       "10                                     2.8              90.8   \n",
       "11                                     3.1             320.8   \n",
       "...                                    ...               ...   \n",
       "167141                                 4.8             406.0   \n",
       "167211                                 5.4             396.1   \n",
       "168911                                 4.1             355.0   \n",
       "190408                                13.8            1294.2   \n",
       "360948                                 3.0             109.6   \n",
       "\n",
       "                   runtime_finetune_per_cv  \n",
       "openml_dataset_id                           \n",
       "3                                     28.6  \n",
       "4                                     13.2  \n",
       "9                                     11.2  \n",
       "10                                     9.1  \n",
       "11                                    32.1  \n",
       "...                                    ...  \n",
       "167141                                40.6  \n",
       "167211                                39.6  \n",
       "168911                                35.5  \n",
       "190408                               129.4  \n",
       "360948                                11.0  \n",
       "\n",
       "[94 rows x 12 columns]"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ids_used_in_paper = BENCHMARKS[BenchmarkName.TABZILLA_HAS_COMPLETED_RUNS].openml_dataset_ids\n",
    "\n",
    "metadata_tabzilla = create_metadata(BenchmarkOrigin.TABZILLA)\n",
    "metadata_tabzilla = metadata_tabzilla.loc[ids_used_in_paper]\n",
    "\n",
    "metadata_tabzilla['runtime_zeroshot'] = [tabzilla_zeroshot[dataset_id]['mean'] for dataset_id in metadata_tabzilla.index]\n",
    "metadata_tabzilla['runtime_zeroshot_per_cv'] = metadata_tabzilla['runtime_zeroshot'] / metadata_tabzilla['n_splits']\n",
    "metadata_tabzilla['runtime_finetune'] = [tabzilla_finetuned[dataset_id]['mean'] for dataset_id in metadata_tabzilla.index]\n",
    "metadata_tabzilla['runtime_finetune_per_cv'] = metadata_tabzilla['runtime_finetune'] / metadata_tabzilla['n_splits']\n",
    "\n",
    "metadata_tabzilla['runtime_zeroshot'] = metadata_tabzilla['runtime_zeroshot'].round(decimals=1)\n",
    "metadata_tabzilla['runtime_zeroshot_per_cv'] = metadata_tabzilla['runtime_zeroshot_per_cv'].round(decimals=1)\n",
    "metadata_tabzilla['runtime_finetune'] = metadata_tabzilla['runtime_finetune'].round(decimals=1)\n",
    "metadata_tabzilla['runtime_finetune_per_cv'] = metadata_tabzilla['runtime_finetune_per_cv'].round(decimals=1)\n",
    "\n",
    "metadata_tabzilla"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\\begin{tabular}{llrrrr}\n",
      "\\toprule\n",
      " & openml\\_dataset\\_name & n\\_train & n\\_features & runtime\\_zeroshot\\_per\\_cv & runtime\\_finetune\\_per\\_cv \\\\\n",
      "openml_dataset_id &  &  &  &  &  \\\\\n",
      "\\midrule\n",
      "3 & kr-vs-kp & 2556 & 36 & 4 & 29 \\\\\n",
      "4 & labor & 45 & 16 & 3 & 13 \\\\\n",
      "9 & autos & 163 & 25 & 3 & 11 \\\\\n",
      "10 & lymph & 118 & 18 & 3 & 9 \\\\\n",
      "11 & balance-scale & 499 & 4 & 3 & 32 \\\\\n",
      "12 & mfeat-factors & 1600 & 216 & 5 & 26 \\\\\n",
      "14 & mfeat-fourier & 1600 & 76 & 4 & 29 \\\\\n",
      "15 & breast-w & 559 & 9 & 3 & 19 \\\\\n",
      "16 & mfeat-karhunen & 1600 & 64 & 4 & 22 \\\\\n",
      "18 & mfeat-morphological & 1600 & 6 & 4 & 22 \\\\\n",
      "23 & cmc & 1177 & 9 & 4 & 20 \\\\\n",
      "25 & colic & 294 & 26 & 3 & 10 \\\\\n",
      "27 & colic & 294 & 22 & 3 & 11 \\\\\n",
      "29 & credit-approval & 552 & 15 & 4 & 22 \\\\\n",
      "30 & page-blocks & 4377 & 10 & 5 & 40 \\\\\n",
      "35 & dermatology & 292 & 34 & 3 & 13 \\\\\n",
      "37 & diabetes & 614 & 8 & 3 & 19 \\\\\n",
      "39 & sonar & 166 & 60 & 3 & 11 \\\\\n",
      "40 & glass & 170 & 9 & 3 & 10 \\\\\n",
      "43 & spambase & 3680 & 57 & 6 & 42 \\\\\n",
      "45 & splice & 2552 & 60 & 3 & 33 \\\\\n",
      "47 & tae & 120 & 5 & 3 & 11 \\\\\n",
      "48 & heart-c & 241 & 13 & 3 & 11 \\\\\n",
      "49 & tic-tac-toe & 766 & 9 & 3 & 20 \\\\\n",
      "50 & heart-h & 234 & 13 & 2 & 12 \\\\\n",
      "53 & vehicle & 676 & 18 & 3 & 23 \\\\\n",
      "59 & iris & 120 & 4 & 3 & 16 \\\\\n",
      "2074 & satimage & 5144 & 36 & 6 & 55 \\\\\n",
      "2079 & eucalyptus & 588 & 19 & 3 & 18 \\\\\n",
      "2867 & anneal & 718 & 38 & 3 & 26 \\\\\n",
      "3485 & scene & 1925 & 299 & 6 & 37 \\\\\n",
      "3512 & synthetic\\_control & 480 & 60 & 3 & 18 \\\\\n",
      "3540 & analcatdata\\_boxing1 & 96 & 3 & 3 & 12 \\\\\n",
      "3543 & irish & 400 & 5 & 4 & 19 \\\\\n",
      "3549 & analcatdata\\_authorship & 672 & 70 & 4 & 25 \\\\\n",
      "3560 & analcatdata\\_dmft & 637 & 4 & 3 & 20 \\\\\n",
      "3561 & profb & 536 & 9 & 3 & 16 \\\\\n",
      "3602 & visualizing\\_environmental & 88 & 3 & 3 & 10 \\\\\n",
      "3620 & fri\\_c0\\_100\\_5 & 80 & 5 & 3 & 13 \\\\\n",
      "3647 & rabe\\_266 & 96 & 2 & 3 & 14 \\\\\n",
      "3711 & elevators & 13279 & 18 & 9 & 101 \\\\\n",
      "3731 & visualizing\\_livestock & 104 & 2 & 3 & 15 \\\\\n",
      "3739 & analcatdata\\_chlamydia & 80 & 3 & 3 & 16 \\\\\n",
      "3748 & transplant & 104 & 3 & 3 & 12 \\\\\n",
      "3779 & fri\\_c3\\_100\\_5 & 80 & 5 & 3 & 13 \\\\\n",
      "3797 & socmob & 924 & 5 & 3 & 19 \\\\\n",
      "3896 & ada\\_agnostic & 3648 & 48 & 6 & 36 \\\\\n",
      "3902 & pc4 & 1166 & 37 & 4 & 23 \\\\\n",
      "3903 & pc3 & 1249 & 37 & 4 & 26 \\\\\n",
      "3904 & jm1 & 8707 & 21 & 6 & 117 \\\\\n",
      "3913 & kc2 & 416 & 21 & 3 & 26 \\\\\n",
      "3917 & kc1 & 1687 & 21 & 4 & 48 \\\\\n",
      "3918 & pc1 & 887 & 21 & 3 & 16 \\\\\n",
      "3953 & adult-census & 26048 & 14 & 14 & 175 \\\\\n",
      "9946 & wdbc & 455 & 30 & 4 & 17 \\\\\n",
      "9952 & phoneme & 4322 & 5 & 4 & 44 \\\\\n",
      "9957 & qsar-biodeg & 843 & 41 & 4 & 23 \\\\\n",
      "9960 & wall-robot-navigation & 4364 & 24 & 5 & 42 \\\\\n",
      "9964 & semeion & 1273 & 256 & 5 & 26 \\\\\n",
      "9971 & ilpd & 465 & 10 & 4 & 20 \\\\\n",
      "9978 & ozone-level-8hr & 2026 & 72 & 4 & 25 \\\\\n",
      "9984 & fertility & 80 & 9 & 3 & 13 \\\\\n",
      "10089 & acute-inflammations & 96 & 6 & 3 & 10 \\\\\n",
      "10093 & banknote-authentication & 1096 & 4 & 4 & 20 \\\\\n",
      "10101 & blood-transfusion-service-center & 598 & 4 & 3 & 20 \\\\\n",
      "14952 & PhishingWebsites & 8843 & 30 & 8 & 103 \\\\\n",
      "14954 & cylinder-bands & 432 & 37 & 4 & 16 \\\\\n",
      "14965 & bank-marketing & 36168 & 16 & 17 & 165 \\\\\n",
      "14967 & cjs & 2236 & 33 & 4 & 79 \\\\\n",
      "125920 & dresses-sales & 400 & 12 & 4 & 18 \\\\\n",
      "125921 & LED-display-domain-7digit & 400 & 7 & 4 & 16 \\\\\n",
      "145793 & yeast & 1015 & 8 & 4 & 19 \\\\\n",
      "145799 & breast-cancer & 228 & 9 & 3 & 11 \\\\\n",
      "145836 & blood-transfusion-service-center & 598 & 4 & 3 & 21 \\\\\n",
      "145847 & hill-valley & 968 & 100 & 4 & 47 \\\\\n",
      "145977 & ecoli & 268 & 7 & 3 & 12 \\\\\n",
      "145984 & ionosphere & 280 & 34 & 3 & 12 \\\\\n",
      "146024 & lung-cancer & 24 & 56 & 3 & 14 \\\\\n",
      "146063 & hayes-roth & 128 & 4 & 3 & 14 \\\\\n",
      "146065 & monks-problems-2 & 480 & 6 & 2 & 22 \\\\\n",
      "146192 & car-evaluation & 1382 & 21 & 4 & 27 \\\\\n",
      "146210 & postoperative-patient-data & 70 & 8 & 3 & 13 \\\\\n",
      "146607 & SpeedDating & 6702 & 120 & 6 & 57 \\\\\n",
      "146800 & MiceProtein & 864 & 77 & 4 & 28 \\\\\n",
      "146817 & steel-plates-fault & 1552 & 27 & 4 & 22 \\\\\n",
      "146818 & Australian & 552 & 14 & 4 & 23 \\\\\n",
      "146820 & wilt & 3871 & 5 & 4 & 30 \\\\\n",
      "146821 & car & 1382 & 6 & 4 & 30 \\\\\n",
      "167140 & dna & 2548 & 180 & 4 & 26 \\\\\n",
      "167141 & churn & 4000 & 20 & 5 & 41 \\\\\n",
      "167211 & Satellite & 4080 & 36 & 5 & 40 \\\\\n",
      "168911 & jasmine & 2386 & 144 & 4 & 36 \\\\\n",
      "190408 & Click\\_prediction\\_small & 31958 & 11 & 14 & 129 \\\\\n",
      "360948 & libras & 288 & 104 & 3 & 11 \\\\\n",
      "\\bottomrule\n",
      "\\end{tabular}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(metadata_tabzilla[['openml_dataset_name', 'n_train', 'n_features', 'runtime_zeroshot_per_cv', 'runtime_finetune_per_cv']].to_latex(float_format=\"%.0f\", escape=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1748.6, 219.2)"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "metadata_total = pd.concat([metadata_whytrees, metadata_tabzilla])\n",
    "\n",
    "max_runtime = metadata_total['runtime_finetune'].max()\n",
    "max_runtime_per_cv = metadata_total['runtime_finetune_per_cv'].max()\n",
    "\n",
    "max_runtime, max_runtime_per_cv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(988, 68.50219433198377)"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "total_number_of_cv_splits = metadata_total['n_splits'].sum()\n",
    "\n",
    "total_number_of_cv_splits, finetune_total / total_number_of_cv_splits\n",
    "\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "tabforestpfn",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
