{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>openml_dataset_name</th>\n",
       "      <th>n_observations</th>\n",
       "      <th>n_train</th>\n",
       "      <th>n_val</th>\n",
       "      <th>n_test</th>\n",
       "      <th>n_features</th>\n",
       "      <th>n_splits</th>\n",
       "      <th>n_classes</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>openml_dataset_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>44055</th>\n",
       "      <td>analcatdata_supreme</td>\n",
       "      <td>4052</td>\n",
       "      <td>2836</td>\n",
       "      <td>364</td>\n",
       "      <td>852</td>\n",
       "      <td>7</td>\n",
       "      <td>5</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44056</th>\n",
       "      <td>visualizing_soil</td>\n",
       "      <td>8641</td>\n",
       "      <td>6048</td>\n",
       "      <td>777</td>\n",
       "      <td>1816</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44059</th>\n",
       "      <td>diamonds</td>\n",
       "      <td>53940</td>\n",
       "      <td>10000</td>\n",
       "      <td>13182</td>\n",
       "      <td>30758</td>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "      <td>11602</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44061</th>\n",
       "      <td>Mercedes_Benz_Greener_Manufacturing</td>\n",
       "      <td>4209</td>\n",
       "      <td>2946</td>\n",
       "      <td>378</td>\n",
       "      <td>885</td>\n",
       "      <td>359</td>\n",
       "      <td>5</td>\n",
       "      <td>2545</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44062</th>\n",
       "      <td>Brazilian_houses</td>\n",
       "      <td>10692</td>\n",
       "      <td>7484</td>\n",
       "      <td>962</td>\n",
       "      <td>2246</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>5751</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44063</th>\n",
       "      <td>Bike_Sharing_Demand</td>\n",
       "      <td>17379</td>\n",
       "      <td>10000</td>\n",
       "      <td>2213</td>\n",
       "      <td>5166</td>\n",
       "      <td>11</td>\n",
       "      <td>2</td>\n",
       "      <td>869</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44065</th>\n",
       "      <td>nyc-taxi-green-dec-2016</td>\n",
       "      <td>581835</td>\n",
       "      <td>10000</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "      <td>16</td>\n",
       "      <td>1</td>\n",
       "      <td>1811</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44066</th>\n",
       "      <td>house_sales</td>\n",
       "      <td>21613</td>\n",
       "      <td>10000</td>\n",
       "      <td>3483</td>\n",
       "      <td>8130</td>\n",
       "      <td>17</td>\n",
       "      <td>1</td>\n",
       "      <td>4028</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44068</th>\n",
       "      <td>particulate-matter-ukair-2017</td>\n",
       "      <td>394299</td>\n",
       "      <td>10000</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>21599</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44069</th>\n",
       "      <td>SGEMM_GPU_kernel_performance</td>\n",
       "      <td>241600</td>\n",
       "      <td>10000</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "      <td>58161</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44089</th>\n",
       "      <td>credit</td>\n",
       "      <td>16714</td>\n",
       "      <td>10000</td>\n",
       "      <td>2014</td>\n",
       "      <td>4700</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44120</th>\n",
       "      <td>electricity</td>\n",
       "      <td>38474</td>\n",
       "      <td>10000</td>\n",
       "      <td>8542</td>\n",
       "      <td>19932</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44121</th>\n",
       "      <td>covertype</td>\n",
       "      <td>566602</td>\n",
       "      <td>10000</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "      <td>10</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44122</th>\n",
       "      <td>pol</td>\n",
       "      <td>10082</td>\n",
       "      <td>7057</td>\n",
       "      <td>907</td>\n",
       "      <td>2118</td>\n",
       "      <td>26</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44123</th>\n",
       "      <td>house_16H</td>\n",
       "      <td>13488</td>\n",
       "      <td>9441</td>\n",
       "      <td>1214</td>\n",
       "      <td>2833</td>\n",
       "      <td>16</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44125</th>\n",
       "      <td>MagicTelescope</td>\n",
       "      <td>13376</td>\n",
       "      <td>9363</td>\n",
       "      <td>1203</td>\n",
       "      <td>2810</td>\n",
       "      <td>10</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44126</th>\n",
       "      <td>bank-marketing</td>\n",
       "      <td>10578</td>\n",
       "      <td>7404</td>\n",
       "      <td>952</td>\n",
       "      <td>2222</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44128</th>\n",
       "      <td>MiniBooNE</td>\n",
       "      <td>72998</td>\n",
       "      <td>10000</td>\n",
       "      <td>18899</td>\n",
       "      <td>44099</td>\n",
       "      <td>50</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44129</th>\n",
       "      <td>Higgs</td>\n",
       "      <td>940160</td>\n",
       "      <td>10000</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "      <td>24</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44130</th>\n",
       "      <td>eye_movements</td>\n",
       "      <td>7608</td>\n",
       "      <td>5325</td>\n",
       "      <td>684</td>\n",
       "      <td>1599</td>\n",
       "      <td>20</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44132</th>\n",
       "      <td>cpu_act</td>\n",
       "      <td>8192</td>\n",
       "      <td>5734</td>\n",
       "      <td>737</td>\n",
       "      <td>1721</td>\n",
       "      <td>21</td>\n",
       "      <td>3</td>\n",
       "      <td>56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44133</th>\n",
       "      <td>pol</td>\n",
       "      <td>15000</td>\n",
       "      <td>10000</td>\n",
       "      <td>1500</td>\n",
       "      <td>3500</td>\n",
       "      <td>26</td>\n",
       "      <td>2</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44134</th>\n",
       "      <td>elevators</td>\n",
       "      <td>16599</td>\n",
       "      <td>10000</td>\n",
       "      <td>1979</td>\n",
       "      <td>4620</td>\n",
       "      <td>16</td>\n",
       "      <td>2</td>\n",
       "      <td>61</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44136</th>\n",
       "      <td>wine_quality</td>\n",
       "      <td>6497</td>\n",
       "      <td>4547</td>\n",
       "      <td>585</td>\n",
       "      <td>1365</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44137</th>\n",
       "      <td>Ailerons</td>\n",
       "      <td>13750</td>\n",
       "      <td>9625</td>\n",
       "      <td>1237</td>\n",
       "      <td>2888</td>\n",
       "      <td>33</td>\n",
       "      <td>3</td>\n",
       "      <td>35</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44138</th>\n",
       "      <td>houses</td>\n",
       "      <td>20640</td>\n",
       "      <td>10000</td>\n",
       "      <td>3192</td>\n",
       "      <td>7448</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>3842</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44139</th>\n",
       "      <td>house_16H</td>\n",
       "      <td>22784</td>\n",
       "      <td>10000</td>\n",
       "      <td>3835</td>\n",
       "      <td>8949</td>\n",
       "      <td>16</td>\n",
       "      <td>1</td>\n",
       "      <td>2045</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44140</th>\n",
       "      <td>diamonds</td>\n",
       "      <td>53940</td>\n",
       "      <td>10000</td>\n",
       "      <td>13182</td>\n",
       "      <td>30758</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>11602</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44141</th>\n",
       "      <td>Brazilian_houses</td>\n",
       "      <td>10692</td>\n",
       "      <td>7484</td>\n",
       "      <td>962</td>\n",
       "      <td>2246</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>5751</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44142</th>\n",
       "      <td>Bike_Sharing_Demand</td>\n",
       "      <td>17379</td>\n",
       "      <td>10000</td>\n",
       "      <td>2213</td>\n",
       "      <td>5166</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>869</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44143</th>\n",
       "      <td>nyc-taxi-green-dec-2016</td>\n",
       "      <td>581835</td>\n",
       "      <td>10000</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "      <td>1811</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44144</th>\n",
       "      <td>house_sales</td>\n",
       "      <td>21613</td>\n",
       "      <td>10000</td>\n",
       "      <td>3483</td>\n",
       "      <td>8130</td>\n",
       "      <td>15</td>\n",
       "      <td>1</td>\n",
       "      <td>4028</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44145</th>\n",
       "      <td>sulfur</td>\n",
       "      <td>10081</td>\n",
       "      <td>7056</td>\n",
       "      <td>907</td>\n",
       "      <td>2118</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>9368</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44146</th>\n",
       "      <td>medical_charges</td>\n",
       "      <td>163065</td>\n",
       "      <td>10000</td>\n",
       "      <td>45919</td>\n",
       "      <td>50000</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>154891</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44147</th>\n",
       "      <td>MiamiHousing2016</td>\n",
       "      <td>13932</td>\n",
       "      <td>9752</td>\n",
       "      <td>1254</td>\n",
       "      <td>2926</td>\n",
       "      <td>13</td>\n",
       "      <td>3</td>\n",
       "      <td>2111</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44148</th>\n",
       "      <td>superconduct</td>\n",
       "      <td>21263</td>\n",
       "      <td>10000</td>\n",
       "      <td>3378</td>\n",
       "      <td>7885</td>\n",
       "      <td>79</td>\n",
       "      <td>1</td>\n",
       "      <td>3007</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44156</th>\n",
       "      <td>electricity</td>\n",
       "      <td>38474</td>\n",
       "      <td>10000</td>\n",
       "      <td>8542</td>\n",
       "      <td>19932</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44157</th>\n",
       "      <td>eye_movements</td>\n",
       "      <td>7608</td>\n",
       "      <td>5325</td>\n",
       "      <td>684</td>\n",
       "      <td>1599</td>\n",
       "      <td>23</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44159</th>\n",
       "      <td>covertype</td>\n",
       "      <td>423680</td>\n",
       "      <td>10000</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "      <td>54</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45019</th>\n",
       "      <td>Bioresponse</td>\n",
       "      <td>3434</td>\n",
       "      <td>2403</td>\n",
       "      <td>309</td>\n",
       "      <td>722</td>\n",
       "      <td>419</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45020</th>\n",
       "      <td>default-of-credit-card-clients</td>\n",
       "      <td>13272</td>\n",
       "      <td>9290</td>\n",
       "      <td>1194</td>\n",
       "      <td>2788</td>\n",
       "      <td>20</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45021</th>\n",
       "      <td>jannis</td>\n",
       "      <td>57580</td>\n",
       "      <td>10000</td>\n",
       "      <td>14274</td>\n",
       "      <td>33306</td>\n",
       "      <td>54</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45022</th>\n",
       "      <td>Diabetes130US</td>\n",
       "      <td>71090</td>\n",
       "      <td>10000</td>\n",
       "      <td>18327</td>\n",
       "      <td>42763</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45026</th>\n",
       "      <td>heloc</td>\n",
       "      <td>10000</td>\n",
       "      <td>7000</td>\n",
       "      <td>900</td>\n",
       "      <td>2100</td>\n",
       "      <td>22</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45028</th>\n",
       "      <td>california</td>\n",
       "      <td>20634</td>\n",
       "      <td>10000</td>\n",
       "      <td>3190</td>\n",
       "      <td>7444</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45032</th>\n",
       "      <td>yprop_4_1</td>\n",
       "      <td>8885</td>\n",
       "      <td>6219</td>\n",
       "      <td>799</td>\n",
       "      <td>1867</td>\n",
       "      <td>42</td>\n",
       "      <td>3</td>\n",
       "      <td>1336</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45033</th>\n",
       "      <td>abalone</td>\n",
       "      <td>4177</td>\n",
       "      <td>2923</td>\n",
       "      <td>376</td>\n",
       "      <td>878</td>\n",
       "      <td>7</td>\n",
       "      <td>5</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45034</th>\n",
       "      <td>delays_zurich_transport</td>\n",
       "      <td>5465575</td>\n",
       "      <td>10000</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>4082</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45035</th>\n",
       "      <td>albert</td>\n",
       "      <td>58252</td>\n",
       "      <td>10000</td>\n",
       "      <td>14475</td>\n",
       "      <td>33777</td>\n",
       "      <td>31</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45036</th>\n",
       "      <td>default-of-credit-card-clients</td>\n",
       "      <td>13272</td>\n",
       "      <td>9290</td>\n",
       "      <td>1194</td>\n",
       "      <td>2788</td>\n",
       "      <td>21</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45038</th>\n",
       "      <td>road-safety</td>\n",
       "      <td>111762</td>\n",
       "      <td>10000</td>\n",
       "      <td>30528</td>\n",
       "      <td>50000</td>\n",
       "      <td>32</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45039</th>\n",
       "      <td>compas-two-years</td>\n",
       "      <td>4966</td>\n",
       "      <td>3476</td>\n",
       "      <td>447</td>\n",
       "      <td>1043</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45041</th>\n",
       "      <td>topo_2_1</td>\n",
       "      <td>8885</td>\n",
       "      <td>6219</td>\n",
       "      <td>799</td>\n",
       "      <td>1867</td>\n",
       "      <td>255</td>\n",
       "      <td>3</td>\n",
       "      <td>1336</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45042</th>\n",
       "      <td>abalone</td>\n",
       "      <td>4177</td>\n",
       "      <td>2923</td>\n",
       "      <td>376</td>\n",
       "      <td>878</td>\n",
       "      <td>8</td>\n",
       "      <td>5</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45043</th>\n",
       "      <td>seattlecrime6</td>\n",
       "      <td>52031</td>\n",
       "      <td>10000</td>\n",
       "      <td>12609</td>\n",
       "      <td>29422</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45045</th>\n",
       "      <td>delays_zurich_transport</td>\n",
       "      <td>5465575</td>\n",
       "      <td>10000</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "      <td>11</td>\n",
       "      <td>1</td>\n",
       "      <td>4082</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45046</th>\n",
       "      <td>Allstate_Claims_Severity</td>\n",
       "      <td>188318</td>\n",
       "      <td>10000</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "      <td>124</td>\n",
       "      <td>1</td>\n",
       "      <td>158223</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45047</th>\n",
       "      <td>Airlines_DepDelay_1M</td>\n",
       "      <td>1000000</td>\n",
       "      <td>10000</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>629</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45048</th>\n",
       "      <td>medical_charges</td>\n",
       "      <td>163065</td>\n",
       "      <td>10000</td>\n",
       "      <td>45919</td>\n",
       "      <td>50000</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>154891</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                   openml_dataset_name  n_observations  \\\n",
       "openml_dataset_id                                                        \n",
       "44055                              analcatdata_supreme            4052   \n",
       "44056                                 visualizing_soil            8641   \n",
       "44059                                         diamonds           53940   \n",
       "44061              Mercedes_Benz_Greener_Manufacturing            4209   \n",
       "44062                                 Brazilian_houses           10692   \n",
       "44063                              Bike_Sharing_Demand           17379   \n",
       "44065                          nyc-taxi-green-dec-2016          581835   \n",
       "44066                                      house_sales           21613   \n",
       "44068                    particulate-matter-ukair-2017          394299   \n",
       "44069                     SGEMM_GPU_kernel_performance          241600   \n",
       "44089                                           credit           16714   \n",
       "44120                                      electricity           38474   \n",
       "44121                                        covertype          566602   \n",
       "44122                                              pol           10082   \n",
       "44123                                        house_16H           13488   \n",
       "44125                                   MagicTelescope           13376   \n",
       "44126                                   bank-marketing           10578   \n",
       "44128                                        MiniBooNE           72998   \n",
       "44129                                            Higgs          940160   \n",
       "44130                                    eye_movements            7608   \n",
       "44132                                          cpu_act            8192   \n",
       "44133                                              pol           15000   \n",
       "44134                                        elevators           16599   \n",
       "44136                                     wine_quality            6497   \n",
       "44137                                         Ailerons           13750   \n",
       "44138                                           houses           20640   \n",
       "44139                                        house_16H           22784   \n",
       "44140                                         diamonds           53940   \n",
       "44141                                 Brazilian_houses           10692   \n",
       "44142                              Bike_Sharing_Demand           17379   \n",
       "44143                          nyc-taxi-green-dec-2016          581835   \n",
       "44144                                      house_sales           21613   \n",
       "44145                                           sulfur           10081   \n",
       "44146                                  medical_charges          163065   \n",
       "44147                                 MiamiHousing2016           13932   \n",
       "44148                                     superconduct           21263   \n",
       "44156                                      electricity           38474   \n",
       "44157                                    eye_movements            7608   \n",
       "44159                                        covertype          423680   \n",
       "45019                                      Bioresponse            3434   \n",
       "45020                   default-of-credit-card-clients           13272   \n",
       "45021                                           jannis           57580   \n",
       "45022                                    Diabetes130US           71090   \n",
       "45026                                            heloc           10000   \n",
       "45028                                       california           20634   \n",
       "45032                                        yprop_4_1            8885   \n",
       "45033                                          abalone            4177   \n",
       "45034                          delays_zurich_transport         5465575   \n",
       "45035                                           albert           58252   \n",
       "45036                   default-of-credit-card-clients           13272   \n",
       "45038                                      road-safety          111762   \n",
       "45039                                 compas-two-years            4966   \n",
       "45041                                         topo_2_1            8885   \n",
       "45042                                          abalone            4177   \n",
       "45043                                    seattlecrime6           52031   \n",
       "45045                          delays_zurich_transport         5465575   \n",
       "45046                         Allstate_Claims_Severity          188318   \n",
       "45047                             Airlines_DepDelay_1M         1000000   \n",
       "45048                                  medical_charges          163065   \n",
       "\n",
       "                   n_train  n_val  n_test  n_features  n_splits  n_classes  \n",
       "openml_dataset_id                                                           \n",
       "44055                 2836    364     852           7         5         10  \n",
       "44056                 6048    777    1816           4         3         40  \n",
       "44059                10000  13182   30758           9         1      11602  \n",
       "44061                 2946    378     885         359         5       2545  \n",
       "44062                 7484    962    2246          11         3       5751  \n",
       "44063                10000   2213    5166          11         2        869  \n",
       "44065                10000  50000   50000          16         1       1811  \n",
       "44066                10000   3483    8130          17         1       4028  \n",
       "44068                10000  50000   50000           6         1      21599  \n",
       "44069                10000  50000   50000           9         1      58161  \n",
       "44089                10000   2014    4700          10         2          2  \n",
       "44120                10000   8542   19932           7         1          2  \n",
       "44121                10000  50000   50000          10         1          2  \n",
       "44122                 7057    907    2118          26         3          2  \n",
       "44123                 9441   1214    2833          16         3          2  \n",
       "44125                 9363   1203    2810          10         3          2  \n",
       "44126                 7404    952    2222           7         3          2  \n",
       "44128                10000  18899   44099          50         1          2  \n",
       "44129                10000  50000   50000          24         1          2  \n",
       "44130                 5325    684    1599          20         3          2  \n",
       "44132                 5734    737    1721          21         3         56  \n",
       "44133                10000   1500    3500          26         2         11  \n",
       "44134                10000   1979    4620          16         2         61  \n",
       "44136                 4547    585    1365          11         3          7  \n",
       "44137                 9625   1237    2888          33         3         35  \n",
       "44138                10000   3192    7448           8         1       3842  \n",
       "44139                10000   3835    8949          16         1       2045  \n",
       "44140                10000  13182   30758           6         1      11602  \n",
       "44141                 7484    962    2246           8         3       5751  \n",
       "44142                10000   2213    5166           6         2        869  \n",
       "44143                10000  50000   50000           9         1       1811  \n",
       "44144                10000   3483    8130          15         1       4028  \n",
       "44145                 7056    907    2118           6         3       9368  \n",
       "44146                10000  45919   50000           3         1     154891  \n",
       "44147                 9752   1254    2926          13         3       2111  \n",
       "44148                10000   3378    7885          79         1       3007  \n",
       "44156                10000   8542   19932           8         1          2  \n",
       "44157                 5325    684    1599          23         3          2  \n",
       "44159                10000  50000   50000          54         1          2  \n",
       "45019                 2403    309     722         419         5          2  \n",
       "45020                 9290   1194    2788          20         3          2  \n",
       "45021                10000  14274   33306          54         1          2  \n",
       "45022                10000  18327   42763           7         1          2  \n",
       "45026                 7000    900    2100          22         3          2  \n",
       "45028                10000   3190    7444           8         1          2  \n",
       "45032                 6219    799    1867          42         3       1336  \n",
       "45033                 2923    376     878           7         5         28  \n",
       "45034                10000  50000   50000           8         1       4082  \n",
       "45035                10000  14475   33777          31         1          2  \n",
       "45036                 9290   1194    2788          21         3          2  \n",
       "45038                10000  30528   50000          32         1          2  \n",
       "45039                 3476    447    1043          11         3          2  \n",
       "45041                 6219    799    1867         255         3       1336  \n",
       "45042                 2923    376     878           8         5         28  \n",
       "45043                10000  12609   29422           4         1         29  \n",
       "45045                10000  50000   50000          11         1       4082  \n",
       "45046                10000  50000   50000         124         1     158223  \n",
       "45047                10000  50000   50000           5         1        629  \n",
       "45048                10000  45919   50000           3         1     154891  "
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import xarray as xr\n",
    "import pandas as pd\n",
    "from pathlib import Path\n",
    "\n",
    "from tabicl.core.enums import BenchmarkOrigin\n",
    "from tabicl.data.metadata import create_metadata\n",
    "\n",
    "metadata = create_metadata(benchmark_origin=BenchmarkOrigin.WHYTREES)\n",
    "metadata\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>openml_dataset_name</th>\n",
       "      <th>n_observations</th>\n",
       "      <th>n_train</th>\n",
       "      <th>n_val</th>\n",
       "      <th>n_test</th>\n",
       "      <th>n_features</th>\n",
       "      <th>n_splits</th>\n",
       "      <th>n_classes</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>openml_dataset_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>44089</th>\n",
       "      <td>credit</td>\n",
       "      <td>16714</td>\n",
       "      <td>10000</td>\n",
       "      <td>2014</td>\n",
       "      <td>4700</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44120</th>\n",
       "      <td>electricity</td>\n",
       "      <td>38474</td>\n",
       "      <td>10000</td>\n",
       "      <td>8542</td>\n",
       "      <td>19932</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44121</th>\n",
       "      <td>covertype</td>\n",
       "      <td>566602</td>\n",
       "      <td>10000</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "      <td>10</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44122</th>\n",
       "      <td>pol</td>\n",
       "      <td>10082</td>\n",
       "      <td>7057</td>\n",
       "      <td>907</td>\n",
       "      <td>2118</td>\n",
       "      <td>26</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44123</th>\n",
       "      <td>house_16H</td>\n",
       "      <td>13488</td>\n",
       "      <td>9441</td>\n",
       "      <td>1214</td>\n",
       "      <td>2833</td>\n",
       "      <td>16</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44125</th>\n",
       "      <td>MagicTelescope</td>\n",
       "      <td>13376</td>\n",
       "      <td>9363</td>\n",
       "      <td>1203</td>\n",
       "      <td>2810</td>\n",
       "      <td>10</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44126</th>\n",
       "      <td>bank-marketing</td>\n",
       "      <td>10578</td>\n",
       "      <td>7404</td>\n",
       "      <td>952</td>\n",
       "      <td>2222</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44128</th>\n",
       "      <td>MiniBooNE</td>\n",
       "      <td>72998</td>\n",
       "      <td>10000</td>\n",
       "      <td>18899</td>\n",
       "      <td>44099</td>\n",
       "      <td>50</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44129</th>\n",
       "      <td>Higgs</td>\n",
       "      <td>940160</td>\n",
       "      <td>10000</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "      <td>24</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44130</th>\n",
       "      <td>eye_movements</td>\n",
       "      <td>7608</td>\n",
       "      <td>5325</td>\n",
       "      <td>684</td>\n",
       "      <td>1599</td>\n",
       "      <td>20</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44156</th>\n",
       "      <td>electricity</td>\n",
       "      <td>38474</td>\n",
       "      <td>10000</td>\n",
       "      <td>8542</td>\n",
       "      <td>19932</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44157</th>\n",
       "      <td>eye_movements</td>\n",
       "      <td>7608</td>\n",
       "      <td>5325</td>\n",
       "      <td>684</td>\n",
       "      <td>1599</td>\n",
       "      <td>23</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44159</th>\n",
       "      <td>covertype</td>\n",
       "      <td>423680</td>\n",
       "      <td>10000</td>\n",
       "      <td>50000</td>\n",
       "      <td>50000</td>\n",
       "      <td>54</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45019</th>\n",
       "      <td>Bioresponse</td>\n",
       "      <td>3434</td>\n",
       "      <td>2403</td>\n",
       "      <td>309</td>\n",
       "      <td>722</td>\n",
       "      <td>419</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45020</th>\n",
       "      <td>default-of-credit-card-clients</td>\n",
       "      <td>13272</td>\n",
       "      <td>9290</td>\n",
       "      <td>1194</td>\n",
       "      <td>2788</td>\n",
       "      <td>20</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45021</th>\n",
       "      <td>jannis</td>\n",
       "      <td>57580</td>\n",
       "      <td>10000</td>\n",
       "      <td>14274</td>\n",
       "      <td>33306</td>\n",
       "      <td>54</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45022</th>\n",
       "      <td>Diabetes130US</td>\n",
       "      <td>71090</td>\n",
       "      <td>10000</td>\n",
       "      <td>18327</td>\n",
       "      <td>42763</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45026</th>\n",
       "      <td>heloc</td>\n",
       "      <td>10000</td>\n",
       "      <td>7000</td>\n",
       "      <td>900</td>\n",
       "      <td>2100</td>\n",
       "      <td>22</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45028</th>\n",
       "      <td>california</td>\n",
       "      <td>20634</td>\n",
       "      <td>10000</td>\n",
       "      <td>3190</td>\n",
       "      <td>7444</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45035</th>\n",
       "      <td>albert</td>\n",
       "      <td>58252</td>\n",
       "      <td>10000</td>\n",
       "      <td>14475</td>\n",
       "      <td>33777</td>\n",
       "      <td>31</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45036</th>\n",
       "      <td>default-of-credit-card-clients</td>\n",
       "      <td>13272</td>\n",
       "      <td>9290</td>\n",
       "      <td>1194</td>\n",
       "      <td>2788</td>\n",
       "      <td>21</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45038</th>\n",
       "      <td>road-safety</td>\n",
       "      <td>111762</td>\n",
       "      <td>10000</td>\n",
       "      <td>30528</td>\n",
       "      <td>50000</td>\n",
       "      <td>32</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45039</th>\n",
       "      <td>compas-two-years</td>\n",
       "      <td>4966</td>\n",
       "      <td>3476</td>\n",
       "      <td>447</td>\n",
       "      <td>1043</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                              openml_dataset_name  n_observations  n_train  \\\n",
       "openml_dataset_id                                                            \n",
       "44089                                      credit           16714    10000   \n",
       "44120                                 electricity           38474    10000   \n",
       "44121                                   covertype          566602    10000   \n",
       "44122                                         pol           10082     7057   \n",
       "44123                                   house_16H           13488     9441   \n",
       "44125                              MagicTelescope           13376     9363   \n",
       "44126                              bank-marketing           10578     7404   \n",
       "44128                                   MiniBooNE           72998    10000   \n",
       "44129                                       Higgs          940160    10000   \n",
       "44130                               eye_movements            7608     5325   \n",
       "44156                                 electricity           38474    10000   \n",
       "44157                               eye_movements            7608     5325   \n",
       "44159                                   covertype          423680    10000   \n",
       "45019                                 Bioresponse            3434     2403   \n",
       "45020              default-of-credit-card-clients           13272     9290   \n",
       "45021                                      jannis           57580    10000   \n",
       "45022                               Diabetes130US           71090    10000   \n",
       "45026                                       heloc           10000     7000   \n",
       "45028                                  california           20634    10000   \n",
       "45035                                      albert           58252    10000   \n",
       "45036              default-of-credit-card-clients           13272     9290   \n",
       "45038                                 road-safety          111762    10000   \n",
       "45039                            compas-two-years            4966     3476   \n",
       "\n",
       "                   n_val  n_test  n_features  n_splits  n_classes  \n",
       "openml_dataset_id                                                  \n",
       "44089               2014    4700          10         2          2  \n",
       "44120               8542   19932           7         1          2  \n",
       "44121              50000   50000          10         1          2  \n",
       "44122                907    2118          26         3          2  \n",
       "44123               1214    2833          16         3          2  \n",
       "44125               1203    2810          10         3          2  \n",
       "44126                952    2222           7         3          2  \n",
       "44128              18899   44099          50         1          2  \n",
       "44129              50000   50000          24         1          2  \n",
       "44130                684    1599          20         3          2  \n",
       "44156               8542   19932           8         1          2  \n",
       "44157                684    1599          23         3          2  \n",
       "44159              50000   50000          54         1          2  \n",
       "45019                309     722         419         5          2  \n",
       "45020               1194    2788          20         3          2  \n",
       "45021              14274   33306          54         1          2  \n",
       "45022              18327   42763           7         1          2  \n",
       "45026                900    2100          22         3          2  \n",
       "45028               3190    7444           8         1          2  \n",
       "45035              14475   33777          31         1          2  \n",
       "45036               1194    2788          21         3          2  \n",
       "45038              30528   50000          32         1          2  \n",
       "45039                447    1043          11         3          2  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from tabicl.data.benchmarks import BENCHMARKS, BenchmarkName\n",
    "\n",
    "ids_used_in_paper_numerical = BENCHMARKS[BenchmarkName.NUMERICAL_CLASSIFICATION].openml_dataset_ids\n",
    "ids_used_in_paper_categorical = BENCHMARKS[BenchmarkName.CATEGORICAL_CLASSIFICATION].openml_dataset_ids\n",
    "\n",
    "ids_used_in_paper = ids_used_in_paper_numerical + ids_used_in_paper_categorical\n",
    "ids_used_in_paper.sort()\n",
    "\n",
    "metadata = metadata.loc[ids_used_in_paper]\n",
    "metadata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "metadata.loc[metadata['openml_dataset_name'].str.len() > 15, 'openml_dataset_name'] = metadata[metadata['openml_dataset_name'].str.len() > 15]['openml_dataset_name'].str[:15] + '...'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\\begin{tabular}{llrrrrrrr}\n",
      "\\toprule\n",
      " & openml\\_dataset\\_name & n\\_observations & n\\_train & n\\_val & n\\_test & n\\_features & n\\_splits & n\\_classes \\\\\n",
      "openml_dataset_id &  &  &  &  &  &  &  &  \\\\\n",
      "\\midrule\n",
      "44089 & credit & 16714 & 10000 & 2014 & 4700 & 10 & 2 & 2 \\\\\n",
      "44120 & electricity & 38474 & 10000 & 8542 & 19932 & 7 & 1 & 2 \\\\\n",
      "44121 & covertype & 566602 & 10000 & 50000 & 50000 & 10 & 1 & 2 \\\\\n",
      "44122 & pol & 10082 & 7057 & 907 & 2118 & 26 & 3 & 2 \\\\\n",
      "44123 & house\\_16H & 13488 & 9441 & 1214 & 2833 & 16 & 3 & 2 \\\\\n",
      "44125 & MagicTelescope & 13376 & 9363 & 1203 & 2810 & 10 & 3 & 2 \\\\\n",
      "44126 & bank-marketing & 10578 & 7404 & 952 & 2222 & 7 & 3 & 2 \\\\\n",
      "44128 & MiniBooNE & 72998 & 10000 & 18899 & 44099 & 50 & 1 & 2 \\\\\n",
      "44129 & Higgs & 940160 & 10000 & 50000 & 50000 & 24 & 1 & 2 \\\\\n",
      "44130 & eye\\_movements & 7608 & 5325 & 684 & 1599 & 20 & 3 & 2 \\\\\n",
      "44156 & electricity & 38474 & 10000 & 8542 & 19932 & 8 & 1 & 2 \\\\\n",
      "44157 & eye\\_movements & 7608 & 5325 & 684 & 1599 & 23 & 3 & 2 \\\\\n",
      "44159 & covertype & 423680 & 10000 & 50000 & 50000 & 54 & 1 & 2 \\\\\n",
      "45019 & Bioresponse & 3434 & 2403 & 309 & 722 & 419 & 5 & 2 \\\\\n",
      "45020 & default-of-cred... & 13272 & 9290 & 1194 & 2788 & 20 & 3 & 2 \\\\\n",
      "45021 & jannis & 57580 & 10000 & 14274 & 33306 & 54 & 1 & 2 \\\\\n",
      "45022 & Diabetes130US & 71090 & 10000 & 18327 & 42763 & 7 & 1 & 2 \\\\\n",
      "45026 & heloc & 10000 & 7000 & 900 & 2100 & 22 & 3 & 2 \\\\\n",
      "45028 & california & 20634 & 10000 & 3190 & 7444 & 8 & 1 & 2 \\\\\n",
      "45035 & albert & 58252 & 10000 & 14475 & 33777 & 31 & 1 & 2 \\\\\n",
      "45036 & default-of-cred... & 13272 & 9290 & 1194 & 2788 & 21 & 3 & 2 \\\\\n",
      "45038 & road-safety & 111762 & 10000 & 30528 & 50000 & 32 & 1 & 2 \\\\\n",
      "45039 & compas-two-year... & 4966 & 3476 & 447 & 1043 & 11 & 3 & 2 \\\\\n",
      "\\bottomrule\n",
      "\\end{tabular}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(metadata.to_latex(escape=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>openml_dataset_name</th>\n",
       "      <th>n_observations</th>\n",
       "      <th>n_train</th>\n",
       "      <th>n_val</th>\n",
       "      <th>n_test</th>\n",
       "      <th>n_features</th>\n",
       "      <th>n_splits</th>\n",
       "      <th>n_classes</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>openml_dataset_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>kr-vs-kp</td>\n",
       "      <td>3196</td>\n",
       "      <td>2556</td>\n",
       "      <td>320</td>\n",
       "      <td>320</td>\n",
       "      <td>36</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>labor</td>\n",
       "      <td>57</td>\n",
       "      <td>45</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>16</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>arrhythmia</td>\n",
       "      <td>452</td>\n",
       "      <td>360</td>\n",
       "      <td>46</td>\n",
       "      <td>46</td>\n",
       "      <td>279</td>\n",
       "      <td>10</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>letter</td>\n",
       "      <td>20000</td>\n",
       "      <td>16000</td>\n",
       "      <td>2000</td>\n",
       "      <td>2000</td>\n",
       "      <td>16</td>\n",
       "      <td>10</td>\n",
       "      <td>26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>audiology</td>\n",
       "      <td>226</td>\n",
       "      <td>180</td>\n",
       "      <td>23</td>\n",
       "      <td>23</td>\n",
       "      <td>69</td>\n",
       "      <td>10</td>\n",
       "      <td>24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>189355</th>\n",
       "      <td>dionis</td>\n",
       "      <td>416188</td>\n",
       "      <td>332950</td>\n",
       "      <td>41619</td>\n",
       "      <td>41619</td>\n",
       "      <td>60</td>\n",
       "      <td>10</td>\n",
       "      <td>355</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>189356</th>\n",
       "      <td>albert</td>\n",
       "      <td>425240</td>\n",
       "      <td>340192</td>\n",
       "      <td>42524</td>\n",
       "      <td>42524</td>\n",
       "      <td>78</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>190408</th>\n",
       "      <td>Click_prediction_small</td>\n",
       "      <td>39948</td>\n",
       "      <td>31958</td>\n",
       "      <td>3995</td>\n",
       "      <td>3995</td>\n",
       "      <td>11</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>190410</th>\n",
       "      <td>philippine</td>\n",
       "      <td>5832</td>\n",
       "      <td>4664</td>\n",
       "      <td>584</td>\n",
       "      <td>584</td>\n",
       "      <td>308</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>360948</th>\n",
       "      <td>libras</td>\n",
       "      <td>360</td>\n",
       "      <td>288</td>\n",
       "      <td>36</td>\n",
       "      <td>36</td>\n",
       "      <td>104</td>\n",
       "      <td>10</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>176 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                      openml_dataset_name  n_observations  n_train  n_val  \\\n",
       "openml_dataset_id                                                           \n",
       "3                                kr-vs-kp            3196     2556    320   \n",
       "4                                   labor              57       45      6   \n",
       "5                              arrhythmia             452      360     46   \n",
       "6                                  letter           20000    16000   2000   \n",
       "7                               audiology             226      180     23   \n",
       "...                                   ...             ...      ...    ...   \n",
       "189355                             dionis          416188   332950  41619   \n",
       "189356                             albert          425240   340192  42524   \n",
       "190408             Click_prediction_small           39948    31958   3995   \n",
       "190410                         philippine            5832     4664    584   \n",
       "360948                             libras             360      288     36   \n",
       "\n",
       "                   n_test  n_features  n_splits  n_classes  \n",
       "openml_dataset_id                                           \n",
       "3                     320          36        10          2  \n",
       "4                       6          16        10          2  \n",
       "5                      46         279        10         13  \n",
       "6                    2000          16        10         26  \n",
       "7                      23          69        10         24  \n",
       "...                   ...         ...       ...        ...  \n",
       "189355              41619          60        10        355  \n",
       "189356              42524          78        10          2  \n",
       "190408               3995          11        10          2  \n",
       "190410                584         308        10          2  \n",
       "360948                 36         104        10         10  \n",
       "\n",
       "[176 rows x 8 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "metadata = create_metadata(BenchmarkOrigin.TABZILLA)\n",
    "metadata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>openml_dataset_name</th>\n",
       "      <th>n_observations</th>\n",
       "      <th>n_train</th>\n",
       "      <th>n_val</th>\n",
       "      <th>n_test</th>\n",
       "      <th>n_features</th>\n",
       "      <th>n_splits</th>\n",
       "      <th>n_classes</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>openml_dataset_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>kr-vs-kp</td>\n",
       "      <td>3196</td>\n",
       "      <td>2556</td>\n",
       "      <td>320</td>\n",
       "      <td>320</td>\n",
       "      <td>36</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>labor</td>\n",
       "      <td>57</td>\n",
       "      <td>45</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>16</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>autos</td>\n",
       "      <td>205</td>\n",
       "      <td>163</td>\n",
       "      <td>21</td>\n",
       "      <td>21</td>\n",
       "      <td>25</td>\n",
       "      <td>10</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>lymph</td>\n",
       "      <td>148</td>\n",
       "      <td>118</td>\n",
       "      <td>15</td>\n",
       "      <td>15</td>\n",
       "      <td>18</td>\n",
       "      <td>10</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>balance-scale</td>\n",
       "      <td>625</td>\n",
       "      <td>499</td>\n",
       "      <td>63</td>\n",
       "      <td>63</td>\n",
       "      <td>4</td>\n",
       "      <td>10</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>167141</th>\n",
       "      <td>churn</td>\n",
       "      <td>5000</td>\n",
       "      <td>4000</td>\n",
       "      <td>500</td>\n",
       "      <td>500</td>\n",
       "      <td>20</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>167211</th>\n",
       "      <td>Satellite</td>\n",
       "      <td>5100</td>\n",
       "      <td>4080</td>\n",
       "      <td>510</td>\n",
       "      <td>510</td>\n",
       "      <td>36</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>168911</th>\n",
       "      <td>jasmine</td>\n",
       "      <td>2984</td>\n",
       "      <td>2386</td>\n",
       "      <td>299</td>\n",
       "      <td>299</td>\n",
       "      <td>144</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>190408</th>\n",
       "      <td>Click_prediction_small</td>\n",
       "      <td>39948</td>\n",
       "      <td>31958</td>\n",
       "      <td>3995</td>\n",
       "      <td>3995</td>\n",
       "      <td>11</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>360948</th>\n",
       "      <td>libras</td>\n",
       "      <td>360</td>\n",
       "      <td>288</td>\n",
       "      <td>36</td>\n",
       "      <td>36</td>\n",
       "      <td>104</td>\n",
       "      <td>10</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>94 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                      openml_dataset_name  n_observations  n_train  n_val  \\\n",
       "openml_dataset_id                                                           \n",
       "3                                kr-vs-kp            3196     2556    320   \n",
       "4                                   labor              57       45      6   \n",
       "9                                   autos             205      163     21   \n",
       "10                                  lymph             148      118     15   \n",
       "11                          balance-scale             625      499     63   \n",
       "...                                   ...             ...      ...    ...   \n",
       "167141                              churn            5000     4000    500   \n",
       "167211                          Satellite            5100     4080    510   \n",
       "168911                            jasmine            2984     2386    299   \n",
       "190408             Click_prediction_small           39948    31958   3995   \n",
       "360948                             libras             360      288     36   \n",
       "\n",
       "                   n_test  n_features  n_splits  n_classes  \n",
       "openml_dataset_id                                           \n",
       "3                     320          36        10          2  \n",
       "4                       6          16        10          2  \n",
       "9                      21          25        10          6  \n",
       "10                     15          18        10          4  \n",
       "11                     63           4        10          3  \n",
       "...                   ...         ...       ...        ...  \n",
       "167141                500          20        10          2  \n",
       "167211                510          36        10          2  \n",
       "168911                299         144        10          2  \n",
       "190408               3995          11        10          2  \n",
       "360948                 36         104        10         10  \n",
       "\n",
       "[94 rows x 8 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from tabicl.data.benchmarks import BENCHMARKS, BenchmarkName\n",
    "\n",
    "ids_used_in_paper = BENCHMARKS[BenchmarkName.TABZILLA_HAS_COMPLETED_RUNS].openml_dataset_ids\n",
    "\n",
    "metadata = metadata.loc[ids_used_in_paper]\n",
    "metadata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "metadata.loc[metadata['openml_dataset_name'].str.len() > 15, 'openml_dataset_name'] = metadata[metadata['openml_dataset_name'].str.len() > 15]['openml_dataset_name'].str[:15] + '...'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\\begin{tabular}{llrrrrrrr}\n",
      "\\toprule\n",
      " & openml\\_dataset\\_name & n\\_observations & n\\_train & n\\_val & n\\_test & n\\_features & n\\_splits & n\\_classes \\\\\n",
      "openml_dataset_id &  &  &  &  &  &  &  &  \\\\\n",
      "\\midrule\n",
      "3 & kr-vs-kp & 3196 & 2556 & 320 & 320 & 36 & 10 & 2 \\\\\n",
      "4 & labor & 57 & 45 & 6 & 6 & 16 & 10 & 2 \\\\\n",
      "9 & autos & 205 & 163 & 21 & 21 & 25 & 10 & 6 \\\\\n",
      "10 & lymph & 148 & 118 & 15 & 15 & 18 & 10 & 4 \\\\\n",
      "11 & balance-scale & 625 & 499 & 63 & 63 & 4 & 10 & 3 \\\\\n",
      "12 & mfeat-factors & 2000 & 1600 & 200 & 200 & 216 & 10 & 10 \\\\\n",
      "14 & mfeat-fourier & 2000 & 1600 & 200 & 200 & 76 & 10 & 10 \\\\\n",
      "15 & breast-w & 699 & 559 & 70 & 70 & 9 & 10 & 2 \\\\\n",
      "16 & mfeat-karhunen & 2000 & 1600 & 200 & 200 & 64 & 10 & 10 \\\\\n",
      "18 & mfeat-morpholog... & 2000 & 1600 & 200 & 200 & 6 & 10 & 10 \\\\\n",
      "23 & cmc & 1473 & 1177 & 148 & 148 & 9 & 10 & 3 \\\\\n",
      "25 & colic & 368 & 294 & 37 & 37 & 26 & 10 & 2 \\\\\n",
      "27 & colic & 368 & 294 & 37 & 37 & 22 & 10 & 2 \\\\\n",
      "29 & credit-approval & 690 & 552 & 69 & 69 & 15 & 10 & 2 \\\\\n",
      "30 & page-blocks & 5473 & 4377 & 548 & 548 & 10 & 10 & 5 \\\\\n",
      "35 & dermatology & 366 & 292 & 37 & 37 & 34 & 10 & 6 \\\\\n",
      "37 & diabetes & 768 & 614 & 77 & 77 & 8 & 10 & 2 \\\\\n",
      "39 & sonar & 208 & 166 & 21 & 21 & 60 & 10 & 2 \\\\\n",
      "40 & glass & 214 & 170 & 22 & 22 & 9 & 10 & 6 \\\\\n",
      "43 & spambase & 4601 & 3680 & 460 & 461 & 57 & 10 & 2 \\\\\n",
      "45 & splice & 3190 & 2552 & 319 & 319 & 60 & 10 & 3 \\\\\n",
      "47 & tae & 151 & 120 & 15 & 16 & 5 & 10 & 3 \\\\\n",
      "48 & heart-c & 303 & 241 & 31 & 31 & 13 & 10 & 2 \\\\\n",
      "49 & tic-tac-toe & 958 & 766 & 96 & 96 & 9 & 10 & 2 \\\\\n",
      "50 & heart-h & 294 & 234 & 30 & 30 & 13 & 10 & 2 \\\\\n",
      "53 & vehicle & 846 & 676 & 85 & 85 & 18 & 10 & 4 \\\\\n",
      "59 & iris & 150 & 120 & 15 & 15 & 4 & 10 & 3 \\\\\n",
      "2074 & satimage & 6430 & 5144 & 643 & 643 & 36 & 10 & 6 \\\\\n",
      "2079 & eucalyptus & 736 & 588 & 74 & 74 & 19 & 10 & 5 \\\\\n",
      "2867 & anneal & 898 & 718 & 90 & 90 & 38 & 10 & 5 \\\\\n",
      "3485 & scene & 2407 & 1925 & 241 & 241 & 299 & 10 & 2 \\\\\n",
      "3512 & synthetic\\_contr... & 600 & 480 & 60 & 60 & 60 & 10 & 6 \\\\\n",
      "3540 & analcatdata\\_box... & 120 & 96 & 12 & 12 & 3 & 10 & 2 \\\\\n",
      "3543 & irish & 500 & 400 & 50 & 50 & 5 & 10 & 2 \\\\\n",
      "3549 & analcatdata\\_aut... & 841 & 672 & 84 & 85 & 70 & 10 & 4 \\\\\n",
      "3560 & analcatdata\\_dmf... & 797 & 637 & 80 & 80 & 4 & 10 & 6 \\\\\n",
      "3561 & profb & 672 & 536 & 68 & 68 & 9 & 10 & 2 \\\\\n",
      "3602 & visualizing\\_env... & 111 & 88 & 11 & 12 & 3 & 10 & 2 \\\\\n",
      "3620 & fri\\_c0\\_100\\_5 & 100 & 80 & 10 & 10 & 5 & 10 & 2 \\\\\n",
      "3647 & rabe\\_266 & 120 & 96 & 12 & 12 & 2 & 10 & 2 \\\\\n",
      "3711 & elevators & 16599 & 13279 & 1660 & 1660 & 18 & 10 & 2 \\\\\n",
      "3731 & visualizing\\_liv... & 130 & 104 & 13 & 13 & 2 & 10 & 2 \\\\\n",
      "3739 & analcatdata\\_chl... & 100 & 80 & 10 & 10 & 3 & 10 & 2 \\\\\n",
      "3748 & transplant & 131 & 104 & 13 & 14 & 3 & 10 & 2 \\\\\n",
      "3779 & fri\\_c3\\_100\\_5 & 100 & 80 & 10 & 10 & 5 & 10 & 2 \\\\\n",
      "3797 & socmob & 1156 & 924 & 116 & 116 & 5 & 10 & 2 \\\\\n",
      "3896 & ada\\_agnostic & 4562 & 3648 & 457 & 457 & 48 & 10 & 2 \\\\\n",
      "3902 & pc4 & 1458 & 1166 & 146 & 146 & 37 & 10 & 2 \\\\\n",
      "3903 & pc3 & 1563 & 1249 & 157 & 157 & 37 & 10 & 2 \\\\\n",
      "3904 & jm1 & 10885 & 8707 & 1089 & 1089 & 21 & 10 & 2 \\\\\n",
      "3913 & kc2 & 522 & 416 & 53 & 53 & 21 & 10 & 2 \\\\\n",
      "3917 & kc1 & 2109 & 1687 & 211 & 211 & 21 & 10 & 2 \\\\\n",
      "3918 & pc1 & 1109 & 887 & 111 & 111 & 21 & 10 & 2 \\\\\n",
      "3953 & adult-census & 32561 & 26048 & 3256 & 3257 & 14 & 10 & 2 \\\\\n",
      "9946 & wdbc & 569 & 455 & 57 & 57 & 30 & 10 & 2 \\\\\n",
      "9952 & phoneme & 5404 & 4322 & 541 & 541 & 5 & 10 & 2 \\\\\n",
      "9957 & qsar-biodeg & 1055 & 843 & 106 & 106 & 41 & 10 & 2 \\\\\n",
      "9960 & wall-robot-navi... & 5456 & 4364 & 546 & 546 & 24 & 10 & 4 \\\\\n",
      "9964 & semeion & 1593 & 1273 & 160 & 160 & 256 & 10 & 10 \\\\\n",
      "9971 & ilpd & 583 & 465 & 59 & 59 & 10 & 10 & 2 \\\\\n",
      "9978 & ozone-level-8hr & 2534 & 2026 & 254 & 254 & 72 & 10 & 2 \\\\\n",
      "9984 & fertility & 100 & 80 & 10 & 10 & 9 & 10 & 2 \\\\\n",
      "10089 & acute-inflammat... & 120 & 96 & 12 & 12 & 6 & 10 & 2 \\\\\n",
      "10093 & banknote-authen... & 1372 & 1096 & 138 & 138 & 4 & 10 & 2 \\\\\n",
      "10101 & blood-transfusi... & 748 & 598 & 75 & 75 & 4 & 10 & 2 \\\\\n",
      "14952 & PhishingWebsite... & 11055 & 8843 & 1106 & 1106 & 30 & 10 & 2 \\\\\n",
      "14954 & cylinder-bands & 540 & 432 & 54 & 54 & 37 & 10 & 2 \\\\\n",
      "14965 & bank-marketing & 45211 & 36168 & 4521 & 4522 & 16 & 10 & 2 \\\\\n",
      "14967 & cjs & 2796 & 2236 & 280 & 280 & 33 & 10 & 6 \\\\\n",
      "125920 & dresses-sales & 500 & 400 & 50 & 50 & 12 & 10 & 2 \\\\\n",
      "125921 & LED-display-dom... & 500 & 400 & 50 & 50 & 7 & 10 & 10 \\\\\n",
      "145793 & yeast & 1269 & 1015 & 127 & 127 & 8 & 10 & 4 \\\\\n",
      "145799 & breast-cancer & 286 & 228 & 29 & 29 & 9 & 10 & 2 \\\\\n",
      "145836 & blood-transfusi... & 748 & 598 & 75 & 75 & 4 & 10 & 2 \\\\\n",
      "145847 & hill-valley & 1212 & 968 & 122 & 122 & 100 & 10 & 2 \\\\\n",
      "145977 & ecoli & 336 & 268 & 34 & 34 & 7 & 10 & 8 \\\\\n",
      "145984 & ionosphere & 351 & 280 & 35 & 36 & 34 & 10 & 2 \\\\\n",
      "146024 & lung-cancer & 32 & 24 & 4 & 4 & 56 & 10 & 3 \\\\\n",
      "146063 & hayes-roth & 160 & 128 & 16 & 16 & 4 & 10 & 3 \\\\\n",
      "146065 & monks-problems-... & 601 & 480 & 60 & 61 & 6 & 10 & 2 \\\\\n",
      "146192 & car-evaluation & 1728 & 1382 & 173 & 173 & 21 & 10 & 4 \\\\\n",
      "146210 & postoperative-p... & 88 & 70 & 9 & 9 & 8 & 10 & 2 \\\\\n",
      "146607 & SpeedDating & 8378 & 6702 & 838 & 838 & 120 & 10 & 2 \\\\\n",
      "146800 & MiceProtein & 1080 & 864 & 108 & 108 & 77 & 10 & 8 \\\\\n",
      "146817 & steel-plates-fa... & 1941 & 1552 & 194 & 195 & 27 & 10 & 7 \\\\\n",
      "146818 & Australian & 690 & 552 & 69 & 69 & 14 & 10 & 2 \\\\\n",
      "146820 & wilt & 4839 & 3871 & 484 & 484 & 5 & 10 & 2 \\\\\n",
      "146821 & car & 1728 & 1382 & 173 & 173 & 6 & 10 & 4 \\\\\n",
      "167140 & dna & 3186 & 2548 & 319 & 319 & 180 & 10 & 3 \\\\\n",
      "167141 & churn & 5000 & 4000 & 500 & 500 & 20 & 10 & 2 \\\\\n",
      "167211 & Satellite & 5100 & 4080 & 510 & 510 & 36 & 10 & 2 \\\\\n",
      "168911 & jasmine & 2984 & 2386 & 299 & 299 & 144 & 10 & 2 \\\\\n",
      "190408 & Click\\_predictio... & 39948 & 31958 & 3995 & 3995 & 11 & 10 & 2 \\\\\n",
      "360948 & libras & 360 & 288 & 36 & 36 & 104 & 10 & 10 \\\\\n",
      "\\bottomrule\n",
      "\\end{tabular}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(metadata.to_latex(escape=True))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "tab",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
