{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><svg style=\"position: absolute; width: 0; height: 0; overflow: hidden\">\n",
       "<defs>\n",
       "<symbol id=\"icon-database\" viewBox=\"0 0 32 32\">\n",
       "<path d=\"M16 0c-8.837 0-16 2.239-16 5v4c0 2.761 7.163 5 16 5s16-2.239 16-5v-4c0-2.761-7.163-5-16-5z\"></path>\n",
       "<path d=\"M16 17c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n",
       "<path d=\"M16 26c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n",
       "</symbol>\n",
       "<symbol id=\"icon-file-text2\" viewBox=\"0 0 32 32\">\n",
       "<path d=\"M28.681 7.159c-0.694-0.947-1.662-2.053-2.724-3.116s-2.169-2.030-3.116-2.724c-1.612-1.182-2.393-1.319-2.841-1.319h-15.5c-1.378 0-2.5 1.121-2.5 2.5v27c0 1.378 1.122 2.5 2.5 2.5h23c1.378 0 2.5-1.122 2.5-2.5v-19.5c0-0.448-0.137-1.23-1.319-2.841zM24.543 5.457c0.959 0.959 1.712 1.825 2.268 2.543h-4.811v-4.811c0.718 0.556 1.584 1.309 2.543 2.268zM28 29.5c0 0.271-0.229 0.5-0.5 0.5h-23c-0.271 0-0.5-0.229-0.5-0.5v-27c0-0.271 0.229-0.5 0.5-0.5 0 0 15.499-0 15.5 0v7c0 0.552 0.448 1 1 1h7v19.5z\"></path>\n",
       "<path d=\"M23 26h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
       "<path d=\"M23 22h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
       "<path d=\"M23 18h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
       "</symbol>\n",
       "</defs>\n",
       "</svg>\n",
       "<style>/* CSS stylesheet for displaying xarray objects in jupyterlab.\n",
       " *\n",
       " */\n",
       "\n",
       ":root {\n",
       "  --xr-font-color0: var(--jp-content-font-color0, rgba(0, 0, 0, 1));\n",
       "  --xr-font-color2: var(--jp-content-font-color2, rgba(0, 0, 0, 0.54));\n",
       "  --xr-font-color3: var(--jp-content-font-color3, rgba(0, 0, 0, 0.38));\n",
       "  --xr-border-color: var(--jp-border-color2, #e0e0e0);\n",
       "  --xr-disabled-color: var(--jp-layout-color3, #bdbdbd);\n",
       "  --xr-background-color: var(--jp-layout-color0, white);\n",
       "  --xr-background-color-row-even: var(--jp-layout-color1, white);\n",
       "  --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n",
       "}\n",
       "\n",
       "html[theme=dark],\n",
       "body[data-theme=dark],\n",
       "body.vscode-dark {\n",
       "  --xr-font-color0: rgba(255, 255, 255, 1);\n",
       "  --xr-font-color2: rgba(255, 255, 255, 0.54);\n",
       "  --xr-font-color3: rgba(255, 255, 255, 0.38);\n",
       "  --xr-border-color: #1F1F1F;\n",
       "  --xr-disabled-color: #515151;\n",
       "  --xr-background-color: #111111;\n",
       "  --xr-background-color-row-even: #111111;\n",
       "  --xr-background-color-row-odd: #313131;\n",
       "}\n",
       "\n",
       ".xr-wrap {\n",
       "  display: block !important;\n",
       "  min-width: 300px;\n",
       "  max-width: 700px;\n",
       "}\n",
       "\n",
       ".xr-text-repr-fallback {\n",
       "  /* fallback to plain text repr when CSS is not injected (untrusted notebook) */\n",
       "  display: none;\n",
       "}\n",
       "\n",
       ".xr-header {\n",
       "  padding-top: 6px;\n",
       "  padding-bottom: 6px;\n",
       "  margin-bottom: 4px;\n",
       "  border-bottom: solid 1px var(--xr-border-color);\n",
       "}\n",
       "\n",
       ".xr-header > div,\n",
       ".xr-header > ul {\n",
       "  display: inline;\n",
       "  margin-top: 0;\n",
       "  margin-bottom: 0;\n",
       "}\n",
       "\n",
       ".xr-obj-type,\n",
       ".xr-array-name {\n",
       "  margin-left: 2px;\n",
       "  margin-right: 10px;\n",
       "}\n",
       "\n",
       ".xr-obj-type {\n",
       "  color: var(--xr-font-color2);\n",
       "}\n",
       "\n",
       ".xr-sections {\n",
       "  padding-left: 0 !important;\n",
       "  display: grid;\n",
       "  grid-template-columns: 150px auto auto 1fr 20px 20px;\n",
       "}\n",
       "\n",
       ".xr-section-item {\n",
       "  display: contents;\n",
       "}\n",
       "\n",
       ".xr-section-item input {\n",
       "  display: none;\n",
       "}\n",
       "\n",
       ".xr-section-item input + label {\n",
       "  color: var(--xr-disabled-color);\n",
       "}\n",
       "\n",
       ".xr-section-item input:enabled + label {\n",
       "  cursor: pointer;\n",
       "  color: var(--xr-font-color2);\n",
       "}\n",
       "\n",
       ".xr-section-item input:enabled + label:hover {\n",
       "  color: var(--xr-font-color0);\n",
       "}\n",
       "\n",
       ".xr-section-summary {\n",
       "  grid-column: 1;\n",
       "  color: var(--xr-font-color2);\n",
       "  font-weight: 500;\n",
       "}\n",
       "\n",
       ".xr-section-summary > span {\n",
       "  display: inline-block;\n",
       "  padding-left: 0.5em;\n",
       "}\n",
       "\n",
       ".xr-section-summary-in:disabled + label {\n",
       "  color: var(--xr-font-color2);\n",
       "}\n",
       "\n",
       ".xr-section-summary-in + label:before {\n",
       "  display: inline-block;\n",
       "  content: '►';\n",
       "  font-size: 11px;\n",
       "  width: 15px;\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       ".xr-section-summary-in:disabled + label:before {\n",
       "  color: var(--xr-disabled-color);\n",
       "}\n",
       "\n",
       ".xr-section-summary-in:checked + label:before {\n",
       "  content: '▼';\n",
       "}\n",
       "\n",
       ".xr-section-summary-in:checked + label > span {\n",
       "  display: none;\n",
       "}\n",
       "\n",
       ".xr-section-summary,\n",
       ".xr-section-inline-details {\n",
       "  padding-top: 4px;\n",
       "  padding-bottom: 4px;\n",
       "}\n",
       "\n",
       ".xr-section-inline-details {\n",
       "  grid-column: 2 / -1;\n",
       "}\n",
       "\n",
       ".xr-section-details {\n",
       "  display: none;\n",
       "  grid-column: 1 / -1;\n",
       "  margin-bottom: 5px;\n",
       "}\n",
       "\n",
       ".xr-section-summary-in:checked ~ .xr-section-details {\n",
       "  display: contents;\n",
       "}\n",
       "\n",
       ".xr-array-wrap {\n",
       "  grid-column: 1 / -1;\n",
       "  display: grid;\n",
       "  grid-template-columns: 20px auto;\n",
       "}\n",
       "\n",
       ".xr-array-wrap > label {\n",
       "  grid-column: 1;\n",
       "  vertical-align: top;\n",
       "}\n",
       "\n",
       ".xr-preview {\n",
       "  color: var(--xr-font-color3);\n",
       "}\n",
       "\n",
       ".xr-array-preview,\n",
       ".xr-array-data {\n",
       "  padding: 0 5px !important;\n",
       "  grid-column: 2;\n",
       "}\n",
       "\n",
       ".xr-array-data,\n",
       ".xr-array-in:checked ~ .xr-array-preview {\n",
       "  display: none;\n",
       "}\n",
       "\n",
       ".xr-array-in:checked ~ .xr-array-data,\n",
       ".xr-array-preview {\n",
       "  display: inline-block;\n",
       "}\n",
       "\n",
       ".xr-dim-list {\n",
       "  display: inline-block !important;\n",
       "  list-style: none;\n",
       "  padding: 0 !important;\n",
       "  margin: 0;\n",
       "}\n",
       "\n",
       ".xr-dim-list li {\n",
       "  display: inline-block;\n",
       "  padding: 0;\n",
       "  margin: 0;\n",
       "}\n",
       "\n",
       ".xr-dim-list:before {\n",
       "  content: '(';\n",
       "}\n",
       "\n",
       ".xr-dim-list:after {\n",
       "  content: ')';\n",
       "}\n",
       "\n",
       ".xr-dim-list li:not(:last-child):after {\n",
       "  content: ',';\n",
       "  padding-right: 5px;\n",
       "}\n",
       "\n",
       ".xr-has-index {\n",
       "  font-weight: bold;\n",
       "}\n",
       "\n",
       ".xr-var-list,\n",
       ".xr-var-item {\n",
       "  display: contents;\n",
       "}\n",
       "\n",
       ".xr-var-item > div,\n",
       ".xr-var-item label,\n",
       ".xr-var-item > .xr-var-name span {\n",
       "  background-color: var(--xr-background-color-row-even);\n",
       "  margin-bottom: 0;\n",
       "}\n",
       "\n",
       ".xr-var-item > .xr-var-name:hover span {\n",
       "  padding-right: 5px;\n",
       "}\n",
       "\n",
       ".xr-var-list > li:nth-child(odd) > div,\n",
       ".xr-var-list > li:nth-child(odd) > label,\n",
       ".xr-var-list > li:nth-child(odd) > .xr-var-name span {\n",
       "  background-color: var(--xr-background-color-row-odd);\n",
       "}\n",
       "\n",
       ".xr-var-name {\n",
       "  grid-column: 1;\n",
       "}\n",
       "\n",
       ".xr-var-dims {\n",
       "  grid-column: 2;\n",
       "}\n",
       "\n",
       ".xr-var-dtype {\n",
       "  grid-column: 3;\n",
       "  text-align: right;\n",
       "  color: var(--xr-font-color2);\n",
       "}\n",
       "\n",
       ".xr-var-preview {\n",
       "  grid-column: 4;\n",
       "}\n",
       "\n",
       ".xr-index-preview {\n",
       "  grid-column: 2 / 5;\n",
       "  color: var(--xr-font-color2);\n",
       "}\n",
       "\n",
       ".xr-var-name,\n",
       ".xr-var-dims,\n",
       ".xr-var-dtype,\n",
       ".xr-preview,\n",
       ".xr-attrs dt {\n",
       "  white-space: nowrap;\n",
       "  overflow: hidden;\n",
       "  text-overflow: ellipsis;\n",
       "  padding-right: 10px;\n",
       "}\n",
       "\n",
       ".xr-var-name:hover,\n",
       ".xr-var-dims:hover,\n",
       ".xr-var-dtype:hover,\n",
       ".xr-attrs dt:hover {\n",
       "  overflow: visible;\n",
       "  width: auto;\n",
       "  z-index: 1;\n",
       "}\n",
       "\n",
       ".xr-var-attrs,\n",
       ".xr-var-data,\n",
       ".xr-index-data {\n",
       "  display: none;\n",
       "  background-color: var(--xr-background-color) !important;\n",
       "  padding-bottom: 5px !important;\n",
       "}\n",
       "\n",
       ".xr-var-attrs-in:checked ~ .xr-var-attrs,\n",
       ".xr-var-data-in:checked ~ .xr-var-data,\n",
       ".xr-index-data-in:checked ~ .xr-index-data {\n",
       "  display: block;\n",
       "}\n",
       "\n",
       ".xr-var-data > table {\n",
       "  float: right;\n",
       "}\n",
       "\n",
       ".xr-var-name span,\n",
       ".xr-var-data,\n",
       ".xr-index-name div,\n",
       ".xr-index-data,\n",
       ".xr-attrs {\n",
       "  padding-left: 25px !important;\n",
       "}\n",
       "\n",
       ".xr-attrs,\n",
       ".xr-var-attrs,\n",
       ".xr-var-data,\n",
       ".xr-index-data {\n",
       "  grid-column: 1 / -1;\n",
       "}\n",
       "\n",
       "dl.xr-attrs {\n",
       "  padding: 0;\n",
       "  margin: 0;\n",
       "  display: grid;\n",
       "  grid-template-columns: 125px auto;\n",
       "}\n",
       "\n",
       ".xr-attrs dt,\n",
       ".xr-attrs dd {\n",
       "  padding: 0;\n",
       "  margin: 0;\n",
       "  float: left;\n",
       "  padding-right: 10px;\n",
       "  width: auto;\n",
       "}\n",
       "\n",
       ".xr-attrs dt {\n",
       "  font-weight: normal;\n",
       "  grid-column: 1;\n",
       "}\n",
       "\n",
       ".xr-attrs dt:hover span {\n",
       "  display: inline-block;\n",
       "  background: var(--xr-background-color);\n",
       "  padding-right: 10px;\n",
       "}\n",
       "\n",
       ".xr-attrs dd {\n",
       "  grid-column: 2;\n",
       "  white-space: pre-wrap;\n",
       "  word-break: break-all;\n",
       "}\n",
       "\n",
       ".xr-icon-database,\n",
       ".xr-icon-file-text2,\n",
       ".xr-no-icon {\n",
       "  display: inline-block;\n",
       "  vertical-align: middle;\n",
       "  width: 1em;\n",
       "  height: 1.5em !important;\n",
       "  stroke-width: 0;\n",
       "  stroke: currentColor;\n",
       "  fill: currentColor;\n",
       "}\n",
       "</style><pre class='xr-text-repr-fallback'>&lt;xarray.Dataset&gt;\n",
       "Dimensions:                (observation: 226, feature: 69, split: 10)\n",
       "Coordinates:\n",
       "  * observation            (observation) int32 0 1 2 3 4 ... 221 222 223 224 225\n",
       "  * feature                (feature) int32 0 1 2 3 4 5 6 ... 63 64 65 66 67 68\n",
       "  * split                  (split) int32 0 1 2 3 4 5 6 7 8 9\n",
       "Data variables:\n",
       "    x                      (observation, feature) float32 ...\n",
       "    split_index_train      (observation, split) bool ...\n",
       "    split_index_val        (observation, split) bool ...\n",
       "    split_index_test       (observation, split) bool ...\n",
       "    y                      (observation) int32 ...\n",
       "    attribute_names        (feature) object ...\n",
       "    categorical_indicator  (feature) bool ...\n",
       "Attributes:\n",
       "    openml_dataset_id:    7\n",
       "    openml_dataset_name:  audiology</pre><div class='xr-wrap' style='display:none'><div class='xr-header'><div class='xr-obj-type'>xarray.Dataset</div></div><ul class='xr-sections'><li class='xr-section-item'><input id='section-a3f69853-388d-435d-9da1-ddc2e50a3b48' class='xr-section-summary-in' type='checkbox' disabled ><label for='section-a3f69853-388d-435d-9da1-ddc2e50a3b48' class='xr-section-summary'  title='Expand/collapse section'>Dimensions:</label><div class='xr-section-inline-details'><ul class='xr-dim-list'><li><span class='xr-has-index'>observation</span>: 226</li><li><span class='xr-has-index'>feature</span>: 69</li><li><span class='xr-has-index'>split</span>: 10</li></ul></div><div class='xr-section-details'></div></li><li class='xr-section-item'><input id='section-a622a8ab-2297-461b-aeb2-92a041681b47' class='xr-section-summary-in' type='checkbox'  checked><label for='section-a622a8ab-2297-461b-aeb2-92a041681b47' class='xr-section-summary' >Coordinates: <span>(3)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>observation</span></div><div class='xr-var-dims'>(observation)</div><div class='xr-var-dtype'>int32</div><div class='xr-var-preview xr-preview'>0 1 2 3 4 5 ... 221 222 223 224 225</div><input id='attrs-7976738e-a9cd-4dff-81c0-57b646566ead' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-7976738e-a9cd-4dff-81c0-57b646566ead' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-510542cf-0081-4f59-9404-de4758af61c9' class='xr-var-data-in' type='checkbox'><label for='data-510542cf-0081-4f59-9404-de4758af61c9' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>array([  0,   1,   2, ..., 223, 224, 225], dtype=int32)</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>feature</span></div><div class='xr-var-dims'>(feature)</div><div class='xr-var-dtype'>int32</div><div class='xr-var-preview xr-preview'>0 1 2 3 4 5 6 ... 63 64 65 66 67 68</div><input id='attrs-4f0df5b5-7159-47ca-b628-747fa93f76a6' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-4f0df5b5-7159-47ca-b628-747fa93f76a6' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-0c87ee02-d31d-4bf2-acd8-bea7914bafe9' class='xr-var-data-in' type='checkbox'><label for='data-0c87ee02-d31d-4bf2-acd8-bea7914bafe9' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,\n",
       "       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,\n",
       "       36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,\n",
       "       54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68], dtype=int32)</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>split</span></div><div class='xr-var-dims'>(split)</div><div class='xr-var-dtype'>int32</div><div class='xr-var-preview xr-preview'>0 1 2 3 4 5 6 7 8 9</div><input id='attrs-3e94b93f-e6df-4373-90e6-d09f6723e2ca' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-3e94b93f-e6df-4373-90e6-d09f6723e2ca' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-7ff74fdd-b397-4cb4-b25b-d519c4181f40' class='xr-var-data-in' type='checkbox'><label for='data-7ff74fdd-b397-4cb4-b25b-d519c4181f40' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int32)</pre></div></li></ul></div></li><li class='xr-section-item'><input id='section-3f92f123-dfa2-462c-a12f-b9cd0b00863a' class='xr-section-summary-in' type='checkbox'  checked><label for='section-3f92f123-dfa2-462c-a12f-b9cd0b00863a' class='xr-section-summary' >Data variables: <span>(7)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-var-name'><span>x</span></div><div class='xr-var-dims'>(observation, feature)</div><div class='xr-var-dtype'>float32</div><div class='xr-var-preview xr-preview'>...</div><input id='attrs-0d8f1c07-a075-44ae-89fb-d565407e7d49' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-0d8f1c07-a075-44ae-89fb-d565407e7d49' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-f23edae6-20e7-4703-be9c-71ec839fc16a' class='xr-var-data-in' type='checkbox'><label for='data-f23edae6-20e7-4703-be9c-71ec839fc16a' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>[15594 values with dtype=float32]</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span>split_index_train</span></div><div class='xr-var-dims'>(observation, split)</div><div class='xr-var-dtype'>bool</div><div class='xr-var-preview xr-preview'>...</div><input id='attrs-fbb89a77-f47b-4dee-8079-3ebf3fb7497a' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-fbb89a77-f47b-4dee-8079-3ebf3fb7497a' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-feafd62f-30ea-4482-9a8b-328573a1d11b' class='xr-var-data-in' type='checkbox'><label for='data-feafd62f-30ea-4482-9a8b-328573a1d11b' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>[2260 values with dtype=bool]</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span>split_index_val</span></div><div class='xr-var-dims'>(observation, split)</div><div class='xr-var-dtype'>bool</div><div class='xr-var-preview xr-preview'>...</div><input id='attrs-796d8fb3-3732-4afc-bb21-2bb200886ecf' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-796d8fb3-3732-4afc-bb21-2bb200886ecf' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-d95f5486-8715-4b11-aebb-81105c653a88' class='xr-var-data-in' type='checkbox'><label for='data-d95f5486-8715-4b11-aebb-81105c653a88' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>[2260 values with dtype=bool]</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span>split_index_test</span></div><div class='xr-var-dims'>(observation, split)</div><div class='xr-var-dtype'>bool</div><div class='xr-var-preview xr-preview'>...</div><input id='attrs-2b2d17cc-1238-47d6-ba1b-3fed77de8153' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-2b2d17cc-1238-47d6-ba1b-3fed77de8153' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-a351fb99-1a78-4994-8830-aedcf2d2fa89' class='xr-var-data-in' type='checkbox'><label for='data-a351fb99-1a78-4994-8830-aedcf2d2fa89' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>[2260 values with dtype=bool]</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span>y</span></div><div class='xr-var-dims'>(observation)</div><div class='xr-var-dtype'>int32</div><div class='xr-var-preview xr-preview'>...</div><input id='attrs-be14d27e-2250-42d8-90b9-8816429feab8' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-be14d27e-2250-42d8-90b9-8816429feab8' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-7e9404a5-ba00-4a31-b907-5055923c9eb7' class='xr-var-data-in' type='checkbox'><label for='data-7e9404a5-ba00-4a31-b907-5055923c9eb7' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>[226 values with dtype=int32]</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span>attribute_names</span></div><div class='xr-var-dims'>(feature)</div><div class='xr-var-dtype'>object</div><div class='xr-var-preview xr-preview'>...</div><input id='attrs-538a3e49-40d2-4e41-9ca5-9de945af29c0' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-538a3e49-40d2-4e41-9ca5-9de945af29c0' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-aac6275f-ed50-40d4-b2a5-3be27e00dbf0' class='xr-var-data-in' type='checkbox'><label for='data-aac6275f-ed50-40d4-b2a5-3be27e00dbf0' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>[69 values with dtype=object]</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span>categorical_indicator</span></div><div class='xr-var-dims'>(feature)</div><div class='xr-var-dtype'>bool</div><div class='xr-var-preview xr-preview'>...</div><input id='attrs-9bdbe37d-6c23-476e-9889-e95aa3cbdd6b' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-9bdbe37d-6c23-476e-9889-e95aa3cbdd6b' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-d07d52d6-1439-4ff6-b7d2-90fe54f312b3' class='xr-var-data-in' type='checkbox'><label for='data-d07d52d6-1439-4ff6-b7d2-90fe54f312b3' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>[69 values with dtype=bool]</pre></div></li></ul></div></li><li class='xr-section-item'><input id='section-31a4f11f-b250-456c-9b98-967276ecebca' class='xr-section-summary-in' type='checkbox'  ><label for='section-31a4f11f-b250-456c-9b98-967276ecebca' class='xr-section-summary' >Indexes: <span>(3)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-index-name'><div>observation</div></div><div class='xr-index-preview'>PandasIndex</div><div></div><input id='index-f1fdad60-78ba-448a-960e-feb67290cfdc' class='xr-index-data-in' type='checkbox'/><label for='index-f1fdad60-78ba-448a-960e-feb67290cfdc' title='Show/Hide index repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-index-data'><pre>PandasIndex(Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,\n",
       "       ...\n",
       "       216, 217, 218, 219, 220, 221, 222, 223, 224, 225],\n",
       "      dtype=&#x27;int32&#x27;, name=&#x27;observation&#x27;, length=226))</pre></div></li><li class='xr-var-item'><div class='xr-index-name'><div>feature</div></div><div class='xr-index-preview'>PandasIndex</div><div></div><input id='index-9462d37b-7ab4-4e12-871c-4539b474f56f' class='xr-index-data-in' type='checkbox'/><label for='index-9462d37b-7ab4-4e12-871c-4539b474f56f' title='Show/Hide index repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-index-data'><pre>PandasIndex(Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,\n",
       "       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,\n",
       "       36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,\n",
       "       54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68],\n",
       "      dtype=&#x27;int32&#x27;, name=&#x27;feature&#x27;))</pre></div></li><li class='xr-var-item'><div class='xr-index-name'><div>split</div></div><div class='xr-index-preview'>PandasIndex</div><div></div><input id='index-0280120c-88e0-4f36-b044-56d723301271' class='xr-index-data-in' type='checkbox'/><label for='index-0280120c-88e0-4f36-b044-56d723301271' title='Show/Hide index repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-index-data'><pre>PandasIndex(Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=&#x27;int32&#x27;, name=&#x27;split&#x27;))</pre></div></li></ul></div></li><li class='xr-section-item'><input id='section-2a3f4aec-6774-46d8-a615-69ff1b889578' class='xr-section-summary-in' type='checkbox'  checked><label for='section-2a3f4aec-6774-46d8-a615-69ff1b889578' class='xr-section-summary' >Attributes: <span>(2)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><dl class='xr-attrs'><dt><span>openml_dataset_id :</span></dt><dd>7</dd><dt><span>openml_dataset_name :</span></dt><dd>audiology</dd></dl></div></li></ul></div></div>"
      ],
      "text/plain": [
       "<xarray.Dataset>\n",
       "Dimensions:                (observation: 226, feature: 69, split: 10)\n",
       "Coordinates:\n",
       "  * observation            (observation) int32 0 1 2 3 4 ... 221 222 223 224 225\n",
       "  * feature                (feature) int32 0 1 2 3 4 5 6 ... 63 64 65 66 67 68\n",
       "  * split                  (split) int32 0 1 2 3 4 5 6 7 8 9\n",
       "Data variables:\n",
       "    x                      (observation, feature) float32 ...\n",
       "    split_index_train      (observation, split) bool ...\n",
       "    split_index_val        (observation, split) bool ...\n",
       "    split_index_test       (observation, split) bool ...\n",
       "    y                      (observation) int32 ...\n",
       "    attribute_names        (feature) object ...\n",
       "    categorical_indicator  (feature) bool ...\n",
       "Attributes:\n",
       "    openml_dataset_id:    7\n",
       "    openml_dataset_name:  audiology"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import xarray as xr\n",
    "from pathlib import Path\n",
    "\n",
    "path = Path('data/datasets/tabzilla_7.nc')\n",
    "ds = xr.open_dataset(path)\n",
    "ds\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 2,  2,  3,  3,  2,  2,  2,  2,  6,  6, 14, 14,  6,  6,  2,  2, 18,\n",
       "       14,  2,  2,  2, 10, 18, 14,  3,  3,  7,  7, 10, 11,  2,  2,  7,  7,\n",
       "        7,  2,  2,  2,  7, 18,  2,  2,  6,  6, 18,  7,  3,  3,  6, 18,  7,\n",
       "        7,  7,  6,  0,  7,  6, 18,  7,  7, 15,  7,  3, 18,  8,  2,  2, 18,\n",
       "        2,  7,  7, 23,  9,  7, 18,  2,  2,  2, 18,  8, 18,  7,  7,  2,  2,\n",
       "        6,  6,  3,  3,  7, 18,  7, 18,  3,  3,  7,  7, 18,  2,  2,  3, 18,\n",
       "        1,  5,  5,  7, 15, 14, 14,  2, 17, 17, 19, 19, 22, 18, 21, 22,  7,\n",
       "        6,  4, 22, 22, 22,  7,  2, 19, 15, 19, 12,  2,  7,  6, 18, 23, 13,\n",
       "        7, 14, 18, 22, 11, 12, 16, 20, 11, 21,  7,  2,  2,  7,  7,  6,  2,\n",
       "        2,  3,  2,  9,  2,  7,  9,  9,  2, 18,  7,  3,  3, 21,  2,  2,  6,\n",
       "        2,  6,  6,  7,  7, 22, 18,  6,  6,  3,  7,  2, 18,  3,  3,  3,  3,\n",
       "        2,  2,  2,  2, 14, 14,  7,  7,  2,  2,  7,  7,  7,  7,  6, 13,  2,\n",
       "        2,  7,  7,  7,  7,  2,  2, 22,  7,  7,  9,  9, 18,  3,  3,  2,  2,\n",
       "        2, 11, 21,  2,  2], dtype=int32)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "ds.y.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{9910: 2,\n",
       " 168337: 2,\n",
       " 168335: 2,\n",
       " 3917: 2,\n",
       " 14: 10,\n",
       " 50: 2,\n",
       " 45: 3,\n",
       " 146606: 2,\n",
       " 3896: 2,\n",
       " 31: 2,\n",
       " 3711: 2,\n",
       " 9977: 2,\n",
       " 168911: 2,\n",
       " 22: 10,\n",
       " 146818: 2,\n",
       " 219: 2,\n",
       " 146607: 2,\n",
       " 29: 2,\n",
       " 9952: 2,\n",
       " 167119: 3,\n",
       " 3797: 2,\n",
       " 146065: 2,\n",
       " 25: 2,\n",
       " 189354: 2,\n",
       " 10: 4,\n",
       " 14969: 5,\n",
       " 3561: 2,\n",
       " 53: 4,\n",
       " 9890: 10,\n",
       " 11: 3,\n",
       " 7: 24,\n",
       " 189356: 2,\n",
       " 9981: 9,\n",
       " 14964: 10,\n",
       " 9956: 100,\n",
       " 9957: 2}"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset_paths = list(Path('data/datasets').glob('tabzilla*'))\n",
    "\n",
    "classes = {}\n",
    "\n",
    "for path in dataset_paths:\n",
    "\n",
    "    dataset = xr.load_dataset(path)\n",
    "    n_classes = len(np.unique(dataset['y']))\n",
    "    openml_id = dataset.attrs['openml_dataset_id']\n",
    "\n",
    "    classes[openml_id] = n_classes\n",
    "\n",
    "classes"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "tab",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
