{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from pathlib import Path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "p_project = str(pathlib.Path(os.getcwd()).parents[1])\n",
    "path_eicu = p_project + '/data/eicu'\n",
    "path_processed = path_eicu/\"processed\"\n",
    "path_data = path_processed/\"all_data.csv\"\n",
    "path_pat = path_eicu/\"raw/eicu-2.0/patient.csv.gz\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_df = pd.read_csv(path_data).rename(columns={'patientunitstayid': 'ID', 'itemoffset': 'Time'}).set_index('ID')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Time</th>\n",
       "      <th>FiO2</th>\n",
       "      <th>GCS Total</th>\n",
       "      <th>Heart Rate</th>\n",
       "      <th>Invasive BP Diastolic</th>\n",
       "      <th>Invasive BP Systolic</th>\n",
       "      <th>O2 Saturation</th>\n",
       "      <th>Respiratory Rate</th>\n",
       "      <th>Temperature (C)</th>\n",
       "      <th>glucose</th>\n",
       "      <th>pH</th>\n",
       "      <th>Motor</th>\n",
       "      <th>Eyes</th>\n",
       "      <th>MAP (mmHg)</th>\n",
       "      <th>Verbal</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1578183</th>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1578183</th>\n",
       "      <td>13</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>73.0</td>\n",
       "      <td>84.0</td>\n",
       "      <td>181.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>34.722222</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1578183</th>\n",
       "      <td>19</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>154.0</td>\n",
       "      <td>188.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1578183</th>\n",
       "      <td>34</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>73.0</td>\n",
       "      <td>86.0</td>\n",
       "      <td>208.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1578183</th>\n",
       "      <td>49</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Time  FiO2  GCS Total  Heart Rate  Invasive BP Diastolic  \\\n",
       "ID                                                                  \n",
       "1578183     4   NaN        4.0         NaN                    NaN   \n",
       "1578183    13   NaN        NaN        73.0                   84.0   \n",
       "1578183    19   NaN        4.0        75.0                  154.0   \n",
       "1578183    34   NaN        4.0        73.0                   86.0   \n",
       "1578183    49   NaN       10.0         NaN                    NaN   \n",
       "\n",
       "         Invasive BP Systolic  O2 Saturation  Respiratory Rate  \\\n",
       "ID                                                               \n",
       "1578183                   NaN            NaN               NaN   \n",
       "1578183                 181.0          100.0              17.0   \n",
       "1578183                 188.0          100.0              20.0   \n",
       "1578183                 208.0          100.0              15.0   \n",
       "1578183                   NaN            NaN               NaN   \n",
       "\n",
       "         Temperature (C)  glucose  pH  Motor  Eyes  MAP (mmHg)  Verbal  \n",
       "ID                                                                      \n",
       "1578183              NaN      NaN NaN    NaN   NaN         NaN     NaN  \n",
       "1578183        34.722222      NaN NaN    NaN   NaN         NaN     NaN  \n",
       "1578183              NaN      NaN NaN    NaN   NaN         NaN     NaN  \n",
       "1578183              NaN      NaN NaN    NaN   NaN         NaN     NaN  \n",
       "1578183              NaN      NaN NaN    NaN   NaN         NaN     NaN  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "var_to_consider = ['glucose', 'Invasive BP Diastolic', 'Invasive BP Systolic',\n",
    "                   'O2 Saturation', 'Respiratory Rate', 'Motor', 'Eyes', 'MAP (mmHg)',\n",
    "                   'Heart Rate', 'GCS Total', 'Verbal', 'pH', 'FiO2', 'Temperature (C)']\n",
    "\n",
    "cols_needed = var_to_consider + ['Time']\n",
    "for col_name in all_df.columns:\n",
    "    if not col_name in cols_needed:\n",
    "        all_df.drop(col_name, axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(8478327, 15)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_df = all_df[(all_df['Time']>=0) & (all_df['Time']<2880)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3334730, 15)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "tmp_df = all_df.groupby('ID').size()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot: >"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjEAAAGdCAYAAADjWSL8AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAwpklEQVR4nO3dfVBUZ57//U8HmlYIdEQGGlZCnInjmqCpXUyg3Wx8BLSCTGIqZsctytS6ajZRQ6mVjaZyB3cTcd261SxuXNe11IgWubcSM6nSdMBfRlwL8YGEihrLcmrU0R0Qk+FBlDQdPL8/Up47LT41dgcueL+quuSc8+XiOl868ePV53Q7LMuyBAAAYJj7ensCAAAAPUGIAQAARiLEAAAAIxFiAACAkQgxAADASIQYAABgJEIMAAAwEiEGAAAYKbq3JxAp165d0x//+EfFx8fL4XD09nQAAMBdsCxLly9fVlpamu677/ZrLf02xPzxj39Uenp6b08DAAD0wPnz5zVs2LDb1vTbEBMfHy/phyYkJCTc83iBQECVlZXKy8uT0+m85/HwA/oaGfQ1MuhrZNDXyDC1r21tbUpPT7f/Hr+dfhtirr+ElJCQELYQExsbq4SEBKOeDH0dfY0M+hoZ9DUy6GtkmN7Xu7kUhAt7AQCAkQgxAADASIQYAABgJEIMAAAwEiEGAAAYiRADAACMRIgBAABGIsQAAAAjEWIAAICRCDEAAMBIhBgAAGAkQgwAADASIQYAABiJEAMAAIwU3dsTwE/nodd3B22fXfV0L80EAIB7x0oMAAAwEiEGAAAYKaQQs2HDBo0ZM0YJCQlKSEiQ1+vVp59+ah9/8cUX5XA4gh45OTlBY/j9fi1cuFBJSUmKi4tTYWGhLly4EFTT3NysoqIiud1uud1uFRUVqaWlpednOQA99Prubg8AAPqTkELMsGHDtGrVKh09elRHjx7VpEmT9Ktf/UonTpywa6ZOnaqGhgb7sWfPnqAxiouLtWvXLlVUVOjAgQNqb29XQUGBurq67JpZs2apvr5ePp9PPp9P9fX1KioqusdTBQAA/UlIF/ZOnz49aPudd97Rhg0bVFtbq0cffVSS5HK55PF4bvr9ra2t2rx5s7Zv364pU6ZIksrLy5Wenq69e/cqPz9fJ0+elM/nU21trbKzsyVJmzZtktfr1alTpzRy5MiQTxIAAPQ/Pb47qaurS//93/+tK1euyOv12vv37dun5ORkPfDAAxo/frzeeecdJScnS5Lq6uoUCASUl5dn16elpSkzM1M1NTXKz8/XwYMH5Xa77QAjSTk5OXK73aqpqblliPH7/fL7/fZ2W1ubJCkQCCgQCPT0NG3XxwjHWD8FV5R1x5q+cC6m9dUU9DUy6Gtk0NfIMLWvocw35BBz7Ngxeb1efffdd7r//vu1a9cuPfLII5KkadOm6fnnn1dGRobOnDmjN998U5MmTVJdXZ1cLpcaGxsVExOjIUOGBI2ZkpKixsZGSVJjY6Mden4sOTnZrrmZ0tJSrVixotv+yspKxcbGhnqat1RVVRW2sSJp9RN3rrnxpb7eZEpfTUNfI4O+RgZ9jQzT+nr16tW7rg05xIwcOVL19fVqaWnRhx9+qNmzZ6u6ulqPPPKIXnjhBbsuMzNTY8eOVUZGhnbv3q0ZM2bcckzLsuRwOOztH399q5obLVu2TIsXL7a329ralJ6erry8PCUkJIR6mt0EAgFVVVUpNzdXTqfznseLtMySz+5Yc7wk/yeYye2Z1ldT0NfIoK+RQV8jw9S+Xn8l5W6EHGJiYmL08MMPS5LGjh2rI0eO6N1339XGjRu71aampiojI0OnT5+WJHk8HnV2dqq5uTloNaapqUnjxo2zay5evNhtrEuXLiklJeWW83K5XHK5XN32O53OsP7ywj1epPi7bh34rutL52FKX01DXyODvkYGfY0M0/oaylzv+X1iLMsKuhblx7799ludP39eqampkqSsrCw5nc6gpa2GhgYdP37cDjFer1etra06fPiwXXPo0CG1trbaNQAAACGtxCxfvlzTpk1Tenq6Ll++rIqKCu3bt08+n0/t7e0qKSnRc889p9TUVJ09e1bLly9XUlKSnn32WUmS2+3WnDlztGTJEg0dOlSJiYlaunSpRo8ebd+tNGrUKE2dOlVz5861V3fmzZungoIC7kwCAAC2kELMxYsXVVRUpIaGBrndbo0ZM0Y+n0+5ubnq6OjQsWPH9P7776ulpUWpqamaOHGiPvjgA8XHx9tjrF27VtHR0Zo5c6Y6Ojo0efJkbd26VVFRUXbNjh07tGjRIvsupsLCQq1fvz5MpwwAAPqDkELM5s2bb3ls8ODB+uyzO19MOmjQIJWVlamsrOyWNYmJiSovLw9lagAAYIDhs5MAAICRCDEAAMBIhBgAAGAkQgwAADASIQYAABiJEAMAAIxEiAEAAEYixAAAACMRYgAAgJEIMQAAwEiEGAAAYCRCDAAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAI0X39gTQtzz0+u6g7bOrnu6lmQAAcHuEmAHsxsACAIBJeDkJAAAYiRADAACMRIgBAABGIsQAAAAjEWIAAICRCDEAAMBIhBgAAGAkQgwAADASIQYAABiJEAMAAIxEiAEAAEYixAAAACMRYgAAgJEIMQAAwEiEGAAAYCRCDAAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAI4UUYjZs2KAxY8YoISFBCQkJ8nq9+vTTT+3jlmWppKREaWlpGjx4sCZMmKATJ04EjeH3+7Vw4UIlJSUpLi5OhYWFunDhQlBNc3OzioqK5Ha75Xa7VVRUpJaWlp6fJQAA6HdCCjHDhg3TqlWrdPToUR09elSTJk3Sr371KzuorF69WmvWrNH69et15MgReTwe5ebm6vLly/YYxcXF2rVrlyoqKnTgwAG1t7eroKBAXV1dds2sWbNUX18vn88nn8+n+vp6FRUVhemUAQBAfxAdSvH06dODtt955x1t2LBBtbW1euSRR7Ru3Tq98cYbmjFjhiRp27ZtSklJ0c6dOzV//ny1trZq8+bN2r59u6ZMmSJJKi8vV3p6uvbu3av8/HydPHlSPp9PtbW1ys7OliRt2rRJXq9Xp06d0siRI8Nx3gAAwHA9viamq6tLFRUVunLlirxer86cOaPGxkbl5eXZNS6XS+PHj1dNTY0kqa6uToFAIKgmLS1NmZmZds3BgwfldrvtACNJOTk5crvddg0AAEBIKzGSdOzYMXm9Xn333Xe6//77tWvXLj3yyCN2wEhJSQmqT0lJ0blz5yRJjY2NiomJ0ZAhQ7rVNDY22jXJycndfm5ycrJdczN+v19+v9/ebmtrkyQFAgEFAoFQT7Ob62OEY6yfgivKCss4kT5f0/pqCvoaGfQ1MuhrZJja11DmG3KIGTlypOrr69XS0qIPP/xQs2fPVnV1tX3c4XAE1VuW1W3fjW6suVn9ncYpLS3VihUruu2vrKxUbGzsbX9+KKqqqsI2ViStfiI84+zZsyc8A92BKX01DX2NDPoaGfQ1Mkzr69WrV++6NuQQExMTo4cffliSNHbsWB05ckTvvvuu/vEf/1HSDyspqampdn1TU5O9OuPxeNTZ2anm5uag1ZimpiaNGzfOrrl48WK3n3vp0qVuqzw/tmzZMi1evNjebmtrU3p6uvLy8pSQkBDqaXYTCARUVVWl3NxcOZ3Oex4v0jJLPgvLOMdL8sMyzq2Y1ldT0NfIoK+RQV8jw9S+Xn8l5W6EHGJuZFmW/H6/hg8fLo/Ho6qqKv3FX/yFJKmzs1PV1dX6l3/5F0lSVlaWnE6nqqqqNHPmTElSQ0ODjh8/rtWrV0uSvF6vWltbdfjwYT3xxA/LCYcOHVJra6sddG7G5XLJ5XJ12+90OsP6ywv3eJHi77r96tfd+qnO1ZS+moa+RgZ9jQz6Ghmm9TWUuYYUYpYvX65p06YpPT1dly9fVkVFhfbt2yefzyeHw6Hi4mKtXLlSI0aM0IgRI7Ry5UrFxsZq1qxZkiS32605c+ZoyZIlGjp0qBITE7V06VKNHj3avltp1KhRmjp1qubOnauNGzdKkubNm6eCggLuTAIAALaQQszFixdVVFSkhoYGud1ujRkzRj6fT7m5uZKk1157TR0dHXr55ZfV3Nys7OxsVVZWKj4+3h5j7dq1io6O1syZM9XR0aHJkydr69atioqKsmt27NihRYsW2XcxFRYWav369eE4XwAA0E+EFGI2b9582+MOh0MlJSUqKSm5Zc2gQYNUVlamsrKyW9YkJiaqvLw8lKkBAIABhs9OAgAARrrnC3vRNzz0+u7engIAAD8pVmIAAICRCDEAAMBIhBgAAGAkQgwAADASIQYAABiJEAMAAIxEiAEAAEYixAAAACMRYgAAgJEIMQAAwEiEGAAAYCRCDAAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAIxFiAACAkQgxAADASIQYAABgJEIMAAAwEiEGAAAYiRADAACMRIgBAABGIsQAAAAjEWIAAICRCDEAAMBIhBgAAGAkQgwAADASIQYAABiJEAMAAIxEiAEAAEYixAAAACMRYgAAgJEIMQAAwEiEGAAAYCRCDAAAMBIhBgAAGCmkEFNaWqrHH39c8fHxSk5O1jPPPKNTp04F1bz44otyOBxBj5ycnKAav9+vhQsXKikpSXFxcSosLNSFCxeCapqbm1VUVCS32y23262ioiK1tLT07CwBAEC/E1KIqa6u1iuvvKLa2lpVVVXp+++/V15enq5cuRJUN3XqVDU0NNiPPXv2BB0vLi7Wrl27VFFRoQMHDqi9vV0FBQXq6uqya2bNmqX6+nr5fD75fD7V19erqKjoHk4VAAD0J9GhFPt8vqDtLVu2KDk5WXV1dXrqqafs/S6XSx6P56ZjtLa2avPmzdq+fbumTJkiSSovL1d6err27t2r/Px8nTx5Uj6fT7W1tcrOzpYkbdq0SV6vV6dOndLIkSNDOkkAAND/hBRibtTa2ipJSkxMDNq/b98+JScn64EHHtD48eP1zjvvKDk5WZJUV1enQCCgvLw8uz4tLU2ZmZmqqalRfn6+Dh48KLfbbQcYScrJyZHb7VZNTc1NQ4zf75ff77e329raJEmBQECBQOBeTtMe58d/9jWuKCsi40b6fPt6X01FXyODvkYGfY0MU/saynx7HGIsy9LixYv15JNPKjMz094/bdo0Pf/888rIyNCZM2f05ptvatKkSaqrq5PL5VJjY6NiYmI0ZMiQoPFSUlLU2NgoSWpsbLRDz48lJyfbNTcqLS3VihUruu2vrKxUbGxsT0+zm6qqqrCNFU6rn4jMuDe+FBgpfbWvpqOvkUFfI4O+RoZpfb169epd1/Y4xCxYsEBfffWVDhw4ELT/hRdesL/OzMzU2LFjlZGRod27d2vGjBm3HM+yLDkcDnv7x1/fqubHli1bpsWLF9vbbW1tSk9PV15enhISEu76vG4lEAioqqpKubm5cjqd9zxeuGWWfBaRcY+X5Edk3Ov6el9NRV8jg75GBn2NDFP7ev2VlLvRoxCzcOFCffLJJ9q/f7+GDRt229rU1FRlZGTo9OnTkiSPx6POzk41NzcHrcY0NTVp3Lhxds3Fixe7jXXp0iWlpKTc9Oe4XC65XK5u+51OZ1h/eeEeL1z8XTcPd/fqpzrXvtpX09HXyKCvkUFfI8O0voYy15DuTrIsSwsWLNBHH32kzz//XMOHD7/j93z77bc6f/68UlNTJUlZWVlyOp1By1sNDQ06fvy4HWK8Xq9aW1t1+PBhu+bQoUNqbW21awAAwMAW0krMK6+8op07d+o3v/mN4uPj7etT3G63Bg8erPb2dpWUlOi5555Tamqqzp49q+XLlyspKUnPPvusXTtnzhwtWbJEQ4cOVWJiopYuXarRo0fbdyuNGjVKU6dO1dy5c7Vx40ZJ0rx581RQUMCdSQAAQFKIIWbDhg2SpAkTJgTt37Jli1588UVFRUXp2LFjev/999XS0qLU1FRNnDhRH3zwgeLj4+36tWvXKjo6WjNnzlRHR4cmT56srVu3Kioqyq7ZsWOHFi1aZN/FVFhYqPXr1/f0PAEAQD8TUoixrNvfxjt48GB99tmdLzAdNGiQysrKVFZWdsuaxMRElZeXhzI9AAAwgPDZSQAAwEiEGAAAYCRCDAAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAIxFiAACAkQgxAADASIQYAABgJEIMAAAwEiEGAAAYiRADAACMRIgBAABGIsQAAAAjEWIAAICRCDEAAMBIhBgAAGAkQgwAADBSdG9PAH3bQ6/v7rbv7Kqne2EmAAAEYyUGAAAYiRADAACMRIgBAABGIsQAAAAjEWIAAICRCDEAAMBIhBgAAGAkQgwAADASIQYAABiJEAMAAIxEiAEAAEYixAAAACMRYgAAgJEIMQAAwEiEGAAAYCRCDAAAMBIhBgAAGIkQAwAAjBRSiCktLdXjjz+u+Ph4JScn65lnntGpU6eCaizLUklJidLS0jR48GBNmDBBJ06cCKrx+/1auHChkpKSFBcXp8LCQl24cCGoprm5WUVFRXK73XK73SoqKlJLS0vPzhIAAPQ7IYWY6upqvfLKK6qtrVVVVZW+//575eXl6cqVK3bN6tWrtWbNGq1fv15HjhyRx+NRbm6uLl++bNcUFxdr165dqqio0IEDB9Te3q6CggJ1dXXZNbNmzVJ9fb18Pp98Pp/q6+tVVFQUhlMGAAD9QXQoxT6fL2h7y5YtSk5OVl1dnZ566ilZlqV169bpjTfe0IwZMyRJ27ZtU0pKinbu3Kn58+ertbVVmzdv1vbt2zVlyhRJUnl5udLT07V3717l5+fr5MmT8vl8qq2tVXZ2tiRp06ZN8nq9OnXqlEaOHBmOcwcAAAYLKcTcqLW1VZKUmJgoSTpz5owaGxuVl5dn17hcLo0fP141NTWaP3++6urqFAgEgmrS0tKUmZmpmpoa5efn6+DBg3K73XaAkaScnBy53W7V1NTcNMT4/X75/X57u62tTZIUCAQUCATu5TTtcX78Z1/jirJ+sp8Vzh709b6air5GBn2NDPoaGab2NZT59jjEWJalxYsX68knn1RmZqYkqbGxUZKUkpISVJuSkqJz587ZNTExMRoyZEi3muvf39jYqOTk5G4/Mzk52a65UWlpqVasWNFtf2VlpWJjY0M8u1urqqoK21jhtPqJn+5n7dmzJ+xj9tW+mo6+RgZ9jQz6Ghmm9fXq1at3XdvjELNgwQJ99dVXOnDgQLdjDocjaNuyrG77bnRjzc3qbzfOsmXLtHjxYnu7ra1N6enpysvLU0JCwm1/9t0IBAKqqqpSbm6unE7nPY8Xbpkln/1kP+t4SX7YxurrfTUVfY0M+hoZ9DUyTO3r9VdS7kaPQszChQv1ySefaP/+/Ro2bJi93+PxSPphJSU1NdXe39TUZK/OeDwedXZ2qrm5OWg1pqmpSePGjbNrLl682O3nXrp0qdsqz3Uul0sul6vbfqfTGdZfXrjHCxd/1+1DYjhF4vz7al9NR18jg75GBn2NDNP6GspcQ7o7ybIsLViwQB999JE+//xzDR8+POj48OHD5fF4gpauOjs7VV1dbQeUrKwsOZ3OoJqGhgYdP37crvF6vWptbdXhw4ftmkOHDqm1tdWuAQAAA1tIKzGvvPKKdu7cqd/85jeKj4+3r09xu90aPHiwHA6HiouLtXLlSo0YMUIjRozQypUrFRsbq1mzZtm1c+bM0ZIlSzR06FAlJiZq6dKlGj16tH230qhRozR16lTNnTtXGzdulCTNmzdPBQUF3JkEAAAkhRhiNmzYIEmaMGFC0P4tW7boxRdflCS99tpr6ujo0Msvv6zm5mZlZ2ersrJS8fHxdv3atWsVHR2tmTNnqqOjQ5MnT9bWrVsVFRVl1+zYsUOLFi2y72IqLCzU+vXre3KOAACgHwopxFjWnW/jdTgcKikpUUlJyS1rBg0apLKyMpWVld2yJjExUeXl5aFMDwAADCB8dhIAADASIQYAABiJEAMAAIxEiAEAAEYixAAAACPd0wdAYmB66PXdQdtnVz3dSzMBAAxkrMQAAAAjEWIAAICRCDEAAMBIhBgAAGAkQgwAADASIQYAABiJEAMAAIxEiAEAAEYixAAAACMRYgAAgJEIMQAAwEiEGAAAYCRCDAAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAIxFiAACAkQgxAADASIQYAABgJEIMAAAwEiEGAAAYiRADAACMRIgBAABGIsQAAAAjRff2BGC+h17f3W3f2VVP98JMAAADCSEGEXFjsCHUAADCjZeTAACAkQgxAADASIQYAABgJEIMAAAwEiEGAAAYKeQQs3//fk2fPl1paWlyOBz6+OOPg46/+OKLcjgcQY+cnJygGr/fr4ULFyopKUlxcXEqLCzUhQsXgmqam5tVVFQkt9stt9utoqIitbS0hHyCAACgfwo5xFy5ckWPPfaY1q9ff8uaqVOnqqGhwX7s2bMn6HhxcbF27dqliooKHThwQO3t7SooKFBXV5ddM2vWLNXX18vn88nn86m+vl5FRUWhThcAAPRTIb9PzLRp0zRt2rTb1rhcLnk8npsea21t1ebNm7V9+3ZNmTJFklReXq709HTt3btX+fn5OnnypHw+n2pra5WdnS1J2rRpk7xer06dOqWRI0eGOm0AANDPROTN7vbt26fk5GQ98MADGj9+vN555x0lJydLkurq6hQIBJSXl2fXp6WlKTMzUzU1NcrPz9fBgwfldrvtACNJOTk5crvdqqmpuWmI8fv98vv99nZbW5skKRAIKBAI3PM5XR8jHGNFgivK6u0p3Nat+tbX+2oq+hoZ9DUy6GtkmNrXUOYb9hAzbdo0Pf/888rIyNCZM2f05ptvatKkSaqrq5PL5VJjY6NiYmI0ZMiQoO9LSUlRY2OjJKmxsdEOPT+WnJxs19yotLRUK1as6La/srJSsbGxYTizH1RVVYVtrHBa/URvz+D2bnxJ8UZ9ta+mo6+RQV8jg75Ghml9vXr16l3Xhj3EvPDCC/bXmZmZGjt2rDIyMrR7927NmDHjlt9nWZYcDoe9/eOvb1XzY8uWLdPixYvt7ba2NqWnpysvL08JCQk9OZUggUBAVVVVys3NldPpvOfxwi2z5LPensJtHS/Jv+n+vt5XU9HXyKCvkUFfI8PUvl5/JeVuRPyzk1JTU5WRkaHTp09Lkjwejzo7O9Xc3By0GtPU1KRx48bZNRcvXuw21qVLl5SSknLTn+NyueRyubrtdzqdYf3lhXu8cPF33Tzc9RV36llf7avp6Gtk0NfIoK+RYVpfQ5lrxN8n5ttvv9X58+eVmpoqScrKypLT6Qxa3mpoaNDx48ftEOP1etXa2qrDhw/bNYcOHVJra6tdAwAABraQV2La29v1u9/9zt4+c+aM6uvrlZiYqMTERJWUlOi5555Tamqqzp49q+XLlyspKUnPPvusJMntdmvOnDlasmSJhg4dqsTERC1dulSjR4+271YaNWqUpk6dqrlz52rjxo2SpHnz5qmgoIA7kwAAgKQehJijR49q4sSJ9vb161Bmz56tDRs26NixY3r//ffV0tKi1NRUTZw4UR988IHi4+Pt71m7dq2io6M1c+ZMdXR0aPLkydq6dauioqLsmh07dmjRokX2XUyFhYW3fW8aAAAwsIQcYiZMmCDLuvXtvJ99ducLTAcNGqSysjKVlZXdsiYxMVHl5eWhTg8AAAwQfHYSAAAwEiEGAAAYiRADAACMRIgBAABGIsQAAAAjEWIAAICRCDEAAMBIhBgAAGAkQgwAADASIQYAABiJEAMAAIxEiAEAAEYixAAAACMRYgAAgJEIMQAAwEiEGAAAYCRCDAAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAIxFiAACAkQgxAADASIQYAABgpOjengAGhode391t39lVT/fCTAAA/QUrMQAAwEiEGAAAYCRCDAAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAI/E+MQa62XuuAAAw0LASAwAAjESIAQAARiLEoNc89PpuZZZ8Jkn2nwAA3C1CDAAAMBIhBgAAGIkQAwAAjBRyiNm/f7+mT5+utLQ0ORwOffzxx0HHLctSSUmJ0tLSNHjwYE2YMEEnTpwIqvH7/Vq4cKGSkpIUFxenwsJCXbhwIaimublZRUVFcrvdcrvdKioqUktLS8gnCAAA+qeQQ8yVK1f02GOPaf369Tc9vnr1aq1Zs0br16/XkSNH5PF4lJubq8uXL9s1xcXF2rVrlyoqKnTgwAG1t7eroKBAXV1dds2sWbNUX18vn88nn8+n+vp6FRUV9eAUAQBAfxTym91NmzZN06ZNu+kxy7K0bt06vfHGG5oxY4Ykadu2bUpJSdHOnTs1f/58tba2avPmzdq+fbumTJkiSSovL1d6err27t2r/Px8nTx5Uj6fT7W1tcrOzpYkbdq0SV6vV6dOndLIkSN7er4AAKCfCOs79p45c0aNjY3Ky8uz97lcLo0fP141NTWaP3++6urqFAgEgmrS0tKUmZmpmpoa5efn6+DBg3K73XaAkaScnBy53W7V1NTcNMT4/X75/X57u62tTZIUCAQUCATu+dyujxGOse6VK8rq7SmEjes+y/6zL/S2v+hLz9f+hL5GBn2NDFP7Gsp8wxpiGhsbJUkpKSlB+1NSUnTu3Dm7JiYmRkOGDOlWc/37GxsblZyc3G385ORku+ZGpaWlWrFiRbf9lZWVio2NDf1kbqGqqipsY/XU6id6ewbh989jr2nPnj29PY1+py88X/sj+hoZ9DUyTOvr1atX77o2Ip+d5HA4grYty+q270Y31tys/nbjLFu2TIsXL7a329ralJ6erry8PCUkJIQy/ZsKBAKqqqpSbm6unE7nPY93L/rTG8O57rP0z2Ov6c2j96nu/5na29PpN/rS87U/oa+RQV8jw9S+Xn8l5W6ENcR4PB5JP6ykpKam2vubmprs1RmPx6POzk41NzcHrcY0NTVp3Lhxds3Fixe7jX/p0qVuqzzXuVwuuVyubvudTmdYf3nhHq8n/F23D4Qm8l9z9Hpf+6O+8Hztj+hrZNDXyDCtr6HMNazvEzN8+HB5PJ6gpavOzk5VV1fbASUrK0tOpzOopqGhQcePH7drvF6vWltbdfjwYbvm0KFDam1ttWsAAMDAFvJKTHt7u373u9/Z22fOnFF9fb0SExP14IMPqri4WCtXrtSIESM0YsQIrVy5UrGxsZo1a5Ykye12a86cOVqyZImGDh2qxMRELV26VKNHj7bvVho1apSmTp2quXPnauPGjZKkefPmqaCggDuTAACApB6EmKNHj2rixIn29vXrUGbPnq2tW7fqtddeU0dHh15++WU1NzcrOztblZWVio+Pt79n7dq1io6O1syZM9XR0aHJkydr69atioqKsmt27NihRYsW2XcxFRYW3vK9aQAAwMATcoiZMGGCLOvWt/g6HA6VlJSopKTkljWDBg1SWVmZysrKblmTmJio8vLyUKeHfuah13cHbZ9d9XQvzQQA0Nfw2UkAAMBIEbnFGuiJG1ddAAC4HVZiAACAkQgxAADASIQYAABgJEIMAAAwEiEGAAAYiRADAACMRIgBAABGIsQAAAAjEWIAAICRCDEAAMBIhBgAAGAkQgwAADASIQYAABiJEAMAAIxEiAEAAEaK7u0J4M4een13b08BAIA+h5UYAABgJEIMAAAwEiEGAAAYiWtiYJSbXR90dtXTvTATAEBvYyUGAAAYiRADAACMRIgBAABGIsQAAAAjcWEvjHfjxb5c6AsAAwMrMQAAwEiEGAAAYCRCDAAAMBIhBgAAGIkLe3sZF6UCANAzrMQAAAAjsRLTx9zss4Fw71jxAoD+h5UYAABgJFZiMCDxadgAYD5CDPodXpIDgIGBl5MAAICRwh5iSkpK5HA4gh4ej8c+blmWSkpKlJaWpsGDB2vChAk6ceJE0Bh+v18LFy5UUlKS4uLiVFhYqAsXLoR7qgAAwGARWYl59NFH1dDQYD+OHTtmH1u9erXWrFmj9evX68iRI/J4PMrNzdXly5ftmuLiYu3atUsVFRU6cOCA2tvbVVBQoK6urkhMFwAAGCgi18RER0cHrb5cZ1mW1q1bpzfeeEMzZsyQJG3btk0pKSnauXOn5s+fr9bWVm3evFnbt2/XlClTJEnl5eVKT0/X3r17lZ+fH4kpAwAAw0QkxJw+fVppaWlyuVzKzs7WypUr9fOf/1xnzpxRY2Oj8vLy7FqXy6Xx48erpqZG8+fPV11dnQKBQFBNWlqaMjMzVVNTc8sQ4/f75ff77e22tjZJUiAQUCAQuOdzuj5GOMb6MVeUFdbxTOO6zwr6szeF+3fbmyL1fB3o6Gtk0NfIMLWvocw37CEmOztb77//vn75y1/q4sWLevvttzVu3DidOHFCjY2NkqSUlJSg70lJSdG5c+ckSY2NjYqJidGQIUO61Vz//pspLS3VihUruu2vrKxUbGzsvZ6WraqqKmxjSdLqJ8I6nLH+eey13p6C9uzZ09tTCLtwP1/xA/oaGfQ1Mkzr69WrV++6NuwhZtq0afbXo0ePltfr1S9+8Qtt27ZNOTk5kiSHwxH0PZZlddt3ozvVLFu2TIsXL7a329ralJ6erry8PCUkJPTkVIIEAgFVVVUpNzdXTqfznse7LrPks7CNZSLXfZb+eew1vXn0Pvmv3f45EGnHS/rPS5WRer4OdPQ1MuhrZJja1+uvpNyNiL9PTFxcnEaPHq3Tp0/rmWeekfTDaktqaqpd09TUZK/OeDwedXZ2qrm5OWg1pqmpSePGjbvlz3G5XHK5XN32O53OsP7ywj2ev6t3/+LuK/zXHL3eC5P+I79b4X6+4gf0NTLoa2SY1tdQ5hrx94nx+/06efKkUlNTNXz4cHk8nqClrc7OTlVXV9sBJSsrS06nM6imoaFBx48fv22IAQAAA0vYV2KWLl2q6dOn68EHH1RTU5PefvtttbW1afbs2XI4HCouLtbKlSs1YsQIjRgxQitXrlRsbKxmzZolSXK73ZozZ46WLFmioUOHKjExUUuXLtXo0aPtu5UAAADCHmIuXLigX//61/rmm2/0s5/9TDk5OaqtrVVGRoYk6bXXXlNHR4defvllNTc3Kzs7W5WVlYqPj7fHWLt2raKjozVz5kx1dHRo8uTJ2rp1q6KiosI9XQAAYKiwh5iKiorbHnc4HCopKVFJScktawYNGqSysjKVlZWFeXbArfGhkABgFj47CQAAGIlPsY6gG/9lz7/qzcPvEAD6LkLMT+hmL1fAfAQdAOgdvJwEAACMRIgBAABGIsQAAAAjcU0MEAKuawKAvoOVGAAAYCRCDAAAMBIhBgAAGIkQAwAAjMSFvUAfwZvmAUBoWIkBAABGIsQAAAAj8XISEGaRfC+Zu3nJ6aHXd8sVZWn1E1JmyWc69U5BxOYDAL2JlRgAAGAkVmKAPop3BwaA2yPEAAYj6AAYyHg5CQAAGIkQAwAAjESIAQAARuKamDDh2gQAAH5arMQAAAAjsRID9HM3WyXkc5kA9AeEGGAA4sMmAfQHvJwEAACMRIgBAABGIsQAAAAjEWIAAICRuLAXAHcwATASKzEAAMBIhBgAAGAkQgwAADASIQYAABiJC3sB3BTv6gugr2MlBgAAGImVGAB3hduwAfQ1rMQAAAAj9fmVmPfee0//+q//qoaGBj366KNat26d/vqv/7q3p3XTf5UCAICfTp8OMR988IGKi4v13nvv6a/+6q+0ceNGTZs2TV9//bUefPDB3p4eMODdTZjnJScAkdKnQ8yaNWs0Z84c/f3f/70kad26dfrss8+0YcMGlZaW9vLsANwN7nICECl9NsR0dnaqrq5Or7/+etD+vLw81dTUdKv3+/3y+/32dmtrqyTpT3/6kwKBwD3PJxAI6OrVq/r222/ldDoV/f2Vex4TUvQ1S1evXlN04D51XXP09nT6jb7c14eX/n89+r5DyyYHbWeX/p871oTbjf8fQHjQ18gwta+XL1+WJFmWdcfaPhtivvnmG3V1dSklJSVof0pKihobG7vVl5aWasWKFd32Dx8+PGJzRHjM6u0J9FP9ra9J/294agCY4fLly3K73bet6bMh5jqHI/hfkZZlddsnScuWLdPixYvt7WvXrulPf/qThg4detP6ULW1tSk9PV3nz59XQkLCPY+HH9DXyKCvkUFfI4O+RoapfbUsS5cvX1ZaWtoda/tsiElKSlJUVFS3VZempqZuqzOS5HK55HK5gvY98MADYZ9XQkKCUU8GU9DXyKCvkUFfI4O+RoaJfb3TCsx1ffZ9YmJiYpSVlaWqqqqg/VVVVRo3blwvzQoAAPQVfXYlRpIWL16soqIijR07Vl6vV//5n/+pP/zhD3rppZd6e2oAAKCX9ekQ88ILL+jbb7/VP/3TP6mhoUGZmZnas2ePMjIyfvK5uFwuvfXWW91essK9oa+RQV8jg75GBn2NjIHQV4d1N/cwAQAA9DF99poYAACA2yHEAAAAIxFiAACAkQgxAADASISYu/Tee+9p+PDhGjRokLKysvQ///M/vT2lPmv//v2aPn260tLS5HA49PHHHwcdtyxLJSUlSktL0+DBgzVhwgSdOHEiqMbv92vhwoVKSkpSXFycCgsLdeHChZ/wLPqe0tJSPf7444qPj1dycrKeeeYZnTp1KqiG3oZuw4YNGjNmjP2GYF6vV59++ql9nJ6GR2lpqRwOh4qLi+199DZ0JSUlcjgcQQ+Px2MfH3A9tXBHFRUVltPptDZt2mR9/fXX1quvvmrFxcVZ586d6+2p9Ul79uyx3njjDevDDz+0JFm7du0KOr5q1SorPj7e+vDDD61jx45ZL7zwgpWammq1tbXZNS+99JL1Z3/2Z1ZVVZX1xRdfWBMnTrQee+wx6/vvv/+Jz6bvyM/Pt7Zs2WIdP37cqq+vt55++mnrwQcftNrb2+0aehu6Tz75xNq9e7d16tQp69SpU9by5cstp9NpHT9+3LIsehoOhw8fth566CFrzJgx1quvvmrvp7ehe+utt6xHH33UamhosB9NTU328YHWU0LMXXjiiSesl156KWjfn//5n1uvv/56L83IHDeGmGvXrlkej8datWqVve+7776z3G639R//8R+WZVlWS0uL5XQ6rYqKCrvmf//3f6377rvP8vl8P9nc+7qmpiZLklVdXW1ZFr0NpyFDhlj/9V//RU/D4PLly9aIESOsqqoqa/z48XaIobc989Zbb1mPPfbYTY8NxJ7yctIddHZ2qq6uTnl5eUH78/LyVFNT00uzMteZM2fU2NgY1E+Xy6Xx48fb/ayrq1MgEAiqSUtLU2ZmJj3/kdbWVklSYmKiJHobDl1dXaqoqNCVK1fk9XrpaRi88sorevrppzVlypSg/fS2506fPq20tDQNHz5cf/M3f6Pf//73kgZmT/v0O/b2Bd988426urq6fehkSkpKtw+nxJ1d79nN+nnu3Dm7JiYmRkOGDOlWQ89/YFmWFi9erCeffFKZmZmS6O29OHbsmLxer7777jvdf//92rVrlx555BH7f+r0tGcqKir0xRdf6MiRI92O8XztmezsbL3//vv65S9/qYsXL+rtt9/WuHHjdOLEiQHZU0LMXXI4HEHblmV124e715N+0vP/34IFC/TVV1/pwIED3Y7R29CNHDlS9fX1amlp0YcffqjZs2erurraPk5PQ3f+/Hm9+uqrqqys1KBBg25ZR29DM23aNPvr0aNHy+v16he/+IW2bdumnJwcSQOrp7ycdAdJSUmKiorqllCbmpq6pV3c2fWr6G/XT4/Ho87OTjU3N9+yZiBbuHChPvnkE/32t7/VsGHD7P30tudiYmL08MMPa+zYsSotLdVjjz2md999l57eg7q6OjU1NSkrK0vR0dGKjo5WdXW1/u3f/k3R0dF2b+jtvYmLi9Po0aN1+vTpAfl8JcTcQUxMjLKyslRVVRW0v6qqSuPGjeulWZlr+PDh8ng8Qf3s7OxUdXW13c+srCw5nc6gmoaGBh0/fnxA99yyLC1YsEAfffSRPv/8cw0fPjzoOL0NH8uy5Pf76ek9mDx5so4dO6b6+nr7MXbsWP3t3/6t6uvr9fOf/5zehoHf79fJkyeVmpo6MJ+vvXE1sWmu32K9efNm6+uvv7aKi4utuLg46+zZs709tT7p8uXL1pdffml9+eWXliRrzZo11pdffmnfkr5q1SrL7XZbH330kXXs2DHr17/+9U1vARw2bJi1d+9e64svvrAmTZpk7C2A4fIP//APltvttvbt2xd0e+XVq1ftGnobumXLlln79++3zpw5Y3311VfW8uXLrfvuu8+qrKy0LIuehtOP706yLHrbE0uWLLH27dtn/f73v7dqa2utgoICKz4+3v77aKD1lBBzl/793//dysjIsGJiYqy//Mu/tG9rRXe//e1vLUndHrNnz7Ys64fbAN966y3L4/FYLpfLeuqpp6xjx44FjdHR0WEtWLDASkxMtAYPHmwVFBRYf/jDH3rhbPqOm/VUkrVlyxa7ht6G7u/+7u/s/7Z/9rOfWZMnT7YDjGXR03C6McTQ29Bdf98Xp9NppaWlWTNmzLBOnDhhHx9oPXVYlmX1zhoQAABAz3FNDAAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAIxFiAACAkQgxAADASIQYAABgJEIMAAAwEiEGAAAYiRADAACMRIgBAABG+r8tdb9BHIFjUAAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "tmp_df.hist(bins=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    40005.000000\n",
       "mean        83.357830\n",
       "std         43.954611\n",
       "min          3.000000\n",
       "25%         55.000000\n",
       "50%         70.000000\n",
       "75%        101.000000\n",
       "max        527.000000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tmp_df.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "ids_before = all_df.loc[all_df['Time'] < 1440].groupby('ID').size()\n",
    "ids_after = all_df.loc[all_df['Time'] > 1440].groupby('ID').size()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ids_before_selected:  (30389,)\n",
      "ids_after_selected:  (18569,)\n"
     ]
    }
   ],
   "source": [
    "ids_before_selected = ids_before.loc[ids_before >= 30].index\n",
    "ids_after_selected = ids_after.loc[ids_after >= 30].index\n",
    "\n",
    "print(\"ids_before_selected: \", ids_before_selected.shape)\n",
    "print(\"ids_after_selected: \", ids_after_selected.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_var_num = all_df.iloc[:, 1:].notna().groupby(\"ID\").apply(lambda x: x.sum(axis=0).gt(0).sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot: >"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjoAAAGdCAYAAAAbudkLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAvt0lEQVR4nO3df3BU9b3/8deahCXJhJWESZatQXEmIhKqDCoEvIUOJMFLSDtMy7WpKyoXuAOCEVCg1Bq8Q5BYgXuTEYHLiGPg4h+K9aqNCa3F5oZfhqYVZKBOUwQlhNGw4ZebNTnfP/jmXDcbkNhNTvLh+ZjJ4H72vWff551DfHF2T9ZlWZYlAAAAA93gdAMAAADdhaADAACMRdABAADGIugAAABjEXQAAICxCDoAAMBYBB0AAGAsgg4AADBWrNMNOKmtrU2ff/65kpKS5HK5nG4HAABcA8uydO7cOfl8Pt1ww9XP2VzXQefzzz9Xenq6020AAIDv4MSJE7rpppuuWnNdB52kpCRJlwc1YMAAh7vpHqFQSJWVlcrJyVFcXJzT7fQazCUSM4nETDrHXCIxk85111yam5uVnp5u/3/8aq7roNP+ctWAAQOMDjoJCQkaMGAAf/m+gblEYiaRmEnnmEskZtK57p7LtbztpMtvRv7ggw80bdo0+Xw+uVwuvfnmm/Z9oVBIS5cu1ciRI5WYmCifz6eHHnpIn3/+edg2gsGgFixYoEGDBikxMVH5+fk6efJkWE1TU5P8fr88Ho88Ho/8fr/Onj0bVvPpp59q2rRpSkxM1KBBg7Rw4UK1tLR0dZcAAIChuhx0Lly4oDvvvFNlZWUR9128eFEHDx7U008/rYMHD+qNN97QsWPHlJ+fH1ZXWFionTt3aseOHaqurtb58+eVl5en1tZWu6agoEB1dXWqqKhQRUWF6urq5Pf77ftbW1s1depUXbhwQdXV1dqxY4def/11LV68uKu7BAAADNXll67uv/9+3X///Z3e5/F4VFVVFbZWWlqqe++9V59++qmGDBmiQCCgLVu26NVXX9XkyZMlSeXl5UpPT9euXbuUm5urI0eOqKKiQnv37tWYMWMkSZs3b1ZWVpaOHj2qYcOGqbKyUh9//LFOnDghn88nSXrhhRf08MMPa9WqVca+FAUAAK5dt79HJxAIyOVy6cYbb5Qk1dbWKhQKKScnx67x+XzKzMxUTU2NcnNztWfPHnk8HjvkSNLYsWPl8XhUU1OjYcOGac+ePcrMzLRDjiTl5uYqGAyqtrZWP/zhDyN6CQaDCgaD9u3m5mZJl19yC4VC0d71XqF9v0zdv++KuURiJpGYSeeYSyRm0rnumktXttetQeerr77SsmXLVFBQYJ9haWhoUL9+/TRw4MCw2rS0NDU0NNg1qampEdtLTU0Nq0lLSwu7f+DAgerXr59d09Hq1au1cuXKiPXKykolJCR0fQf7kI5n2nAZc4nETCIxk84xl0jMpHPRnsvFixevubbbgk4oFNIDDzygtrY2vfjii99ab1lW2LunO3sn9Xep+ably5dr0aJF9u32y9NycnKMfakrFAqpqqpK2dnZXAnwDcwlEjOJxEw6x1wiMZPOdddc2l+RuRbdEnRCoZBmzJih+vp6/f73vw8LEV6vVy0tLWpqago7q9PY2Khx48bZNadPn47Y7pkzZ+yzOF6vV/v27Qu7v6mpSaFQKOJMTzu32y232x2xHhcXZ/yBeT3s43fBXCIxk0jMpHPMJRIz6Vy059KVbUX9s67aQ85f//pX7dq1SykpKWH3jx49WnFxcWGnsU6dOqVDhw7ZQScrK0uBQED79++3a/bt26dAIBBWc+jQIZ06dcquqayslNvt1ujRo6O9WwAAoA/q8hmd8+fP65NPPrFv19fXq66uTsnJyfL5fPrJT36igwcP6u2331Zra6v9fpnk5GT169dPHo9Hs2bN0uLFi5WSkqLk5GQtWbJEI0eOtK/CGj58uKZMmaLZs2dr48aNkqQ5c+YoLy9Pw4YNkyTl5OTojjvukN/v1/PPP68vv/xSS5Ys0ezZs419GQoAAHRNl4POhx9+GHZFU/t7XmbOnKmioiK99dZbkqS77ror7HHvv/++Jk6cKElat26dYmNjNWPGDF26dEmTJk3S1q1bFRMTY9dv27ZNCxcutK/Oys/PD/vdPTExMXrnnXc0b948jR8/XvHx8SooKNCvf/3rru4SAAAwVJeDzsSJE2VZ1hXvv9p97fr376/S0lKVlpZesSY5OVnl5eVX3c6QIUP09ttvf+vzAQCA61PU36MDAADQWxB0AACAsQg6AADAWN3+ERAAAETLLcveiVj7+3NTHegEfQVndAAAgLEIOgAAwFgEHQAAYCyCDgAAMBZBBwAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAsQg6AADAWAQdAABgLIIOAAAwFkEHAAAYi6ADAACMRdABAADGIugAAABjEXQAAICxCDoAAMBYBB0AAGAsgg4AADAWQQcAABiLoAMAAIxF0AEAAMYi6AAAAGMRdAAAgLEIOgAAwFgEHQAAYCyCDgAAMBZBBwAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAsQg6AADAWAQdAABgLIIOAAAwFkEHAAAYi6ADAACMRdABAADGIugAAABjEXQAAICxCDoAAMBYBB0AAGAsgg4AADAWQQcAABiLoAMAAIzV5aDzwQcfaNq0afL5fHK5XHrzzTfD7rcsS0VFRfL5fIqPj9fEiRN1+PDhsJpgMKgFCxZo0KBBSkxMVH5+vk6ePBlW09TUJL/fL4/HI4/HI7/fr7Nnz4bVfPrpp5o2bZoSExM1aNAgLVy4UC0tLV3dJQAAYKguB50LFy7ozjvvVFlZWaf3l5SUaO3atSorK9OBAwfk9XqVnZ2tc+fO2TWFhYXauXOnduzYoerqap0/f155eXlqbW21awoKClRXV6eKigpVVFSorq5Ofr/fvr+1tVVTp07VhQsXVF1drR07duj111/X4sWLu7pLAADAULFdfcD999+v+++/v9P7LMvS+vXrtWLFCk2fPl2S9MorrygtLU3bt2/X3LlzFQgEtGXLFr366quaPHmyJKm8vFzp6enatWuXcnNzdeTIEVVUVGjv3r0aM2aMJGnz5s3KysrS0aNHNWzYMFVWVurjjz/WiRMn5PP5JEkvvPCCHn74Ya1atUoDBgz4TgMBAADm6HLQuZr6+no1NDQoJyfHXnO73ZowYYJqamo0d+5c1dbWKhQKhdX4fD5lZmaqpqZGubm52rNnjzwejx1yJGns2LHyeDyqqanRsGHDtGfPHmVmZtohR5Jyc3MVDAZVW1urH/7whxH9BYNBBYNB+3Zzc7MkKRQKKRQKRXMUvUb7fpm6f98Vc4nETCIxk845ORd3jBWx1hu+PxwrneuuuXRle1ENOg0NDZKktLS0sPW0tDQdP37crunXr58GDhwYUdP++IaGBqWmpkZsPzU1Naym4/MMHDhQ/fr1s2s6Wr16tVauXBmxXllZqYSEhGvZxT6rqqrK6RZ6JeYSiZlEYiadc2IuJfdGrr377rs93seVcKx0LtpzuXjx4jXXRjXotHO5XGG3LcuKWOuoY01n9d+l5puWL1+uRYsW2bebm5uVnp6unJwcY1/qCoVCqqqqUnZ2tuLi4pxup9dgLpGYSSRm0jkn55JZ9F7E2qGi3B7toTMcK53rrrm0vyJzLaIadLxer6TLZ1sGDx5srzc2NtpnX7xer1paWtTU1BR2VqexsVHjxo2za06fPh2x/TNnzoRtZ9++fWH3NzU1KRQKRZzpaed2u+V2uyPW4+LijD8wr4d9/C6YSyRmEomZdM6JuQRbI/8h25u+NxwrnYv2XLqyraj+Hp2hQ4fK6/WGnaJqaWnR7t277RAzevRoxcXFhdWcOnVKhw4dsmuysrIUCAS0f/9+u2bfvn0KBAJhNYcOHdKpU6fsmsrKSrndbo0ePTqauwUAAPqoLp/ROX/+vD755BP7dn19verq6pScnKwhQ4aosLBQxcXFysjIUEZGhoqLi5WQkKCCggJJksfj0axZs7R48WKlpKQoOTlZS5Ys0ciRI+2rsIYPH64pU6Zo9uzZ2rhxoyRpzpw5ysvL07BhwyRJOTk5uuOOO+T3+/X888/ryy+/1JIlSzR79mxjX4YCAABd0+Wg8+GHH4Zd0dT+npeZM2dq69ateuqpp3Tp0iXNmzdPTU1NGjNmjCorK5WUlGQ/Zt26dYqNjdWMGTN06dIlTZo0SVu3blVMTIxds23bNi1cuNC+Ois/Pz/sd/fExMTonXfe0bx58zR+/HjFx8eroKBAv/71r7s+BQAAYKQuB52JEyfKsiIv72vncrlUVFSkoqKiK9b0799fpaWlKi0tvWJNcnKyysvLr9rLkCFD9Pbbb39rzwAA4PrEZ10BAABjEXQAAICxCDoAAMBYBB0AAGAsgg4AADAWQQcAABiLoAMAAIxF0AEAAMYi6AAAAGMRdAAAgLEIOgAAwFgEHQAAYCyCDgAAMBZBBwAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAsQg6AADAWAQdAABgLIIOAAAwFkEHAAAYi6ADAACMRdABAADGIugAAABjEXQAAICxCDoAAMBYsU43AAAAouOWZe9ErP39uakOdNJ7cEYHAAAYi6ADAACMRdABAADGIugAAABjEXQAAICxCDoAAMBYBB0AAGAsgg4AADAWQQcAABiLoAMAAIxF0AEAAMYi6AAAAGMRdAAAgLEIOgAAwFgEHQAAYCyCDgAAMBZBBwAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAsaIedL7++mv98pe/1NChQxUfH69bb71Vzz77rNra2uway7JUVFQkn8+n+Ph4TZw4UYcPHw7bTjAY1IIFCzRo0CAlJiYqPz9fJ0+eDKtpamqS3++Xx+ORx+OR3+/X2bNno71LAACgj4p60FmzZo1eeukllZWV6ciRIyopKdHzzz+v0tJSu6akpERr165VWVmZDhw4IK/Xq+zsbJ07d86uKSws1M6dO7Vjxw5VV1fr/PnzysvLU2trq11TUFCguro6VVRUqKKiQnV1dfL7/dHeJQAA0EfFRnuDe/bs0Y9+9CNNnTpVknTLLbfov//7v/Xhhx9Kunw2Z/369VqxYoWmT58uSXrllVeUlpam7du3a+7cuQoEAtqyZYteffVVTZ48WZJUXl6u9PR07dq1S7m5uTpy5IgqKiq0d+9ejRkzRpK0efNmZWVl6ejRoxo2bFi0dw0AAPQxUQ869913n1566SUdO3ZMt912m/785z+rurpa69evlyTV19eroaFBOTk59mPcbrcmTJigmpoazZ07V7W1tQqFQmE1Pp9PmZmZqqmpUW5urvbs2SOPx2OHHEkaO3asPB6PampqOg06wWBQwWDQvt3c3CxJCoVCCoVC0R5Fr9C+X6bu33fFXCIxk0jMpHNOzsUdY0Ws9YbvT285VnrbfLprLl3ZXtSDztKlSxUIBHT77bcrJiZGra2tWrVqlX72s59JkhoaGiRJaWlpYY9LS0vT8ePH7Zp+/fpp4MCBETXtj29oaFBqamrE86empto1Ha1evVorV66MWK+srFRCQkIX97RvqaqqcrqFXom5RGImkZhJ55yYS8m9kWvvvvtuj/dxJU4fK711PtGey8WLF6+5NupB57XXXlN5ebm2b9+uESNGqK6uToWFhfL5fJo5c6Zd53K5wh5nWVbEWkcdazqrv9p2li9frkWLFtm3m5ublZ6erpycHA0YMOCa9q+vCYVCqqqqUnZ2tuLi4pxup9dgLpGYSSRm0jkn55JZ9F7E2qGi3B7toTO95VjpbfPprrm0vyJzLaIedJ588kktW7ZMDzzwgCRp5MiROn78uFavXq2ZM2fK6/VKunxGZvDgwfbjGhsb7bM8Xq9XLS0tampqCjur09jYqHHjxtk1p0+fjnj+M2fORJwtaud2u+V2uyPW4+LijP8hdj3s43fBXCIxk0jMpHNOzCXYGvkP2d70vXH6WOmt84n2XLqyrahfdXXx4kXdcEP4ZmNiYuzLy4cOHSqv1xt2GqulpUW7d++2Q8zo0aMVFxcXVnPq1CkdOnTIrsnKylIgEND+/fvtmn379ikQCNg1AADg+hb1MzrTpk3TqlWrNGTIEI0YMUJ/+tOftHbtWj366KOSLr/cVFhYqOLiYmVkZCgjI0PFxcVKSEhQQUGBJMnj8WjWrFlavHixUlJSlJycrCVLlmjkyJH2VVjDhw/XlClTNHv2bG3cuFGSNGfOHOXl5XHFFQAAkNQNQae0tFRPP/205s2bp8bGRvl8Ps2dO1e/+tWv7JqnnnpKly5d0rx589TU1KQxY8aosrJSSUlJds26desUGxurGTNm6NKlS5o0aZK2bt2qmJgYu2bbtm1auHChfXVWfn6+ysrKor1LAACgj4p60ElKStL69evty8k743K5VFRUpKKioivW9O/fX6WlpWG/aLCj5ORklZeX/wPdAgAAk/FZVwAAwFgEHQAAYCyCDgAAMBZBBwAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAsQg6AADAWAQdAABgLIIOAAAwFkEHAAAYi6ADAACMRdABAADGIugAAABjEXQAAICxCDoAAMBYBB0AAGAsgg4AADAWQQcAABiLoAMAAIxF0AEAAMYi6AAAAGMRdAAAgLEIOgAAwFgEHQAAYCyCDgAAMBZBBwAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAsQg6AADAWAQdAABgLIIOAAAwFkEHAAAYi6ADAACMRdABAADGIugAAABjEXQAAICxCDoAAMBYBB0AAGAsgg4AADAWQQcAABiLoAMAAIxF0AEAAMYi6AAAAGMRdAAAgLEIOgAAwFjdEnQ+++wzPfjgg0pJSVFCQoLuuusu1dbW2vdblqWioiL5fD7Fx8dr4sSJOnz4cNg2gsGgFixYoEGDBikxMVH5+fk6efJkWE1TU5P8fr88Ho88Ho/8fr/Onj3bHbsEAAD6oKgHnaamJo0fP15xcXH67W9/q48//lgvvPCCbrzxRrumpKREa9euVVlZmQ4cOCCv16vs7GydO3fOriksLNTOnTu1Y8cOVVdX6/z588rLy1Nra6tdU1BQoLq6OlVUVKiiokJ1dXXy+/3R3iUAANBHxUZ7g2vWrFF6erpefvlle+2WW26x/9uyLK1fv14rVqzQ9OnTJUmvvPKK0tLStH37ds2dO1eBQEBbtmzRq6++qsmTJ0uSysvLlZ6erl27dik3N1dHjhxRRUWF9u7dqzFjxkiSNm/erKysLB09elTDhg2L9q4BAIA+JupB56233lJubq5++tOfavfu3fre976nefPmafbs2ZKk+vp6NTQ0KCcnx36M2+3WhAkTVFNTo7lz56q2tlahUCisxufzKTMzUzU1NcrNzdWePXvk8XjskCNJY8eOlcfjUU1NTadBJxgMKhgM2rebm5slSaFQSKFQKNqj6BXa98vU/fuumEskZhKJmXTOybm4Y6yItd7w/ektx0pvm093zaUr24t60Pnb3/6mDRs2aNGiRfrFL36h/fv3a+HChXK73XrooYfU0NAgSUpLSwt7XFpamo4fPy5JamhoUL9+/TRw4MCImvbHNzQ0KDU1NeL5U1NT7ZqOVq9erZUrV0asV1ZWKiEhoes724dUVVU53UKvxFwiMZNIzKRzTsyl5N7ItXfffbfH+7gSp4+V3jqfaM/l4sWL11wb9aDT1tamu+++W8XFxZKkUaNG6fDhw9qwYYMeeughu87lcoU9zrKsiLWOOtZ0Vn+17SxfvlyLFi2ybzc3Nys9PV05OTkaMGDAt+9cHxQKhVRVVaXs7GzFxcU53U6vwVwiMZNIzKRzTs4ls+i9iLVDRbk92kNnesux0tvm011zaX9F5lpEPegMHjxYd9xxR9ja8OHD9frrr0uSvF6vpMtnZAYPHmzXNDY22md5vF6vWlpa1NTUFHZWp7GxUePGjbNrTp8+HfH8Z86ciThb1M7tdsvtdkesx8XFGf9D7HrYx++CuURiJpGYSeecmEuwNfIfsr3pe+P0sdJb5xPtuXRlW1G/6mr8+PE6evRo2NqxY8d08803S5KGDh0qr9cbdhqrpaVFu3fvtkPM6NGjFRcXF1Zz6tQpHTp0yK7JyspSIBDQ/v377Zp9+/YpEAjYNQAA4PoW9TM6TzzxhMaNG6fi4mLNmDFD+/fv16ZNm7Rp0yZJl19uKiwsVHFxsTIyMpSRkaHi4mIlJCSooKBAkuTxeDRr1iwtXrxYKSkpSk5O1pIlSzRy5Ej7Kqzhw4drypQpmj17tjZu3ChJmjNnjvLy8rjiCgAASOqGoHPPPfdo586dWr58uZ599lkNHTpU69ev189//nO75qmnntKlS5c0b948NTU1acyYMaqsrFRSUpJds27dOsXGxmrGjBm6dOmSJk2apK1btyomJsau2bZtmxYuXGhfnZWfn6+ysrJo7xIAAOijoh50JCkvL095eXlXvN/lcqmoqEhFRUVXrOnfv79KS0tVWlp6xZrk5GSVl5f/I60CAACD8VlXAADAWAQdAABgLIIOAAAwFkEHAAAYi6ADAACMRdABAADGIugAAABjEXQAAICxCDoAAMBYBB0AAGAsgg4AADAWQQcAABiLoAMAAIxF0AEAAMYi6AAAAGMRdAAAgLEIOgAAwFgEHQAAYCyCDgAAMBZBBwAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAsQg6AADAWAQdAABgLIIOAAAwFkEHAAAYi6ADAACMRdABAADGIugAAABjEXQAAICxCDoAAMBYBB0AAGAsgg4AADAWQQcAABiLoAMAAIxF0AEAAMYi6AAAAGPFOt0AAAB91S3L3pEkuWMsldwrZRa9p6Or8hzuCt/EGR0AAGAsgg4AADAWQQcAABiLoAMAAIxF0AEAAMYi6AAAAGMRdAAAgLEIOgAAwFgEHQAAYKxuDzqrV6+Wy+VSYWGhvWZZloqKiuTz+RQfH6+JEyfq8OHDYY8LBoNasGCBBg0apMTEROXn5+vkyZNhNU1NTfL7/fJ4PPJ4PPL7/Tp79mx37xIAAOgjujXoHDhwQJs2bdL3v//9sPWSkhKtXbtWZWVlOnDggLxer7Kzs3Xu3Dm7prCwUDt37tSOHTtUXV2t8+fPKy8vT62trXZNQUGB6urqVFFRoYqKCtXV1cnv93fnLgEAgD6k24LO+fPn9fOf/1ybN2/WwIED7XXLsrR+/XqtWLFC06dPV2Zmpl555RVdvHhR27dvlyQFAgFt2bJFL7zwgiZPnqxRo0apvLxcH330kXbt2iVJOnLkiCoqKvRf//VfysrKUlZWljZv3qy3335bR48e7a7dAgAAfUi3BZ358+dr6tSpmjx5cth6fX29GhoalJOTY6+53W5NmDBBNTU1kqTa2lqFQqGwGp/Pp8zMTLtmz5498ng8GjNmjF0zduxYeTweuwYAAFzfuuXTy3fs2KGDBw/qwIEDEfc1NDRIktLS0sLW09LSdPz4cbumX79+YWeC2mvaH9/Q0KDU1NSI7aempto1HQWDQQWDQft2c3OzJCkUCikUCl3r7vUp7ftl6v59V8wlEjOJxEw65+Rc3DFWxJqT35/2ftw3/N+fvaGfb3Kyn+46VrqyvagHnRMnTujxxx9XZWWl+vfvf8U6l8sVdtuyrIi1jjrWdFZ/te2sXr1aK1eujFivrKxUQkLCVZ+7r6uqqnK6hV6JuURiJpGYSeecmEvJvZFr7777bo/30a5jP/9+d1uv6kdydj7ton2sXLx48Zprox50amtr1djYqNGjR9trra2t+uCDD1RWVma/f6ahoUGDBw+2axobG+2zPF6vVy0tLWpqago7q9PY2Khx48bZNadPn454/jNnzkScLWq3fPlyLVq0yL7d3Nys9PR05eTkaMCAAf/AXvdeoVBIVVVVys7OVlxcnNPt9BrMJRIzicRMOufkXDKL3otYO1SU26M9fFN7P+4bLP373W16+sMbVPurKY73801Ozqe7jpX2V2SuRdSDzqRJk/TRRx+FrT3yyCO6/fbbtXTpUt16663yer2qqqrSqFGjJEktLS3avXu31qxZI0kaPXq04uLiVFVVpRkzZkiSTp06pUOHDqmkpESSlJWVpUAgoP379+veey9H2H379ikQCNhhqCO32y232x2xHhcXZ/wPsethH78L5hKJmURiJp1zYi7B1sgz9k5+bzr2E2xz9ap+JGfn880eotlHV7YV9aCTlJSkzMzMsLXExESlpKTY64WFhSouLlZGRoYyMjJUXFyshIQEFRQUSJI8Ho9mzZqlxYsXKyUlRcnJyVqyZIlGjhxpv7l5+PDhmjJlimbPnq2NGzdKkubMmaO8vDwNGzYs2rsFAAD6oG55M/K3eeqpp3Tp0iXNmzdPTU1NGjNmjCorK5WUlGTXrFu3TrGxsZoxY4YuXbqkSZMmaevWrYqJibFrtm3bpoULF9pXZ+Xn56usrKzH9wcAAPROPRJ0/vCHP4TddrlcKioqUlFR0RUf079/f5WWlqq0tPSKNcnJySovL49SlwAAwDR81hUAADAWQQcAABiLoAMAAIxF0AEAAMYi6AAAAGMRdAAAgLEIOgAAwFgEHQAAYCyCDgAAMBZBBwAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAsQg6AADAWLFONwAAvc0ty96RJLljLJXc63AzAP4hnNEBAADGIugAAABjEXQAAICxCDoAAMBYBB0AAGAsgg4AADAWQQcAABiL36MDAL1c++/1aff356Y61AnQ93BGBwAAGIugAwAAjEXQAaBblr2jzKL3JMn+EwBMQNABAADGIugAAABjEXQAAICxCDoAAMBYBB0AAGAsgg4AADAWQQcAABiLoAMAAIxF0AEAAMYi6AAAAGMRdAAAgLEIOgAAwFixTjcAAB3dsuydiLW/PzfVgU7Qmcyi91Ry7+U/g60uvjfo1TijAwAAjEXQAQAAxiLoAAAAYxF0AACAsQg6AADAWAQdAABgLIIOAAAwFkEHAAAYi6ADAACMRdABAADGinrQWb16te655x4lJSUpNTVVP/7xj3X06NGwGsuyVFRUJJ/Pp/j4eE2cOFGHDx8OqwkGg1qwYIEGDRqkxMRE5efn6+TJk2E1TU1N8vv98ng88ng88vv9Onv2bLR3CQAA9FFRDzq7d+/W/PnztXfvXlVVVenrr79WTk6OLly4YNeUlJRo7dq1Kisr04EDB+T1epWdna1z587ZNYWFhdq5c6d27Nih6upqnT9/Xnl5eWptbbVrCgoKVFdXp4qKClVUVKiurk5+vz/auwQAAPqoqH+oZ0VFRdjtl19+WampqaqtrdUPfvADWZal9evXa8WKFZo+fbok6ZVXXlFaWpq2b9+uuXPnKhAIaMuWLXr11Vc1efJkSVJ5ebnS09O1a9cu5ebm6siRI6qoqNDevXs1ZswYSdLmzZuVlZWlo0ePatiwYdHeNQAA0Md0+6eXBwIBSVJycrIkqb6+Xg0NDcrJybFr3G63JkyYoJqaGs2dO1e1tbUKhUJhNT6fT5mZmaqpqVFubq727Nkjj8djhxxJGjt2rDwej2pqajoNOsFgUMFg0L7d3NwsSQqFQgqFQtHd8V6ifb9M3b/virmEc8dYct9gXf7vGyzH5+KOsSLWerKn9udvn0lvm4fj/dzg3HycPjY66nisOP33p7fNp7t+1nZley7LsiKnEiWWZelHP/qRmpqa9Mc//lGSVFNTo/Hjx+uzzz6Tz+eza+fMmaPjx4/rvffe0/bt2/XII4+EhRJJysnJ0dChQ7Vx40YVFxdr69atOnbsWFjNbbfdpkceeUTLly+P6KeoqEgrV66MWN++fbsSEhKiscsAAKCbXbx4UQUFBQoEAhowYMBVa7v1jM5jjz2mv/zlL6quro64z+Vyhd22LCtiraOONZ3VX207y5cv16JFi+zbzc3NSk9PV05OzrcOqq8KhUKqqqpSdna24uLinG6n12Au4TKL3pP7Bkv/fnebnv7wBtX+aorj/XR0qCi3x5+/fSZOHycd59GTs+jM6Gcr7GMl2OZy5HvzTU7Oo+Ox4vTfn942n+76Wdv+isy16Lags2DBAr311lv64IMPdNNNN9nrXq9XktTQ0KDBgwfb642NjUpLS7NrWlpa1NTUpIEDB4bVjBs3zq45ffp0xPOeOXPG3k5Hbrdbbrc7Yj0uLs74/9ldD/v4XTCXy4Kt//ePg2Cby/GZfLOfdj3ZU8fnd/o46awfJwXbXPafwdaePV6cPjY66tiP039/ett8vtlDNPvoyraiftWVZVl67LHH9MYbb+j3v/+9hg4dGnb/0KFD5fV6VVVVZa+1tLRo9+7ddogZPXq04uLiwmpOnTqlQ4cO2TVZWVkKBALav3+/XbNv3z4FAgG7BgAAXN+ifkZn/vz52r59u37zm98oKSlJDQ0NkiSPx6P4+Hi5XC4VFhaquLhYGRkZysjIUHFxsRISElRQUGDXzpo1S4sXL1ZKSoqSk5O1ZMkSjRw50r4Ka/jw4ZoyZYpmz56tjRs3Srr8Pp+8vDyuuAIAAJK6Iehs2LBBkjRx4sSw9ZdfflkPP/ywJOmpp57SpUuXNG/ePDU1NWnMmDGqrKxUUlKSXb9u3TrFxsZqxowZunTpkiZNmqStW7cqJibGrtm2bZsWLlxoX52Vn5+vsrKyaO8SAADoo6IedK7lIi6Xy6WioiIVFRVdsaZ///4qLS1VaWnpFWuSk5NVXl7+XdoEAADXAT7rCgAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAsQg6AADAWAQdAABgLIIOAAAwFkEHAAAYi6ADAACMRdABAADGIugAAABjEXQAAICxCDoAAMBYBB0AAGAsgg4AADAWQQcAABiLoAMAAIxF0AEAAMYi6AAAAGMRdAAAgLEIOgAAwFgEHQAAYCyCDgAAMBZBBwAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAsQg6AADAWAQdAABgLIIOAAAwFkEHAAAYi6ADAACMRdABAADGIugAAABjEXQAAICxCDoAAMBYBB0AAGAsgg4AADAWQQcAABiLoAMAAIxF0AEAAMYi6AAAAGMRdAAAgLEIOgAAwFgEHQAAYKxYpxsAnJBZ9J5K7r38Z7DVJUn6+3NTHe4KABBtnNEBAADG6vNndF588UU9//zzOnXqlEaMGKH169frn/7pn5xuCwCA684ty94Ju+2OsVRyr0PN/H99Oui89tprKiws1Isvvqjx48dr48aNuv/++/Xxxx9ryJAhTrfnqPaDrf0gyyx6T0dX5Tnezzddzy8VdZzH9TwLAOhOffqlq7Vr12rWrFn613/9Vw0fPlzr169Xenq6NmzY4HRrAACgF+izZ3RaWlpUW1urZcuWha3n5OSopqam08cEg0EFg0H7diAQkCR9+eWXCoVCUe9xzOrfhd3et3xS1J/jSmK/vnD5zzZLFy+2KTZ0g7744osee/4r9fNNjvYTumDPpbXN1eP9dJyHk7OQLvfTW46V9n46cuL70z6TL774QnFxcT32/Ffqp53j358Of3+c/LsjOfyzhJ+1V33+7vo7dO7cOUmSZVnfXmz1UZ999pklyfrf//3fsPVVq1ZZt912W6ePeeaZZyxJfPHFF1988cWXAV8nTpz41rzQZ8/otHO5XGG3LcuKWGu3fPlyLVq0yL7d1tamL7/8UikpKVd8TF/X3Nys9PR0nThxQgMGDHC6nV6DuURiJpGYSeeYSyRm0rnumotlWTp37px8Pt+31vbZoDNo0CDFxMSooaEhbL2xsVFpaWmdPsbtdsvtdoet3Xjjjd3VYq8yYMAA/vJ1grlEYiaRmEnnmEskZtK57piLx+O5pro++2bkfv36afTo0aqqqgpbr6qq0rhx4xzqCgAA9CZ99oyOJC1atEh+v1933323srKytGnTJn366af6t3/7N6dbAwAAvUCfDjr/8i//oi+++ELPPvusTp06pczMTL377ru6+eabnW6t13C73XrmmWciXrK73jGXSMwkEjPpHHOJxEw61xvm4rKsa7k2CwAAoO/ps+/RAQAA+DYEHQAAYCyCDgAAMBZBBwAAGIugY6jVq1frnnvuUVJSklJTU/XjH/9YR48edbqtXmX16tVyuVwqLCx0uhXHffbZZ3rwwQeVkpKihIQE3XXXXaqtrXW6Lcd8/fXX+uUvf6mhQ4cqPj5et956q5599lm1tbU53VqP+eCDDzRt2jT5fD65XC69+eabYfdblqWioiL5fD7Fx8dr4sSJOnz4sDPN9qCrzSUUCmnp0qUaOXKkEhMT5fP59NBDD+nzzz93ruEe8G3HyjfNnTtXLpdL69ev77H+CDqG2r17t+bPn6+9e/eqqqpKX3/9tXJycnThQuQHvl2PDhw4oE2bNun73/++0604rqmpSePHj1dcXJx++9vf6uOPP9YLL7xw3fzW8M6sWbNGL730ksrKynTkyBGVlJTo+eefV2lpqdOt9ZgLFy7ozjvvVFlZWaf3l5SUaO3atSorK9OBAwfk9XqVnZ1tf9iiqa42l4sXL+rgwYN6+umndfDgQb3xxhs6duyY8vPzHei053zbsdLuzTff1L59+67pYxui6h/9cE30DY2NjZYka/fu3U634rhz585ZGRkZVlVVlTVhwgTr8ccfd7olRy1dutS67777nG6jV5k6dar16KOPhq1Nnz7devDBBx3qyFmSrJ07d9q329raLK/Xaz333HP22ldffWV5PB7rpZdecqBDZ3ScS2f2799vSbKOHz/eM0057EozOXnypPW9733POnTokHXzzTdb69at67GeOKNznQgEApKk5ORkhztx3vz58zV16lRNnjzZ6VZ6hbfeekt33323fvrTnyo1NVWjRo3S5s2bnW7LUffdd59+97vf6dixY5KkP//5z6qurtY///M/O9xZ71BfX6+Ghgbl5OTYa263WxMmTFBNTY2DnfU+gUBALpfruj5D2tbWJr/fryeffFIjRozo8efv078ZGdfGsiwtWrRI9913nzIzM51ux1E7duzQwYMHdeDAAadb6TX+9re/acOGDVq0aJF+8YtfaP/+/Vq4cKHcbrceeughp9tzxNKlSxUIBHT77bcrJiZGra2tWrVqlX72s5853Vqv0P5hyh0/QDktLU3Hjx93oqVe6auvvtKyZctUUFBwXX/Q55o1axQbG6uFCxc68vwEnevAY489pr/85S+qrq52uhVHnThxQo8//rgqKyvVv39/p9vpNdra2nT33XeruLhYkjRq1CgdPnxYGzZsuG6Dzmuvvaby8nJt375dI0aMUF1dnQoLC+Xz+TRz5kyn2+s1XC5X2G3LsiLWrlehUEgPPPCA2tra9OKLLzrdjmNqa2v1H//xHzp48KBjxwYvXRluwYIFeuutt/T+++/rpptucrodR9XW1qqxsVGjR49WbGysYmNjtXv3bv3nf/6nYmNj1dra6nSLjhg8eLDuuOOOsLXhw4fr008/dagj5z355JNatmyZHnjgAY0cOVJ+v19PPPGEVq9e7XRrvYLX65X0f2d22jU2Nkac5bkehUIhzZgxQ/X19aqqqrquz+b88Y9/VGNjo4YMGWL/3D1+/LgWL16sW265pUd64IyOoSzL0oIFC7Rz50794Q9/0NChQ51uyXGTJk3SRx99FLb2yCOP6Pbbb9fSpUsVExPjUGfOGj9+fMSvHjh27Nh1/eG4Fy9e1A03hP87MCYm5rq6vPxqhg4dKq/Xq6qqKo0aNUqS1NLSot27d2vNmjUOd+es9pDz17/+Ve+//75SUlKcbslRfr8/4v2Qubm58vv9euSRR3qkB4KOoebPn6/t27frN7/5jZKSkux/eXk8HsXHxzvcnTOSkpIi3qOUmJiolJSU6/q9S0888YTGjRun4uJizZgxQ/v379emTZu0adMmp1tzzLRp07Rq1SoNGTJEI0aM0J/+9CetXbtWjz76qNOt9Zjz58/rk08+sW/X19errq5OycnJGjJkiAoLC1VcXKyMjAxlZGSouLhYCQkJKigocLDr7ne1ufh8Pv3kJz/RwYMH9fbbb6u1tdX+2ZucnKx+/fo51Xa3+rZjpWPYi4uLk9fr1bBhw3qmwR67vgs9SlKnXy+//LLTrfUqXF5+2f/8z/9YmZmZltvttm6//XZr06ZNTrfkqObmZuvxxx+3hgwZYvXv39+69dZbrRUrVljBYNDp1nrM+++/3+nPkJkzZ1qWdfkS82eeecbyer2W2+22fvCDH1gfffSRs033gKvNpb6+/oo/e99//32nW+8233asdNTTl5e7LMuyeiZSAQAA9CzejAwAAIxF0AEAAMYi6AAAAGMRdAAAgLEIOgAAwFgEHQAAYCyCDgAAMBZBBwAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAsf4fzjSkqrb4tbcAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_var_num.hist(bins=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ids_selected_by_vars:  (27673,)\n"
     ]
    }
   ],
   "source": [
    "ids_selected_by_vars = df_var_num.loc[df_var_num >= 10].index\n",
    "\n",
    "print(\"ids_selected_by_vars: \", ids_selected_by_vars.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "12312"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ids_selected = set(ids_before_selected) & set(ids_after_selected) & set(ids_selected_by_vars)\n",
    "len(ids_selected)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_df = all_df.loc[all_df.index.isin(ids_selected)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a label code (int) for the labels.\n",
    "label_dict = dict(zip(var_to_consider, (\"Value_\" + str(i) for i in range(len(var_to_consider)))))\n",
    "all_df.rename(label_dict, axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Time</th>\n",
       "      <th>Value_12</th>\n",
       "      <th>Value_9</th>\n",
       "      <th>Value_8</th>\n",
       "      <th>Value_1</th>\n",
       "      <th>Value_2</th>\n",
       "      <th>Value_3</th>\n",
       "      <th>Value_4</th>\n",
       "      <th>Value_13</th>\n",
       "      <th>Value_0</th>\n",
       "      <th>Value_11</th>\n",
       "      <th>Value_5</th>\n",
       "      <th>Value_6</th>\n",
       "      <th>Value_7</th>\n",
       "      <th>Value_10</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1578183</th>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1578183</th>\n",
       "      <td>13</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>73.0</td>\n",
       "      <td>84.0</td>\n",
       "      <td>181.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>34.722222</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1578183</th>\n",
       "      <td>19</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>154.0</td>\n",
       "      <td>188.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1578183</th>\n",
       "      <td>34</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>73.0</td>\n",
       "      <td>86.0</td>\n",
       "      <td>208.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1578183</th>\n",
       "      <td>49</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Time  Value_12  Value_9  Value_8  Value_1  Value_2  Value_3  Value_4  \\\n",
       "ID                                                                              \n",
       "1578183     4       NaN      4.0      NaN      NaN      NaN      NaN      NaN   \n",
       "1578183    13       NaN      NaN     73.0     84.0    181.0    100.0     17.0   \n",
       "1578183    19       NaN      4.0     75.0    154.0    188.0    100.0     20.0   \n",
       "1578183    34       NaN      4.0     73.0     86.0    208.0    100.0     15.0   \n",
       "1578183    49       NaN     10.0      NaN      NaN      NaN      NaN      NaN   \n",
       "\n",
       "          Value_13  Value_0  Value_11  Value_5  Value_6  Value_7  Value_10  \n",
       "ID                                                                          \n",
       "1578183        NaN      NaN       NaN      NaN      NaN      NaN       NaN  \n",
       "1578183  34.722222      NaN       NaN      NaN      NaN      NaN       NaN  \n",
       "1578183        NaN      NaN       NaN      NaN      NaN      NaN       NaN  \n",
       "1578183        NaN      NaN       NaN      NaN      NaN      NaN       NaN  \n",
       "1578183        NaN      NaN       NaN      NaN      NaN      NaN       NaN  "
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_value = all_df.iloc[:, 1:]\n",
    "\n",
    "arr_mask = (~np.isnan(df_value.values)).astype(float)\n",
    "\n",
    "df_mask = pd.DataFrame(arr_mask, columns=['Mask_'+i_str[6:] for i_str in df_value.columns.tolist()], index=all_df.index)\n",
    "\n",
    "df_eicu_data = pd.concat([all_df.fillna(0), df_mask], axis=1)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Time</th>\n",
       "      <th>Value_12</th>\n",
       "      <th>Value_9</th>\n",
       "      <th>Value_8</th>\n",
       "      <th>Value_1</th>\n",
       "      <th>Value_2</th>\n",
       "      <th>Value_3</th>\n",
       "      <th>Value_4</th>\n",
       "      <th>Value_13</th>\n",
       "      <th>Value_0</th>\n",
       "      <th>...</th>\n",
       "      <th>Mask_2</th>\n",
       "      <th>Mask_3</th>\n",
       "      <th>Mask_4</th>\n",
       "      <th>Mask_13</th>\n",
       "      <th>Mask_0</th>\n",
       "      <th>Mask_11</th>\n",
       "      <th>Mask_5</th>\n",
       "      <th>Mask_6</th>\n",
       "      <th>Mask_7</th>\n",
       "      <th>Mask_10</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1578183</th>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1578183</th>\n",
       "      <td>13</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>73.0</td>\n",
       "      <td>84.0</td>\n",
       "      <td>181.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>34.722222</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1578183</th>\n",
       "      <td>19</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>154.0</td>\n",
       "      <td>188.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1578183</th>\n",
       "      <td>34</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>73.0</td>\n",
       "      <td>86.0</td>\n",
       "      <td>208.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1578183</th>\n",
       "      <td>49</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 29 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         Time  Value_12  Value_9  Value_8  Value_1  Value_2  Value_3  Value_4  \\\n",
       "ID                                                                              \n",
       "1578183     4       0.0      4.0      0.0      0.0      0.0      0.0      0.0   \n",
       "1578183    13       0.0      0.0     73.0     84.0    181.0    100.0     17.0   \n",
       "1578183    19       0.0      4.0     75.0    154.0    188.0    100.0     20.0   \n",
       "1578183    34       0.0      4.0     73.0     86.0    208.0    100.0     15.0   \n",
       "1578183    49       0.0     10.0      0.0      0.0      0.0      0.0      0.0   \n",
       "\n",
       "          Value_13  Value_0  ...  Mask_2  Mask_3  Mask_4  Mask_13  Mask_0  \\\n",
       "ID                           ...                                            \n",
       "1578183   0.000000      0.0  ...     0.0     0.0     0.0      0.0     0.0   \n",
       "1578183  34.722222      0.0  ...     1.0     1.0     1.0      1.0     0.0   \n",
       "1578183   0.000000      0.0  ...     1.0     1.0     1.0      0.0     0.0   \n",
       "1578183   0.000000      0.0  ...     1.0     1.0     1.0      0.0     0.0   \n",
       "1578183   0.000000      0.0  ...     0.0     0.0     0.0      0.0     0.0   \n",
       "\n",
       "         Mask_11  Mask_5  Mask_6  Mask_7  Mask_10  \n",
       "ID                                                 \n",
       "1578183      0.0     0.0     0.0     0.0      0.0  \n",
       "1578183      0.0     0.0     0.0     0.0      0.0  \n",
       "1578183      0.0     0.0     0.0     0.0      0.0  \n",
       "1578183      0.0     0.0     0.0     0.0      0.0  \n",
       "1578183      0.0     0.0     0.0     0.0      0.0  \n",
       "\n",
       "[5 rows x 29 columns]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_eicu_data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "pat_df = pd.read_csv(path_pat)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "labels_mortality = pat_df[pat_df['hospitaldischargestatus'].isin(['Expired', 'Alive'])][['patientunitstayid', 'hospitaldischargestatus']]\n",
    "\n",
    "labels_mortality['hospitaldischargestatus'] = labels_mortality['hospitaldischargestatus'].replace({\"Expired\": 1, \"Alive\": 0})\n",
    "\n",
    "labels_mortality.rename(columns={'patientunitstayid': 'ID', 'hospitaldischargestatus': 'labels'}, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "labels_mortality = labels_mortality[labels_mortality['ID'].isin(df_eicu_data.index)]\n",
    "df_eicu_data = df_eicu_data[df_eicu_data.index.isin(labels_mortality['ID'])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(12312, 2)"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "labels_mortality.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_eicu_data.to_csv(path_processed/'eicu_data.csv')\n",
    "labels_mortality.to_csv(path_processed/'eicu_labels.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Value_7</th>\n",
       "      <th>Value_8</th>\n",
       "      <th>Mask_7</th>\n",
       "      <th>Mask_8</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2054261</th>\n",
       "      <td>0.0</td>\n",
       "      <td>113.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2054261</th>\n",
       "      <td>0.0</td>\n",
       "      <td>126.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2054261</th>\n",
       "      <td>0.0</td>\n",
       "      <td>109.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2054261</th>\n",
       "      <td>0.0</td>\n",
       "      <td>117.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2054261</th>\n",
       "      <td>0.0</td>\n",
       "      <td>117.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Value_7  Value_8  Mask_7  Mask_8\n",
       "ID                                       \n",
       "2054261      0.0    113.0     0.0     1.0\n",
       "2054261      0.0    126.0     0.0     1.0\n",
       "2054261      0.0    109.0     0.0     1.0\n",
       "2054261      0.0    117.0     0.0     1.0\n",
       "2054261      0.0    117.0     0.0     1.0"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_eicu_data[['Value_7', 'Value_8', 'Mask_7', 'Mask_8']].tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    10144\n",
       "1     2168\n",
       "Name: labels, dtype: int64"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "labels_mortality['labels'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "leit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "5c6db37f2dbfa0dc7724e0c837d07e3540b86643967779554e04bc9c17696e47"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
