{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from pathlib import Path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "p_project = str(Path(os.getcwd()).parents[1])\n",
    "path_eicu = p_project + '/data/eicu'\n",
    "path_processed = path_eicu + \"/extracted\"\n",
    "path_data = path_processed + \"/all_data.csv\"\n",
    "path_pat = path_eicu + \"/raw/eicu-2.0/patient.csv.gz\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_df = pd.read_csv(path_data).rename(columns={'patientunitstayid': 'ID', 'itemoffset': 'Time'}).set_index('ID')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Time</th>\n",
       "      <th>FiO2</th>\n",
       "      <th>GCS Total</th>\n",
       "      <th>Heart Rate</th>\n",
       "      <th>Invasive BP Diastolic</th>\n",
       "      <th>Invasive BP Systolic</th>\n",
       "      <th>O2 Saturation</th>\n",
       "      <th>Respiratory Rate</th>\n",
       "      <th>Temperature (C)</th>\n",
       "      <th>glucose</th>\n",
       "      <th>pH</th>\n",
       "      <th>Motor</th>\n",
       "      <th>Eyes</th>\n",
       "      <th>MAP (mmHg)</th>\n",
       "      <th>Verbal</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1578183</th>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1578183</th>\n",
       "      <td>13</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>73.0</td>\n",
       "      <td>84.0</td>\n",
       "      <td>181.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>34.722222</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1578183</th>\n",
       "      <td>19</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>154.0</td>\n",
       "      <td>188.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1578183</th>\n",
       "      <td>34</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>73.0</td>\n",
       "      <td>86.0</td>\n",
       "      <td>208.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1578183</th>\n",
       "      <td>49</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Time  FiO2  GCS Total  Heart Rate  Invasive BP Diastolic  \\\n",
       "ID                                                                  \n",
       "1578183     4   NaN        4.0         NaN                    NaN   \n",
       "1578183    13   NaN        NaN        73.0                   84.0   \n",
       "1578183    19   NaN        4.0        75.0                  154.0   \n",
       "1578183    34   NaN        4.0        73.0                   86.0   \n",
       "1578183    49   NaN       10.0         NaN                    NaN   \n",
       "\n",
       "         Invasive BP Systolic  O2 Saturation  Respiratory Rate  \\\n",
       "ID                                                               \n",
       "1578183                   NaN            NaN               NaN   \n",
       "1578183                 181.0          100.0              17.0   \n",
       "1578183                 188.0          100.0              20.0   \n",
       "1578183                 208.0          100.0              15.0   \n",
       "1578183                   NaN            NaN               NaN   \n",
       "\n",
       "         Temperature (C)  glucose  pH  Motor  Eyes  MAP (mmHg)  Verbal  \n",
       "ID                                                                      \n",
       "1578183              NaN      NaN NaN    NaN   NaN         NaN     NaN  \n",
       "1578183        34.722222      NaN NaN    NaN   NaN         NaN     NaN  \n",
       "1578183              NaN      NaN NaN    NaN   NaN         NaN     NaN  \n",
       "1578183              NaN      NaN NaN    NaN   NaN         NaN     NaN  \n",
       "1578183              NaN      NaN NaN    NaN   NaN         NaN     NaN  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "var_to_consider = ['glucose', 'Invasive BP Diastolic', 'Invasive BP Systolic',\n",
    "                   'O2 Saturation', 'Respiratory Rate', 'Motor', 'Eyes', 'MAP (mmHg)',\n",
    "                   'Heart Rate', 'GCS Total', 'Verbal', 'pH', 'FiO2', 'Temperature (C)']\n",
    "\n",
    "cols_needed = var_to_consider + ['Time']\n",
    "for col_name in all_df.columns:\n",
    "    if not col_name in cols_needed:\n",
    "        all_df.drop(col_name, axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(8478327, 15)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_df = all_df[(all_df['Time']>=0) & (all_df['Time']<2880)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3334730, 15)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "tmp_df = all_df.groupby('ID').size()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Axes: >"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjAAAAGdCAYAAAAMm0nCAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAMclJREFUeJzt3X10VPWdx/FPEjIDESYRMJlkCTEVC0QIIEiYVVmUkIBZqpU/fKDCVtQDGzyFuAjpIgaoDcUiUouwPT7EPYUi9ohVgpAhFBANoFlSntqssrixK5O0UhgehyG5+4cnt47hKWSG5Je8X+fkwL33O7/53W+m5eN9mijLsiwBAAAYJLq1JwAAANBcBBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHE6tfYEIqWhoUFffvmlunXrpqioqNaeDgAAuAKWZenEiRNKSUlRdPTFj7O02wDz5ZdfKjU1tbWnAQAArsIXX3yhXr16XXR7uw0w3bp1k/R1A1wuV4vHCwaDKisrU05OjmJjY1s8Hr5GXyODvkYGfY0M+hoZpvbV7/crNTXV/nf8YtptgGk8beRyucIWYOLi4uRyuYz6ILR19DUy6Gtk0NfIoK+RYXpfL3f5BxfxAgAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABinU2tPANfGjXNKm6z7fFFeK8wEAICW4wgMAAAwDgEGAAAYp1kBZsWKFcrMzJTL5ZLL5ZLH49H7779vbx81apSioqJCfqZOnRoyRk1NjfLy8hQXF6fExETNmjVL58+fD6nZunWrbr31VjmdTvXp00clJSVXv4cd1I1zSkN+AABoT5p1DUyvXr20aNEi3XzzzbIsS2+88Ybuvfde7dmzR7fccosk6fHHH9eCBQvs18TFxdl/r6+vV15entxutz766CMdOXJEkyZNUmxsrH76059Kkg4fPqy8vDxNnTpVq1atUnl5uR577DElJycrNzc3HPsMAAAM16wAM378+JDl5557TitWrNDOnTvtABMXFye3233B15eVlengwYPavHmzkpKSNHjwYC1cuFCzZ89WUVGRHA6HVq5cqfT0dC1ZskSS1L9/f+3YsUNLly4lwAAAAEktuAupvr5eb731lk6dOiWPx2OvX7VqlX7961/L7XZr/PjxeuaZZ+yjMBUVFRo4cKCSkpLs+tzcXE2bNk0HDhzQkCFDVFFRoezs7JD3ys3N1YwZMy45n0AgoEAgYC/7/X5JUjAYVDAYvNrdtDWOEY6xrgVnjHXZmrawL6b11RT0NTLoa2TQ18gwta9XOt9mB5h9+/bJ4/Ho7Nmz6tq1q9atW6eMjAxJ0sMPP6y0tDSlpKRo7969mj17tqqrq/X2229Lknw+X0h4kWQv+3y+S9b4/X6dOXNGXbp0ueC8iouLNX/+/Cbry8rKQk5jtZTX6w3bWJG0ePjlazZs2BD5iVwhU/pqGvoaGfQ1MuhrZJjW19OnT19RXbMDTN++fVVVVaXjx4/rt7/9rSZPnqxt27YpIyNDTzzxhF03cOBAJScna/To0Tp06JBuuumm5r5VsxQWFqqgoMBe9vv9Sk1NVU5OjlwuV4vHDwaD8nq9GjNmjGJjY1s8XqQNKNp02Zr9Ra1/Ss60vpqCvkYGfY0M+hoZpva18QzK5TQ7wDgcDvXp00eSNHToUH388cdatmyZ/uM//qNJbVZWliTps88+00033SS3263du3eH1NTW1kqSfd2M2+22132zxuVyXfToiyQ5nU45nc4m62NjY8P6iwv3eJESqI+6bE1b2g9T+moa+hoZ9DUy6GtkmNbXK51ri58D09DQEHLtyTdVVVVJkpKTkyVJHo9H+/btU11dnV3j9Xrlcrns01Aej0fl5eUh43i93pDrbAAAQMfWrCMwhYWFGjdunHr37q0TJ05o9erV2rp1qzZt2qRDhw5p9erVuueee9SjRw/t3btXM2fO1MiRI5WZmSlJysnJUUZGhh555BEtXrxYPp9Pc+fOVX5+vn30ZOrUqfrlL3+pp59+Wo8++qi2bNmitWvXqrSUZ5kAAICvNSvA1NXVadKkSTpy5Iji4+OVmZmpTZs2acyYMfriiy+0efNmvfjiizp16pRSU1M1YcIEzZ071359TEyM1q9fr2nTpsnj8ei6667T5MmTQ54bk56ertLSUs2cOVPLli1Tr1699Morr3ALNQAAsDUrwLz66qsX3Zaamqpt27Zddoy0tLTL3v0yatQo7dmzpzlTAwAAHQjfhQQAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYp1NrTwBty41zSkOWP1+U10ozAQDg4ggwHdi3wwoAAKbgFBIAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGaVaAWbFihTIzM+VyueRyueTxePT+++/b28+ePav8/Hz16NFDXbt21YQJE1RbWxsyRk1NjfLy8hQXF6fExETNmjVL58+fD6nZunWrbr31VjmdTvXp00clJSVXv4cAAKDdaVaA6dWrlxYtWqTKykp98sknuvvuu3XvvffqwIEDkqSZM2fqvffe01tvvaVt27bpyy+/1P3332+/vr6+Xnl5eTp37pw++ugjvfHGGyopKdG8efPsmsOHDysvL0933XWXqqqqNGPGDD322GPatGlTmHYZAACYrlNzisePHx+y/Nxzz2nFihXauXOnevXqpVdffVWrV6/W3XffLUl6/fXX1b9/f+3cuVMjRoxQWVmZDh48qM2bNyspKUmDBw/WwoULNXv2bBUVFcnhcGjlypVKT0/XkiVLJEn9+/fXjh07tHTpUuXm5oZptwEAgMmu+hqY+vp6rVmzRqdOnZLH41FlZaWCwaCys7Ptmn79+ql3796qqKiQJFVUVGjgwIFKSkqya3Jzc+X3++2jOBUVFSFjNNY0jgEAANCsIzCStG/fPnk8Hp09e1Zdu3bVunXrlJGRoaqqKjkcDiUkJITUJyUlyefzSZJ8Pl9IeGnc3rjtUjV+v19nzpxRly5dLjivQCCgQCBgL/v9fklSMBhUMBhs7m420ThGOMa6FpwxVljGifT+mtZXU9DXyKCvkUFfI8PUvl7pfJsdYPr27auqqiodP35cv/3tbzV58mRt27at2RMMt+LiYs2fP7/J+rKyMsXFxYXtfbxeb9jGiqTFw8MzzoYNG8Iz0GWY0lfT0NfIoK+RQV8jw7S+nj59+orqmh1gHA6H+vTpI0kaOnSoPv74Yy1btkwPPPCAzp07p2PHjoUchamtrZXb7ZYkud1u7d69O2S8xruUvlnz7TuXamtr5XK5Lnr0RZIKCwtVUFBgL/v9fqWmpionJ0cul6u5u9lEMBiU1+vVmDFjFBsb2+LxIm1AUXguet5fFNnrjkzrqynoa2TQ18igr5Fhal8bz6BcTrMDzLc1NDQoEAho6NChio2NVXl5uSZMmCBJqq6uVk1NjTwejyTJ4/HoueeeU11dnRITEyV9nQxdLpcyMjLsmm//V7/X67XHuBin0ymn09lkfWxsbFh/ceEeL1IC9VFhGeda7aspfTUNfY0M+hoZ9DUyTOvrlc61WQGmsLBQ48aNU+/evXXixAmtXr1aW7du1aZNmxQfH68pU6aooKBA3bt3l8vl0pNPPimPx6MRI0ZIknJycpSRkaFHHnlEixcvls/n09y5c5Wfn2+Hj6lTp+qXv/ylnn76aT366KPasmWL1q5dq9LS0ma2AAAAtFfNCjB1dXWaNGmSjhw5ovj4eGVmZmrTpk0aM2aMJGnp0qWKjo7WhAkTFAgElJubq5dfftl+fUxMjNavX69p06bJ4/Houuuu0+TJk7VgwQK7Jj09XaWlpZo5c6aWLVumXr166ZVXXuEWagAAYGtWgHn11Vcvub1z585avny5li9fftGatLS0y14YOmrUKO3Zs6c5UwMAAB0I34UEAACM0+KLeNH6bpzD9UEAgI6FIzAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjNOsAFNcXKzbbrtN3bp1U2Jiou677z5VV1eH1IwaNUpRUVEhP1OnTg2pqampUV5enuLi4pSYmKhZs2bp/PnzITVbt27VrbfeKqfTqT59+qikpOTq9hAAALQ7zQow27ZtU35+vnbu3Cmv16tgMKicnBydOnUqpO7xxx/XkSNH7J/Fixfb2+rr65WXl6dz587po48+0htvvKGSkhLNmzfPrjl8+LDy8vJ01113qaqqSjNmzNBjjz2mTZs2tXB3AQBAe9CpOcUbN24MWS4pKVFiYqIqKys1cuRIe31cXJzcbvcFxygrK9PBgwe1efNmJSUlafDgwVq4cKFmz56toqIiORwOrVy5Uunp6VqyZIkkqX///tqxY4eWLl2q3Nzc5u4jAABoZ5oVYL7t+PHjkqTu3buHrF+1apV+/etfy+12a/z48XrmmWcUFxcnSaqoqNDAgQOVlJRk1+fm5mratGk6cOCAhgwZooqKCmVnZ4eMmZubqxkzZlx0LoFAQIFAwF72+/2SpGAwqGAw2JLdtMf55p9tiTPGitjYkd7fttxXk9HXyKCvkUFfI8PUvl7pfK86wDQ0NGjGjBm6/fbbNWDAAHv9ww8/rLS0NKWkpGjv3r2aPXu2qqur9fbbb0uSfD5fSHiRZC/7fL5L1vj9fp05c0ZdunRpMp/i4mLNnz+/yfqysjI7PIWD1+sN21jhsnh45MbesGFD5Ab/hrbY1/aAvkYGfY0M+hoZpvX19OnTV1R31QEmPz9f+/fv144dO0LWP/HEE/bfBw4cqOTkZI0ePVqHDh3STTfddLVvd1mFhYUqKCiwl/1+v1JTU5WTkyOXy9Xi8YPBoLxer8aMGaPY2NgWjxdOA4oid23Q/qLInrJry301GX2NDPoaGfQ1Mkzta+MZlMu5qgAzffp0rV+/Xtu3b1evXr0uWZuVlSVJ+uyzz3TTTTfJ7XZr9+7dITW1tbWSZF8343a77XXfrHG5XBc8+iJJTqdTTqezyfrY2Niw/uLCPV44BOqjIjb2tdrXttjX9oC+RgZ9jQz6Ghmm9fVK59qsu5Asy9L06dO1bt06bdmyRenp6Zd9TVVVlSQpOTlZkuTxeLRv3z7V1dXZNV6vVy6XSxkZGXZNeXl5yDher1cej6c50wUAAO1UswJMfn6+fv3rX2v16tXq1q2bfD6ffD6fzpw5I0k6dOiQFi5cqMrKSn3++ed69913NWnSJI0cOVKZmZmSpJycHGVkZOiRRx7RH/7wB23atElz585Vfn6+fQRl6tSp+p//+R89/fTT+tOf/qSXX35Za9eu1cyZM8O8+wAAwETNCjArVqzQ8ePHNWrUKCUnJ9s/b775piTJ4XBo8+bNysnJUb9+/fTUU09pwoQJeu+99+wxYmJitH79esXExMjj8egHP/iBJk2apAULFtg16enpKi0tldfr1aBBg7RkyRK98sor3EINAAAkNfMaGMu69O26qamp2rZt22XHSUtLu+zdLaNGjdKePXuaMz0AANBB8F1IAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAME6n1p4A2rYb55Q2Wff5orxWmAkAAH/HERgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwTrMCTHFxsW677TZ169ZNiYmJuu+++1RdXR1Sc/bsWeXn56tHjx7q2rWrJkyYoNra2pCampoa5eXlKS4uTomJiZo1a5bOnz8fUrN161bdeuutcjqd6tOnj0pKSq5uDwEAQLvTrACzbds25efna+fOnfJ6vQoGg8rJydGpU6fsmpkzZ+q9997TW2+9pW3btunLL7/U/fffb2+vr69XXl6ezp07p48++khvvPGGSkpKNG/ePLvm8OHDysvL01133aWqqirNmDFDjz32mDZt2hSGXQYAAKbr1JzijRs3hiyXlJQoMTFRlZWVGjlypI4fP65XX31Vq1ev1t133y1Jev3119W/f3/t3LlTI0aMUFlZmQ4ePKjNmzcrKSlJgwcP1sKFCzV79mwVFRXJ4XBo5cqVSk9P15IlSyRJ/fv3144dO7R06VLl5uaGadcBAICpmhVgvu348eOSpO7du0uSKisrFQwGlZ2dbdf069dPvXv3VkVFhUaMGKGKigoNHDhQSUlJdk1ubq6mTZumAwcOaMiQIaqoqAgZo7FmxowZF51LIBBQIBCwl/1+vyQpGAwqGAy2ZDftcb75Z1vijLGu6fuFswdtua8mo6+RQV8jg75Ghql9vdL5XnWAaWho0IwZM3T77bdrwIABkiSfzyeHw6GEhISQ2qSkJPl8Prvmm+GlcXvjtkvV+P1+nTlzRl26dGkyn+LiYs2fP7/J+rKyMsXFxV3dTl6A1+sN21jhsnj4tX2/DRs2hH3MttjX9oC+RgZ9jQz6Ghmm9fX06dNXVHfVASY/P1/79+/Xjh07rnaIsCosLFRBQYG97Pf7lZqaqpycHLlcrhaPHwwG5fV6NWbMGMXGxrZ4vHAaUHRtrw3aXxS+03htua8mo6+RQV8jg75Ghql9bTyDcjlXFWCmT5+u9evXa/v27erVq5e93u1269y5czp27FjIUZja2lq53W67Zvfu3SHjNd6l9M2ab9+5VFtbK5fLdcGjL5LkdDrldDqbrI+NjQ3rLy7c44VDoD7qmr5fJPa/Lfa1PaCvkUFfI4O+RoZpfb3SuTbrLiTLsjR9+nStW7dOW7ZsUXp6esj2oUOHKjY2VuXl5fa66upq1dTUyOPxSJI8Ho/27dunuro6u8br9crlcikjI8Ou+eYYjTWNYwAAgI6tWUdg8vPztXr1av3ud79Tt27d7GtW4uPj1aVLF8XHx2vKlCkqKChQ9+7d5XK59OSTT8rj8WjEiBGSpJycHGVkZOiRRx7R4sWL5fP5NHfuXOXn59tHUKZOnapf/vKXevrpp/Xoo49qy5YtWrt2rUpLS8O8+wAAwETNOgKzYsUKHT9+XKNGjVJycrL98+abb9o1S5cu1T//8z9rwoQJGjlypNxut95++217e0xMjNavX6+YmBh5PB794Ac/0KRJk7RgwQK7Jj09XaWlpfJ6vRo0aJCWLFmiV155hVuoAQCApGYegbGsy9+u27lzZy1fvlzLly+/aE1aWtpl72QZNWqU9uzZ05zpAQCADoLvQgIAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYJwWfZkjOqYb54Q+j+fzRXmtNBMAQEfFERgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGCcTq09AZjvxjmlTdZ9viivFWYCAOgoCDCIiG+HGgINACCcOIUEAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4zQ4w27dv1/jx45WSkqKoqCi98847Idv/5V/+RVFRUSE/Y8eODak5evSoJk6cKJfLpYSEBE2ZMkUnT54Mqdm7d6/uvPNOde7cWampqVq8eHHz9w4AALRLzQ4wp06d0qBBg7R8+fKL1owdO1ZHjhyxf37zm9+EbJ84caIOHDggr9er9evXa/v27XriiSfs7X6/Xzk5OUpLS1NlZaWef/55FRUV6Ve/+lVzpwsAANqhZj8HZty4cRo3btwla5xOp9xu9wW3/fGPf9TGjRv18ccfa9iwYZKkl156Sffcc49+/vOfKyUlRatWrdK5c+f02muvyeFw6JZbblFVVZVeeOGFkKADAAA6pog8yG7r1q1KTEzU9ddfr7vvvls/+clP1KNHD0lSRUWFEhIS7PAiSdnZ2YqOjtauXbv0/e9/XxUVFRo5cqQcDoddk5ubq5/97Gf629/+puuvv77JewYCAQUCAXvZ7/dLkoLBoILBYIv3qXGMcIwVbs4Yq7WncFkX61tb7qvJ6Gtk0NfIoK+RYWpfr3S+YQ8wY8eO1f3336/09HQdOnRIP/7xjzVu3DhVVFQoJiZGPp9PiYmJoZPo1Endu3eXz+eTJPl8PqWnp4fUJCUl2dsuFGCKi4s1f/78JuvLysoUFxcXrt2T1+sN21jhsnh4a8/g8jZs2HDJ7W2xr+0BfY0M+hoZ9DUyTOvr6dOnr6gu7AHmwQcftP8+cOBAZWZm6qabbtLWrVs1evTocL+drbCwUAUFBfay3+9XamqqcnJy5HK5Wjx+MBiU1+vVmDFjFBsb2+LxwmlA0abWnsJl7S/KveD6ttxXk9HXyKCvkUFfI8PUvjaeQbmciH8X0ne+8x317NlTn332mUaPHi232626urqQmvPnz+vo0aP2dTNut1u1tbUhNY3LF7u2xul0yul0NlkfGxsb1l9cuMcLh0B9VGtP4bIu17O22Nf2gL5GBn2NDPoaGab19UrnGvHnwPz5z3/WV199peTkZEmSx+PRsWPHVFlZadds2bJFDQ0NysrKsmu2b98ech7M6/Wqb9++Fzx9BAAAOpZmB5iTJ0+qqqpKVVVVkqTDhw+rqqpKNTU1OnnypGbNmqWdO3fq888/V3l5ue6991716dNHublfn0Lo37+/xo4dq8cff1y7d+/Whx9+qOnTp+vBBx9USkqKJOnhhx+Ww+HQlClTdODAAb355ptatmxZyCkiAADQcTU7wHzyyScaMmSIhgwZIkkqKCjQkCFDNG/ePMXExGjv3r363ve+p+9+97uaMmWKhg4dqg8++CDk9M6qVavUr18/jR49Wvfcc4/uuOOOkGe8xMfHq6ysTIcPH9bQoUP11FNPad68edxCDQAAJF3FNTCjRo2SZV38tt1Nmy5/QWn37t21evXqS9ZkZmbqgw8+aO70AABAB8B3IQEAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMbp1NoTQMdw45zSJus+X5TXCjMBALQHHIEBAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiH58AY6ELPVAEAoCPhCAwAADAOAQYAABiHAINWc+OcUg0o2iRJ9p8AAFwJAgwAADAOAQYAABiHAAMAAIzT7ACzfft2jR8/XikpKYqKitI777wTst2yLM2bN0/Jycnq0qWLsrOz9emnn4bUHD16VBMnTpTL5VJCQoKmTJmikydPhtTs3btXd955pzp37qzU1FQtXry4+XsHAADapWYHmFOnTmnQoEFavnz5BbcvXrxYv/jFL7Ry5Urt2rVL1113nXJzc3X27Fm7ZuLEiTpw4IC8Xq/Wr1+v7du364knnrC3+/1+5eTkKC0tTZWVlXr++edVVFSkX/3qV1exiwAAoL1p9oPsxo0bp3Hjxl1wm2VZevHFFzV37lzde++9kqT//M//VFJSkt555x09+OCD+uMf/6iNGzfq448/1rBhwyRJL730ku655x79/Oc/V0pKilatWqVz587ptddek8Ph0C233KKqqiq98MILIUEHAAB0TGF9Eu/hw4fl8/mUnZ1tr4uPj1dWVpYqKir04IMPqqKiQgkJCXZ4kaTs7GxFR0dr165d+v73v6+KigqNHDlSDofDrsnNzdXPfvYz/e1vf9P111/f5L0DgYACgYC97Pf7JUnBYFDBYLDF+9Y4RjjGailnjNXaUwgbZ7Rl/9kWettetKXPa3tCXyODvkaGqX290vmGNcD4fD5JUlJSUsj6pKQke5vP51NiYmLoJDp1Uvfu3UNq0tPTm4zRuO1CAaa4uFjz589vsr6srExxcXFXuUdNeb3esI11tRYPb+0ZhN/CYQ3asGFDa0+j3WkLn9f2iL5GBn2NDNP6evr06SuqazffhVRYWKiCggJ72e/3KzU1VTk5OXK5XC0ePxgMyuv1asyYMYqNjW3xeC3Rnh765oy2tHBYg575JFqV88a29nTajbb0eW1P6Gtk0NfIMLWvjWdQLiesAcbtdkuSamtrlZycbK+vra3V4MGD7Zq6urqQ150/f15Hjx61X+92u1VbWxtS07jcWPNtTqdTTqezyfrY2Niw/uLCPd7VCNRHter7R0KgIarV+9oetYXPa3tEXyODvkaGaX290rmG9Tkw6enpcrvdKi8vt9f5/X7t2rVLHo9HkuTxeHTs2DFVVlbaNVu2bFFDQ4OysrLsmu3bt4ecB/N6verbt+8FTx8BAICOpdkB5uTJk6qqqlJVVZWkry/craqqUk1NjaKiojRjxgz95Cc/0bvvvqt9+/Zp0qRJSklJ0X333SdJ6t+/v8aOHavHH39cu3fv1ocffqjp06frwQcfVEpKiiTp4YcflsPh0JQpU3TgwAG9+eabWrZsWcgpIgAA0HE1+xTSJ598orvuustebgwVkydPVklJiZ5++mmdOnVKTzzxhI4dO6Y77rhDGzduVOfOne3XrFq1StOnT9fo0aMVHR2tCRMm6Be/+IW9PT4+XmVlZcrPz9fQoUPVs2dPzZs3j1uoAQCApKsIMKNGjZJlXfw23qioKC1YsEALFiy4aE337t21evXqS75PZmamPvjgg+ZOD+3MjXNKQ5Y/X5TXSjMBALQlfBcSAAAwTru5jRrm+/bRFgAALoYjMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjdGrtCeDSbpxT2tpTAACgzeEIDAAAMA4BBgAAGIcAAwAAjMM1MDDKha4J+nxRXivMBADQmjgCAwAAjEOAAQAAxiHAAAAA4xBgAACAcbiIF8b79oW9XNQLAO0fR2AAAIBxCDAAAMA4BBgAAGAcAgwAADAOF/G2Ip4qCwDA1eEIDAAAMA5HYNqYCx2VQctxqzUAtC8cgQEAAMbhCAw6JK4/AgCzEWDQ7nAaDgDaP04hAQAA44Q9wBQVFSkqKirkp1+/fvb2s2fPKj8/Xz169FDXrl01YcIE1dbWhoxRU1OjvLw8xcXFKTExUbNmzdL58+fDPVUAAGCoiJxCuuWWW7R58+a/v0mnv7/NzJkzVVpaqrfeekvx8fGaPn267r//fn344YeSpPr6euXl5cntduujjz7SkSNHNGnSJMXGxuqnP/1pJKYLAAAME5EA06lTJ7nd7ibrjx8/rldffVWrV6/W3XffLUl6/fXX1b9/f+3cuVMjRoxQWVmZDh48qM2bNyspKUmDBw/WwoULNXv2bBUVFcnhcERiygAAwCARCTCffvqpUlJS1LlzZ3k8HhUXF6t3796qrKxUMBhUdna2XduvXz/17t1bFRUVGjFihCoqKjRw4EAlJSXZNbm5uZo2bZoOHDigIUOGXPA9A4GAAoGAvez3+yVJwWBQwWCwxfvUOEY4xmrkjLHCNpapnNFWyJ+tKZy/29YWic8r6Guk0NfIMLWvVzrfsAeYrKwslZSUqG/fvjpy5Ijmz5+vO++8U/v375fP55PD4VBCQkLIa5KSkuTz+SRJPp8vJLw0bm/cdjHFxcWaP39+k/VlZWWKi4tr4V79ndfrDdtYi4eHbSjjLRzW0NpT0IYNG1p7CmEXzs8r/o6+RgZ9jQzT+nr69Okrqgt7gBk3bpz998zMTGVlZSktLU1r165Vly5dwv12tsLCQhUUFNjLfr9fqampysnJkcvlavH4wWBQXq9XY8aMUWxsbIvHk6QBRZvCMo7JnNGWFg5r0DOfRCvQENWqc9lflNuq7x9Okfi8gr5GCn2NDFP72ngG5XIi/hyYhIQEffe739Vnn32mMWPG6Ny5czp27FjIUZja2lr7mhm3263du3eHjNF4l9KFrqtp5HQ65XQ6m6yPjY0N6y8unOMF6lv3H+y2JNAQ1er9MOl/4Fcq3J9/fI2+RgZ9jQzT+nqlc434c2BOnjypQ4cOKTk5WUOHDlVsbKzKy8vt7dXV1aqpqZHH45EkeTwe7du3T3V1dXaN1+uVy+VSRkZGpKcLAAAMEPYjMP/2b/+m8ePHKy0tTV9++aWeffZZxcTE6KGHHlJ8fLymTJmigoICde/eXS6XS08++aQ8Ho9GjBghScrJyVFGRoYeeeQRLV68WD6fT3PnzlV+fv4Fj7AAAICOJ+wB5s9//rMeeughffXVV7rhhht0xx13aOfOnbrhhhskSUuXLlV0dLQmTJigQCCg3Nxcvfzyy/brY2JitH79ek2bNk0ej0fXXXedJk+erAULFoR7qgAAwFBhDzBr1qy55PbOnTtr+fLlWr58+UVr0tLS2uUdIWjbvv0dSny5IwC0XXwXEgAAMA7fRh0hF/pGZP6L3iz8DgGg7SLAXEMX+gcRZiPkAEDr4BQSAAAwDgEGAAAYhwADAACMwzUwQDNwHRMAtA0cgQEAAMYhwAAAAOMQYAAAgHEIMAAAwDhcxAu0ATwQDwCahyMwAADAOAQYAABgHE4hAWEWqWfFXMlppsYaZ4ylxcOlAUWbVP3cP0dkPgDQmjgCAwAAjMMRGKCN4qm/AHBxBBjAYIQcAB0Vp5AAAIBxCDAAAMA4BBgAAGAcroEJE65FAADg2uEIDAAAMA5HYIB27ttHB/mOJQDtAQEG6GD44kgA7QGnkAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4X8QLgTiUAxuEIDAAAMA4BBgAAGIcAAwAAjEOAAQAAxuEiXgBN8LReAG0dR2AAAIBxOAID4IpwqzWAtoQjMAAAwDht+gjM8uXL9fzzz8vn82nQoEF66aWXNHz48Nae1gWvDwAAANdOmw0wb775pgoKCrRy5UplZWXpxRdfVG5urqqrq5WYmNja0wM6vCsJ8pxmAhApbTbAvPDCC3r88cf1wx/+UJK0cuVKlZaW6rXXXtOcOXNaeXYArgR3MwGIlDYZYM6dO6fKykoVFhba66Kjo5Wdna2KiooLviYQCCgQCNjLx48flyQdPXpUwWCwxXMKBoM6ffq0vvrqK3U6f6rF4+FrnRosnT7doE7BaNU3RLX2dNqNttzXPv+2ttmv2VU4usm6rOLyy9aE2zf/fyA2Njbi79dR0NfIMLWvJ06ckCRZlnXJujYZYP7617+qvr5eSUlJIeuTkpL0pz/96YKvKS4u1vz585usT09Pj8gcET4Pt/YE2qn21NeeS8JTA8AcJ06cUHx8/EW3t8kAczUKCwtVUFBgLzc0NOjo0aPq0aOHoqJa/l+gfr9fqamp+uKLL+RyuVo8Hr5GXyODvkYGfY0M+hoZpvbVsiydOHFCKSkpl6xrkwGmZ8+eiomJUW1tbcj62tpaud3uC77G6XTK6XSGrEtISAj73Fwul1EfBFPQ18igr5FBXyODvkaGiX291JGXRm3yOTAOh0NDhw5Vefnfz3E3NDSovLxcHo+nFWcGAADagjZ5BEaSCgoKNHnyZA0bNkzDhw/Xiy++qFOnTtl3JQEAgI6rzQaYBx54QH/5y180b948+Xw+DR48WBs3bmxyYe+14nQ69eyzzzY5TYWWoa+RQV8jg75GBn2NjPbe1yjrcvcpAQAAtDFt8hoYAACASyHAAAAA4xBgAACAcQgwAADAOASYK7B8+XLdeOON6ty5s7KysrR79+7WnlKbtn37do0fP14pKSmKiorSO++8E7LdsizNmzdPycnJ6tKli7Kzs/Xpp5+G1Bw9elQTJ06Uy+VSQkKCpkyZopMnT17DvWh7iouLddttt6lbt25KTEzUfffdp+rq6pCas2fPKj8/Xz169FDXrl01YcKEJg+ErKmpUV5enuLi4pSYmKhZs2bp/Pnz13JX2pQVK1YoMzPTftiXx+PR+++/b2+np+GxaNEiRUVFacaMGfY6ett8RUVFioqKCvnp16+fvb1D9dTCJa1Zs8ZyOBzWa6+9Zh04cMB6/PHHrYSEBKu2tra1p9Zmbdiwwfr3f/936+2337YkWevWrQvZvmjRIis+Pt565513rD/84Q/W9773PSs9Pd06c+aMXTN27Fhr0KBB1s6dO60PPvjA6tOnj/XQQw9d4z1pW3Jzc63XX3/d2r9/v1VVVWXdc889Vu/eva2TJ0/aNVOnTrVSU1Ot8vJy65NPPrFGjBhh/eM//qO9/fz589aAAQOs7Oxsa8+ePdaGDRusnj17WoWFha2xS23Cu+++a5WWllr//d//bVVXV1s//vGPrdjYWGv//v2WZdHTcNi9e7d14403WpmZmdaPfvQjez29bb5nn33WuuWWW6wjR47YP3/5y1/s7R2ppwSYyxg+fLiVn59vL9fX11spKSlWcXFxK87KHN8OMA0NDZbb7baef/55e92xY8csp9Np/eY3v7Esy7IOHjxoSbI+/vhju+b999+3oqKirP/7v/+7ZnNv6+rq6ixJ1rZt2yzL+rqPsbGx1ltvvWXX/PGPf7QkWRUVFZZlfR0uo6OjLZ/PZ9esWLHCcrlcViAQuLY70IZdf/311iuvvEJPw+DEiRPWzTffbHm9Xuuf/umf7ABDb6/Os88+aw0aNOiC2zpaTzmFdAnnzp1TZWWlsrOz7XXR0dHKzs5WRUVFK87MXIcPH5bP5wvpaXx8vLKysuyeVlRUKCEhQcOGDbNrsrOzFR0drV27dl3zObdVx48flyR1795dklRZWalgMBjS2379+ql3794hvR04cGDIAyFzc3Pl9/t14MCBazj7tqm+vl5r1qzRqVOn5PF46GkY5OfnKy8vL6SHEp/Xlvj000+VkpKi73znO5o4caJqamokdbyettkn8bYFf/3rX1VfX9/k6b9JSUn605/+1EqzMpvP55OkC/a0cZvP51NiYmLI9k6dOql79+52TUfX0NCgGTNm6Pbbb9eAAQMkfd03h8PR5EtMv93bC/W+cVtHtW/fPnk8Hp09e1Zdu3bVunXrlJGRoaqqKnraAmvWrNF//dd/6eOPP26yjc/r1cnKylJJSYn69u2rI0eOaP78+brzzju1f//+DtdTAgxgoPz8fO3fv187duxo7am0C3379lVVVZWOHz+u3/72t5o8ebK2bdvW2tMy2hdffKEf/ehH8nq96ty5c2tPp90YN26c/ffMzExlZWUpLS1Na9euVZcuXVpxZtcep5AuoWfPnoqJiWlyBXdtba3cbncrzcpsjX27VE/dbrfq6upCtp8/f15Hjx6l75KmT5+u9evX6/e//7169eplr3e73Tp37pyOHTsWUv/t3l6o943bOiqHw6E+ffpo6NChKi4u1qBBg7Rs2TJ62gKVlZWqq6vTrbfeqk6dOqlTp07atm2bfvGLX6hTp05KSkqit2GQkJCg7373u/rss8863OeVAHMJDodDQ4cOVXl5ub2uoaFB5eXl8ng8rTgzc6Wnp8vtdof01O/3a9euXXZPPR6Pjh07psrKSrtmy5YtamhoUFZW1jWfc1thWZamT5+udevWacuWLUpPTw/ZPnToUMXGxob0trq6WjU1NSG93bdvX0hA9Hq9crlcysjIuDY7YoCGhgYFAgF62gKjR4/Wvn37VFVVZf8MGzZMEydOtP9Ob1vu5MmTOnTokJKTkzve57W1ryJu69asWWM5nU6rpKTEOnjwoPXEE09YCQkJIVdwI9SJEyesPXv2WHv27LEkWS+88IK1Z88e63//938ty/r6NuqEhATrd7/7nbV3717r3nvvveBt1EOGDLF27dpl7dixw7r55ps7/G3U06ZNs+Lj462tW7eG3EJ5+vRpu2bq1KlW7969rS1btliffPKJ5fF4LI/HY29vvIUyJyfHqqqqsjZu3GjdcMMNRt5CGS5z5syxtm3bZh0+fNjau3evNWfOHCsqKsoqKyuzLIuehtM370KyLHp7NZ566ilr69at1uHDh60PP/zQys7Otnr27GnV1dVZltWxekqAuQIvvfSS1bt3b8vhcFjDhw+3du7c2dpTatN+//vfW5Ka/EyePNmyrK9vpX7mmWespKQky+l0WqNHj7aqq6tDxvjqq6+shx56yOratavlcrmsH/7wh9aJEydaYW/ajgv1VJL1+uuv2zVnzpyx/vVf/9W6/vrrrbi4OOv73/++deTIkZBxPv/8c2vcuHFWly5drJ49e1pPPfWUFQwGr/HetB2PPvqolZaWZjkcDuuGG26wRo8ebYcXy6Kn4fTtAENvm++BBx6wkpOTLYfDYf3DP/yD9cADD1ifffaZvb0j9TTKsiyrdY79AAAAXB2ugQEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOP8PdZM0ScNn4Y8AAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "tmp_df.hist(bins=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    40005.000000\n",
       "mean        83.357830\n",
       "std         43.954611\n",
       "min          3.000000\n",
       "25%         55.000000\n",
       "50%         70.000000\n",
       "75%        101.000000\n",
       "max        527.000000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tmp_df.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "ids_before = all_df.loc[all_df['Time'] < 1440].groupby('ID').size()\n",
    "ids_after = all_df.loc[all_df['Time'] > 1440].groupby('ID').size()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ids_before_selected:  (30389,)\n",
      "ids_after_selected:  (18569,)\n"
     ]
    }
   ],
   "source": [
    "ids_before_selected = ids_before.loc[ids_before >= 30].index\n",
    "ids_after_selected = ids_after.loc[ids_after >= 30].index\n",
    "\n",
    "print(\"ids_before_selected: \", ids_before_selected.shape)\n",
    "print(\"ids_after_selected: \", ids_after_selected.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_var_num = all_df.iloc[:, 1:].notna().groupby(\"ID\").apply(lambda x: x.sum(axis=0).gt(0).sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Axes: >"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjkAAAGdCAYAAADwjmIIAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAALw9JREFUeJzt3X10VPWdx/FPEpJJoCQhePJUA2ZbK8+CIBgfWC0hAeMDirjRCBxMYbWJEtKDgAtpABWJgjwuKW2Vepa06K5QBBozBSUqIUAwFZAiPUWwcibZUwgjZEmGZPYPd2YdJjzZJHfyy/t1zhy49/7m3u/vm5vw4d6ZTJDb7XYLAADAMMFWFwAAANAWCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACN1sboAKzU3N+vkyZPq3r27goKCrC4HAABcBbfbra+//lqJiYkKDr709ZpOHXJOnjyppKQkq8sAAADfwZdffqnrr7/+kts7dcjp3r27pG+aFBkZaXE1bcPlcqmsrExpaWkKDQ21upyAQE9aRl/80RN/9KRl9MVfW/bE6XQqKSnJ++/4pXTqkOO5RRUZGWl0yOnatasiIyP5xvs/9KRl9MUfPfFHT1pGX/y1R0+u9FKTa37hcXl5ue6//34lJiYqKChImzZt8m5zuVyaNWuWBg4cqG7duikxMVGTJk3SyZMnffZx6tQpZWVlKTIyUtHR0crOztbZs2d9xnz66ae66667FB4erqSkJBUVFfnV8vbbb6tPnz4KDw/XwIEDtW3btmudDgAAMNQ1h5xz587p5ptv1urVq/221dfXa//+/Zo3b57279+vd955R0eOHNEDDzzgMy4rK0uHDh2S3W7Xli1bVF5ermnTpnm3O51OpaWlqXfv3qqqqtIrr7yiwsJCrV271jtm165deuyxx5Sdna1PPvlE48aN07hx43Tw4MFrnRIAADDQNd+uGjt2rMaOHdvitqioKNntdp91q1at0vDhw3XixAn16tVLhw8fVmlpqfbu3athw4ZJklauXKl7771Xr776qhITE7V+/Xo1Njbq9ddfV1hYmPr376/q6motXbrUG4aWL1+uMWPGaObMmZKkhQsXym63a9WqVSouLr7WaQEAAMO0+Wtyzpw5o6CgIEVHR0uSKioqFB0d7Q04kpSamqrg4GBVVlbqoYceUkVFhUaOHKmwsDDvmPT0dC1evFinT59Wjx49VFFRofz8fJ9jpaen+9w+u1hDQ4MaGhq8y06nU9I3t9lcLlcrzDbweOZl6vy+C3rSMvrij574oyctoy/+2rInV7vPNg0558+f16xZs/TYY495X9jrcDgUGxvrW0SXLoqJiZHD4fCOSU5O9hkTFxfn3dajRw85HA7vum+P8eyjJYsWLdL8+fP91peVlalr167XPsEO5OIrbKAnl0Jf/NETf/SkZfTFX1v0pL6+/qrGtVnIcblcevTRR+V2u7VmzZq2Osw1mTNnjs/VH89b0NLS0ox+d5Xdbtfo0aN5xf//oSctoy/+6Ik/etIy+uKvLXviuRNzJW0ScjwB5/jx49qxY4dPgIiPj1dtba3P+AsXLujUqVOKj4/3jqmpqfEZ41m+0hjP9pbYbDbZbDa/9aGhocaflJ1hjteKnrSMvvijJ/7oScvoi7+26MnV7q/VP7vKE3COHj2qP/7xj+rZs6fP9pSUFNXV1amqqsq7bseOHWpubtaIESO8Y8rLy33uudntdt10003q0aOHd8z27dt99m2325WSktLaUwIAAB3QNYecs2fPqrq6WtXV1ZKkY8eOqbq6WidOnJDL5dIjjzyiffv2af369WpqapLD4ZDD4VBjY6MkqW/fvhozZoymTp2qPXv26OOPP1Zubq4yMzOVmJgoSXr88ccVFham7OxsHTp0SBs2bNDy5ct9bjVNnz5dpaWlWrJkif785z+rsLBQ+/btU25ubiu0BQAAdHTXHHL27dunIUOGaMiQIZKk/Px8DRkyRAUFBfrqq6+0efNm/e1vf9PgwYOVkJDgfezatcu7j/Xr16tPnz4aNWqU7r33Xt15550+vwMnKipKZWVlOnbsmIYOHaqf/exnKigo8PldOrfffrtKSkq0du1a3XzzzfrP//xPbdq0SQMGDPhH+gEAAAxxza/Jufvuu+V2uy+5/XLbPGJiYlRSUnLZMYMGDdKHH3542TETJkzQhAkTrng8AADQ+bT6a3IAAAACASEHAAAYiZADAACM1OYf6wAAQGu5YfZWn+UvXs6wqBJ0BFzJAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMNI1h5zy8nLdf//9SkxMVFBQkDZt2uSz3e12q6CgQAkJCYqIiFBqaqqOHj3qM+bUqVPKyspSZGSkoqOjlZ2drbNnz/qM+fTTT3XXXXcpPDxcSUlJKioq8qvl7bffVp8+fRQeHq6BAwdq27Zt1zodAABgqGsOOefOndPNN9+s1atXt7i9qKhIK1asUHFxsSorK9WtWzelp6fr/Pnz3jFZWVk6dOiQ7Ha7tmzZovLyck2bNs273el0Ki0tTb1791ZVVZVeeeUVFRYWau3atd4xu3bt0mOPPabs7Gx98sknGjdunMaNG6eDBw9e65QAAICBulzrE8aOHauxY8e2uM3tdmvZsmWaO3euHnzwQUnSm2++qbi4OG3atEmZmZk6fPiwSktLtXfvXg0bNkyStHLlSt1777169dVXlZiYqPXr16uxsVGvv/66wsLC1L9/f1VXV2vp0qXeMLR8+XKNGTNGM2fOlCQtXLhQdrtdq1atUnFx8XdqBgAAMMc1h5zLOXbsmBwOh1JTU73roqKiNGLECFVUVCgzM1MVFRWKjo72BhxJSk1NVXBwsCorK/XQQw+poqJCI0eOVFhYmHdMenq6Fi9erNOnT6tHjx6qqKhQfn6+z/HT09P9bp99W0NDgxoaGrzLTqdTkuRyueRyuf7R6Qckz7xMnd93QU9aRl/80RN/VvfEFuL2WQ6Ur43VfQlEbdmTq91nq4Ych8MhSYqLi/NZHxcX593mcDgUGxvrW0SXLoqJifEZk5yc7LcPz7YePXrI4XBc9jgtWbRokebPn++3vqysTF27dr2aKXZYdrvd6hICDj1pGX3xR0/8WdWTouG+y4H2WkzOFX9t0ZP6+vqrGteqISfQzZkzx+fqj9PpVFJSktLS0hQZGWlhZW3H5XLJbrdr9OjRCg0NtbqcgEBPWkZf/NETf1b3ZEDhez7LBwvT272Glljdl0DUlj3x3Im5klYNOfHx8ZKkmpoaJSQkeNfX1NRo8ODB3jG1tbU+z7tw4YJOnTrlfX58fLxqamp8xniWrzTGs70lNptNNpvNb31oaKjxJ2VnmOO1oictoy/+6Ik/q3rS0BTkV0cg4Vzx1xY9udr9tervyUlOTlZ8fLy2b9/uXed0OlVZWamUlBRJUkpKiurq6lRVVeUds2PHDjU3N2vEiBHeMeXl5T733Ox2u2666Sb16NHDO+bbx/GM8RwHAAB0btcccs6ePavq6mpVV1dL+ubFxtXV1Tpx4oSCgoKUl5enF154QZs3b9aBAwc0adIkJSYmaty4cZKkvn37asyYMZo6dar27Nmjjz/+WLm5ucrMzFRiYqIk6fHHH1dYWJiys7N16NAhbdiwQcuXL/e51TR9+nSVlpZqyZIl+vOf/6zCwkLt27dPubm5/3hXAABAh3fNt6v27dune+65x7vsCR6TJ0/WunXr9Nxzz+ncuXOaNm2a6urqdOedd6q0tFTh4eHe56xfv165ubkaNWqUgoODNX78eK1YscK7PSoqSmVlZcrJydHQoUN13XXXqaCgwOd36dx+++0qKSnR3Llz9fzzz+vGG2/Upk2bNGDAgO/UCAAAYJZrDjl333233G73JbcHBQVpwYIFWrBgwSXHxMTEqKSk5LLHGTRokD788MPLjpkwYYImTJhw+YIBAECnxGdXAQAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASF2sLgAAALSOG2Zv9Vn+4uUMiyoJDFzJAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYKRWDzlNTU2aN2+ekpOTFRERoR/84AdauHCh3G63d4zb7VZBQYESEhIUERGh1NRUHT161Gc/p06dUlZWliIjIxUdHa3s7GydPXvWZ8ynn36qu+66S+Hh4UpKSlJRUVFrTwcAAHRQrR5yFi9erDVr1mjVqlU6fPiwFi9erKKiIq1cudI7pqioSCtWrFBxcbEqKyvVrVs3paen6/z5894xWVlZOnTokOx2u7Zs2aLy8nJNmzbNu93pdCotLU29e/dWVVWVXnnlFRUWFmrt2rWtPSUAANABdWntHe7atUsPPvigMjIyJEk33HCDfvvb32rPnj2SvrmKs2zZMs2dO1cPPvigJOnNN99UXFycNm3apMzMTB0+fFilpaXau3evhg0bJklauXKl7r33Xr366qtKTEzU+vXr1djYqNdff11hYWHq37+/qqurtXTpUp8wBAAAOqdWDzm333671q5dq88//1w/+tGP9Kc//UkfffSRli5dKkk6duyYHA6HUlNTvc+JiorSiBEjVFFRoczMTFVUVCg6OtobcCQpNTVVwcHBqqys1EMPPaSKigqNHDlSYWFh3jHp6elavHixTp8+rR49evjV1tDQoIaGBu+y0+mUJLlcLrlcrtZuRUDwzMvU+X0X9KRl9MUfPfFndU9sIW6f5UD52ljdF49A6k9b9uRq99nqIWf27NlyOp3q06ePQkJC1NTUpBdffFFZWVmSJIfDIUmKi4vzeV5cXJx3m8PhUGxsrG+hXbooJibGZ0xycrLfPjzbWgo5ixYt0vz58/3Wl5WVqWvXrt9luh2G3W63uoSAQ09aRl/80RN/VvWkaLjv8rZt2yyp41KsPlcCsT9t0ZP6+vqrGtfqIeett97S+vXrVVJS4r2FlJeXp8TERE2ePLm1D3dN5syZo/z8fO+y0+lUUlKS0tLSFBkZaWFlbcflcslut2v06NEKDQ21upyAQE9aRl/80RN/VvdkQOF7PssHC9PbvYaWWN0Xj0DqT1v2xHMn5kpaPeTMnDlTs2fPVmZmpiRp4MCBOn78uBYtWqTJkycrPj5eklRTU6OEhATv82pqajR48GBJUnx8vGpra332e+HCBZ06dcr7/Pj4eNXU1PiM8Sx7xlzMZrPJZrP5rQ8NDTX+B1hnmOO1oictoy/+6Ik/q3rS0BTkV0cgsfpcCcT+tEVPrnZ/rf7uqvr6egUH++42JCREzc3NkqTk5GTFx8dr+/bt3u1Op1OVlZVKSUmRJKWkpKiurk5VVVXeMTt27FBzc7NGjBjhHVNeXu5zX85ut+umm25q8VYVAADoXFo95Nx///168cUXtXXrVn3xxRfauHGjli5dqoceekiSFBQUpLy8PL3wwgvavHmzDhw4oEmTJikxMVHjxo2TJPXt21djxozR1KlTtWfPHn388cfKzc1VZmamEhMTJUmPP/64wsLClJ2drUOHDmnDhg1avny5z+0oAADQebX67aqVK1dq3rx5+ulPf6ra2lolJibqX//1X1VQUOAd89xzz+ncuXOaNm2a6urqdOedd6q0tFTh4eHeMevXr1dubq5GjRql4OBgjR8/XitWrPBuj4qKUllZmXJycjR06FBdd911Kigo4O3jAABAUhuEnO7du2vZsmVatmzZJccEBQVpwYIFWrBgwSXHxMTEqKSk5LLHGjRokD788MPvWioAADAYn10FAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACM1CYh56uvvtITTzyhnj17KiIiQgMHDtS+ffu8291utwoKCpSQkKCIiAilpqbq6NGjPvs4deqUsrKyFBkZqejoaGVnZ+vs2bM+Yz799FPdddddCg8PV1JSkoqKitpiOgAAoANq9ZBz+vRp3XHHHQoNDdUf/vAHffbZZ1qyZIl69OjhHVNUVKQVK1aouLhYlZWV6tatm9LT03X+/HnvmKysLB06dEh2u11btmxReXm5pk2b5t3udDqVlpam3r17q6qqSq+88ooKCwu1du3a1p4SAADogLq09g4XL16spKQkvfHGG951ycnJ3r+73W4tW7ZMc+fO1YMPPihJevPNNxUXF6dNmzYpMzNThw8fVmlpqfbu3athw4ZJklauXKl7771Xr776qhITE7V+/Xo1Njbq9ddfV1hYmPr376/q6motXbrUJwwBAIDOqdVDzubNm5Wenq4JEyZo586d+v73v6+f/vSnmjp1qiTp2LFjcjgcSk1N9T4nKipKI0aMUEVFhTIzM1VRUaHo6GhvwJGk1NRUBQcHq7KyUg899JAqKio0cuRIhYWFecekp6dr8eLFOn36tM+VI4+GhgY1NDR4l51OpyTJ5XLJ5XK1disCgmdeps7vu6AnLaMv/uiJP6t7Ygtx+ywHytfG6r54BFJ/2rInV7vPVg85f/3rX7VmzRrl5+fr+eef1969e/Xss88qLCxMkydPlsPhkCTFxcX5PC8uLs67zeFwKDY21rfQLl0UExPjM+bbV4i+vU+Hw9FiyFm0aJHmz5/vt76srExdu3b9jjPuGOx2u9UlBBx60jL64o+e+LOqJ0XDfZe3bdtmSR2XYvW5Eoj9aYue1NfXX9W4Vg85zc3NGjZsmF566SVJ0pAhQ3Tw4EEVFxdr8uTJrX24azJnzhzl5+d7l51Op5KSkpSWlqbIyEgLK2s7LpdLdrtdo0ePVmhoqNXlBAR60jL64o+e+LO6JwMK3/NZPliY3u41tMTqvngEUn/asieeOzFX0uohJyEhQf369fNZ17dvX/3Xf/2XJCk+Pl6SVFNTo4SEBO+YmpoaDR482DumtrbWZx8XLlzQqVOnvM+Pj49XTU2NzxjPsmfMxWw2m2w2m9/60NBQ43+AdYY5Xit60jL64o+e+LOqJw1NQX51BBKrz5VA7E9b9ORq99fq76664447dOTIEZ91n3/+uXr37i3pmxchx8fHa/v27d7tTqdTlZWVSklJkSSlpKSorq5OVVVV3jE7duxQc3OzRowY4R1TXl7uc1/ObrfrpptuavFWFQAA6FxaPeTMmDFDu3fv1ksvvaS//OUvKikp0dq1a5WTkyNJCgoKUl5enl544QVt3rxZBw4c0KRJk5SYmKhx48ZJ+ubKz5gxYzR16lTt2bNHH3/8sXJzc5WZmanExERJ0uOPP66wsDBlZ2fr0KFD2rBhg5YvX+5zOwoAAHRerX676tZbb9XGjRs1Z84cLViwQMnJyVq2bJmysrK8Y5577jmdO3dO06ZNU11dne68806VlpYqPDzcO2b9+vXKzc3VqFGjFBwcrPHjx2vFihXe7VFRUSorK1NOTo6GDh2q6667TgUFBbx9HAAASGqDkCNJ9913n+67775Lbg8KCtKCBQu0YMGCS46JiYlRSUnJZY8zaNAgffjhh9+5TgAAYC4+uwoAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgpC5WFwAAQEd1w+ytkiRbiFtFwy0uBn64kgMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjNTmIefll19WUFCQ8vLyvOvOnz+vnJwc9ezZU9/73vc0fvx41dTU+DzvxIkTysjIUNeuXRUbG6uZM2fqwoULPmM++OAD3XLLLbLZbPrhD3+odevWtfV0AABAB9GmIWfv3r36xS9+oUGDBvmsnzFjht599129/fbb2rlzp06ePKmHH37Yu72pqUkZGRlqbGzUrl279Jvf/Ebr1q1TQUGBd8yxY8eUkZGhe+65R9XV1crLy9NPfvITvffee205JQAA0EG0Wcg5e/assrKy9Mtf/lI9evTwrj9z5ox+/etfa+nSpfrxj3+soUOH6o033tCuXbu0e/duSVJZWZk+++wz/cd//IcGDx6ssWPHauHChVq9erUaGxslScXFxUpOTtaSJUvUt29f5ebm6pFHHtFrr73WVlMCAAAdSJuFnJycHGVkZCg1NdVnfVVVlVwul8/6Pn36qFevXqqoqJAkVVRUaODAgYqLi/OOSU9Pl9Pp1KFDh7xjLt53enq6dx8AAKBza5NPIf/d736n/fv3a+/evX7bHA6HwsLCFB0d7bM+Li5ODofDO+bbAcez3bPtcmOcTqf+53/+RxEREX7HbmhoUENDg3fZ6XRKklwul1wu1zXOsmPwzMvU+X0X9KRl9MUfPfFndU9sIW6fZau/Np56bMHf/Bko9XhYWU9bnitXu89WDzlffvmlpk+fLrvdrvDw8Nbe/T9k0aJFmj9/vt/6srIyde3a1YKK2o/dbre6hIBDT1pGX/zRE39W9aRouO/ytm3bLKnD4+J6rD5XAq0/Utv0pL6+/qrGtXrIqaqqUm1trW655RbvuqamJpWXl2vVqlV677331NjYqLq6Op+rOTU1NYqPj5ckxcfHa8+ePT779bz76ttjLn5HVk1NjSIjI1u8iiNJc+bMUX5+vnfZ6XQqKSlJaWlpioyM/O6TDmAul0t2u12jR49WaGio1eUEBHrSMvrij574s7onAwp931xysDC93Wv4Nk89tmC3Fg5rtvxcCaT+tOW54rkTcyWtHnJGjRqlAwcO+KybMmWK+vTpo1mzZikpKUmhoaHavn27xo8fL0k6cuSITpw4oZSUFElSSkqKXnzxRdXW1io2NlbSN0kwMjJS/fr18465OKHa7XbvPlpis9lks9n81oeGhhr/A6wzzPFa0ZOW0Rd/9MSfVT1paAryq8NKLdVjZU2B1h9PDa1dx9Xur9VDTvfu3TVgwACfdd26dVPPnj2967Ozs5Wfn6+YmBhFRkbqmWeeUUpKim677TZJUlpamvr166eJEyeqqKhIDodDc+fOVU5OjjekPPXUU1q1apWee+45Pfnkk9qxY4feeustbd26tbWnBAAAOqA2eeHxlbz22msKDg7W+PHj1dDQoPT0dP37v/+7d3tISIi2bNmip59+WikpKerWrZsmT56sBQsWeMckJydr69atmjFjhpYvX67rr79ev/rVr5Sebu2lSwAAEBjaJeR88MEHPsvh4eFavXq1Vq9efcnn9O7d+4ovmLr77rv1ySeftEaJAADAMHx2FQAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIXawuAAACyQ2zt3r/bgtxq2i4hcUA+IdwJQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCR+Tw4ABLBv/94ejy9ezrCgEqDj4UoOAAAwEiEHAAAYidtVQCfnuR3i+QiDAYXv6ciL91lcFQD847iSAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICR+IBOAAHH86GhHl+8nGFRJbjYgML3vB/k2tAUJImvDwIXV3IAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEitHnIWLVqkW2+9Vd27d1dsbKzGjRunI0eO+Iw5f/68cnJy1LNnT33ve9/T+PHjVVNT4zPmxIkTysjIUNeuXRUbG6uZM2fqwoULPmM++OAD3XLLLbLZbPrhD3+odevWtfZ0AABAB9XqIWfnzp3KycnR7t27Zbfb5XK5lJaWpnPnznnHzJgxQ++++67efvtt7dy5UydPntTDDz/s3d7U1KSMjAw1NjZq165d+s1vfqN169apoKDAO+bYsWPKyMjQPffco+rqauXl5eknP/mJ3nvvvdaeEgAA6IBa/QM6S0tLfZbXrVun2NhYVVVVaeTIkTpz5ox+/etfq6SkRD/+8Y8lSW+88Yb69u2r3bt367bbblNZWZk+++wz/fGPf1RcXJwGDx6shQsXatasWSosLFRYWJiKi4uVnJysJUuWSJL69u2rjz76SK+99prS09Nbe1oAAKCDafNPIT9z5owkKSYmRpJUVVUll8ul1NRU75g+ffqoV69eqqio0G233aaKigoNHDhQcXFx3jHp6el6+umndejQIQ0ZMkQVFRU++/CMycvLu2QtDQ0Namho8C47nU5Jksvlksvl+ofnGog88zJ1ft8FPfFlC3F/82fw//9pdW88NXm0Zz3fPranJ1b24+JeSBbXE+x7vkjWfX3a+9gtufj7J1Dq8bCynrb8WXu1+wxyu93+30GtpLm5WQ888IDq6ur00UcfSZJKSko0ZcoUn7AhScOHD9c999yjxYsXa9q0aTp+/LjPraf6+np169ZN27Zt09ixY/WjH/1IU6ZM0Zw5c7xjtm3bpoyMDNXX1ysiIsKvnsLCQs2fP99vfUlJibp27dpa0wYAAG2ovr5ejz/+uM6cOaPIyMhLjmvTKzk5OTk6ePCgN+BYbc6cOcrPz/cuO51OJSUlKS0t7bJN6shcLpfsdrtGjx6t0NBQq8sJCPTE14DCb/4zYQt2a+GwZs3bF6yqgjEBUZPHwcL2uwX97WN7emLluXJxL6T27cfFhi4o9Z4nDc1B7V6PledGSy7+/rH650og9actf9Z67sRcSZuFnNzcXG3ZskXl5eW6/vrrvevj4+PV2Niouro6RUdHe9fX1NQoPj7eO2bPnj0++/O8++rbYy5+R1ZNTY0iIyNbvIojSTabTTabzW99aGio8f/YdYY5Xit68o2GpiDf5eYgy/tycU3tWc/Fx/Yc36qeXKoeq3iCTUNzkLc2K78+gXiuWvr1CbD+eGpo7Tqudn+t/u4qt9ut3Nxcbdy4UTt27FBycrLP9qFDhyo0NFTbt2/3rjty5IhOnDihlJQUSVJKSooOHDig2tpa7xi73a7IyEj169fPO+bb+/CM8ewDAAB0bq1+JScnJ0clJSX6/e9/r+7du8vhcEiSoqKiFBERoaioKGVnZys/P18xMTGKjIzUM888o5SUFN12222SpLS0NPXr108TJ05UUVGRHA6H5s6dq5ycHO+VmKeeekqrVq3Sc889pyeffFI7duzQW2+9pa1bt7b2lAAAQAfU6ldy1qxZozNnzujuu+9WQkKC97FhwwbvmNdee0333Xefxo8fr5EjRyo+Pl7vvPOOd3tISIi2bNmikJAQpaSk6IknntCkSZO0YMEC75jk5GRt3bpVdrtdN998s5YsWaJf/epXvH0cAABIaoMrOVfzZq3w8HCtXr1aq1evvuSY3r17a9u2bZfdz913361PPvnkmmsEAADm47OrAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARupidQGAVQYUvqeGpiBJ0hcvZ1hcDQCgtXElBwAAGKnDh5zVq1frhhtuUHh4uEaMGKE9e/ZYXRIAAJ3SDbO3eh8DCt+zupyOfbtqw4YNys/PV3FxsUaMGKFly5YpPT1dR44cUWxsrNXlWeqG2VslSbYQt4qGW1yM/r8ej858e+jiXkidux8A0FY69JWcpUuXaurUqZoyZYr69eun4uJide3aVa+//rrVpQEAAIt12Cs5jY2Nqqqq0pw5c7zrgoODlZqaqoqKihaf09DQoIaGBu/ymTNnJEmnTp2Sy+Vq1fpGLNrut65yzqhWPcbldLlw7ps/m92qr2/W3//+d4WGhrbb8S9Vj8ff//53iyqRXC6X6uvr1cUVrKbmoHav5+JetPfxL3bxudLFFWxpPd+uycOqr08gfP8E3PniOuc9TwLh+ydQztVAOFe+XY9He/envb5/vv76a0mS2+2+/EB3B/XVV1+5Jbl37drls37mzJnu4cOHt/icn//8525JPHjw4MGDBw8DHl9++eVls0KHvZLzXcyZM0f5+fne5ebmZp06dUo9e/ZUUFCQhZW1HafTqaSkJH355ZeKjIy0upyAQE9aRl/80RN/9KRl9MVfW/bE7Xbr66+/VmJi4mXHddiQc9111ykkJEQ1NTU+62tqahQfH9/ic2w2m2w2m8+66OjotioxoERGRvKNdxF60jL64o+e+KMnLaMv/tqqJ1FRUVcc02FfeBwWFqahQ4dq+/b/f+1Lc3Oztm/frpSUFAsrAwAAgaDDXsmRpPz8fE2ePFnDhg3T8OHDtWzZMp07d05TpkyxujQAAGCxDh1y/uVf/kX//d//rYKCAjkcDg0ePFilpaWKi4uzurSAYbPZ9POf/9zvNl1nRk9aRl/80RN/9KRl9MVfIPQkyO2+0vuvAAAAOp4O+5ocAACAyyHkAAAAIxFyAACAkQg5AADASIQcAy1atEi33nqrunfvrtjYWI0bN05HjhyxuqyA8/LLLysoKEh5eXlWl2Kpr776Sk888YR69uypiIgIDRw4UPv27bO6LEs1NTVp3rx5Sk5OVkREhH7wgx9o4cKFV/6cHIOUl5fr/vvvV2JiooKCgrRp0yaf7W63WwUFBUpISFBERIRSU1N19OhRa4ptJ5fricvl0qxZszRw4EB169ZNiYmJmjRpkk6ePGldwe3kSufKtz311FMKCgrSsmXL2qU2Qo6Bdu7cqZycHO3evVt2u10ul0tpaWk6d87/g/46q7179+oXv/iFBg0aZHUpljp9+rTuuOMOhYaG6g9/+IM+++wzLVmyRD169LC6NEstXrxYa9as0apVq3T48GEtXrxYRUVFWrlypdWltZtz587p5ptv1urVq1vcXlRUpBUrVqi4uFiVlZXq1q2b0tPTdf78+XautP1crif19fXav3+/5s2bp/379+udd97RkSNH9MADD1hQafu60rnisXHjRu3evfuKH8XQqlrjwzIR2Gpra92S3Dt37rS6lIDw9ddfu2+88Ua33W53//M//7N7+vTpVpdkmVmzZrnvvPNOq8sIOBkZGe4nn3zSZ93DDz/szsrKsqgia0lyb9y40bvc3Nzsjo+Pd7/yyivedXV1dW6bzeb+7W9/a0GF7e/inrRkz549bknu48ePt09RAeBSffnb3/7m/v73v+8+ePCgu3fv3u7XXnutXerhSk4ncObMGUlSTEyMxZUEhpycHGVkZCg1NdXqUiy3efNmDRs2TBMmTFBsbKyGDBmiX/7yl1aXZbnbb79d27dv1+effy5J+tOf/qSPPvpIY8eOtbiywHDs2DE5HA6f76GoqCiNGDFCFRUVFlYWWM6cOaOgoKBO8xmJl9Lc3KyJEydq5syZ6t+/f7seu0P/xmNcWXNzs/Ly8nTHHXdowIABVpdjud/97nfav3+/9u7da3UpAeGvf/2r1qxZo/z8fD3//PPau3evnn32WYWFhWny5MlWl2eZ2bNny+l0qk+fPgoJCVFTU5NefPFFZWVlWV1aQHA4HJLk99vl4+LivNs6u/Pnz2vWrFl67LHHOv0Hdi5evFhdunTRs88+2+7HJuQYLicnRwcPHtRHH31kdSmW+/LLLzV9+nTZ7XaFh4dbXU5AaG5u1rBhw/TSSy9JkoYMGaKDBw+quLi4U4ect956S+vXr1dJSYn69++v6upq5eXlKTExsVP3BVfH5XLp0Ucfldvt1po1a6wux1JVVVVavny59u/fr6CgoHY/PrerDJabm6stW7bo/fff1/XXX291OZarqqpSbW2tbrnlFnXp0kVdunTRzp07tWLFCnXp0kVNTU1Wl9juEhIS1K9fP591ffv21YkTJyyqKDDMnDlTs2fPVmZmpgYOHKiJEydqxowZWrRokdWlBYT4+HhJUk1Njc/6mpoa77bOyhNwjh8/Lrvd3umv4nz44Yeqra1Vr169vD93jx8/rp/97Ge64YYb2vz4XMkxkNvt1jPPPKONGzfqgw8+UHJystUlBYRRo0bpwIEDPuumTJmiPn36aNasWQoJCbGoMuvccccdfr9e4PPPP1fv3r0tqigw1NfXKzjY9/+AISEham5utqiiwJKcnKz4+Hht375dgwcPliQ5nU5VVlbq6aeftrY4C3kCztGjR/X++++rZ8+eVpdkuYkTJ/q9/jE9PV0TJ07UlClT2vz4hBwD5eTkqKSkRL///e/VvXt37z3yqKgoRUREWFyddbp37+73uqRu3bqpZ8+enfb1SjNmzNDtt9+ul156SY8++qj27NmjtWvXau3atVaXZqn7779fL774onr16qX+/fvrk08+0dKlS/Xkk09aXVq7OXv2rP7yl794l48dO6bq6mrFxMSoV69eysvL0wsvvKAbb7xRycnJmjdvnhITEzVu3Djrim5jl+tJQkKCHnnkEe3fv19btmxRU1OT92dvTEyMwsLCrCq7zV3pXLk47IWGhio+Pl433XRT2xfXLu/hQruS1OLjjTfesLq0gNPZ30Ludrvd7777rnvAgAFum83m7tOnj3vt2rVWl2Q5p9Ppnj59urtXr17u8PBw9z/90z+5/+3f/s3d0NBgdWnt5v3332/x58jkyZPdbvc3byOfN2+eOy4uzm2z2dyjRo1yHzlyxNqi29jlenLs2LFL/ux9//33rS69TV3pXLlYe76FPMjt7kS/whMAAHQavPAYAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACP9L/y0N3ltDT3aAAAAAElFTkSuQmCC",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_var_num.hist(bins=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ids_selected_by_vars:  (27673,)\n"
     ]
    }
   ],
   "source": [
    "ids_selected_by_vars = df_var_num.loc[df_var_num >= 10].index\n",
    "\n",
    "print(\"ids_selected_by_vars: \", ids_selected_by_vars.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "12312"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ids_selected = set(ids_before_selected) & set(ids_after_selected) & set(ids_selected_by_vars)\n",
    "len(ids_selected)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_df = all_df.loc[all_df.index.isin(ids_selected)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a label code (int) for the labels.\n",
    "label_dict = dict(zip(var_to_consider, (\"Value_\" + str(i) for i in range(len(var_to_consider)))))\n",
    "all_df.rename(label_dict, axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Time</th>\n",
       "      <th>Value_6</th>\n",
       "      <th>Value_12</th>\n",
       "      <th>Value_9</th>\n",
       "      <th>Value_8</th>\n",
       "      <th>Value_1</th>\n",
       "      <th>Value_2</th>\n",
       "      <th>Value_5</th>\n",
       "      <th>Value_3</th>\n",
       "      <th>Value_4</th>\n",
       "      <th>Value_13</th>\n",
       "      <th>Value_10</th>\n",
       "      <th>Value_0</th>\n",
       "      <th>Value_11</th>\n",
       "      <th>Value_7</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1069486</th>\n",
       "      <td>7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>74.0</td>\n",
       "      <td>64.0</td>\n",
       "      <td>115.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>37.222222</td>\n",
       "      <td>NaN</td>\n",
       "      <td>247.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1069486</th>\n",
       "      <td>13</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>249.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1069486</th>\n",
       "      <td>20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>74.0</td>\n",
       "      <td>64.0</td>\n",
       "      <td>110.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>37.300000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1069486</th>\n",
       "      <td>60</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>74.0</td>\n",
       "      <td>60.0</td>\n",
       "      <td>106.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1069486</th>\n",
       "      <td>115</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>74.0</td>\n",
       "      <td>62.0</td>\n",
       "      <td>110.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>37.500000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Time  Value_6  Value_12  Value_9  Value_8  Value_1  Value_2  Value_5  \\\n",
       "ID                                                                              \n",
       "1069486     7      NaN       NaN      NaN     74.0     64.0    115.0      NaN   \n",
       "1069486    13      NaN       NaN      NaN      NaN      NaN      NaN      NaN   \n",
       "1069486    20      NaN       NaN      NaN     74.0     64.0    110.0      NaN   \n",
       "1069486    60      NaN       NaN      NaN     74.0     60.0    106.0      NaN   \n",
       "1069486   115      NaN       NaN      NaN     74.0     62.0    110.0      NaN   \n",
       "\n",
       "         Value_3  Value_4   Value_13  Value_10  Value_0  Value_11  Value_7  \n",
       "ID                                                                          \n",
       "1069486     99.0     18.0  37.222222       NaN    247.0       NaN      NaN  \n",
       "1069486      NaN      NaN        NaN       NaN    249.0       NaN      NaN  \n",
       "1069486     99.0     10.0  37.300000       NaN      NaN       NaN      NaN  \n",
       "1069486     99.0     14.0        NaN       NaN      NaN       NaN      NaN  \n",
       "1069486     98.0     15.0  37.500000       NaN      NaN       NaN      NaN  "
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_value = all_df.iloc[:, 1:]\n",
    "\n",
    "arr_mask = (~np.isnan(df_value.values)).astype(float)\n",
    "\n",
    "df_mask = pd.DataFrame(arr_mask, columns=['Mask_'+i_str[6:] for i_str in df_value.columns.tolist()], index=all_df.index)\n",
    "\n",
    "df_eicu_data = pd.concat([all_df.fillna(0), df_mask], axis=1)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Time</th>\n",
       "      <th>Value_6</th>\n",
       "      <th>Value_12</th>\n",
       "      <th>Value_9</th>\n",
       "      <th>Value_8</th>\n",
       "      <th>Value_1</th>\n",
       "      <th>Value_2</th>\n",
       "      <th>Value_5</th>\n",
       "      <th>Value_3</th>\n",
       "      <th>Value_4</th>\n",
       "      <th>...</th>\n",
       "      <th>Mask_1</th>\n",
       "      <th>Mask_2</th>\n",
       "      <th>Mask_5</th>\n",
       "      <th>Mask_3</th>\n",
       "      <th>Mask_4</th>\n",
       "      <th>Mask_13</th>\n",
       "      <th>Mask_10</th>\n",
       "      <th>Mask_0</th>\n",
       "      <th>Mask_11</th>\n",
       "      <th>Mask_7</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1069486</th>\n",
       "      <td>7</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>74.0</td>\n",
       "      <td>64.0</td>\n",
       "      <td>115.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>99.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1069486</th>\n",
       "      <td>13</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1069486</th>\n",
       "      <td>20</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>74.0</td>\n",
       "      <td>64.0</td>\n",
       "      <td>110.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>99.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1069486</th>\n",
       "      <td>60</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>74.0</td>\n",
       "      <td>60.0</td>\n",
       "      <td>106.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>99.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1069486</th>\n",
       "      <td>115</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>74.0</td>\n",
       "      <td>62.0</td>\n",
       "      <td>110.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>98.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 29 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         Time  Value_6  Value_12  Value_9  Value_8  Value_1  Value_2  Value_5  \\\n",
       "ID                                                                              \n",
       "1069486     7      0.0       0.0      0.0     74.0     64.0    115.0      0.0   \n",
       "1069486    13      0.0       0.0      0.0      0.0      0.0      0.0      0.0   \n",
       "1069486    20      0.0       0.0      0.0     74.0     64.0    110.0      0.0   \n",
       "1069486    60      0.0       0.0      0.0     74.0     60.0    106.0      0.0   \n",
       "1069486   115      0.0       0.0      0.0     74.0     62.0    110.0      0.0   \n",
       "\n",
       "         Value_3  Value_4  ...  Mask_1  Mask_2  Mask_5  Mask_3  Mask_4  \\\n",
       "ID                         ...                                           \n",
       "1069486     99.0     18.0  ...     1.0     1.0     0.0     1.0     1.0   \n",
       "1069486      0.0      0.0  ...     0.0     0.0     0.0     0.0     0.0   \n",
       "1069486     99.0     10.0  ...     1.0     1.0     0.0     1.0     1.0   \n",
       "1069486     99.0     14.0  ...     1.0     1.0     0.0     1.0     1.0   \n",
       "1069486     98.0     15.0  ...     1.0     1.0     0.0     1.0     1.0   \n",
       "\n",
       "         Mask_13  Mask_10  Mask_0  Mask_11  Mask_7  \n",
       "ID                                                  \n",
       "1069486      1.0      0.0     1.0      0.0     0.0  \n",
       "1069486      0.0      0.0     1.0      0.0     0.0  \n",
       "1069486      1.0      0.0     0.0      0.0     0.0  \n",
       "1069486      0.0      0.0     0.0      0.0     0.0  \n",
       "1069486      1.0      0.0     0.0      0.0     0.0  \n",
       "\n",
       "[5 rows x 29 columns]"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_eicu_data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "pat_df = pd.read_csv(path_pat)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_3660448/553277826.py:3: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n",
      "  labels_mortality['hospitaldischargestatus'] = labels_mortality['hospitaldischargestatus'].replace({\"Expired\": 1, \"Alive\": 0})\n"
     ]
    }
   ],
   "source": [
    "labels_mortality = pat_df[pat_df['hospitaldischargestatus'].isin(['Expired', 'Alive'])][['patientunitstayid', 'hospitaldischargestatus']]\n",
    "\n",
    "labels_mortality['hospitaldischargestatus'] = labels_mortality['hospitaldischargestatus'].replace({\"Expired\": 1, \"Alive\": 0})\n",
    "\n",
    "labels_mortality.rename(columns={'patientunitstayid': 'ID', 'hospitaldischargestatus': 'labels'}, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "labels_mortality = labels_mortality[labels_mortality['ID'].isin(df_eicu_data.index)]\n",
    "df_eicu_data = df_eicu_data[df_eicu_data.index.isin(labels_mortality['ID'])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(12312, 2)"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "labels_mortality.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "df_eicu_data.to_csv(path_processed + '/eicu_data.csv')\n",
    "labels_mortality.to_csv(path_processed + '/eicu_labels.csv', index=False)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Value_7</th>\n",
       "      <th>Value_8</th>\n",
       "      <th>Mask_7</th>\n",
       "      <th>Mask_8</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>165828</th>\n",
       "      <td>0.0</td>\n",
       "      <td>62.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>165828</th>\n",
       "      <td>100.0</td>\n",
       "      <td>62.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>165828</th>\n",
       "      <td>0.0</td>\n",
       "      <td>60.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>165828</th>\n",
       "      <td>0.0</td>\n",
       "      <td>62.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>165828</th>\n",
       "      <td>0.0</td>\n",
       "      <td>66.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Value_7  Value_8  Mask_7  Mask_8\n",
       "ID                                      \n",
       "165828      0.0     62.0     0.0     1.0\n",
       "165828    100.0     62.0     1.0     1.0\n",
       "165828      0.0     60.0     0.0     1.0\n",
       "165828      0.0     62.0     0.0     1.0\n",
       "165828      0.0     66.0     0.0     1.0"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_eicu_data[['Value_7', 'Value_8', 'Mask_7', 'Mask_8']].tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "labels\n",
       "0    10144\n",
       "1     2168\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "labels_mortality['labels'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "leit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "5c6db37f2dbfa0dc7724e0c837d07e3540b86643967779554e04bc9c17696e47"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
