{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Loading libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from typing import List, Union, Dict\n",
    "import sys\n",
    "import os\n",
    "import yaml\n",
    "import warnings\n",
    "import datetime\n",
    "from functools import partial\n",
    "sys.path.insert(1, '..')\n",
    "os.chdir('..')\n",
    "\n",
    "import seaborn as sns\n",
    "sns.set_style('whitegrid')\n",
    "import matplotlib.pyplot as plt\n",
    "import statsmodels.api as sm\n",
    "import sklearn\n",
    "import optuna\n",
    "import darts\n",
    "\n",
    "from darts import models\n",
    "from darts import metrics\n",
    "from darts import TimeSeries\n",
    "from darts.dataprocessing.transformers import Scaler\n",
    "from torch.optim.lr_scheduler import StepLR\n",
    "from pytorch_lightning.callbacks.early_stopping import EarlyStopping\n",
    "\n",
    "from statsforecast.models import AutoARIMA\n",
    "\n",
    "from data_formatter.base import *"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Processing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# read-in file with CGM data (S1 in the paper source)\n",
    "data = pd.read_csv('./raw_data/pbio.2005143.s010', sep='\\t')\n",
    "# set types and column names\n",
    "data = data.rename(columns={'DisplayTime': 'time', 'GlucoseValue': 'gl', 'subjectId': 'id'})\n",
    "data = data[['time', 'gl', 'id']]\n",
    "data['time'] = pd.to_datetime(data['time'])\n",
    "data['gl'] = data['gl'].replace('Low', 40)\n",
    "data['gl'] = data['gl'].replace('High', 400)\n",
    "data['gl'] = data['gl'].astype(float)\n",
    "data['id'] = data['id'].astype(str)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['userID', 'Age', 'BMI', 'A1C', 'FBG', 'ogtt.2hr', 'insulin', 'hs.CRP', 'Tchol', 'Trg', 'HDL', 'LDL', 'mean_glucose', 'sd_glucose', 'range_glucose', 'min_glucose', 'max_glucose', 'quartile.25_glucose', 'median_glucose', 'quartile.75_glucose', 'mean_slope', 'max_slope', 'number_Random140', 'number_Random200', 'percent_below.80', 'percent_above.130', 'se_glucose_mean', 'numGE', 'mage', 'j_index', 'IQR', 'modd', 'distance_traveled', 'coef_variation', 'number_Random140_normByDays', 'number_Random200_normByDays', 'numGE_normByDays', 'distance_traveled_normByDays', 'diagnosis', 'freq_low', 'freq_moderate', 'freq_severe', 'glucotype', 'Height', 'Weight', 'Insulin_rate_dd', 'perc_cgm_prediabetic_range', 'perc_cgm_diabetic_range', 'SSPG']\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "49"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# read-in covariate file (S5 in the paper source)\n",
    "import sqlite3\n",
    "dbfile = './raw_data/pbio.2005143.s014.db'\n",
    "# Create a SQL connection to our SQLite database\n",
    "con = sqlite3.connect(dbfile)\n",
    "\n",
    "# initialize array to store data\n",
    "raw_covs = []\n",
    "\n",
    "cursor = con.execute('SELECT * FROM clinical')\n",
    "names = [description[0] for description in cursor.description]\n",
    "\n",
    "# reading all table names\n",
    "for row in con.execute(\"SELECT * FROM clinical\"):\n",
    "    raw_covs.append(row)\n",
    "\n",
    "# Be sure to close the connection\n",
    "con.close()\n",
    "\n",
    "# display(covs)\n",
    "print(names)\n",
    "display(len(raw_covs[0])) # num rows of db covariates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>...</th>\n",
       "      <th>39</th>\n",
       "      <th>40</th>\n",
       "      <th>41</th>\n",
       "      <th>42</th>\n",
       "      <th>43</th>\n",
       "      <th>44</th>\n",
       "      <th>45</th>\n",
       "      <th>46</th>\n",
       "      <th>47</th>\n",
       "      <th>48</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1636-69-001</td>\n",
       "      <td>59.0</td>\n",
       "      <td>21.70</td>\n",
       "      <td>6.7</td>\n",
       "      <td>109.0</td>\n",
       "      <td>205.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>204.0</td>\n",
       "      <td>135.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.147059</td>\n",
       "      <td>0.369748</td>\n",
       "      <td>0.483193</td>\n",
       "      <td>severe</td>\n",
       "      <td>176.3</td>\n",
       "      <td>68.0</td>\n",
       "      <td>0.10150</td>\n",
       "      <td>0.190404</td>\n",
       "      <td>0.0262106</td>\n",
       "      <td>91</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1636-69-026</td>\n",
       "      <td>67.0</td>\n",
       "      <td>28.90</td>\n",
       "      <td>6.2</td>\n",
       "      <td>97.0</td>\n",
       "      <td>152.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.2</td>\n",
       "      <td>208.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.004202</td>\n",
       "      <td>0.289916</td>\n",
       "      <td>0.705882</td>\n",
       "      <td>severe</td>\n",
       "      <td>157.5</td>\n",
       "      <td>76.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0831202</td>\n",
       "      <td>0</td>\n",
       "      <td>133</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1636-69-028</td>\n",
       "      <td>50.0</td>\n",
       "      <td>27.30</td>\n",
       "      <td>5.2</td>\n",
       "      <td>91.0</td>\n",
       "      <td>121.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.4</td>\n",
       "      <td>127.0</td>\n",
       "      <td>25.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.008403</td>\n",
       "      <td>0.424370</td>\n",
       "      <td>0.567227</td>\n",
       "      <td>severe</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0714286</td>\n",
       "      <td>0.0015444</td>\n",
       "      <td>75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1636-69-032</td>\n",
       "      <td>59.0</td>\n",
       "      <td>25.00</td>\n",
       "      <td>5.7</td>\n",
       "      <td>82.0</td>\n",
       "      <td>142.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>224.0</td>\n",
       "      <td>138.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.021008</td>\n",
       "      <td>0.491597</td>\n",
       "      <td>0.487395</td>\n",
       "      <td>moderate</td>\n",
       "      <td>169.4</td>\n",
       "      <td>68.2</td>\n",
       "      <td>0.01575</td>\n",
       "      <td>0.0147643</td>\n",
       "      <td>0</td>\n",
       "      <td>87</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1636-69-035</td>\n",
       "      <td>60.0</td>\n",
       "      <td>28.20</td>\n",
       "      <td>5.5</td>\n",
       "      <td>87.0</td>\n",
       "      <td>118.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.2</td>\n",
       "      <td>224.0</td>\n",
       "      <td>85.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.029412</td>\n",
       "      <td>0.352941</td>\n",
       "      <td>0.617647</td>\n",
       "      <td>severe</td>\n",
       "      <td>176.5</td>\n",
       "      <td>82.5</td>\n",
       "      <td>0.05642</td>\n",
       "      <td>0.15465</td>\n",
       "      <td>0.00592128</td>\n",
       "      <td>160</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1636-69-048</td>\n",
       "      <td>60.0</td>\n",
       "      <td>33.50</td>\n",
       "      <td>5.2</td>\n",
       "      <td>87.0</td>\n",
       "      <td>130.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.8</td>\n",
       "      <td>197.0</td>\n",
       "      <td>213.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.121849</td>\n",
       "      <td>0.693277</td>\n",
       "      <td>0.184874</td>\n",
       "      <td>moderate</td>\n",
       "      <td>151.9</td>\n",
       "      <td>77.5</td>\n",
       "      <td>-0.02675</td>\n",
       "      <td>0.00188071</td>\n",
       "      <td>0</td>\n",
       "      <td>119</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1636-69-053</td>\n",
       "      <td>60.0</td>\n",
       "      <td>26.20</td>\n",
       "      <td>5.0</td>\n",
       "      <td>85.0</td>\n",
       "      <td>128.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.4</td>\n",
       "      <td>196.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.054622</td>\n",
       "      <td>0.714286</td>\n",
       "      <td>0.231092</td>\n",
       "      <td>moderate</td>\n",
       "      <td>164.5</td>\n",
       "      <td>75.7</td>\n",
       "      <td>0.10525</td>\n",
       "      <td>0.0505717</td>\n",
       "      <td>0.00322486</td>\n",
       "      <td>188</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1636-69-060</td>\n",
       "      <td>55.0</td>\n",
       "      <td>28.30</td>\n",
       "      <td>5.2</td>\n",
       "      <td>91.0</td>\n",
       "      <td>120.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.4</td>\n",
       "      <td>181.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.121849</td>\n",
       "      <td>0.878151</td>\n",
       "      <td>severe</td>\n",
       "      <td>181.5</td>\n",
       "      <td>95.7</td>\n",
       "      <td>0.04108</td>\n",
       "      <td>0.0851419</td>\n",
       "      <td>0</td>\n",
       "      <td>179</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1636-69-064</td>\n",
       "      <td>51.0</td>\n",
       "      <td>28.30</td>\n",
       "      <td>5.2</td>\n",
       "      <td>82.0</td>\n",
       "      <td>137.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1.1</td>\n",
       "      <td>225.0</td>\n",
       "      <td>123.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.239496</td>\n",
       "      <td>0.760504</td>\n",
       "      <td>severe</td>\n",
       "      <td>150.5</td>\n",
       "      <td>65.3</td>\n",
       "      <td>0.05908</td>\n",
       "      <td>0.16646</td>\n",
       "      <td>0.022697</td>\n",
       "      <td>190</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1636-69-069</td>\n",
       "      <td>56.0</td>\n",
       "      <td>29.30</td>\n",
       "      <td>5.5</td>\n",
       "      <td>90.0</td>\n",
       "      <td>111.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>189.0</td>\n",
       "      <td>131.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.050420</td>\n",
       "      <td>0.462185</td>\n",
       "      <td>0.487395</td>\n",
       "      <td>severe</td>\n",
       "      <td>184.5</td>\n",
       "      <td>191.0</td>\n",
       "      <td>0.04925</td>\n",
       "      <td>0.0792059</td>\n",
       "      <td>0</td>\n",
       "      <td>250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>1636-69-090</td>\n",
       "      <td>76.0</td>\n",
       "      <td>22.00</td>\n",
       "      <td>5.6</td>\n",
       "      <td>106.0</td>\n",
       "      <td>194.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>159.0</td>\n",
       "      <td>34.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.155462</td>\n",
       "      <td>0.340336</td>\n",
       "      <td>0.504202</td>\n",
       "      <td>severe</td>\n",
       "      <td>169.6</td>\n",
       "      <td>63.8</td>\n",
       "      <td>0.12083</td>\n",
       "      <td>0.0887701</td>\n",
       "      <td>0</td>\n",
       "      <td>NA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>1636-69-091</td>\n",
       "      <td>62.0</td>\n",
       "      <td>23.80</td>\n",
       "      <td>6.5</td>\n",
       "      <td>127.0</td>\n",
       "      <td>151.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>1.1</td>\n",
       "      <td>293.0</td>\n",
       "      <td>343.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.046219</td>\n",
       "      <td>0.680672</td>\n",
       "      <td>0.273109</td>\n",
       "      <td>moderate</td>\n",
       "      <td>176.0</td>\n",
       "      <td>73.6</td>\n",
       "      <td>0.12308</td>\n",
       "      <td>0.0360577</td>\n",
       "      <td>0</td>\n",
       "      <td>155</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>1636-69-100</td>\n",
       "      <td>45.0</td>\n",
       "      <td>28.40</td>\n",
       "      <td>5.3</td>\n",
       "      <td>83.0</td>\n",
       "      <td>66.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>219.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.159664</td>\n",
       "      <td>0.655462</td>\n",
       "      <td>0.184874</td>\n",
       "      <td>moderate</td>\n",
       "      <td>184.0</td>\n",
       "      <td>96.0</td>\n",
       "      <td>-0.00608</td>\n",
       "      <td>0.00797832</td>\n",
       "      <td>0</td>\n",
       "      <td>NA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>1636-69-104</td>\n",
       "      <td>56.0</td>\n",
       "      <td>24.60</td>\n",
       "      <td>4.6</td>\n",
       "      <td>82.0</td>\n",
       "      <td>104.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2.5</td>\n",
       "      <td>259.0</td>\n",
       "      <td>67.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.415966</td>\n",
       "      <td>0.466387</td>\n",
       "      <td>0.117647</td>\n",
       "      <td>moderate</td>\n",
       "      <td>165.3</td>\n",
       "      <td>67.0</td>\n",
       "      <td>0.02833</td>\n",
       "      <td>0.0061745</td>\n",
       "      <td>0</td>\n",
       "      <td>129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>1636-69-107</td>\n",
       "      <td>53.0</td>\n",
       "      <td>38.00</td>\n",
       "      <td>5.6</td>\n",
       "      <td>98.0</td>\n",
       "      <td>120.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>2.3</td>\n",
       "      <td>217.0</td>\n",
       "      <td>125.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.037815</td>\n",
       "      <td>0.352941</td>\n",
       "      <td>0.609244</td>\n",
       "      <td>severe</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0517828</td>\n",
       "      <td>0</td>\n",
       "      <td>223</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>1636-69-111</td>\n",
       "      <td>48.0</td>\n",
       "      <td>29.20</td>\n",
       "      <td>5.2</td>\n",
       "      <td>95.0</td>\n",
       "      <td>97.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>0.5</td>\n",
       "      <td>194.0</td>\n",
       "      <td>64.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.079832</td>\n",
       "      <td>0.663866</td>\n",
       "      <td>0.256303</td>\n",
       "      <td>moderate</td>\n",
       "      <td>176.1</td>\n",
       "      <td>90.5</td>\n",
       "      <td>0.01233</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>1636-69-114</td>\n",
       "      <td>65.0</td>\n",
       "      <td>29.50</td>\n",
       "      <td>5.8</td>\n",
       "      <td>91.0</td>\n",
       "      <td>117.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.3</td>\n",
       "      <td>194.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.474790</td>\n",
       "      <td>0.525210</td>\n",
       "      <td>severe</td>\n",
       "      <td>169.6</td>\n",
       "      <td>84.6</td>\n",
       "      <td>0.03908</td>\n",
       "      <td>0.0170576</td>\n",
       "      <td>0</td>\n",
       "      <td>NA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>1636-69-123</td>\n",
       "      <td>54.0</td>\n",
       "      <td>23.80</td>\n",
       "      <td>5.3</td>\n",
       "      <td>86.0</td>\n",
       "      <td>116.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>0.7</td>\n",
       "      <td>157.0</td>\n",
       "      <td>27.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>0.407563</td>\n",
       "      <td>0.092437</td>\n",
       "      <td>low</td>\n",
       "      <td>171.0</td>\n",
       "      <td>76.2</td>\n",
       "      <td>0.04600</td>\n",
       "      <td>0.00271967</td>\n",
       "      <td>0</td>\n",
       "      <td>NA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>1636-70-1002</td>\n",
       "      <td>51.0</td>\n",
       "      <td>28.80</td>\n",
       "      <td>5.2</td>\n",
       "      <td>88.0</td>\n",
       "      <td>98.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>4.4</td>\n",
       "      <td>182.0</td>\n",
       "      <td>147.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.092437</td>\n",
       "      <td>0.668067</td>\n",
       "      <td>0.239496</td>\n",
       "      <td>moderate</td>\n",
       "      <td>172.0</td>\n",
       "      <td>87.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0287611</td>\n",
       "      <td>0</td>\n",
       "      <td>175</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>1636-70-1003</td>\n",
       "      <td>51.0</td>\n",
       "      <td>26.20</td>\n",
       "      <td>5.2</td>\n",
       "      <td>76.0</td>\n",
       "      <td>107.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.9</td>\n",
       "      <td>206.0</td>\n",
       "      <td>32.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.357143</td>\n",
       "      <td>0.474790</td>\n",
       "      <td>0.168067</td>\n",
       "      <td>moderate</td>\n",
       "      <td>166.3</td>\n",
       "      <td>75.3</td>\n",
       "      <td>0.02342</td>\n",
       "      <td>0.00338409</td>\n",
       "      <td>0</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>1636-70-1005</td>\n",
       "      <td>66.0</td>\n",
       "      <td>27.20</td>\n",
       "      <td>5.5</td>\n",
       "      <td>94.0</td>\n",
       "      <td>152.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.4</td>\n",
       "      <td>243.0</td>\n",
       "      <td>95.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.021008</td>\n",
       "      <td>0.273109</td>\n",
       "      <td>0.705882</td>\n",
       "      <td>severe</td>\n",
       "      <td>184.0</td>\n",
       "      <td>89.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0507868</td>\n",
       "      <td>0.00249593</td>\n",
       "      <td>99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>1636-70-1008</td>\n",
       "      <td>62.0</td>\n",
       "      <td>28.00</td>\n",
       "      <td>5.4</td>\n",
       "      <td>82.0</td>\n",
       "      <td>124.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>1.8</td>\n",
       "      <td>177.0</td>\n",
       "      <td>164.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.138655</td>\n",
       "      <td>0.596639</td>\n",
       "      <td>0.264706</td>\n",
       "      <td>moderate</td>\n",
       "      <td>172.0</td>\n",
       "      <td>92.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.056118</td>\n",
       "      <td>0.00550176</td>\n",
       "      <td>229</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>1636-70-1010</td>\n",
       "      <td>68.0</td>\n",
       "      <td>33.00</td>\n",
       "      <td>5.5</td>\n",
       "      <td>94.0</td>\n",
       "      <td>190.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>143.0</td>\n",
       "      <td>93.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.033613</td>\n",
       "      <td>0.231092</td>\n",
       "      <td>0.735294</td>\n",
       "      <td>severe</td>\n",
       "      <td>164.2</td>\n",
       "      <td>88.1</td>\n",
       "      <td>0.06458</td>\n",
       "      <td>0.068886</td>\n",
       "      <td>0.000164799</td>\n",
       "      <td>65</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>2133-001</td>\n",
       "      <td>27.0</td>\n",
       "      <td>26.40</td>\n",
       "      <td>5.2</td>\n",
       "      <td>86.0</td>\n",
       "      <td>101.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.1</td>\n",
       "      <td>216.0</td>\n",
       "      <td>134.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.722689</td>\n",
       "      <td>0.168067</td>\n",
       "      <td>0.109244</td>\n",
       "      <td>low</td>\n",
       "      <td>168.0</td>\n",
       "      <td>74.5</td>\n",
       "      <td>0.06025</td>\n",
       "      <td>0.0244977</td>\n",
       "      <td>0</td>\n",
       "      <td>110.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>2133-002</td>\n",
       "      <td>29.0</td>\n",
       "      <td>21.40</td>\n",
       "      <td>5.3</td>\n",
       "      <td>76.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>1.7</td>\n",
       "      <td>193.0</td>\n",
       "      <td>116.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.222689</td>\n",
       "      <td>0.617647</td>\n",
       "      <td>0.159664</td>\n",
       "      <td>moderate</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0163119</td>\n",
       "      <td>0</td>\n",
       "      <td>143.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>2133-003</td>\n",
       "      <td>36.0</td>\n",
       "      <td>24.60</td>\n",
       "      <td>5.4</td>\n",
       "      <td>91.0</td>\n",
       "      <td>131.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>0.4</td>\n",
       "      <td>199.0</td>\n",
       "      <td>244.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.151261</td>\n",
       "      <td>0.542017</td>\n",
       "      <td>0.306723</td>\n",
       "      <td>moderate</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0423782</td>\n",
       "      <td>0</td>\n",
       "      <td>110.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>2133-004</td>\n",
       "      <td>54.0</td>\n",
       "      <td>28.09</td>\n",
       "      <td>6.0</td>\n",
       "      <td>145.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>11.0</td>\n",
       "      <td>1.6</td>\n",
       "      <td>148.0</td>\n",
       "      <td>130.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.058824</td>\n",
       "      <td>0.117647</td>\n",
       "      <td>0.823529</td>\n",
       "      <td>severe</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.155905</td>\n",
       "      <td>0.017363</td>\n",
       "      <td>173.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>2133-006</td>\n",
       "      <td>29.0</td>\n",
       "      <td>20.00</td>\n",
       "      <td>5.3</td>\n",
       "      <td>80.0</td>\n",
       "      <td>67.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>84.0</td>\n",
       "      <td>26.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.571429</td>\n",
       "      <td>0.365546</td>\n",
       "      <td>0.063025</td>\n",
       "      <td>low</td>\n",
       "      <td>172.0</td>\n",
       "      <td>58.2</td>\n",
       "      <td>0.03917</td>\n",
       "      <td>0.00263574</td>\n",
       "      <td>0</td>\n",
       "      <td>61.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>2133-007</td>\n",
       "      <td>48.0</td>\n",
       "      <td>26.47</td>\n",
       "      <td>5.5</td>\n",
       "      <td>95.0</td>\n",
       "      <td>127.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>8.6</td>\n",
       "      <td>179.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.012605</td>\n",
       "      <td>0.403361</td>\n",
       "      <td>0.584034</td>\n",
       "      <td>severe</td>\n",
       "      <td>162.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>0.12933</td>\n",
       "      <td>0.0609236</td>\n",
       "      <td>0.000388048</td>\n",
       "      <td>218.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>2133-008</td>\n",
       "      <td>31.0</td>\n",
       "      <td>19.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0.378151</td>\n",
       "      <td>0.563025</td>\n",
       "      <td>0.058824</td>\n",
       "      <td>moderate</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>2133-009</td>\n",
       "      <td>25.0</td>\n",
       "      <td>19.50</td>\n",
       "      <td>5.2</td>\n",
       "      <td>84.0</td>\n",
       "      <td>113.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.3</td>\n",
       "      <td>151.0</td>\n",
       "      <td>94.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.025210</td>\n",
       "      <td>0.424370</td>\n",
       "      <td>0.550420</td>\n",
       "      <td>severe</td>\n",
       "      <td>167.0</td>\n",
       "      <td>54.6</td>\n",
       "      <td>0.06867</td>\n",
       "      <td>0.0426516</td>\n",
       "      <td>0</td>\n",
       "      <td>NA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>2133-010</td>\n",
       "      <td>29.0</td>\n",
       "      <td>24.50</td>\n",
       "      <td>NaN</td>\n",
       "      <td>91.0</td>\n",
       "      <td>125.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>0.6</td>\n",
       "      <td>204.0</td>\n",
       "      <td>43.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.210084</td>\n",
       "      <td>0.689076</td>\n",
       "      <td>0.100840</td>\n",
       "      <td>moderate</td>\n",
       "      <td>170.0</td>\n",
       "      <td>70.9</td>\n",
       "      <td>0.08983</td>\n",
       "      <td>0.00968013</td>\n",
       "      <td>0</td>\n",
       "      <td>NA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>2133-011</td>\n",
       "      <td>32.0</td>\n",
       "      <td>24.00</td>\n",
       "      <td>5.4</td>\n",
       "      <td>88.0</td>\n",
       "      <td>114.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>191.0</td>\n",
       "      <td>34.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.214286</td>\n",
       "      <td>0.592437</td>\n",
       "      <td>0.193277</td>\n",
       "      <td>moderate</td>\n",
       "      <td>155.0</td>\n",
       "      <td>58.2</td>\n",
       "      <td>0.03733</td>\n",
       "      <td>0.0370724</td>\n",
       "      <td>0.00248423</td>\n",
       "      <td>69.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>2133-012</td>\n",
       "      <td>27.0</td>\n",
       "      <td>20.00</td>\n",
       "      <td>5.3</td>\n",
       "      <td>83.0</td>\n",
       "      <td>113.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.1</td>\n",
       "      <td>137.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.294118</td>\n",
       "      <td>0.415966</td>\n",
       "      <td>0.289916</td>\n",
       "      <td>moderate</td>\n",
       "      <td>182.0</td>\n",
       "      <td>66.5</td>\n",
       "      <td>0.01683</td>\n",
       "      <td>0.00857449</td>\n",
       "      <td>0.00214362</td>\n",
       "      <td>NA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>2133-013</td>\n",
       "      <td>65.0</td>\n",
       "      <td>23.60</td>\n",
       "      <td>5.4</td>\n",
       "      <td>89.0</td>\n",
       "      <td>85.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>199.0</td>\n",
       "      <td>88.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.197479</td>\n",
       "      <td>0.596639</td>\n",
       "      <td>0.205882</td>\n",
       "      <td>moderate</td>\n",
       "      <td>162.8</td>\n",
       "      <td>62.5</td>\n",
       "      <td>0.05308</td>\n",
       "      <td>0.0173885</td>\n",
       "      <td>0.00131234</td>\n",
       "      <td>NA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>2133-015</td>\n",
       "      <td>51.0</td>\n",
       "      <td>33.60</td>\n",
       "      <td>5.7</td>\n",
       "      <td>96.0</td>\n",
       "      <td>181.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>5.4</td>\n",
       "      <td>237.0</td>\n",
       "      <td>167.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.037815</td>\n",
       "      <td>0.462185</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>severe</td>\n",
       "      <td>160.0</td>\n",
       "      <td>86.1</td>\n",
       "      <td>0.11250</td>\n",
       "      <td>0.0610169</td>\n",
       "      <td>0.00169492</td>\n",
       "      <td>158.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>2133-017</td>\n",
       "      <td>47.0</td>\n",
       "      <td>40.40</td>\n",
       "      <td>5.3</td>\n",
       "      <td>98.0</td>\n",
       "      <td>164.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>3.8</td>\n",
       "      <td>189.0</td>\n",
       "      <td>104.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.037815</td>\n",
       "      <td>0.432773</td>\n",
       "      <td>0.529412</td>\n",
       "      <td>severe</td>\n",
       "      <td>162.4</td>\n",
       "      <td>106.5</td>\n",
       "      <td>0.05458</td>\n",
       "      <td>0.0691709</td>\n",
       "      <td>0</td>\n",
       "      <td>NA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>2133-018</td>\n",
       "      <td>64.0</td>\n",
       "      <td>26.40</td>\n",
       "      <td>6.4</td>\n",
       "      <td>103.0</td>\n",
       "      <td>256.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>101.0</td>\n",
       "      <td>58.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.315126</td>\n",
       "      <td>0.684874</td>\n",
       "      <td>severe</td>\n",
       "      <td>158.6</td>\n",
       "      <td>66.4</td>\n",
       "      <td>0.08600</td>\n",
       "      <td>0.108421</td>\n",
       "      <td>0.0612928</td>\n",
       "      <td>185.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>2133-019</td>\n",
       "      <td>64.0</td>\n",
       "      <td>29.93</td>\n",
       "      <td>6.2</td>\n",
       "      <td>95.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>251.0</td>\n",
       "      <td>64.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.151261</td>\n",
       "      <td>0.382353</td>\n",
       "      <td>0.466387</td>\n",
       "      <td>severe</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.115385</td>\n",
       "      <td>0.00238727</td>\n",
       "      <td>NA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>2133-020</td>\n",
       "      <td>53.0</td>\n",
       "      <td>24.75</td>\n",
       "      <td>5.1</td>\n",
       "      <td>82.0</td>\n",
       "      <td>97.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>172.0</td>\n",
       "      <td>53.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.289916</td>\n",
       "      <td>0.386555</td>\n",
       "      <td>0.323529</td>\n",
       "      <td>moderate</td>\n",
       "      <td>161.8</td>\n",
       "      <td>64.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0504971</td>\n",
       "      <td>0.000261643</td>\n",
       "      <td>57.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>2133-021</td>\n",
       "      <td>56.0</td>\n",
       "      <td>27.87</td>\n",
       "      <td>6.0</td>\n",
       "      <td>119.0</td>\n",
       "      <td>187.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.5</td>\n",
       "      <td>177.0</td>\n",
       "      <td>126.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.025210</td>\n",
       "      <td>0.109244</td>\n",
       "      <td>0.865546</td>\n",
       "      <td>severe</td>\n",
       "      <td>180.5</td>\n",
       "      <td>88.9</td>\n",
       "      <td>0.05383</td>\n",
       "      <td>0.290249</td>\n",
       "      <td>0.0274251</td>\n",
       "      <td>130.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>2133-022</td>\n",
       "      <td>60.0</td>\n",
       "      <td>26.00</td>\n",
       "      <td>4.9</td>\n",
       "      <td>85.0</td>\n",
       "      <td>116.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.7</td>\n",
       "      <td>176.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.058824</td>\n",
       "      <td>0.529412</td>\n",
       "      <td>0.411765</td>\n",
       "      <td>moderate</td>\n",
       "      <td>156.8</td>\n",
       "      <td>63.7</td>\n",
       "      <td>0.05625</td>\n",
       "      <td>0.0551983</td>\n",
       "      <td>0.000535906</td>\n",
       "      <td>111.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>2133-023</td>\n",
       "      <td>52.0</td>\n",
       "      <td>25.90</td>\n",
       "      <td>5.2</td>\n",
       "      <td>91.0</td>\n",
       "      <td>65.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>3.1</td>\n",
       "      <td>192.0</td>\n",
       "      <td>98.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.487395</td>\n",
       "      <td>0.441176</td>\n",
       "      <td>0.071429</td>\n",
       "      <td>low</td>\n",
       "      <td>181.5</td>\n",
       "      <td>85.8</td>\n",
       "      <td>0.01342</td>\n",
       "      <td>0.00838399</td>\n",
       "      <td>0</td>\n",
       "      <td>70.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>2133-024</td>\n",
       "      <td>28.0</td>\n",
       "      <td>24.50</td>\n",
       "      <td>5.2</td>\n",
       "      <td>124.0</td>\n",
       "      <td>95.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>136.0</td>\n",
       "      <td>274.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.214286</td>\n",
       "      <td>0.462185</td>\n",
       "      <td>0.323529</td>\n",
       "      <td>moderate</td>\n",
       "      <td>180.0</td>\n",
       "      <td>79.4</td>\n",
       "      <td>0.00467</td>\n",
       "      <td>0.0200524</td>\n",
       "      <td>0.000462749</td>\n",
       "      <td>49.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>2133-025</td>\n",
       "      <td>64.0</td>\n",
       "      <td>23.60</td>\n",
       "      <td>5.1</td>\n",
       "      <td>91.0</td>\n",
       "      <td>139.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>166.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.357143</td>\n",
       "      <td>0.584034</td>\n",
       "      <td>0.058824</td>\n",
       "      <td>moderate</td>\n",
       "      <td>177.0</td>\n",
       "      <td>73.9</td>\n",
       "      <td>0.08883</td>\n",
       "      <td>0.00873274</td>\n",
       "      <td>0</td>\n",
       "      <td>NA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>2133-026</td>\n",
       "      <td>34.0</td>\n",
       "      <td>21.30</td>\n",
       "      <td>5.1</td>\n",
       "      <td>91.0</td>\n",
       "      <td>77.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>252.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.588235</td>\n",
       "      <td>0.327731</td>\n",
       "      <td>0.084034</td>\n",
       "      <td>low</td>\n",
       "      <td>177.0</td>\n",
       "      <td>68.1</td>\n",
       "      <td>0.01658</td>\n",
       "      <td>0.00406769</td>\n",
       "      <td>0</td>\n",
       "      <td>73.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>2133-027</td>\n",
       "      <td>58.0</td>\n",
       "      <td>26.30</td>\n",
       "      <td>5.5</td>\n",
       "      <td>107.0</td>\n",
       "      <td>102.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>187.0</td>\n",
       "      <td>190.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.226891</td>\n",
       "      <td>0.697479</td>\n",
       "      <td>0.075630</td>\n",
       "      <td>moderate</td>\n",
       "      <td>175.0</td>\n",
       "      <td>80.6</td>\n",
       "      <td>0.01042</td>\n",
       "      <td>0.0885068</td>\n",
       "      <td>0.00615385</td>\n",
       "      <td>55.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>2133-028</td>\n",
       "      <td>25.0</td>\n",
       "      <td>24.90</td>\n",
       "      <td>5.0</td>\n",
       "      <td>89.0</td>\n",
       "      <td>63.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>172.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.915966</td>\n",
       "      <td>0.084034</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>low</td>\n",
       "      <td>186.4</td>\n",
       "      <td>86.7</td>\n",
       "      <td>0.02642</td>\n",
       "      <td>0.00129947</td>\n",
       "      <td>0</td>\n",
       "      <td>NA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>2133-030</td>\n",
       "      <td>31.0</td>\n",
       "      <td>22.98</td>\n",
       "      <td>5.4</td>\n",
       "      <td>90.0</td>\n",
       "      <td>84.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>168.0</td>\n",
       "      <td>41.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.310924</td>\n",
       "      <td>0.508403</td>\n",
       "      <td>0.180672</td>\n",
       "      <td>moderate</td>\n",
       "      <td>170.1</td>\n",
       "      <td>66.5</td>\n",
       "      <td>0.01858</td>\n",
       "      <td>0.00757657</td>\n",
       "      <td>0</td>\n",
       "      <td>45.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>2133-032</td>\n",
       "      <td>35.0</td>\n",
       "      <td>26.30</td>\n",
       "      <td>5.1</td>\n",
       "      <td>93.0</td>\n",
       "      <td>134.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>187.0</td>\n",
       "      <td>44.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.071429</td>\n",
       "      <td>0.680672</td>\n",
       "      <td>0.247899</td>\n",
       "      <td>moderate</td>\n",
       "      <td>177.5</td>\n",
       "      <td>82.6</td>\n",
       "      <td>0.03308</td>\n",
       "      <td>0.0177891</td>\n",
       "      <td>0</td>\n",
       "      <td>59.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>2133-033</td>\n",
       "      <td>42.0</td>\n",
       "      <td>19.60</td>\n",
       "      <td>5.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.2</td>\n",
       "      <td>150.0</td>\n",
       "      <td>65.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.352941</td>\n",
       "      <td>0.449580</td>\n",
       "      <td>0.197479</td>\n",
       "      <td>moderate</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0103956</td>\n",
       "      <td>0</td>\n",
       "      <td>51.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>2133-035</td>\n",
       "      <td>61.0</td>\n",
       "      <td>30.10</td>\n",
       "      <td>5.4</td>\n",
       "      <td>105.0</td>\n",
       "      <td>108.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>237.0</td>\n",
       "      <td>94.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.042017</td>\n",
       "      <td>0.684874</td>\n",
       "      <td>0.273109</td>\n",
       "      <td>moderate</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0437872</td>\n",
       "      <td>0</td>\n",
       "      <td>NA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>2133-036</td>\n",
       "      <td>30.0</td>\n",
       "      <td>39.20</td>\n",
       "      <td>5.9</td>\n",
       "      <td>98.0</td>\n",
       "      <td>177.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>148.0</td>\n",
       "      <td>71.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.121849</td>\n",
       "      <td>0.428571</td>\n",
       "      <td>0.449580</td>\n",
       "      <td>severe</td>\n",
       "      <td>156.7</td>\n",
       "      <td>96.1</td>\n",
       "      <td>0.07858</td>\n",
       "      <td>0.0678299</td>\n",
       "      <td>0.00270337</td>\n",
       "      <td>301.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>2133-037</td>\n",
       "      <td>28.0</td>\n",
       "      <td>25.40</td>\n",
       "      <td>4.9</td>\n",
       "      <td>83.0</td>\n",
       "      <td>107.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.8</td>\n",
       "      <td>131.0</td>\n",
       "      <td>63.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.319328</td>\n",
       "      <td>0.470588</td>\n",
       "      <td>0.210084</td>\n",
       "      <td>moderate</td>\n",
       "      <td>169.0</td>\n",
       "      <td>72.5</td>\n",
       "      <td>0.06008</td>\n",
       "      <td>0.00843143</td>\n",
       "      <td>0</td>\n",
       "      <td>64.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>2133-039</td>\n",
       "      <td>46.0</td>\n",
       "      <td>36.10</td>\n",
       "      <td>5.1</td>\n",
       "      <td>100.0</td>\n",
       "      <td>240.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>5.5</td>\n",
       "      <td>157.0</td>\n",
       "      <td>108.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.079832</td>\n",
       "      <td>0.516807</td>\n",
       "      <td>0.403361</td>\n",
       "      <td>moderate</td>\n",
       "      <td>159.8</td>\n",
       "      <td>91.9</td>\n",
       "      <td>0.14900</td>\n",
       "      <td>0.086758</td>\n",
       "      <td>0.0127854</td>\n",
       "      <td>335.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>55</th>\n",
       "      <td>2133-040</td>\n",
       "      <td>39.0</td>\n",
       "      <td>21.00</td>\n",
       "      <td>5.2</td>\n",
       "      <td>86.0</td>\n",
       "      <td>105.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.2</td>\n",
       "      <td>183.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.344538</td>\n",
       "      <td>0.495798</td>\n",
       "      <td>0.159664</td>\n",
       "      <td>moderate</td>\n",
       "      <td>159.0</td>\n",
       "      <td>53.3</td>\n",
       "      <td>0.11333</td>\n",
       "      <td>0.018755</td>\n",
       "      <td>0</td>\n",
       "      <td>148.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>56</th>\n",
       "      <td>2133-041</td>\n",
       "      <td>51.0</td>\n",
       "      <td>27.30</td>\n",
       "      <td>4.9</td>\n",
       "      <td>93.0</td>\n",
       "      <td>74.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.4</td>\n",
       "      <td>125.0</td>\n",
       "      <td>53.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.109244</td>\n",
       "      <td>0.264706</td>\n",
       "      <td>0.626050</td>\n",
       "      <td>severe</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.099324</td>\n",
       "      <td>0.00156006</td>\n",
       "      <td>58.00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>57 rows × 49 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              0     1      2    3      4      5     6     7      8      9   \\\n",
       "0    1636-69-001  59.0  21.70  6.7  109.0  205.0   9.0   0.3  204.0  135.0   \n",
       "1    1636-69-026  67.0  28.90  6.2   97.0  152.0   7.0   1.2  208.0   76.0   \n",
       "2    1636-69-028  50.0  27.30  5.2   91.0  121.0   4.0   4.4  127.0   25.0   \n",
       "3    1636-69-032  59.0  25.00  5.7   82.0  142.0   5.0   0.2  224.0  138.0   \n",
       "4    1636-69-035  60.0  28.20  5.5   87.0  118.0   NaN   0.2  224.0   85.0   \n",
       "5    1636-69-048  60.0  33.50  5.2   87.0  130.0   6.0   4.8  197.0  213.0   \n",
       "6    1636-69-053  60.0  26.20  5.0   85.0  128.0   5.0   0.4  196.0   90.0   \n",
       "7    1636-69-060  55.0  28.30  5.2   91.0  120.0   NaN   1.4  181.0   79.0   \n",
       "8    1636-69-064  51.0  28.30  5.2   82.0  137.0   6.0   1.1  225.0  123.0   \n",
       "9    1636-69-069  56.0  29.30  5.5   90.0  111.0   7.0   1.4  189.0  131.0   \n",
       "10   1636-69-090  76.0  22.00  5.6  106.0  194.0  14.0   0.0  159.0   34.0   \n",
       "11   1636-69-091  62.0  23.80  6.5  127.0  151.0  18.0   1.1  293.0  343.0   \n",
       "12   1636-69-100  45.0  28.40  5.3   83.0   66.0   3.0   0.3  219.0   90.0   \n",
       "13   1636-69-104  56.0  24.60  4.6   82.0  104.0   5.0   2.5  259.0   67.0   \n",
       "14   1636-69-107  53.0  38.00  5.6   98.0  120.0  12.0   2.3  217.0  125.0   \n",
       "15   1636-69-111  48.0  29.20  5.2   95.0   97.0  11.0   0.5  194.0   64.0   \n",
       "16   1636-69-114  65.0  29.50  5.8   91.0  117.0   5.0   1.3  194.0   69.0   \n",
       "17   1636-69-123  54.0  23.80  5.3   86.0  116.0   8.0   0.7  157.0   27.0   \n",
       "18  1636-70-1002  51.0  28.80  5.2   88.0   98.0  20.0   4.4  182.0  147.0   \n",
       "19  1636-70-1003  51.0  26.20  5.2   76.0  107.0   7.0   0.9  206.0   32.0   \n",
       "20  1636-70-1005  66.0  27.20  5.5   94.0  152.0   5.0   0.4  243.0   95.0   \n",
       "21  1636-70-1008  62.0  28.00  5.4   82.0  124.0  18.0   1.8  177.0  164.0   \n",
       "22  1636-70-1010  68.0  33.00  5.5   94.0  190.0   9.0   3.5  143.0   93.0   \n",
       "23      2133-001  27.0  26.40  5.2   86.0  101.0   7.0   1.1  216.0  134.0   \n",
       "24      2133-002  29.0  21.40  5.3   76.0   90.0   9.0   1.7  193.0  116.0   \n",
       "25      2133-003  36.0  24.60  5.4   91.0  131.0  13.0   0.4  199.0  244.0   \n",
       "26      2133-004  54.0  28.09  6.0  145.0    NaN  11.0   1.6  148.0  130.0   \n",
       "27      2133-006  29.0  20.00  5.3   80.0   67.0   6.0   0.2   84.0   26.0   \n",
       "28      2133-007  48.0  26.47  5.5   95.0  127.0  10.0   8.6  179.0   68.0   \n",
       "29      2133-008  31.0  19.00  NaN    NaN    NaN   NaN   NaN    NaN    NaN   \n",
       "30      2133-009  25.0  19.50  5.2   84.0  113.0   5.0   3.3  151.0   94.0   \n",
       "31      2133-010  29.0  24.50  NaN   91.0  125.0   8.0   0.6  204.0   43.0   \n",
       "32      2133-011  32.0  24.00  5.4   88.0  114.0   5.0   0.0  191.0   34.0   \n",
       "33      2133-012  27.0  20.00  5.3   83.0  113.0   4.0   2.1  137.0   23.0   \n",
       "34      2133-013  65.0  23.60  5.4   89.0   85.0   7.0   0.3  199.0   88.0   \n",
       "35      2133-015  51.0  33.60  5.7   96.0  181.0  10.0   5.4  237.0  167.0   \n",
       "36      2133-017  47.0  40.40  5.3   98.0  164.0  11.0   3.8  189.0  104.0   \n",
       "37      2133-018  64.0  26.40  6.4  103.0  256.0   7.0   0.0  101.0   58.0   \n",
       "38      2133-019  64.0  29.93  6.2   95.0    NaN   6.0   1.4  251.0   64.0   \n",
       "39      2133-020  53.0  24.75  5.1   82.0   97.0   2.0   0.2  172.0   53.0   \n",
       "40      2133-021  56.0  27.87  6.0  119.0  187.0   5.0   0.5  177.0  126.0   \n",
       "41      2133-022  60.0  26.00  4.9   85.0  116.0   6.0   0.7  176.0   40.0   \n",
       "42      2133-023  52.0  25.90  5.2   91.0   65.0   6.0   3.1  192.0   98.0   \n",
       "43      2133-024  28.0  24.50  5.2  124.0   95.0  20.0   0.2  136.0  274.0   \n",
       "44      2133-025  64.0  23.60  5.1   91.0  139.0   2.0   1.4  166.0   36.0   \n",
       "45      2133-026  34.0  21.30  5.1   91.0   77.0   4.0   0.0  252.0   48.0   \n",
       "46      2133-027  58.0  26.30  5.5  107.0  102.0   8.0  11.0  187.0  190.0   \n",
       "47      2133-028  25.0  24.90  5.0   89.0   63.0   3.0   0.2  172.0   68.0   \n",
       "48      2133-030  31.0  22.98  5.4   90.0   84.0   3.0   0.2  168.0   41.0   \n",
       "49      2133-032  35.0  26.30  5.1   93.0  134.0   3.0   0.3  187.0   44.0   \n",
       "50      2133-033  42.0  19.60  5.0   90.0  100.0   NaN   0.2  150.0   65.0   \n",
       "51      2133-035  61.0  30.10  5.4  105.0  108.0   8.0   1.0  237.0   94.0   \n",
       "52      2133-036  30.0  39.20  5.9   98.0  177.0  12.0   9.0  148.0   71.0   \n",
       "53      2133-037  28.0  25.40  4.9   83.0  107.0   5.0   3.8  131.0   63.0   \n",
       "54      2133-039  46.0  36.10  5.1  100.0  240.0  14.0   5.5  157.0  108.0   \n",
       "55      2133-040  39.0  21.00  5.2   86.0  105.0   4.0   1.2  183.0   76.0   \n",
       "56      2133-041  51.0  27.30  4.9   93.0   74.0   3.0   0.4  125.0   53.0   \n",
       "\n",
       "    ...        39        40        41        42     43     44       45  \\\n",
       "0   ...  0.147059  0.369748  0.483193    severe  176.3   68.0  0.10150   \n",
       "1   ...  0.004202  0.289916  0.705882    severe  157.5   76.0      NaN   \n",
       "2   ...  0.008403  0.424370  0.567227    severe    NaN    NaN      NaN   \n",
       "3   ...  0.021008  0.491597  0.487395  moderate  169.4   68.2  0.01575   \n",
       "4   ...  0.029412  0.352941  0.617647    severe  176.5   82.5  0.05642   \n",
       "5   ...  0.121849  0.693277  0.184874  moderate  151.9   77.5 -0.02675   \n",
       "6   ...  0.054622  0.714286  0.231092  moderate  164.5   75.7  0.10525   \n",
       "7   ...  0.000000  0.121849  0.878151    severe  181.5   95.7  0.04108   \n",
       "8   ...  0.000000  0.239496  0.760504    severe  150.5   65.3  0.05908   \n",
       "9   ...  0.050420  0.462185  0.487395    severe  184.5  191.0  0.04925   \n",
       "10  ...  0.155462  0.340336  0.504202    severe  169.6   63.8  0.12083   \n",
       "11  ...  0.046219  0.680672  0.273109  moderate  176.0   73.6  0.12308   \n",
       "12  ...  0.159664  0.655462  0.184874  moderate  184.0   96.0 -0.00608   \n",
       "13  ...  0.415966  0.466387  0.117647  moderate  165.3   67.0  0.02833   \n",
       "14  ...  0.037815  0.352941  0.609244    severe    NaN    NaN      NaN   \n",
       "15  ...  0.079832  0.663866  0.256303  moderate  176.1   90.5  0.01233   \n",
       "16  ...  0.000000  0.474790  0.525210    severe  169.6   84.6  0.03908   \n",
       "17  ...  0.500000  0.407563  0.092437       low  171.0   76.2  0.04600   \n",
       "18  ...  0.092437  0.668067  0.239496  moderate  172.0   87.0      NaN   \n",
       "19  ...  0.357143  0.474790  0.168067  moderate  166.3   75.3  0.02342   \n",
       "20  ...  0.021008  0.273109  0.705882    severe  184.0   89.0      NaN   \n",
       "21  ...  0.138655  0.596639  0.264706  moderate  172.0   92.8      NaN   \n",
       "22  ...  0.033613  0.231092  0.735294    severe  164.2   88.1  0.06458   \n",
       "23  ...  0.722689  0.168067  0.109244       low  168.0   74.5  0.06025   \n",
       "24  ...  0.222689  0.617647  0.159664  moderate    NaN    NaN      NaN   \n",
       "25  ...  0.151261  0.542017  0.306723  moderate    NaN    NaN      NaN   \n",
       "26  ...  0.058824  0.117647  0.823529    severe    NaN    NaN      NaN   \n",
       "27  ...  0.571429  0.365546  0.063025       low  172.0   58.2  0.03917   \n",
       "28  ...  0.012605  0.403361  0.584034    severe  162.0   70.0  0.12933   \n",
       "29  ...  0.378151  0.563025  0.058824  moderate    NaN    NaN      NaN   \n",
       "30  ...  0.025210  0.424370  0.550420    severe  167.0   54.6  0.06867   \n",
       "31  ...  0.210084  0.689076  0.100840  moderate  170.0   70.9  0.08983   \n",
       "32  ...  0.214286  0.592437  0.193277  moderate  155.0   58.2  0.03733   \n",
       "33  ...  0.294118  0.415966  0.289916  moderate  182.0   66.5  0.01683   \n",
       "34  ...  0.197479  0.596639  0.205882  moderate  162.8   62.5  0.05308   \n",
       "35  ...  0.037815  0.462185  0.500000    severe  160.0   86.1  0.11250   \n",
       "36  ...  0.037815  0.432773  0.529412    severe  162.4  106.5  0.05458   \n",
       "37  ...  0.000000  0.315126  0.684874    severe  158.6   66.4  0.08600   \n",
       "38  ...  0.151261  0.382353  0.466387    severe    NaN    NaN      NaN   \n",
       "39  ...  0.289916  0.386555  0.323529  moderate  161.8   64.8      NaN   \n",
       "40  ...  0.025210  0.109244  0.865546    severe  180.5   88.9  0.05383   \n",
       "41  ...  0.058824  0.529412  0.411765  moderate  156.8   63.7  0.05625   \n",
       "42  ...  0.487395  0.441176  0.071429       low  181.5   85.8  0.01342   \n",
       "43  ...  0.214286  0.462185  0.323529  moderate  180.0   79.4  0.00467   \n",
       "44  ...  0.357143  0.584034  0.058824  moderate  177.0   73.9  0.08883   \n",
       "45  ...  0.588235  0.327731  0.084034       low  177.0   68.1  0.01658   \n",
       "46  ...  0.226891  0.697479  0.075630  moderate  175.0   80.6  0.01042   \n",
       "47  ...  0.915966  0.084034  0.000000       low  186.4   86.7  0.02642   \n",
       "48  ...  0.310924  0.508403  0.180672  moderate  170.1   66.5  0.01858   \n",
       "49  ...  0.071429  0.680672  0.247899  moderate  177.5   82.6  0.03308   \n",
       "50  ...  0.352941  0.449580  0.197479  moderate    NaN    NaN      NaN   \n",
       "51  ...  0.042017  0.684874  0.273109  moderate    NaN    NaN      NaN   \n",
       "52  ...  0.121849  0.428571  0.449580    severe  156.7   96.1  0.07858   \n",
       "53  ...  0.319328  0.470588  0.210084  moderate  169.0   72.5  0.06008   \n",
       "54  ...  0.079832  0.516807  0.403361  moderate  159.8   91.9  0.14900   \n",
       "55  ...  0.344538  0.495798  0.159664  moderate  159.0   53.3  0.11333   \n",
       "56  ...  0.109244  0.264706  0.626050    severe    NaN    NaN      NaN   \n",
       "\n",
       "            46           47      48  \n",
       "0     0.190404    0.0262106      91  \n",
       "1    0.0831202            0     133  \n",
       "2    0.0714286    0.0015444      75  \n",
       "3    0.0147643            0      87  \n",
       "4      0.15465   0.00592128     160  \n",
       "5   0.00188071            0     119  \n",
       "6    0.0505717   0.00322486     188  \n",
       "7    0.0851419            0     179  \n",
       "8      0.16646     0.022697     190  \n",
       "9    0.0792059            0     250  \n",
       "10   0.0887701            0      NA  \n",
       "11   0.0360577            0     155  \n",
       "12  0.00797832            0      NA  \n",
       "13   0.0061745            0     129  \n",
       "14   0.0517828            0     223  \n",
       "15           0            0      NA  \n",
       "16   0.0170576            0      NA  \n",
       "17  0.00271967            0      NA  \n",
       "18   0.0287611            0     175  \n",
       "19  0.00338409            0      70  \n",
       "20   0.0507868   0.00249593      99  \n",
       "21    0.056118   0.00550176     229  \n",
       "22    0.068886  0.000164799      65  \n",
       "23   0.0244977            0  110.00  \n",
       "24   0.0163119            0  143.00  \n",
       "25   0.0423782            0  110.00  \n",
       "26    0.155905     0.017363  173.00  \n",
       "27  0.00263574            0   61.00  \n",
       "28   0.0609236  0.000388048  218.00  \n",
       "29           0            0      NA  \n",
       "30   0.0426516            0      NA  \n",
       "31  0.00968013            0      NA  \n",
       "32   0.0370724   0.00248423   69.00  \n",
       "33  0.00857449   0.00214362      NA  \n",
       "34   0.0173885   0.00131234      NA  \n",
       "35   0.0610169   0.00169492  158.00  \n",
       "36   0.0691709            0      NA  \n",
       "37    0.108421    0.0612928  185.00  \n",
       "38    0.115385   0.00238727      NA  \n",
       "39   0.0504971  0.000261643   57.00  \n",
       "40    0.290249    0.0274251  130.00  \n",
       "41   0.0551983  0.000535906  111.00  \n",
       "42  0.00838399            0   70.00  \n",
       "43   0.0200524  0.000462749   49.00  \n",
       "44  0.00873274            0      NA  \n",
       "45  0.00406769            0   73.00  \n",
       "46   0.0885068   0.00615385   55.00  \n",
       "47  0.00129947            0      NA  \n",
       "48  0.00757657            0   45.00  \n",
       "49   0.0177891            0   59.00  \n",
       "50   0.0103956            0   51.00  \n",
       "51   0.0437872            0      NA  \n",
       "52   0.0678299   0.00270337  301.00  \n",
       "53  0.00843143            0   64.00  \n",
       "54    0.086758    0.0127854  335.00  \n",
       "55    0.018755            0  148.00  \n",
       "56    0.099324   0.00156006   58.00  \n",
       "\n",
       "[57 rows x 49 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# check for nan values\n",
    "pd.DataFrame(raw_covs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "# initialize empty dictionary for storing each covariate's data by column name using db data since has more\n",
    "\n",
    "# add column names first\n",
    "covs_dict = dict()\n",
    "for name in names:\n",
    "    covs_dict[name] = []\n",
    "\n",
    "# load data into dictionary\n",
    "for row in raw_covs:\n",
    "    # append row information for each covariate by indexing row\n",
    "    i = 0\n",
    "    for name in names:\n",
    "        covs_dict[name].append(row[i])\n",
    "        i += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "# of rows: 57\n",
      "userID 57\n",
      "Age 57\n",
      "BMI 57\n",
      "A1C 57\n",
      "FBG 57\n",
      "ogtt.2hr 57\n",
      "insulin 57\n",
      "hs.CRP 57\n",
      "Tchol 57\n",
      "Trg 57\n",
      "HDL 57\n",
      "LDL 57\n",
      "mean_glucose 57\n",
      "sd_glucose 57\n",
      "range_glucose 57\n",
      "min_glucose 57\n",
      "max_glucose 57\n",
      "quartile.25_glucose 57\n",
      "median_glucose 57\n",
      "quartile.75_glucose 57\n",
      "mean_slope 57\n",
      "max_slope 57\n",
      "number_Random140 57\n",
      "number_Random200 57\n",
      "percent_below.80 57\n",
      "percent_above.130 57\n",
      "se_glucose_mean 57\n",
      "numGE 57\n",
      "mage 57\n",
      "j_index 57\n",
      "IQR 57\n",
      "modd 57\n",
      "distance_traveled 57\n",
      "coef_variation 57\n",
      "number_Random140_normByDays 57\n",
      "number_Random200_normByDays 57\n",
      "numGE_normByDays 57\n",
      "distance_traveled_normByDays 57\n",
      "diagnosis 57\n",
      "freq_low 57\n",
      "freq_moderate 57\n",
      "freq_severe 57\n",
      "glucotype 57\n",
      "Height 57\n",
      "Weight 57\n",
      "Insulin_rate_dd 57\n",
      "perc_cgm_prediabetic_range 57\n",
      "perc_cgm_diabetic_range 57\n",
      "SSPG 57\n"
     ]
    }
   ],
   "source": [
    "# check to make sure all data has same lengths\n",
    "print(\"# of rows:\", len(raw_covs))\n",
    "for name in names:\n",
    "    print(name, len(covs_dict[name]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "# construct covariate columns based on number of userID measurements in data\n",
    "# get number of measurements for each id\n",
    "num_measurements = []\n",
    "for id in covs_dict['userID']:\n",
    "    num_measurements.append(len(data.loc[data['id'] == id]))\n",
    "\n",
    "# create columns\n",
    "cols = dict()\n",
    "for name in names:\n",
    "    # exclude userID column\n",
    "    if name != 'userID':\n",
    "        cols[name] = []\n",
    "\n",
    "# fill columns\n",
    "i = 0\n",
    "for num in num_measurements:\n",
    "    # get current subject\n",
    "    curr_id = covs_dict['userID'][i]\n",
    "    \n",
    "    for col in cols:\n",
    "        # check for NAs in covariate data for current subject\n",
    "        if covs_dict[col][covs_dict[\"userID\"].index(curr_id)] in [None, \"NA\"]: #if has NAs\n",
    "            cols[col].extend([-1]*num)\n",
    "        else: #if has no NAs\n",
    "            # convert glucotype/diagnosis strings to integer classifications\n",
    "            if col == 'glucotype':\n",
    "                if covs_dict[col][covs_dict[\"userID\"].index(curr_id)] == 'low':\n",
    "                    cols[col].extend([0]*num) # 0 = 'low' group for glucotype\n",
    "                elif covs_dict[col][covs_dict[\"userID\"].index(curr_id)] == 'moderate':\n",
    "                    cols[col].extend([1]*num) # 1 = 'moderate' group for glucotype\n",
    "                elif covs_dict[col][covs_dict[\"userID\"].index(curr_id)] == 'severe':\n",
    "                    cols[col].extend([2]*num) # 2 = 'severe' group for glucotype\n",
    "            elif col == 'diagnosis':\n",
    "                if covs_dict[col][covs_dict[\"userID\"].index(curr_id)] == 'non-diabetic':\n",
    "                    cols[col].extend([0]*num) # 0 = 'non-diabetic' group for glucotype\n",
    "                elif covs_dict[col][covs_dict[\"userID\"].index(curr_id)] == 'pre-diabetic':\n",
    "                    cols[col].extend([1]*num) # 1 = 'pre-diabetic' group for glucotype\n",
    "                elif covs_dict[col][covs_dict[\"userID\"].index(curr_id)] == 'diabetic':\n",
    "                    cols[col].extend([2]*num) # 2 = 'diabetic' group for glucotype\n",
    "            else:\n",
    "                cols[col].extend([covs_dict[col][covs_dict[\"userID\"].index(curr_id)]]*num)\n",
    "    i += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame.from_dict(cols)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Data Table: 105426\n",
      "Covariate Table: 48\n"
     ]
    }
   ],
   "source": [
    "# check length of covariate table and data table\n",
    "print(\"Data Table:\", len(data))\n",
    "print(\"Covariate Table:\", len(cols))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "# add covariates by column\n",
    "data_covariates = data.copy()\n",
    "\n",
    "for column in df.columns:\n",
    "    data_covariates[column] = df[column]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{0, 1, 2}"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set(data_covariates['glucotype'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{0, 1, 2}"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set(data_covariates['diagnosis'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>time</th>\n",
       "      <th>gl</th>\n",
       "      <th>id</th>\n",
       "      <th>Age</th>\n",
       "      <th>BMI</th>\n",
       "      <th>A1C</th>\n",
       "      <th>FBG</th>\n",
       "      <th>ogtt.2hr</th>\n",
       "      <th>insulin</th>\n",
       "      <th>hs.CRP</th>\n",
       "      <th>...</th>\n",
       "      <th>freq_low</th>\n",
       "      <th>freq_moderate</th>\n",
       "      <th>freq_severe</th>\n",
       "      <th>glucotype</th>\n",
       "      <th>Height</th>\n",
       "      <th>Weight</th>\n",
       "      <th>Insulin_rate_dd</th>\n",
       "      <th>perc_cgm_prediabetic_range</th>\n",
       "      <th>perc_cgm_diabetic_range</th>\n",
       "      <th>SSPG</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2014-02-03 03:42:12</td>\n",
       "      <td>93.0</td>\n",
       "      <td>1636-69-001</td>\n",
       "      <td>59.0</td>\n",
       "      <td>21.7</td>\n",
       "      <td>6.7</td>\n",
       "      <td>109.0</td>\n",
       "      <td>205.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>...</td>\n",
       "      <td>0.147059</td>\n",
       "      <td>0.369748</td>\n",
       "      <td>0.483193</td>\n",
       "      <td>2</td>\n",
       "      <td>176.3</td>\n",
       "      <td>68.0</td>\n",
       "      <td>0.1015</td>\n",
       "      <td>0.190404</td>\n",
       "      <td>0.026211</td>\n",
       "      <td>91.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2014-02-03 03:47:12</td>\n",
       "      <td>93.0</td>\n",
       "      <td>1636-69-001</td>\n",
       "      <td>59.0</td>\n",
       "      <td>21.7</td>\n",
       "      <td>6.7</td>\n",
       "      <td>109.0</td>\n",
       "      <td>205.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>...</td>\n",
       "      <td>0.147059</td>\n",
       "      <td>0.369748</td>\n",
       "      <td>0.483193</td>\n",
       "      <td>2</td>\n",
       "      <td>176.3</td>\n",
       "      <td>68.0</td>\n",
       "      <td>0.1015</td>\n",
       "      <td>0.190404</td>\n",
       "      <td>0.026211</td>\n",
       "      <td>91.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2014-02-03 03:52:12</td>\n",
       "      <td>93.0</td>\n",
       "      <td>1636-69-001</td>\n",
       "      <td>59.0</td>\n",
       "      <td>21.7</td>\n",
       "      <td>6.7</td>\n",
       "      <td>109.0</td>\n",
       "      <td>205.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>...</td>\n",
       "      <td>0.147059</td>\n",
       "      <td>0.369748</td>\n",
       "      <td>0.483193</td>\n",
       "      <td>2</td>\n",
       "      <td>176.3</td>\n",
       "      <td>68.0</td>\n",
       "      <td>0.1015</td>\n",
       "      <td>0.190404</td>\n",
       "      <td>0.026211</td>\n",
       "      <td>91.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2014-02-03 03:57:12</td>\n",
       "      <td>95.0</td>\n",
       "      <td>1636-69-001</td>\n",
       "      <td>59.0</td>\n",
       "      <td>21.7</td>\n",
       "      <td>6.7</td>\n",
       "      <td>109.0</td>\n",
       "      <td>205.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>...</td>\n",
       "      <td>0.147059</td>\n",
       "      <td>0.369748</td>\n",
       "      <td>0.483193</td>\n",
       "      <td>2</td>\n",
       "      <td>176.3</td>\n",
       "      <td>68.0</td>\n",
       "      <td>0.1015</td>\n",
       "      <td>0.190404</td>\n",
       "      <td>0.026211</td>\n",
       "      <td>91.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2014-02-03 04:02:12</td>\n",
       "      <td>96.0</td>\n",
       "      <td>1636-69-001</td>\n",
       "      <td>59.0</td>\n",
       "      <td>21.7</td>\n",
       "      <td>6.7</td>\n",
       "      <td>109.0</td>\n",
       "      <td>205.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>...</td>\n",
       "      <td>0.147059</td>\n",
       "      <td>0.369748</td>\n",
       "      <td>0.483193</td>\n",
       "      <td>2</td>\n",
       "      <td>176.3</td>\n",
       "      <td>68.0</td>\n",
       "      <td>0.1015</td>\n",
       "      <td>0.190404</td>\n",
       "      <td>0.026211</td>\n",
       "      <td>91.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>105421</th>\n",
       "      <td>2017-07-11 20:21:32</td>\n",
       "      <td>70.0</td>\n",
       "      <td>2133-041</td>\n",
       "      <td>51.0</td>\n",
       "      <td>27.3</td>\n",
       "      <td>4.9</td>\n",
       "      <td>93.0</td>\n",
       "      <td>74.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.4</td>\n",
       "      <td>...</td>\n",
       "      <td>0.109244</td>\n",
       "      <td>0.264706</td>\n",
       "      <td>0.626050</td>\n",
       "      <td>2</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0000</td>\n",
       "      <td>0.099324</td>\n",
       "      <td>0.001560</td>\n",
       "      <td>58.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>105422</th>\n",
       "      <td>2017-07-11 20:26:32</td>\n",
       "      <td>64.0</td>\n",
       "      <td>2133-041</td>\n",
       "      <td>51.0</td>\n",
       "      <td>27.3</td>\n",
       "      <td>4.9</td>\n",
       "      <td>93.0</td>\n",
       "      <td>74.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.4</td>\n",
       "      <td>...</td>\n",
       "      <td>0.109244</td>\n",
       "      <td>0.264706</td>\n",
       "      <td>0.626050</td>\n",
       "      <td>2</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0000</td>\n",
       "      <td>0.099324</td>\n",
       "      <td>0.001560</td>\n",
       "      <td>58.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>105423</th>\n",
       "      <td>2017-07-11 20:31:32</td>\n",
       "      <td>61.0</td>\n",
       "      <td>2133-041</td>\n",
       "      <td>51.0</td>\n",
       "      <td>27.3</td>\n",
       "      <td>4.9</td>\n",
       "      <td>93.0</td>\n",
       "      <td>74.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.4</td>\n",
       "      <td>...</td>\n",
       "      <td>0.109244</td>\n",
       "      <td>0.264706</td>\n",
       "      <td>0.626050</td>\n",
       "      <td>2</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0000</td>\n",
       "      <td>0.099324</td>\n",
       "      <td>0.001560</td>\n",
       "      <td>58.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>105424</th>\n",
       "      <td>2017-07-11 20:36:32</td>\n",
       "      <td>62.0</td>\n",
       "      <td>2133-041</td>\n",
       "      <td>51.0</td>\n",
       "      <td>27.3</td>\n",
       "      <td>4.9</td>\n",
       "      <td>93.0</td>\n",
       "      <td>74.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.4</td>\n",
       "      <td>...</td>\n",
       "      <td>0.109244</td>\n",
       "      <td>0.264706</td>\n",
       "      <td>0.626050</td>\n",
       "      <td>2</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0000</td>\n",
       "      <td>0.099324</td>\n",
       "      <td>0.001560</td>\n",
       "      <td>58.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>105425</th>\n",
       "      <td>2017-07-11 20:41:32</td>\n",
       "      <td>66.0</td>\n",
       "      <td>2133-041</td>\n",
       "      <td>51.0</td>\n",
       "      <td>27.3</td>\n",
       "      <td>4.9</td>\n",
       "      <td>93.0</td>\n",
       "      <td>74.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.4</td>\n",
       "      <td>...</td>\n",
       "      <td>0.109244</td>\n",
       "      <td>0.264706</td>\n",
       "      <td>0.626050</td>\n",
       "      <td>2</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0000</td>\n",
       "      <td>0.099324</td>\n",
       "      <td>0.001560</td>\n",
       "      <td>58.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>105426 rows × 51 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                       time    gl           id   Age   BMI  A1C    FBG  \\\n",
       "0       2014-02-03 03:42:12  93.0  1636-69-001  59.0  21.7  6.7  109.0   \n",
       "1       2014-02-03 03:47:12  93.0  1636-69-001  59.0  21.7  6.7  109.0   \n",
       "2       2014-02-03 03:52:12  93.0  1636-69-001  59.0  21.7  6.7  109.0   \n",
       "3       2014-02-03 03:57:12  95.0  1636-69-001  59.0  21.7  6.7  109.0   \n",
       "4       2014-02-03 04:02:12  96.0  1636-69-001  59.0  21.7  6.7  109.0   \n",
       "...                     ...   ...          ...   ...   ...  ...    ...   \n",
       "105421  2017-07-11 20:21:32  70.0     2133-041  51.0  27.3  4.9   93.0   \n",
       "105422  2017-07-11 20:26:32  64.0     2133-041  51.0  27.3  4.9   93.0   \n",
       "105423  2017-07-11 20:31:32  61.0     2133-041  51.0  27.3  4.9   93.0   \n",
       "105424  2017-07-11 20:36:32  62.0     2133-041  51.0  27.3  4.9   93.0   \n",
       "105425  2017-07-11 20:41:32  66.0     2133-041  51.0  27.3  4.9   93.0   \n",
       "\n",
       "        ogtt.2hr  insulin  hs.CRP  ...  freq_low  freq_moderate  freq_severe  \\\n",
       "0          205.0      9.0     0.3  ...  0.147059       0.369748     0.483193   \n",
       "1          205.0      9.0     0.3  ...  0.147059       0.369748     0.483193   \n",
       "2          205.0      9.0     0.3  ...  0.147059       0.369748     0.483193   \n",
       "3          205.0      9.0     0.3  ...  0.147059       0.369748     0.483193   \n",
       "4          205.0      9.0     0.3  ...  0.147059       0.369748     0.483193   \n",
       "...          ...      ...     ...  ...       ...            ...          ...   \n",
       "105421      74.0      3.0     0.4  ...  0.109244       0.264706     0.626050   \n",
       "105422      74.0      3.0     0.4  ...  0.109244       0.264706     0.626050   \n",
       "105423      74.0      3.0     0.4  ...  0.109244       0.264706     0.626050   \n",
       "105424      74.0      3.0     0.4  ...  0.109244       0.264706     0.626050   \n",
       "105425      74.0      3.0     0.4  ...  0.109244       0.264706     0.626050   \n",
       "\n",
       "        glucotype  Height  Weight  Insulin_rate_dd  \\\n",
       "0               2   176.3    68.0           0.1015   \n",
       "1               2   176.3    68.0           0.1015   \n",
       "2               2   176.3    68.0           0.1015   \n",
       "3               2   176.3    68.0           0.1015   \n",
       "4               2   176.3    68.0           0.1015   \n",
       "...           ...     ...     ...              ...   \n",
       "105421          2    -1.0    -1.0          -1.0000   \n",
       "105422          2    -1.0    -1.0          -1.0000   \n",
       "105423          2    -1.0    -1.0          -1.0000   \n",
       "105424          2    -1.0    -1.0          -1.0000   \n",
       "105425          2    -1.0    -1.0          -1.0000   \n",
       "\n",
       "        perc_cgm_prediabetic_range  perc_cgm_diabetic_range  SSPG  \n",
       "0                         0.190404                 0.026211  91.0  \n",
       "1                         0.190404                 0.026211  91.0  \n",
       "2                         0.190404                 0.026211  91.0  \n",
       "3                         0.190404                 0.026211  91.0  \n",
       "4                         0.190404                 0.026211  91.0  \n",
       "...                            ...                      ...   ...  \n",
       "105421                    0.099324                 0.001560  58.0  \n",
       "105422                    0.099324                 0.001560  58.0  \n",
       "105423                    0.099324                 0.001560  58.0  \n",
       "105424                    0.099324                 0.001560  58.0  \n",
       "105425                    0.099324                 0.001560  58.0  \n",
       "\n",
       "[105426 rows x 51 columns]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "data_covariates.to_csv(\"./raw_data/hall.csv\", index=False)\n",
    "df = pd.read_csv(\"./raw_data/hall.csv\")\n",
    "display(df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Check statistics of the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# load yaml config file\n",
    "with open('./config/hall.yaml', 'r') as f:\n",
    "    config = yaml.safe_load(f)\n",
    "\n",
    "# set interpolation params for no interpolation\n",
    "new_config = config.copy()\n",
    "new_config['interpolation_params']['gap_threshold'] = 5\n",
    "new_config['interpolation_params']['min_drop_length'] = 0\n",
    "# set split params for no splitting\n",
    "new_config['split_params']['test_percent_subjects'] = 0\n",
    "new_config['split_params']['length_segment'] = 0\n",
    "# set scaling params for no scaling\n",
    "new_config['scaling_params']['scaler'] = 'None'\n",
    "\n",
    "formatter = DataFormatter(new_config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# print min, max, median, mean, std of segment lengths\n",
    "segment_lens = []\n",
    "for group, data in formatter.train_data.groupby('id_segment'):\n",
    "    segment_lens.append(len(data))\n",
    "print('Train segment lengths:')\n",
    "print('\\tMin: ', min(segment_lens))\n",
    "print('\\tMax: ', max(segment_lens))\n",
    "print('\\tMedian: ', np.median(segment_lens))\n",
    "print('\\tMean: ', np.mean(segment_lens))\n",
    "print('\\tStd: ', np.std(segment_lens))\n",
    "\n",
    "# plot each segment\n",
    "num_segments = formatter.train_data['id_segment'].nunique()\n",
    "# fig, axs = plt.subplots(1, num_segments, figsize=(30, 5))\n",
    "# for i, (group, data) in enumerate(formatter.train_data.groupby('id_segment')):\n",
    "#     data.plot(x='time', y='gl', ax=axs[i], title='Segment {}'.format(group))\n",
    "counter = 0\n",
    "for i, (group, data) in enumerate(formatter.train_data.groupby('id_segment')):\n",
    "    if counter == 5:\n",
    "        counter = 0\n",
    "        continue\n",
    "    if counter == 0:\n",
    "        fig, axs = plt.subplots(1, 5, figsize=(30, 5))\n",
    "    if counter < 5:\n",
    "        data.plot(x='time', y='gl', ax=axs[counter], title='Segment {}'.format(group))\n",
    "        counter += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# plot acf of random samples from segments\n",
    "# fig, ax = plt.subplots(2, num_segments, figsize=(30, 5))\n",
    "# lags = 300\n",
    "# for i, (group, data) in enumerate(formatter.train_data.groupby('id_segment')):\n",
    "#     data = data['gl']\n",
    "#     if len(data) < lags:\n",
    "#         print('Segment {} is too short'.format(group))\n",
    "#         continue\n",
    "#     # select 10 random samples from index of data\n",
    "#     sample = np.random.choice(range(len(data))[:-lags], 10, replace=False)\n",
    "#     # plot acf / pacf of each sample\n",
    "#     for j in sample:\n",
    "#         acf, acf_ci = sm.tsa.stattools.acf(data[j:j+lags], nlags=lags, alpha=0.05)\n",
    "#         pacf, pacf_ci = sm.tsa.stattools.pacf(data[j:j+lags], method='ols-adjusted', alpha=0.05)\n",
    "#         ax[0, i].plot(acf)\n",
    "#         ax[1, i].plot(pacf)\n",
    "\n",
    "counter = 0\n",
    "lags = 300\n",
    "\n",
    "for i, (group, data) in enumerate(formatter.train_data.groupby('id_segment')):\n",
    "    if counter == 10:\n",
    "        counter = 0\n",
    "        continue\n",
    "    if len(data) < lags + 10:\n",
    "        print('Segment {} is too short'.format(group))\n",
    "        continue\n",
    "    if counter == 0:\n",
    "        fig, ax = plt.subplots(2, 10, figsize=(30, 5))\n",
    "    if counter < 10:\n",
    "        data = data['gl']\n",
    "        # select 10 random samples from index of data\n",
    "        sample = np.random.choice(range(len(data))[:-lags], 10, replace=False)\n",
    "        # plot acf / pacf of each sample\n",
    "        for j in sample:\n",
    "            acf, acf_ci = sm.tsa.stattools.acf(data[j:j+lags], nlags=lags, alpha=0.05)\n",
    "            pacf, pacf_ci = sm.tsa.stattools.pacf(data[j:j+lags], method='ols-adjusted', alpha=0.05)\n",
    "            ax[0, counter].plot(acf)\n",
    "            ax[1, counter].plot(pacf)        \n",
    "        counter += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# set interpolation params for interpolation\n",
    "new_config['interpolation_params']['gap_threshold'] = 30 # minutes - use as in config file \n",
    "new_config['interpolation_params']['min_drop_length'] = 192\n",
    "\n",
    "formatter = DataFormatter(new_config, study_file = \"./output/arima_hall.txt\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# print min, max, median, mean, std of segment lengths\n",
    "segment_lens = []\n",
    "for group, data in formatter.train_data.groupby('id_segment'):\n",
    "    segment_lens.append(len(data))\n",
    "print('Train segment lengths:')\n",
    "print('\\tMin: ', min(segment_lens))\n",
    "print('\\tMax: ', max(segment_lens))\n",
    "print('\\t1st Quartile: ', np.quantile(segment_lens, 0.25))\n",
    "print('\\tMedian: ', np.median(segment_lens))\n",
    "print('\\t3rd Quartile: ', np.quantile(segment_lens, 0.75))\n",
    "print('\\tMean: ', np.mean(segment_lens))\n",
    "print('\\tStd: ', np.std(segment_lens))\n",
    "\n",
    "num_segments = formatter.train_data['id_segment'].nunique()\n",
    "# fig, axs = plt.subplots(1, num_segments, figsize=(30, 5))\n",
    "# for i, (group, data) in enumerate(formatter.train_data.groupby('id_segment')):\n",
    "#     data.plot(x='time', y='gl', ax=axs[i], title='Segment {}'.format(group))\n",
    "counter = 0\n",
    "for i, (group, data) in enumerate(formatter.train_data.groupby('id_segment')):\n",
    "    if counter == 5:\n",
    "        counter = 0\n",
    "        continue\n",
    "    if counter == 0:\n",
    "        fig, axs = plt.subplots(1, 5, figsize=(30, 5))\n",
    "    if counter < 5:\n",
    "        data.plot(x='time', y='gl', ax=axs[counter], title='Segment {}'.format(group))\n",
    "        counter += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# plot acf of random samples from first 9 segments segments\n",
    "counter = 0\n",
    "lags = 300\n",
    "\n",
    "for i, (group, data) in enumerate(formatter.train_data.groupby('id_segment')):\n",
    "    if counter == 10:\n",
    "        counter = 0\n",
    "        continue\n",
    "    if len(data) < lags + 10:\n",
    "        print('Segment {} is too short'.format(group))\n",
    "        continue\n",
    "    if counter == 0:\n",
    "        fig, ax = plt.subplots(2, 10, figsize=(30, 5))\n",
    "    if counter < 10:\n",
    "        data = data['gl']\n",
    "        # select 10 random samples from index of data\n",
    "        sample = np.random.choice(range(len(data))[:-lags], 10, replace=False)\n",
    "        # plot acf / pacf of each sample\n",
    "        for j in sample:\n",
    "            acf, acf_ci = sm.tsa.stattools.acf(data[j:j+lags], nlags=lags, alpha=0.05)\n",
    "            pacf, pacf_ci = sm.tsa.stattools.pacf(data[j:j+lags], method='ols-adjusted', alpha=0.05)\n",
    "            ax[0, counter].plot(acf)\n",
    "            ax[1, counter].plot(pacf)        \n",
    "        counter += 1"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  },
  "vscode": {
   "interpreter": {
    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
