{
    "cells": [
        {
            "attachments": {},
            "cell_type": "markdown",
            "metadata": {},
            "source": [
                "# MIMIC 4 data - dataset construction admissions"
            ]
        },
        {
            "attachments": {},
            "cell_type": "markdown",
            "metadata": {},
            "source": [
                "Code taken from GRU-ODE-Bayes preprocessing; simplified and adapted for MIMIC 4 1.0"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 1,
            "metadata": {},
            "outputs": [],
            "source": [
                "import os\n",
                "import pathlib\n",
                "\n",
                "p_project = str(pathlib.Path(os.getcwd()).parents[1])"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 2,
            "metadata": {},
            "outputs": [],
            "source": [
                "import os\n",
                "import pandas as pd\n",
                "from datetime import datetime\n",
                "import numpy as np"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 3,
            "metadata": {},
            "outputs": [],
            "source": [
                "path_data = p_project + '/data/original/mimic4'\n",
                "path_temp = p_project + '/data/mimic4'"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 4,
            "metadata": {},
            "outputs": [],
            "source": [
                "fn = path_data + '/core/admissions.csv.gz'\n",
                "adm = pd.read_csv(fn, compression='gzip')"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 5,
            "metadata": {},
            "outputs": [],
            "source": [
                "#keep only patients present in patients data\n",
                "patients_df=pd.read_csv(path_data + '/core/patients.csv.gz')\n",
                "adm_dob=pd.merge(patients_df[['subject_id','anchor_age']], adm, on='subject_id')"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 6,
            "metadata": {},
            "outputs": [],
            "source": [
                "#keep only patients present in ICU\n",
                "icustay_df = pd.read_csv(path_data + '/icu/icustays.csv.gz', compression='gzip')\n",
                "adm_icu = pd.merge(icustay_df[['hadm_id', 'los']], adm_dob, on='hadm_id')\n",
                "adm_icu.rename(columns = {'los': 'icu_los'}, inplace = True)"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 7,
            "metadata": {},
            "outputs": [
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "Number of patients remaining in the dataframe: \n",
                        "60043\n"
                    ]
                }
            ],
            "source": [
                "adm_icu_selected=adm_icu.loc[(adm_icu['icu_los']<30) & (adm_icu['icu_los']>1)]\n",
                "print('Number of patients remaining in the dataframe: ')\n",
                "print(len(adm_icu_selected.index))"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 8,
            "metadata": {},
            "outputs": [
                {
                    "data": {
                        "text/html": [
                            "<div>\n",
                            "<style scoped>\n",
                            "    .dataframe tbody tr th:only-of-type {\n",
                            "        vertical-align: middle;\n",
                            "    }\n",
                            "\n",
                            "    .dataframe tbody tr th {\n",
                            "        vertical-align: top;\n",
                            "    }\n",
                            "\n",
                            "    .dataframe thead th {\n",
                            "        text-align: right;\n",
                            "    }\n",
                            "</style>\n",
                            "<table border=\"1\" class=\"dataframe\">\n",
                            "  <thead>\n",
                            "    <tr style=\"text-align: right;\">\n",
                            "      <th></th>\n",
                            "      <th>hadm_id</th>\n",
                            "      <th>icu_los</th>\n",
                            "      <th>subject_id</th>\n",
                            "      <th>anchor_age</th>\n",
                            "      <th>admittime</th>\n",
                            "      <th>dischtime</th>\n",
                            "      <th>deathtime</th>\n",
                            "      <th>admission_type</th>\n",
                            "      <th>admission_location</th>\n",
                            "      <th>discharge_location</th>\n",
                            "      <th>insurance</th>\n",
                            "      <th>language</th>\n",
                            "      <th>marital_status</th>\n",
                            "      <th>ethnicity</th>\n",
                            "      <th>edregtime</th>\n",
                            "      <th>edouttime</th>\n",
                            "      <th>hospital_expire_flag</th>\n",
                            "    </tr>\n",
                            "  </thead>\n",
                            "  <tbody>\n",
                            "    <tr>\n",
                            "      <th>0</th>\n",
                            "      <td>24528534</td>\n",
                            "      <td>1.587454</td>\n",
                            "      <td>17867402</td>\n",
                            "      <td>25</td>\n",
                            "      <td>2154-03-03 03:09:00</td>\n",
                            "      <td>2154-03-04 16:30:00</td>\n",
                            "      <td>NaN</td>\n",
                            "      <td>EW EMER.</td>\n",
                            "      <td>EMERGENCY ROOM</td>\n",
                            "      <td>HOME</td>\n",
                            "      <td>Medicaid</td>\n",
                            "      <td>ENGLISH</td>\n",
                            "      <td>NaN</td>\n",
                            "      <td>UNABLE TO OBTAIN</td>\n",
                            "      <td>2154-03-03 02:49:00</td>\n",
                            "      <td>2154-03-03 04:11:00</td>\n",
                            "      <td>0</td>\n",
                            "    </tr>\n",
                            "    <tr>\n",
                            "      <th>1</th>\n",
                            "      <td>28960964</td>\n",
                            "      <td>3.025625</td>\n",
                            "      <td>14435996</td>\n",
                            "      <td>42</td>\n",
                            "      <td>2150-06-19 13:07:00</td>\n",
                            "      <td>2150-06-25 15:36:00</td>\n",
                            "      <td>NaN</td>\n",
                            "      <td>OBSERVATION ADMIT</td>\n",
                            "      <td>TRANSFER FROM HOSPITAL</td>\n",
                            "      <td>HOME</td>\n",
                            "      <td>Medicaid</td>\n",
                            "      <td>ENGLISH</td>\n",
                            "      <td>SINGLE</td>\n",
                            "      <td>WHITE</td>\n",
                            "      <td>2150-06-19 11:54:00</td>\n",
                            "      <td>2150-06-19 17:57:00</td>\n",
                            "      <td>0</td>\n",
                            "    </tr>\n",
                            "    <tr>\n",
                            "      <th>2</th>\n",
                            "      <td>27385897</td>\n",
                            "      <td>9.741725</td>\n",
                            "      <td>17609946</td>\n",
                            "      <td>70</td>\n",
                            "      <td>2138-02-05 17:42:00</td>\n",
                            "      <td>2138-02-15 11:00:00</td>\n",
                            "      <td>2138-02-15 11:00:00</td>\n",
                            "      <td>OBSERVATION ADMIT</td>\n",
                            "      <td>EMERGENCY ROOM</td>\n",
                            "      <td>DIED</td>\n",
                            "      <td>Medicare</td>\n",
                            "      <td>ENGLISH</td>\n",
                            "      <td>SINGLE</td>\n",
                            "      <td>WHITE</td>\n",
                            "      <td>2138-02-05 15:44:00</td>\n",
                            "      <td>2138-02-05 18:54:00</td>\n",
                            "      <td>1</td>\n",
                            "    </tr>\n",
                            "    <tr>\n",
                            "      <th>4</th>\n",
                            "      <td>20817525</td>\n",
                            "      <td>1.674769</td>\n",
                            "      <td>12776735</td>\n",
                            "      <td>72</td>\n",
                            "      <td>2200-07-11 22:46:00</td>\n",
                            "      <td>2200-07-19 12:00:00</td>\n",
                            "      <td>NaN</td>\n",
                            "      <td>OBSERVATION ADMIT</td>\n",
                            "      <td>EMERGENCY ROOM</td>\n",
                            "      <td>SKILLED NURSING FACILITY</td>\n",
                            "      <td>Medicare</td>\n",
                            "      <td>ENGLISH</td>\n",
                            "      <td>MARRIED</td>\n",
                            "      <td>OTHER</td>\n",
                            "      <td>2200-07-11 15:27:00</td>\n",
                            "      <td>2200-07-12 00:33:00</td>\n",
                            "      <td>0</td>\n",
                            "    </tr>\n",
                            "    <tr>\n",
                            "      <th>5</th>\n",
                            "      <td>24283593</td>\n",
                            "      <td>1.292697</td>\n",
                            "      <td>10215159</td>\n",
                            "      <td>67</td>\n",
                            "      <td>2124-09-20 15:04:00</td>\n",
                            "      <td>2124-09-26 14:30:00</td>\n",
                            "      <td>NaN</td>\n",
                            "      <td>EW EMER.</td>\n",
                            "      <td>EMERGENCY ROOM</td>\n",
                            "      <td>HOME</td>\n",
                            "      <td>Medicare</td>\n",
                            "      <td>ENGLISH</td>\n",
                            "      <td>DIVORCED</td>\n",
                            "      <td>WHITE</td>\n",
                            "      <td>2124-09-20 12:52:00</td>\n",
                            "      <td>2124-09-20 17:21:00</td>\n",
                            "      <td>0</td>\n",
                            "    </tr>\n",
                            "  </tbody>\n",
                            "</table>\n",
                            "</div>"
                        ],
                        "text/plain": [
                            "    hadm_id   icu_los  subject_id  anchor_age            admittime  \\\n",
                            "0  24528534  1.587454    17867402          25  2154-03-03 03:09:00   \n",
                            "1  28960964  3.025625    14435996          42  2150-06-19 13:07:00   \n",
                            "2  27385897  9.741725    17609946          70  2138-02-05 17:42:00   \n",
                            "4  20817525  1.674769    12776735          72  2200-07-11 22:46:00   \n",
                            "5  24283593  1.292697    10215159          67  2124-09-20 15:04:00   \n",
                            "\n",
                            "             dischtime            deathtime     admission_type  \\\n",
                            "0  2154-03-04 16:30:00                  NaN           EW EMER.   \n",
                            "1  2150-06-25 15:36:00                  NaN  OBSERVATION ADMIT   \n",
                            "2  2138-02-15 11:00:00  2138-02-15 11:00:00  OBSERVATION ADMIT   \n",
                            "4  2200-07-19 12:00:00                  NaN  OBSERVATION ADMIT   \n",
                            "5  2124-09-26 14:30:00                  NaN           EW EMER.   \n",
                            "\n",
                            "       admission_location        discharge_location insurance language  \\\n",
                            "0          EMERGENCY ROOM                      HOME  Medicaid  ENGLISH   \n",
                            "1  TRANSFER FROM HOSPITAL                      HOME  Medicaid  ENGLISH   \n",
                            "2          EMERGENCY ROOM                      DIED  Medicare  ENGLISH   \n",
                            "4          EMERGENCY ROOM  SKILLED NURSING FACILITY  Medicare  ENGLISH   \n",
                            "5          EMERGENCY ROOM                      HOME  Medicare  ENGLISH   \n",
                            "\n",
                            "  marital_status         ethnicity            edregtime            edouttime  \\\n",
                            "0            NaN  UNABLE TO OBTAIN  2154-03-03 02:49:00  2154-03-03 04:11:00   \n",
                            "1         SINGLE             WHITE  2150-06-19 11:54:00  2150-06-19 17:57:00   \n",
                            "2         SINGLE             WHITE  2138-02-05 15:44:00  2138-02-05 18:54:00   \n",
                            "4        MARRIED             OTHER  2200-07-11 15:27:00  2200-07-12 00:33:00   \n",
                            "5       DIVORCED             WHITE  2124-09-20 12:52:00  2124-09-20 17:21:00   \n",
                            "\n",
                            "   hospital_expire_flag  \n",
                            "0                     0  \n",
                            "1                     0  \n",
                            "2                     1  \n",
                            "4                     0  \n",
                            "5                     0  "
                        ]
                    },
                    "execution_count": 8,
                    "metadata": {},
                    "output_type": "execute_result"
                }
            ],
            "source": [
                "adm_icu_selected.head()"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 9,
            "metadata": {},
            "outputs": [],
            "source": [
                "path_processed = path_temp + '/processed'\n",
                "if not os.path.exists(path_processed):\n",
                "    os.mkdir(path_processed)"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 10,
            "metadata": {},
            "outputs": [],
            "source": [
                "adm_icu_selected.to_csv(path_processed + '/admissions_processed.csv')"
            ]
        }
    ],
    "metadata": {
        "kernelspec": {
            "display_name": "Python 3.8.12 ('transfer_ehr')",
            "language": "python",
            "name": "python3"
        },
        "language_info": {
            "codemirror_mode": {
                "name": "ipython",
                "version": 3
            },
            "file_extension": ".py",
            "mimetype": "text/x-python",
            "name": "python",
            "nbconvert_exporter": "python",
            "pygments_lexer": "ipython3",
            "version": "3.8.12"
        },
        "toc-autonumbering": false,
        "toc-showcode": false,
        "toc-showmarkdowntxt": false,
        "toc-showtags": false,
        "vscode": {
            "interpreter": {
                "hash": "5ae865abb88ad0a991db65fb0a3113ce9d7dd5f50b65ef8b203d3356ff36831c"
            }
        }
    },
    "nbformat": 4,
    "nbformat_minor": 4
}
