{
    "cells": [
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": [
                "# MIMIC 4 data - dataset construction inputevents"
            ]
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": [
                "Code taken from GRU-ODE-Bayes preprocessing; simplified and adapted for MIMIC 4 1.0"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 1,
            "metadata": {},
            "outputs": [],
            "source": [
                "import os\n",
                "import pathlib\n",
                "\n",
                "p_project = str(pathlib.Path(os.getcwd()).parents[1])"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 2,
            "metadata": {},
            "outputs": [],
            "source": [
                "import pandas as pd\n",
                "import numpy as np"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 3,
            "metadata": {},
            "outputs": [],
            "source": [
                "pd.set_option('display.max_rows', 50)\n",
                "pd.set_option('display.max_columns', 300)"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 4,
            "metadata": {},
            "outputs": [],
            "source": [
                "path_data = p_project + '/data/original/mimic4'\n",
                "path_temp = p_project + '/data/mimic4'"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 5,
            "metadata": {},
            "outputs": [],
            "source": [
                "adm = pd.read_csv(path_temp + '/processed/admissions_processed.csv')"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 6,
            "metadata": {},
            "outputs": [
                {
                    "data": {
                        "text/html": [
                            "<div>\n",
                            "<style scoped>\n",
                            "    .dataframe tbody tr th:only-of-type {\n",
                            "        vertical-align: middle;\n",
                            "    }\n",
                            "\n",
                            "    .dataframe tbody tr th {\n",
                            "        vertical-align: top;\n",
                            "    }\n",
                            "\n",
                            "    .dataframe thead th {\n",
                            "        text-align: right;\n",
                            "    }\n",
                            "</style>\n",
                            "<table border=\"1\" class=\"dataframe\">\n",
                            "  <thead>\n",
                            "    <tr style=\"text-align: right;\">\n",
                            "      <th></th>\n",
                            "      <th>subject_id</th>\n",
                            "      <th>hadm_id</th>\n",
                            "      <th>stay_id</th>\n",
                            "      <th>charttime</th>\n",
                            "      <th>storetime</th>\n",
                            "      <th>itemid</th>\n",
                            "      <th>value</th>\n",
                            "      <th>valueuom</th>\n",
                            "    </tr>\n",
                            "  </thead>\n",
                            "  <tbody>\n",
                            "    <tr>\n",
                            "      <th>4457376</th>\n",
                            "      <td>19999068</td>\n",
                            "      <td>21606769</td>\n",
                            "      <td>30143796</td>\n",
                            "      <td>2161-08-30 12:00:00</td>\n",
                            "      <td>2161-08-30 12:21:00</td>\n",
                            "      <td>226559</td>\n",
                            "      <td>180.0</td>\n",
                            "      <td>ml</td>\n",
                            "    </tr>\n",
                            "    <tr>\n",
                            "      <th>4457377</th>\n",
                            "      <td>19999068</td>\n",
                            "      <td>21606769</td>\n",
                            "      <td>30143796</td>\n",
                            "      <td>2161-08-30 14:00:00</td>\n",
                            "      <td>2161-08-30 13:49:00</td>\n",
                            "      <td>226559</td>\n",
                            "      <td>320.0</td>\n",
                            "      <td>ml</td>\n",
                            "    </tr>\n",
                            "    <tr>\n",
                            "      <th>4457378</th>\n",
                            "      <td>19999068</td>\n",
                            "      <td>21606769</td>\n",
                            "      <td>30143796</td>\n",
                            "      <td>2161-08-30 15:00:00</td>\n",
                            "      <td>2161-08-30 14:49:00</td>\n",
                            "      <td>226559</td>\n",
                            "      <td>120.0</td>\n",
                            "      <td>ml</td>\n",
                            "    </tr>\n",
                            "    <tr>\n",
                            "      <th>4457379</th>\n",
                            "      <td>19999068</td>\n",
                            "      <td>21606769</td>\n",
                            "      <td>30143796</td>\n",
                            "      <td>2161-08-30 18:00:00</td>\n",
                            "      <td>2161-08-30 17:50:00</td>\n",
                            "      <td>226559</td>\n",
                            "      <td>400.0</td>\n",
                            "      <td>ml</td>\n",
                            "    </tr>\n",
                            "    <tr>\n",
                            "      <th>4457380</th>\n",
                            "      <td>19999068</td>\n",
                            "      <td>21606769</td>\n",
                            "      <td>30143796</td>\n",
                            "      <td>2161-08-30 23:00:00</td>\n",
                            "      <td>2161-08-30 23:35:00</td>\n",
                            "      <td>226559</td>\n",
                            "      <td>250.0</td>\n",
                            "      <td>ml</td>\n",
                            "    </tr>\n",
                            "  </tbody>\n",
                            "</table>\n",
                            "</div>"
                        ],
                        "text/plain": [
                            "         subject_id   hadm_id   stay_id            charttime  \\\n",
                            "4457376    19999068  21606769  30143796  2161-08-30 12:00:00   \n",
                            "4457377    19999068  21606769  30143796  2161-08-30 14:00:00   \n",
                            "4457378    19999068  21606769  30143796  2161-08-30 15:00:00   \n",
                            "4457379    19999068  21606769  30143796  2161-08-30 18:00:00   \n",
                            "4457380    19999068  21606769  30143796  2161-08-30 23:00:00   \n",
                            "\n",
                            "                   storetime  itemid  value valueuom  \n",
                            "4457376  2161-08-30 12:21:00  226559  180.0       ml  \n",
                            "4457377  2161-08-30 13:49:00  226559  320.0       ml  \n",
                            "4457378  2161-08-30 14:49:00  226559  120.0       ml  \n",
                            "4457379  2161-08-30 17:50:00  226559  400.0       ml  \n",
                            "4457380  2161-08-30 23:35:00  226559  250.0       ml  "
                        ]
                    },
                    "execution_count": 6,
                    "metadata": {},
                    "output_type": "execute_result"
                }
            ],
            "source": [
                "outputs = pd.read_csv(path_data + '/icu/outputevents.csv.gz')\n",
                "outputs.tail()"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 7,
            "metadata": {},
            "outputs": [
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "Number of patients remaining in the database: \n",
                        "43736\n"
                    ]
                }
            ],
            "source": [
                "# only choose previously selected admission ids\n",
                "adm_ids=list(adm['hadm_id'])\n",
                "outputs=outputs.loc[outputs['hadm_id'].isin(adm_ids)]\n",
                "\n",
                "print('Number of patients remaining in the database: ')\n",
                "print(outputs['subject_id'].nunique())"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 8,
            "metadata": {},
            "outputs": [
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "Number of patients remaining in the database: \n",
                        "43736\n"
                    ]
                }
            ],
            "source": [
                "# get item names\n",
                "item_id=pd.read_csv(path_data + '/icu/d_items.csv.gz')\n",
                "item_id_1=item_id[['itemid','label']]\n",
                "item_id_1.head()\n",
                "\n",
                "outputs_2=pd.merge(outputs,item_id_1,on='itemid')\n",
                "outputs_2.head()\n",
                "print('Number of patients remaining in the database: ')\n",
                "print(outputs_2['subject_id'].nunique())"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 9,
            "metadata": {},
            "outputs": [
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "Number of patients remaining in the database: \n",
                        "43571\n",
                        "Number of datapoints remaining in the database: \n",
                        "3691000\n"
                    ]
                }
            ],
            "source": [
                "#Select entries\n",
                "outputs_label_list=['Foley', 'Void', 'OR Urine', 'Chest Tube #1', 'Oral Gastric', 'Pre-Admission', 'TF Residual', 'OR EBL', 'Emesis', 'Nasogastric', 'Stool', 'Jackson Pratt #1', 'Straight Cath', 'TF Residual Output', 'Fecal Bag']\n",
                "outputs_bis=outputs_2.loc[outputs_2['label'].isin(outputs_label_list)].copy()\n",
                "\n",
                "print('Number of patients remaining in the database: ')\n",
                "print(outputs_bis['subject_id'].nunique())\n",
                "print('Number of datapoints remaining in the database: ')\n",
                "print(len(outputs_bis.index))\n",
                "\n",
                "outputs_3=outputs_bis.copy()"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 10,
            "metadata": {},
            "outputs": [
                {
                    "data": {
                        "text/plain": [
                            "label               valueuom\n",
                            "Chest Tube #1       ml           261748\n",
                            "Emesis              ml             8247\n",
                            "Fecal Bag           ml            12446\n",
                            "Foley               ml          2896332\n",
                            "Jackson Pratt #1    ml            56642\n",
                            "Nasogastric         ml            28789\n",
                            "OR EBL              ml             9368\n",
                            "OR Urine            ml            17029\n",
                            "Oral Gastric        ml            24373\n",
                            "Pre-Admission       ml            11321\n",
                            "Stool               ml            14247\n",
                            "Straight Cath       ml             9997\n",
                            "TF Residual         ml            80015\n",
                            "TF Residual Output  ml             7854\n",
                            "Void                ml           252592\n",
                            "Name: count, dtype: int64"
                        ]
                    },
                    "execution_count": 10,
                    "metadata": {},
                    "output_type": "execute_result"
                }
            ],
            "source": [
                "# Verification that all input labels have the same amounts units\n",
                "outputs_3.groupby('label')['valueuom'].value_counts() "
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 12,
            "metadata": {},
            "outputs": [],
            "source": [
                "outputs_3.to_csv(path_temp + '/processed/outputs_processed.csv')"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": []
        }
    ],
    "metadata": {
        "kernelspec": {
            "display_name": "Python 3 (ipykernel)",
            "language": "python",
            "name": "python3"
        },
        "language_info": {
            "codemirror_mode": {
                "name": "ipython",
                "version": 3
            },
            "file_extension": ".py",
            "mimetype": "text/x-python",
            "name": "python",
            "nbconvert_exporter": "python",
            "pygments_lexer": "ipython3",
            "version": "3.7.3"
        },
        "vscode": {
            "interpreter": {
                "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
            }
        }
    },
    "nbformat": 4,
    "nbformat_minor": 4
}
