{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# autoreload\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from tqdm import tqdm\n",
    "from npj_utils import *\n",
    "from sklearn.multioutput import MultiOutputClassifier\n",
    "import xgboost as xgb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "mimic_iv_path = \"/cis/home/charr165/Documents/physionet.org/mimiciv/2.2\"\n",
    "mm_dir = \"/cis/home/charr165/Documents/multimodal\"\n",
    "\n",
    "output_dir = os.path.join(mm_dir, \"preprocessing\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "restrict_48_hours = False\n",
    "include_notes = True\n",
    "include_cxr = True\n",
    "include_ecg = True\n",
    "include_missing = True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "\n",
    "base_name = \"pheno\" # ihm, los, pheno\n",
    "if restrict_48_hours:\n",
    "    base_name += \"-48\"\n",
    "else:\n",
    "    base_name += \"-all\"\n",
    "\n",
    "if include_cxr:\n",
    "    if include_notes:\n",
    "        base_name += \"-cxr-notes\"\n",
    "    else:\n",
    "        base_name += \"-cxr\"\n",
    "\n",
    "if include_ecg:\n",
    "    base_name += \"-ecg\"\n",
    "\n",
    "if include_missing:\n",
    "    base_name += \"-missingInd\"\n",
    "\n",
    "f_path = os.path.join(output_dir, f\"train_{base_name}_stays.pkl\")\n",
    "\n",
    "with open(f_path, \"rb\") as f:\n",
    "    train_stays = pickle.load(f)\n",
    "\n",
    "f_path = os.path.join(output_dir, f\"val_{base_name}_stays.pkl\")\n",
    "\n",
    "with open(f_path, \"rb\") as f:\n",
    "    val_stays = pickle.load(f)\n",
    "\n",
    "f_path = os.path.join(output_dir, f\"test_{base_name}_stays.pkl\")\n",
    "\n",
    "with open(f_path, \"rb\") as f:\n",
    "    test_stays = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_stays = train_stays + val_stays + test_stays"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['name', 'hadm_id', 'stay_id', 'ts_tt', 'irg_ts', 'irg_ts_mask', 'reg_ts', 'text_data', 'text_time_to_end', 'text_embeddings', 'text_missing', 'cxr_feats', 'cxr_time', 'cxr_missing', 'ecg_feats', 'ecg_time', 'ecg_missing', 'label'])"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_stays[0].keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "missing_keys = ['text_missing', 'cxr_missing', 'ecg_missing']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'text_missing': 56824, 'cxr_missing': 14568, 'ecg_missing': 35925}"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "total_obs = {}\n",
    "for key in missing_keys:\n",
    "    total_obs[key] = 0\n",
    "\n",
    "for stay in all_stays:\n",
    "    for key in missing_keys:\n",
    "        total_obs[key] += stay[key]\n",
    "\n",
    "for key in missing_keys:\n",
    "    total_obs[key] = len(all_stays) - total_obs[key]\n",
    "total_obs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "73173"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(all_stays)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
