{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "\n",
    "mimic_iv_cxr_parent = \"/cis/home/charr165/Documents/physionet.org/files/mimic-cxr-jpg/2.0.0\"\n",
    "\n",
    "mm_dir = \"/cis/home/charr165/Documents/multimodal\"\n",
    "preprocessing_dir = os.path.join(mm_dir, \"preprocessing\")\n",
    "f_path = os.path.join(preprocessing_dir, \"cxr_embeddings.pkl\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cxr_embeddings_df = pd.read_pickle(f_path)\n",
    "cxr_embeddings_df['cxrtime'] = pd.to_datetime(cxr_embeddings_df['cxrtime'])\n",
    "\n",
    "mimic_iv_path = \"/cis/home/charr165/Documents/physionet.org/mimiciv/2.2\"\n",
    "icustays_df = pd.read_csv(os.path.join(mimic_iv_path, \"icu\", \"icustays.csv\"), low_memory=False)\n",
    "icustays_df['intime'] = pd.to_datetime(icustays_df['intime'])\n",
    "icustays_df['outtime'] = pd.to_datetime(icustays_df['outtime'])\n",
    "\n",
    "admissions_df = pd.read_csv(os.path.join(mimic_iv_path, \"hosp\", \"admissions.csv\"), low_memory=False)\n",
    "admissions_df['admittime'] = pd.to_datetime(admissions_df['admittime'])\n",
    "admissions_df['dischtime'] = pd.to_datetime(admissions_df['dischtime'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tqdm import tqdm\n",
    "\n",
    "def calc_time_delta_hrs(icu_intime, charttime):\n",
    "    return (charttime - icu_intime).total_seconds() / 3600\n",
    "\n",
    "cxr_embeddings_df['hadm_id'] = None\n",
    "cxr_embeddings_df['stay_id'] = None\n",
    "cxr_embeddings_df['icu_time_delta'] = None\n",
    "cxr_embeddings_df['hosp_time_delta'] = None\n",
    "\n",
    "for index, row in tqdm(cxr_embeddings_df.iterrows(), total=cxr_embeddings_df.shape[0]):\n",
    "    curr_pts_icustays = icustays_df[icustays_df['subject_id'] == row['subject_id']]\n",
    "    \n",
    "    for icu_index, icu_row in curr_pts_icustays.iterrows():\n",
    "        if icu_row['intime'] <= row['cxrtime'] <= icu_row['outtime']:\n",
    "            cxr_embeddings_df.loc[index, 'stay_id'] = icu_row['stay_id']\n",
    "            cxr_embeddings_df.loc[index, 'icu_time_delta'] = calc_time_delta_hrs(icu_row['intime'], row['cxrtime'])\n",
    "    \n",
    "    curr_pts_admissions = admissions_df[admissions_df['subject_id'] == row['subject_id']]\n",
    "\n",
    "    for hosp_index, hosp_row in curr_pts_admissions.iterrows():\n",
    "        if hosp_row['admittime'] <= row['cxrtime'] <= hosp_row['dischtime']:\n",
    "            cxr_embeddings_df.loc[index, 'hadm_id'] = hosp_row['hadm_id']\n",
    "            cxr_embeddings_df.loc[index, 'hosp_time_delta'] = calc_time_delta_hrs(hosp_row['admittime'], row['cxrtime'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cxr_embeddings_df.to_pickle(os.path.join(preprocessing_dir, \"cxr_embeddings_stay.pkl\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.6118373949245578"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cxr_embeddings_df['stay_id'].isna().mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "cxr_embeddings_df = pd.read_pickle(os.path.join(preprocessing_dir, \"cxr_embeddings_stay.pkl\"))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
