{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# autoreload\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-01-15 14:06:59.655761: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
      "2024-01-15 14:06:59.655788: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
      "2024-01-15 14:06:59.657187: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
      "2024-01-15 14:06:59.663433: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
      "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
      "2024-01-15 14:07:01.049577: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'\n",
    "\n",
    "import tensorflow as tf\n",
    "import sys\n",
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "mimic_iv_path = \"/cis/home/charr165/Documents/physionet.org/mimiciv/2.2\"\n",
    "mm_dir = \"/cis/home/charr165/Documents/multimodal\"\n",
    "\n",
    "output_dir = os.path.join(mm_dir, \"preprocessing\")\n",
    "os.makedirs(output_dir, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "f_path = os.path.join(mimic_iv_path, \"hosp\", \"admissions.csv\")\n",
    "admissions_df = pd.read_csv(f_path, low_memory=False)\n",
    "admissions_df['admittime'] = pd.to_datetime(admissions_df['admittime'])\n",
    "admissions_df['dischtime'] = pd.to_datetime(admissions_df['dischtime'])\n",
    "\n",
    "icustays_df = pd.read_csv(os.path.join(mimic_iv_path, \"icu\", \"icustays.csv\"), low_memory=False)\n",
    "icustays_df['intime'] = pd.to_datetime(icustays_df['intime'])\n",
    "icustays_df['outtime'] = pd.to_datetime(icustays_df['outtime'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "ecg_folder = '/cis/home/charr165/Documents/physionet.org/files/mimic-iv-ecg/1.0'\n",
    "\n",
    "records_list_df = pd.read_csv(os.path.join(ecg_folder, 'record_list.csv'))\n",
    "records_list_df['ecg_time'] = pd.to_datetime(records_list_df['ecg_time'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def calc_time_delta_hrs(icu_intime, charttime):\n",
    "    return (charttime - icu_intime).total_seconds() / 3600\n",
    "\n",
    "\n",
    "row = icustays_df.iloc[0]\n",
    "\n",
    "out_df = pd.DataFrame()\n",
    "for index, row in icustays_df.iterrows():\n",
    "    curr_subject_no = row['subject_id']\n",
    "    curr_hadm_id = row['hadm_id']\n",
    "    curr_stay_id = row['stay_id']\n",
    "    curr_intime = row['intime']\n",
    "    curr_outtime = row['outtime']\n",
    "\n",
    "    # Check if subject has ECG data\n",
    "    curr_subject_ecg = records_list_df[records_list_df['subject_id'] == curr_subject_no]\n",
    "    curr_subject_ecg = curr_subject_ecg[curr_subject_ecg['ecg_time'] >= curr_intime]\n",
    "    curr_subject_ecg = curr_subject_ecg[curr_subject_ecg['ecg_time'] <= curr_outtime]\n",
    "\n",
    "    if curr_subject_ecg.shape[0] == 0:\n",
    "        continue\n",
    "\n",
    "    for ecg_index, ecg_row in curr_subject_ecg.iterrows():\n",
    "        tmp_dict = {'subject_id': curr_subject_no,\n",
    "                    'hadm_id': curr_hadm_id,\n",
    "                    'stay_id': curr_stay_id,\n",
    "                    'icu_time_delta': calc_time_delta_hrs(curr_intime, ecg_row['ecg_time']),\n",
    "                    'ecg_time': ecg_row['ecg_time'],\n",
    "                    'path': ecg_row['path']}\n",
    "        tmp_df = pd.DataFrame(tmp_dict, index=[0])\n",
    "        out_df = pd.concat([out_df, tmp_df], axis=0, ignore_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import wfdb\n",
    "\n",
    "f_path = '/cis/home/charr165/Documents/PCM/project_files/models/attia_encoder_256.keras'\n",
    "encoder = tf.keras.models.load_model(f_path)\n",
    "\n",
    "def load_ecg(path, stop_index=4096):\n",
    "    rd_record = wfdb.rdrecord(path) \n",
    "    sig = rd_record.p_signal\n",
    "    sig = sig[:stop_index, :]\n",
    "    return sig\n",
    "\n",
    "out_df['embeddings'] = None\n",
    "\n",
    "from tqdm import tqdm\n",
    "for index, row in tqdm(out_df.iterrows(), total=out_df.shape[0]):\n",
    "    curr_ecg_path = os.path.join(ecg_folder, row['path'])\n",
    "    wf = load_ecg(curr_ecg_path)\n",
    "    out_df.at[index, 'embeddings'] = encoder.predict(wf.reshape(1, -1, 12), verbose=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 31/72167 [00:00<25:17, 47.53it/s] 2024-01-15 14:11:33.893174: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8902\n",
      "100%|██████████| 72167/72167 [1:12:38<00:00, 16.56it/s]\n"
     ]
    }
   ],
   "source": [
    "import wfdb\n",
    "import tensorflow as tf\n",
    "import os\n",
    "import numpy as np\n",
    "from tqdm import tqdm\n",
    "\n",
    "# Load the model\n",
    "f_path = '/cis/home/charr165/Documents/PCM/project_files/models/attia_encoder_256.keras'\n",
    "encoder = tf.keras.models.load_model(f_path)\n",
    "\n",
    "# Function to load ECG\n",
    "def load_ecg(path, stop_index=4096):\n",
    "    rd_record = wfdb.rdrecord(path) \n",
    "    sig = rd_record.p_signal\n",
    "    sig = sig[:stop_index, :]\n",
    "    return sig\n",
    "\n",
    "# Prepare for batch processing\n",
    "batch_size = 32  # You can adjust the batch size depending on your GPU memory\n",
    "ecg_batch = []\n",
    "batch_indices = []\n",
    "out_df['embeddings'] = None\n",
    "\n",
    "# Process in batches\n",
    "for index, row in tqdm(out_df.iterrows(), total=out_df.shape[0]):\n",
    "    curr_ecg_path = os.path.join(ecg_folder, row['path'])\n",
    "    wf = load_ecg(curr_ecg_path)\n",
    "    ecg_batch.append(wf.reshape(1, -1, 12))\n",
    "    batch_indices.append(index)\n",
    "\n",
    "    # When batch is full, process it\n",
    "    if len(ecg_batch) == batch_size:\n",
    "        batch_ecgs = np.vstack(ecg_batch)\n",
    "        embeddings = encoder.predict(batch_ecgs, verbose=0)\n",
    "\n",
    "        # Assign embeddings to the correct rows\n",
    "        for i, idx in enumerate(batch_indices):\n",
    "            out_df.at[idx, 'embeddings'] = embeddings[i]\n",
    "\n",
    "        # Reset for next batch\n",
    "        ecg_batch = []\n",
    "        batch_indices = []\n",
    "\n",
    "# Process any remaining ECGs\n",
    "if ecg_batch:\n",
    "    batch_ecgs = np.vstack(ecg_batch)\n",
    "    embeddings = encoder.predict(batch_ecgs, verbose=0)\n",
    "    for i, idx in enumerate(batch_indices):\n",
    "        out_df.at[idx, 'embeddings'] = embeddings[i]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "mm_dir = \"/cis/home/charr165/Documents/multimodal\"\n",
    "output_dir = os.path.join(mm_dir, \"preprocessing\")\n",
    "\n",
    "out_df.to_pickle(os.path.join(output_dir, \"ecg_embeddings_icu.pkl\"))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
