{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import datetime\n",
    "from datetime import timedelta\n",
    "import numpy as np\n",
    "import os\n",
    "from pathlib import Path\n",
    "from datetime import timedelta\n",
    "# p_project = Path(__file__).parents[2]\n",
    "p_project = Path(os.getcwd()).parents[1]\n",
    "path_m4 = p_project/'data/mimic4'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_20166/158989728.py:2: DtypeWarning: Columns (12) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  inputs_df=pd.read_csv(path_m4/\"processed/inputs_processed.csv\")[[\"subject_id\",\"hadm_id\",\"charttime\",\"amount\",\"label\"]]\n",
      "/tmp/ipykernel_20166/158989728.py:4: DtypeWarning: Columns (8,11,14) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  presc_df=pd.read_csv(path_m4/\"processed/prescriptions_processed.csv\")[[\"subject_id\",\"hadm_id\",\"charttime\",\"dose_val_rx\",\"drug\"]]\n"
     ]
    }
   ],
   "source": [
    "lab_df=pd.read_csv(path_m4/\"processed/lab_processed.csv\")[[\"subject_id\",\"hadm_id\",\"charttime\",\"valuenum\",\"label\"]]\n",
    "inputs_df=pd.read_csv(path_m4/\"processed/inputs_processed.csv\")[[\"subject_id\",\"hadm_id\",\"charttime\",\"amount\",\"label\"]]\n",
    "outputs_df=pd.read_csv(path_m4/\"processed/outputs_processed.csv\")[[\"subject_id\",\"hadm_id\",\"charttime\",\"value\",\"label\"]]\n",
    "presc_df=pd.read_csv(path_m4/\"processed/prescriptions_processed.csv\")[[\"subject_id\",\"hadm_id\",\"charttime\",\"dose_val_rx\",\"drug\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_20166/511566662.py:3: DtypeWarning: Columns (12) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  inputs_df = pd.read_csv(path_m4/'processed/inputs_processed.csv')[\n",
      "/tmp/ipykernel_20166/511566662.py:7: DtypeWarning: Columns (8,11,14) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  presc_df = pd.read_csv(path_m4/'processed/prescriptions_processed.csv')[\n"
     ]
    }
   ],
   "source": [
    "lab_df = pd.read_csv(path_m4/'processed/lab_processed.csv')[\n",
    "    ['subject_id', 'hadm_id', 'charttime', 'valuenum', 'label']]\n",
    "inputs_df = pd.read_csv(path_m4/'processed/inputs_processed.csv')[\n",
    "    ['subject_id', 'hadm_id', 'charttime', 'amount', 'label']]\n",
    "outputs_df = pd.read_csv(path_m4/'processed/outputs_processed.csv')[\n",
    "    ['subject_id', 'hadm_id', 'charttime', 'value', 'label']]\n",
    "presc_df = pd.read_csv(path_m4/'processed/prescriptions_processed.csv')[\n",
    "    ['subject_id', 'hadm_id', 'charttime', 'dose_val_rx', 'drug']]\n",
    "\n",
    "# Change the name of amount. Valuenum for every table\n",
    "inputs_df['valuenum'] = inputs_df['amount']\n",
    "inputs_df = inputs_df.drop(columns=['amount']).copy()\n",
    "\n",
    "outputs_df['valuenum'] = outputs_df['value']\n",
    "outputs_df = outputs_df.drop(columns=['value']).copy()\n",
    "\n",
    "presc_df['valuenum'] = presc_df['dose_val_rx']\n",
    "presc_df = presc_df.drop(columns=['dose_val_rx']).copy()\n",
    "presc_df['label'] = presc_df['drug']\n",
    "presc_df = presc_df.drop(columns=['drug']).copy()\n",
    "\n",
    "# Tag to distinguish between lab and inputs events\n",
    "inputs_df['Origin'] = 'Inputs'\n",
    "lab_df['Origin'] = 'Lab'\n",
    "outputs_df['Origin'] = 'Outputs'\n",
    "presc_df['Origin'] = 'Prescriptions'\n",
    "\n",
    "merged_df = pd.concat((inputs_df, lab_df, outputs_df, presc_df)).reset_index()\n",
    "\n",
    "# Check that all labels have different names.\n",
    "assert(merged_df['label'].nunique() == (inputs_df['label'].nunique(\n",
    ")+lab_df['label'].nunique()+outputs_df['label'].nunique()+presc_df['label'].nunique()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# set the timestamp as the time delta between the first chart time for each admission\n",
    "merged_df['charttime']=pd.to_datetime(merged_df[\"charttime\"], format='%Y-%m-%d %H:%M:%S')\n",
    "ref_time=merged_df.groupby(\"hadm_id\")[\"charttime\"].min()\n",
    "merged_df_1=pd.merge(ref_time.to_frame(name=\"ref_time\"),merged_df,left_index=True,right_on=\"hadm_id\")\n",
    "merged_df_1[\"time_stamp\"]=merged_df_1[\"charttime\"]-merged_df_1[\"ref_time\"]\n",
    "assert(len(merged_df_1.loc[merged_df_1[\"time_stamp\"]<timedelta(hours=0)].index)==0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a label code (int) for the labels.\n",
    "label_dict=dict(zip(list(merged_df_1[\"label\"].unique()),range(len(list(merged_df_1[\"label\"].unique())))))\n",
    "merged_df_1[\"label_code\"]=merged_df_1[\"label\"].map(label_dict)\n",
    "\n",
    "merged_df_short=merged_df_1[[\"hadm_id\",\"valuenum\",\"time_stamp\",\"label_code\",\"Origin\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "label_dict_df=pd.Series(merged_df_1[\"label\"].unique()).reset_index()\n",
    "label_dict_df.columns=[\"index\",\"label\"]\n",
    "label_dict_df[\"label_code\"]=label_dict_df[\"label\"].map(label_dict)\n",
    "label_dict_df.drop(columns=[\"index\"],inplace=True)\n",
    "label_dict_df.to_csv(path_m4/\"processed/label_dict.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_20166/3393732837.py:1: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  merged_df_short[\"valuenum\"] = merged_df_short[\"valuenum\"].astype(float)\n"
     ]
    }
   ],
   "source": [
    "merged_df_short[\"valuenum\"] = merged_df_short[\"valuenum\"].astype(float)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of patients considered: 55181\n"
     ]
    }
   ],
   "source": [
    "# select only values within first 48 hours\n",
    "merged_df_short=merged_df_short.loc[(merged_df_short[\"time_stamp\"]<timedelta(hours=48))]\n",
    "merged_df_short[\"time_stamp\"] = merged_df_short[\"time_stamp\"].dt.total_seconds().div(60).astype(int)\n",
    "print(\"Number of patients considered: \"+str(merged_df_short[\"hadm_id\"].nunique()))\n",
    "assert(len(merged_df_short.loc[merged_df_short[\"time_stamp\"]>2880].index)==0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# drop columns that are not needed for final dataset\n",
    "merged_df_short.drop([\"Origin\"], axis=1, inplace=True)\n",
    "complete_df = merged_df_short"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:10: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name] = 0\n",
      "/tmp/ipykernel_20166/1670237262.py:11: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df[name2] = 0\n"
     ]
    }
   ],
   "source": [
    "# create value- and mask- columns and fill with data\n",
    "labels = complete_df[\"label_code\"].unique()\n",
    "value_columns = []\n",
    "mask_columns  = []\n",
    "for num in labels:\n",
    "    name = \"Value_label_\" + str(num)\n",
    "    name2 = \"Mask_label_\" + str(num)\n",
    "    value_columns.append(name)\n",
    "    mask_columns.append(name2)\n",
    "    complete_df[name] = 0\n",
    "    complete_df[name2] = 0\n",
    "    complete_df[name] = complete_df[name].astype(float)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "complete_df.dropna(inplace=True)\n",
    "for index, row in complete_df.iterrows():\n",
    "    name = \"Value_label_\" + str(row[\"label_code\"].astype(int))\n",
    "    name2 = \"Mask_label_\" + str(row[\"label_code\"].astype(int))\n",
    "    complete_df.at[index, name] = row[\"valuenum\"]\n",
    "    complete_df.at[index, name2] = 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_20166/3932639823.py:3: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df = complete_df.groupby([\"hadm_id\", \"time_stamp\"], as_index=False).max()\n",
      "/tmp/ipykernel_20166/3932639823.py:3: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  complete_df = complete_df.groupby([\"hadm_id\", \"time_stamp\"], as_index=False).max()\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>time_stamp</th>\n",
       "      <th>Value_label_0</th>\n",
       "      <th>Mask_label_0</th>\n",
       "      <th>Value_label_1</th>\n",
       "      <th>Mask_label_1</th>\n",
       "      <th>Value_label_2</th>\n",
       "      <th>Mask_label_2</th>\n",
       "      <th>Value_label_3</th>\n",
       "      <th>Mask_label_3</th>\n",
       "      <th>...</th>\n",
       "      <th>Value_label_92</th>\n",
       "      <th>Mask_label_92</th>\n",
       "      <th>Value_label_63</th>\n",
       "      <th>Mask_label_63</th>\n",
       "      <th>Value_label_95</th>\n",
       "      <th>Mask_label_95</th>\n",
       "      <th>Value_label_94</th>\n",
       "      <th>Mask_label_94</th>\n",
       "      <th>Value_label_91</th>\n",
       "      <th>Mask_label_91</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>20000147.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>20000147.0</td>\n",
       "      <td>578</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>20000147.0</td>\n",
       "      <td>599</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>20000147.0</td>\n",
       "      <td>693</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>20000147.0</td>\n",
       "      <td>720</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5686065</th>\n",
       "      <td>29999828.0</td>\n",
       "      <td>2774</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5686066</th>\n",
       "      <td>29999828.0</td>\n",
       "      <td>2804</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5686067</th>\n",
       "      <td>29999828.0</td>\n",
       "      <td>2820</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5686068</th>\n",
       "      <td>29999828.0</td>\n",
       "      <td>2834</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5686069</th>\n",
       "      <td>29999828.0</td>\n",
       "      <td>2864</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5686070 rows × 194 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            hadm_id  time_stamp  Value_label_0  Mask_label_0  Value_label_1  \\\n",
       "0        20000147.0           0            0.0             0            0.0   \n",
       "1        20000147.0         578            0.0             0            0.0   \n",
       "2        20000147.0         599            0.0             0            0.0   \n",
       "3        20000147.0         693            0.0             0            0.0   \n",
       "4        20000147.0         720            0.0             0            0.0   \n",
       "...             ...         ...            ...           ...            ...   \n",
       "5686065  29999828.0        2774            0.0             0            0.0   \n",
       "5686066  29999828.0        2804            0.0             0            0.0   \n",
       "5686067  29999828.0        2820            0.0             0            0.0   \n",
       "5686068  29999828.0        2834            0.0             0            0.0   \n",
       "5686069  29999828.0        2864            0.0             0            0.0   \n",
       "\n",
       "         Mask_label_1  Value_label_2  Mask_label_2  Value_label_3  \\\n",
       "0                   0            0.0             0            0.0   \n",
       "1                   0            0.0             0            0.0   \n",
       "2                   0            0.0             0            0.0   \n",
       "3                   0            0.0             0            0.0   \n",
       "4                   0            0.0             0            0.0   \n",
       "...               ...            ...           ...            ...   \n",
       "5686065             0            0.0             0            0.0   \n",
       "5686066             0            0.0             0            0.0   \n",
       "5686067             0            0.0             0            0.0   \n",
       "5686068             0            0.0             0            0.0   \n",
       "5686069             0            0.0             0            0.0   \n",
       "\n",
       "         Mask_label_3  ...  Value_label_92  Mask_label_92  Value_label_63  \\\n",
       "0                   0  ...             0.0              0             0.0   \n",
       "1                   0  ...             0.0              0             0.0   \n",
       "2                   0  ...             0.0              0             0.0   \n",
       "3                   0  ...             0.0              0             0.0   \n",
       "4                   0  ...             0.0              0             0.0   \n",
       "...               ...  ...             ...            ...             ...   \n",
       "5686065             0  ...             0.0              0             0.0   \n",
       "5686066             0  ...             0.0              0             0.0   \n",
       "5686067             0  ...             0.0              0             0.0   \n",
       "5686068             0  ...             0.0              0             0.0   \n",
       "5686069             0  ...             0.0              0             0.0   \n",
       "\n",
       "         Mask_label_63  Value_label_95  Mask_label_95  Value_label_94  \\\n",
       "0                    0             0.0              0             0.0   \n",
       "1                    0             0.0              0             0.0   \n",
       "2                    0             0.0              0             0.0   \n",
       "3                    0             0.0              0             0.0   \n",
       "4                    0             0.0              0             0.0   \n",
       "...                ...             ...            ...             ...   \n",
       "5686065              0             0.0              0             0.0   \n",
       "5686066              0             0.0              0             0.0   \n",
       "5686067              0             0.0              0             0.0   \n",
       "5686068              0             0.0              0             0.0   \n",
       "5686069              0             0.0              0             0.0   \n",
       "\n",
       "         Mask_label_94  Value_label_91  Mask_label_91  \n",
       "0                    0             0.0              0  \n",
       "1                    0             0.0              0  \n",
       "2                    0             0.0              0  \n",
       "3                    0             0.0              0  \n",
       "4                    0             0.0              0  \n",
       "...                ...             ...            ...  \n",
       "5686065              0             0.0              0  \n",
       "5686066              0             0.0              0  \n",
       "5686067              0             0.0              0  \n",
       "5686068              0             0.0              0  \n",
       "5686069              0             0.0              0  \n",
       "\n",
       "[5686070 rows x 194 columns]"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# drop all unneccesary columns and do sanity check\n",
    "complete_df.drop([\"valuenum\", \"label_code\"], axis=1, inplace=True)\n",
    "complete_df = complete_df.groupby([\"hadm_id\", \"time_stamp\"], as_index=False).max()\n",
    "for x in mask_columns:\n",
    "    assert(len(complete_df.loc[complete_df[x]>1])==0)\n",
    "complete_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "complete_df.to_csv(path_m4/\"processed/full_dataset.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "GNeuralFlow",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
