{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# autoreload\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "in_project_parent = \"/cis/home/charr165/Documents/physionet.org/files/mimiciv/1.0\"\n",
    "out_project_parent = \"/cis/home/charr165/Documents/physionet.org/files/mimiciv/1.0/shortened\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "subdirs = [\"core\", \"hosp\", \"icu\"]\n",
    "\n",
    "for subdir in subdirs:\n",
    "    curr_subdir = os.path.join(in_project_parent, subdir)\n",
    "\n",
    "    # List all csv files in the current subdirectory\n",
    "    csv_files = [f for f in os.listdir(curr_subdir) if f.endswith(\".csv\")]\n",
    "\n",
    "    for csv_file in csv_files:\n",
    "        curr_in_file = os.path.join(curr_subdir, csv_file)\n",
    "        curr_df = pd.read_csv(curr_in_file, nrows=5000)\n",
    "\n",
    "        curr_out_dir = os.path.join(out_project_parent, subdir)\n",
    "        if not os.path.exists(curr_out_dir):\n",
    "            os.makedirs(curr_out_dir)\n",
    "        \n",
    "        curr_out_file = os.path.join(curr_out_dir, csv_file)\n",
    "        curr_df.to_csv(curr_out_file, index=False)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "in_project_parent = \"/cis/home/charr165/Documents/physionet.org/files/mimic-iv-note/2.2/\"\n",
    "out_project_parent = \"/cis/home/charr165/Documents/physionet.org/files/mimic-iv-note/2.2/shortened\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "subdirs = [\"note\"]\n",
    "\n",
    "for subdir in subdirs:\n",
    "    curr_subdir = os.path.join(in_project_parent, subdir)\n",
    "\n",
    "    # List all csv files in the current subdirectory\n",
    "    csv_files = [f for f in os.listdir(curr_subdir) if f.endswith(\".csv\")]\n",
    "\n",
    "    for csv_file in csv_files:\n",
    "        curr_in_file = os.path.join(curr_subdir, csv_file)\n",
    "        curr_df = pd.read_csv(curr_in_file, nrows=5000)\n",
    "\n",
    "        curr_out_dir = os.path.join(out_project_parent, subdir)\n",
    "        if not os.path.exists(curr_out_dir):\n",
    "            os.makedirs(curr_out_dir)\n",
    "        \n",
    "        curr_out_file = os.path.join(curr_out_dir, csv_file)\n",
    "        curr_df.to_csv(curr_out_file, index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
