{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import os\n",
    "import numpy as np\n",
    "import random\n",
    "import sys\n",
    "sys.path.append(os.path.dirname(os.getcwd()))\n",
    "from constants import *\n",
    "\n",
    "seed = 0\n",
    "random.seed(seed)\n",
    "np.random.seed(seed)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "BRSET_df = pd.read_csv(os.path.join(BRSET_tgt_root, 'train.csv'))\n",
    "EDDFS_df = pd.read_csv(os.path.join(EDDFS_tgt_root, 'train.csv'))\n",
    "\n",
    "BRSET_df_normal = BRSET_df[BRSET_df['abnormal'] == 0]\n",
    "EDDFS_df_normal = EDDFS_df[EDDFS_df['abnormal'] == 0]\n",
    "print(len(BRSET_df_normal), len(EDDFS_df_normal))\n",
    "\n",
    "BRSET_df_abnormal = BRSET_df[BRSET_df['abnormal'] == 1]\n",
    "EDDFS_df_abnormal = EDDFS_df[EDDFS_df['abnormal'] == 1]\n",
    "print(len(BRSET_df_abnormal), len(EDDFS_df_abnormal))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# randomly select 2500 from BRSET_df_normal and 2500 from EDDFS_df_normal\n",
    "BRSET_df_normal_selected = BRSET_df_normal.sample(n=2500, random_state=0)\n",
    "EDDFS_df_normal_selected = EDDFS_df_normal.sample(n=2500, random_state=0)\n",
    "\n",
    "# randomly select 2500 from BRSET_df_abnormal and 2500 from EDDFS_df_abnormal\n",
    "BRSET_df_abnormal_selected = BRSET_df_abnormal.sample(n=2500, random_state=0)\n",
    "EDDFS_df_abnormal_selected = EDDFS_df_abnormal.sample(n=2500, random_state=0)\n",
    "\n",
    "# concatenate the selected dataframes\n",
    "BRSET_5000_df = pd.concat([BRSET_df_normal_selected, BRSET_df_abnormal_selected], ignore_index=True)\n",
    "EDDFS_5000_df = pd.concat([EDDFS_df_normal_selected, EDDFS_df_abnormal_selected], ignore_index=True)\n",
    "\n",
    "# save the selected dataframes\n",
    "BRSET_5000_df.to_csv(os.path.join(BRSET_tgt_root, 'train_for_5000.csv'), index=False)\n",
    "EDDFS_5000_df.to_csv(os.path.join(EDDFS_tgt_root, 'train_for_5000.csv'), index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "torch2",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
