{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import os\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "root = '/root/raw_dataset/RIADD/'\n",
    "tgt_root = '/root/dataset/fundus/RIADD'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_root = os.path.join(root, 'train_set')\n",
    "train_csv = os.path.join(train_root, 'RFMiD_Training_Labels.csv')\n",
    "\n",
    "val_root = os.path.join(root, 'val_set')\n",
    "val_csv = os.path.join(val_root, 'RFMiD_Validation_Label.csv')\n",
    "\n",
    "test_root = os.path.join(root, 'test_set')\n",
    "test_csv = os.path.join(test_root, 'RFMiD_Testing_Labels.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_information = pd.read_csv(train_csv)\n",
    "print(len(train_information))\n",
    "normal_train_information = train_information[train_information['Disease_Risk'] == 0]\n",
    "print(len(normal_train_information))\n",
    "abnormal_train_information = train_information[train_information['Disease_Risk'] == 1]\n",
    "print(len(abnormal_train_information))\n",
    "# only one category\n",
    "abnormal_train_information_selected = abnormal_train_information\n",
    "print(len(abnormal_train_information_selected))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_information.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "val_information = pd.read_csv(val_csv)\n",
    "print(len(val_information))\n",
    "normal_val_information = val_information[val_information['Disease_Risk'] == 0]\n",
    "print(len(normal_val_information))\n",
    "abnormal_val_information = val_information[val_information['Disease_Risk'] == 1]\n",
    "print(len(abnormal_val_information))\n",
    "\n",
    "# abnormal_val_information_selected = abnormal_val_information[abnormal_val_information.iloc[:, 2:].sum(axis=1) == 1]\n",
    "# abnormal_val_information_selected = abnormal_val_information_selected[abnormal_val_information_selected[selected_catogeries].sum(axis=1) == 1]\n",
    "abnormal_val_information_selected = abnormal_val_information\n",
    "print(len(abnormal_val_information_selected))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_information = pd.read_csv(test_csv)\n",
    "print(len(test_information))\n",
    "normal_test_information = test_information[test_information['Disease_Risk'] == 0]\n",
    "print(len(normal_test_information))\n",
    "abnormal_test_information = test_information[test_information['Disease_Risk'] == 1]\n",
    "print(len(abnormal_test_information))\n",
    "\n",
    "abnormal_test_information_selected = abnormal_test_information\n",
    "print(len(abnormal_test_information_selected))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "normal_train_information.loc[:, 'dir'] = 'train_set/Training_Preprocessed'\n",
    "normal_val_information.loc[:, 'dir'] = 'val_set/Validation_Preprocessed'\n",
    "normal_test_information.loc[:, 'dir'] = 'test_set/Test_Preprocessed'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "normal = pd.concat([normal_train_information, normal_val_information, normal_test_information])\n",
    "normal['abnormal'] = 0\n",
    "print(len(normal))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "normal"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "abnormal_train_information_selected.loc[:, 'dir'] = 'train_set/Training_Preprocessed'\n",
    "abnormal_val_information_selected.loc[:, 'dir'] = 'val_set/Validation_Preprocessed'\n",
    "abnormal_test_information_selected.loc[:, 'dir'] = 'test_set/Test_Preprocessed'\n",
    "abnormal = pd.concat([abnormal_train_information_selected, abnormal_val_information_selected, abnormal_test_information_selected])\n",
    "abnormal['abnormal'] = 1\n",
    "print(len(abnormal))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "abnormal"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.concat([normal, abnormal])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['abnormal'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_csv(os.path.join(tgt_root, 'test.csv'), index=False)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "torch2",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
