{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "8aa91736-27ac-4662-b078-6e8b9c0aede2",
   "metadata": {},
   "source": [
    "# Recodiding dataset"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "59dea570-4462-4feb-948f-e4aaab53221c",
   "metadata": {},
   "source": [
    "## Importing Dependencies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "c02241b5-6f3e-437d-af00-acc1d708a575",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "21329732-4ca3-4f7b-978d-034dcd02941c",
   "metadata": {},
   "source": [
    "## Importing Data"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6d7385c0-da09-4e6d-aaae-b19bd5fcdfc8",
   "metadata": {},
   "source": [
    "We import the dataset that we created in the previous notebook."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "9203119b-2f26-40e9-87b5-8fae34e88c06",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_den_expo=pd.read_csv('LBIDD_den_tot.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ae7b828b-c3d8-49c0-95e5-68211b795409",
   "metadata": {},
   "source": [
    "## Selection Recoding Parameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "c24af135-d774-4437-84e7-8268120e4df2",
   "metadata": {},
   "outputs": [],
   "source": [
    "convert_binary=True     #  Set to true if we want to convert the binary encoding from [1,2] to [1,0]."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "86c0d7e8-69cd-4f80-b70d-749543d06d1d",
   "metadata": {},
   "outputs": [],
   "source": [
    "c_bin_other=['CSEX','DMAR','PLDEL_recode','DPLURAL_recode','DELMETH5_recode','WEEKDAYB_recode','BIRATTND_recode','MRACE3_recode','BIRMON_recode','MPLBIRR'] # binary covariates that are not a flag in the original data\n",
    "c_flag=['CARDIAC','LUNG','DIABETES','HYDRA','HEMO','CHYPER','PHYYPER', 'ECLAMP','INCERVIX','PRETERM','OTHERMR','AMNIO','MONITOR','INDUCT', 'STIMULA','TOCOL','ULTRAS','OTHEROB','FEBRILE','MECONIUM','RUPTURE','ABRUPTIO','PREPLACE','EXCEBLD', 'SEIZURE','PRECIP','PROLONG','CEPHALO','CORD','OTHERLB', 'NANEMIA', 'HYALINE','MECONSYN','NSEIZ'] # covariates that represent a flag in the original data\n",
    "c_bin=c_bin_other+c_flag # Binary covariates"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "54a242b7-8eb2-4c70-8d98-4a8d2bbd6fee",
   "metadata": {},
   "source": [
    "## Recoding variables"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3596ebb9-f4e5-476f-8246-8724c08ac358",
   "metadata": {},
   "source": [
    "We transform specific variables to achieve a binary encoding for them."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "cde8c892-6083-4938-b2d7-da2b2fb327a7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# MRACE3\n",
    "conditions = [\n",
    "    df_den_expo['MRACE3'].eq(3),\n",
    "    df_den_expo['MRACE3'].isin([1, 2])\n",
    "]\n",
    "values = [1, 2]\n",
    "\n",
    "df_den_expo['MRACE3_recode']=np.select(conditions, values, default=np.nan).astype(int)\n",
    "\n",
    "#PLDEL\n",
    "conditions = [\n",
    "    df_den_expo['PLDEL'].eq(1),\n",
    "    df_den_expo['PLDEL'].isin([2, 3, 4, 5])\n",
    "]\n",
    "values = [2, 1]\n",
    "\n",
    "df_den_expo['PLDEL_recode']=np.select(conditions, values, default=np.nan).astype(int)\n",
    "\n",
    "#DPLURAL\n",
    "conditions = [\n",
    "    df_den_expo['DPLURAL'].eq(1),\n",
    "    df_den_expo['DPLURAL'].isin([2, 3, 4, 5])\n",
    "]\n",
    "values = [2, 1]\n",
    "\n",
    "df_den_expo['DPLURAL_recode']=np.select(conditions, values, default=np.nan).astype(int)\n",
    "\n",
    "#DELMETH5\n",
    "conditions = [\n",
    "    df_den_expo['DELMETH5'].isin([1,2]),\n",
    "    df_den_expo['DELMETH5'].isin([3,4])\n",
    "]\n",
    "values = [2, 1]\n",
    "\n",
    "df_den_expo['DELMETH5_recode']=np.select(conditions, values, default=np.nan).astype(int)\n",
    "\n",
    "#BIRMON\n",
    "\n",
    "conditions = [\n",
    "    df_den_expo['BIRMON'].isin([6,7,8,9]),\n",
    "    df_den_expo['BIRMON'].isin([1,2,3,4,5,10,11,12]) # Flu season\n",
    "]\n",
    "values = [2, 1]\n",
    "\n",
    "df_den_expo['BIRMON_recode']=np.select(conditions, values, default=np.nan).astype(int)\n",
    "\n",
    "#WEEKDAYB\n",
    "conditions = [\n",
    "    df_den_expo['WEEKDAYB'].isin([2,3,4,5,6]),\n",
    "    df_den_expo['WEEKDAYB'].isin([1,7])\n",
    "]\n",
    "values = [2, 1]\n",
    "\n",
    "df_den_expo['WEEKDAYB_recode']=np.select(conditions, values, default=np.nan).astype(int)\n",
    "\n",
    "\n",
    "#BIRATTND\n",
    "conditions = [\n",
    "    df_den_expo['BIRATTND'].eq(1),\n",
    "    df_den_expo['BIRATTND'].ne(1)\n",
    "    \n",
    "]\n",
    "values = [2, 1]\n",
    "\n",
    "df_den_expo['BIRATTND_recode']=np.select(conditions, values, default=np.nan).astype(int)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1917713e-509a-496b-b101-1a57c3aaab38",
   "metadata": {},
   "source": [
    "We eliminate the covariates for which we have a new encoding."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "df2095c5-3361-4e26-9117-dae04795748d",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_den_expo=df_den_expo.drop('MRACE3',axis=1)\n",
    "df_den_expo=df_den_expo.drop('PLDEL',axis=1)\n",
    "df_den_expo=df_den_expo.drop('DELMETH5',axis=1)\n",
    "df_den_expo=df_den_expo.drop('BIRMON',axis=1)\n",
    "df_den_expo=df_den_expo.drop('WEEKDAYB',axis=1)\n",
    "df_den_expo=df_den_expo.drop('BIRATTND',axis=1)\n",
    "df_den_expo=df_den_expo.drop('DPLURAL',axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "aa958979-61fd-4588-a64d-3265a1144ae8",
   "metadata": {},
   "source": [
    "<hr style=\"border: 2px solid black\">"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "206e3287-0a75-40d8-839f-c9fddc4f2d9d",
   "metadata": {},
   "source": [
    "We modify the enconding of the binary variables."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "7790e3e8",
   "metadata": {},
   "outputs": [],
   "source": [
    "if convert_binary==True:\n",
    "    for column in c_bin:\n",
    "       df_den_expo[column].replace(2,0, inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "32c91a71-c7a8-4305-af1a-b5622e6763a6",
   "metadata": {},
   "source": [
    "<hr style=\"border: 2px solid black\">"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "b0949a39-b530-4c1a-84ca-0aa0e653af5b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CSEX</th>\n",
       "      <th>DMAGE</th>\n",
       "      <th>DMAR</th>\n",
       "      <th>MONPRE</th>\n",
       "      <th>GESTAT</th>\n",
       "      <th>DBIRWT</th>\n",
       "      <th>DTOTORD</th>\n",
       "      <th>MPLBIRR</th>\n",
       "      <th>CARDIAC</th>\n",
       "      <th>LUNG</th>\n",
       "      <th>...</th>\n",
       "      <th>MECONSYN</th>\n",
       "      <th>NSEIZ</th>\n",
       "      <th>FLGND</th>\n",
       "      <th>MRACE3_recode</th>\n",
       "      <th>PLDEL_recode</th>\n",
       "      <th>DPLURAL_recode</th>\n",
       "      <th>DELMETH5_recode</th>\n",
       "      <th>BIRMON_recode</th>\n",
       "      <th>WEEKDAYB_recode</th>\n",
       "      <th>BIRATTND_recode</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>34</td>\n",
       "      <td>2702</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>34</td>\n",
       "      <td>2702</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>14</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>37</td>\n",
       "      <td>3642</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>29</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>3489</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>32</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>38</td>\n",
       "      <td>3351</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3733673</th>\n",
       "      <td>1</td>\n",
       "      <td>23</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>37</td>\n",
       "      <td>2948</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3733674</th>\n",
       "      <td>0</td>\n",
       "      <td>26</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>39</td>\n",
       "      <td>3515</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3733675</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>40</td>\n",
       "      <td>3119</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3733676</th>\n",
       "      <td>1</td>\n",
       "      <td>25</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>39</td>\n",
       "      <td>2892</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3733677</th>\n",
       "      <td>0</td>\n",
       "      <td>19</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>35</td>\n",
       "      <td>2552</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3733678 rows × 50 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         CSEX  DMAGE  DMAR  MONPRE  GESTAT  DBIRWT  DTOTORD  MPLBIRR  CARDIAC  \\\n",
       "0           0     25     1       1      34    2702        1        1        0   \n",
       "1           0     25     1       1      34    2702        2        1        0   \n",
       "2           1     14     0       2      37    3642        1        1        0   \n",
       "3           1     29     1       1      40    3489        1        1        0   \n",
       "4           1     32     1       1      38    3351        5        1        0   \n",
       "...       ...    ...   ...     ...     ...     ...      ...      ...      ...   \n",
       "3733673     1     23     1       1      37    2948        2        1        0   \n",
       "3733674     0     26     1       1      39    3515        3        1        0   \n",
       "3733675     0     21     0       2      40    3119        2        1        0   \n",
       "3733676     1     25     1       1      39    2892        4        0        0   \n",
       "3733677     0     19     1       1      35    2552        1        1        0   \n",
       "\n",
       "         LUNG  ...  MECONSYN  NSEIZ  FLGND  MRACE3_recode  PLDEL_recode  \\\n",
       "0           0  ...         0      0      0              0             0   \n",
       "1           0  ...         0      0      0              0             0   \n",
       "2           0  ...         0      0      0              1             0   \n",
       "3           0  ...         0      0      0              0             0   \n",
       "4           0  ...         0      0      0              0             0   \n",
       "...       ...  ...       ...    ...    ...            ...           ...   \n",
       "3733673     0  ...         0      0      0              0             0   \n",
       "3733674     0  ...         0      0      0              0             1   \n",
       "3733675     0  ...         0      0      0              0             0   \n",
       "3733676     0  ...         0      0      0              0             0   \n",
       "3733677     0  ...         0      0      0              0             1   \n",
       "\n",
       "         DPLURAL_recode  DELMETH5_recode  BIRMON_recode  WEEKDAYB_recode  \\\n",
       "0                     1                1              1                1   \n",
       "1                     1                1              1                1   \n",
       "2                     0                0              1                1   \n",
       "3                     0                0              1                1   \n",
       "4                     0                0              1                1   \n",
       "...                 ...              ...            ...              ...   \n",
       "3733673               0                0              1                0   \n",
       "3733674               0                0              1                1   \n",
       "3733675               0                0              1                1   \n",
       "3733676               0                0              1                0   \n",
       "3733677               0                0              1                0   \n",
       "\n",
       "         BIRATTND_recode  \n",
       "0                      0  \n",
       "1                      0  \n",
       "2                      0  \n",
       "3                      0  \n",
       "4                      0  \n",
       "...                  ...  \n",
       "3733673                0  \n",
       "3733674                1  \n",
       "3733675                0  \n",
       "3733676                0  \n",
       "3733677                1  \n",
       "\n",
       "[3733678 rows x 50 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_den_expo"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d848d707-9f7a-4876-96e7-ff460d0f8254",
   "metadata": {},
   "source": [
    "We display the covariate values to verify our variable encoding."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "6d592bc9-ac6b-4e9f-aeb6-e0387ce9d035",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Unique Values in CSEX: [0 1]\n",
      "Unique Values in DMAGE: [25 14 29 32 31 22 19 21 23 20 24 17 18 35 40 16 27 36 28 30 26 38 41 33\n",
      " 37 15 34 42 39 43 13 46 45 44 12 11 47 10 49 48]\n",
      "Unique Values in DMAR: [1 0]\n",
      "Unique Values in MONPRE: [1 2 3 4 5]\n",
      "Unique Values in GESTAT: [34 37 40 38 43 41 42 39 31 36 35 44 25 32 33 45 29 46 30 24 47 27 23 28\n",
      " 20 21 26 22 19 17 18]\n",
      "Unique Values in DBIRWT: [2702 3642 3489 ... 7172 5319 5062]\n",
      "Unique Values in DTOTORD: [ 1  2  5  3  4  8  7  6 10  9 11 12 13 15 16 14 18 17 19 24 20 22 21 27\n",
      " 26 23 28]\n",
      "Unique Values in MPLBIRR: [1 0]\n",
      "Unique Values in CARDIAC: [0 1]\n",
      "Unique Values in LUNG: [0 1]\n",
      "Unique Values in DIABETES: [0 1]\n",
      "Unique Values in HYDRA: [0 1]\n",
      "Unique Values in HEMO: [0 1]\n",
      "Unique Values in CHYPER: [0 1]\n",
      "Unique Values in PHYYPER: [0 1]\n",
      "Unique Values in ECLAMP: [0 1]\n",
      "Unique Values in INCERVIX: [0 1]\n",
      "Unique Values in PRETERM: [0 1]\n",
      "Unique Values in OTHERMR: [1 0]\n",
      "Unique Values in AMNIO: [0 1]\n",
      "Unique Values in MONITOR: [1 0]\n",
      "Unique Values in INDUCT: [0 1]\n",
      "Unique Values in STIMULA: [0 1]\n",
      "Unique Values in TOCOL: [0 1]\n",
      "Unique Values in ULTRAS: [1 0]\n",
      "Unique Values in OTHEROB: [0 1]\n",
      "Unique Values in FEBRILE: [0 1]\n",
      "Unique Values in MECONIUM: [0 1]\n",
      "Unique Values in RUPTURE: [0 1]\n",
      "Unique Values in ABRUPTIO: [0 1]\n",
      "Unique Values in PREPLACE: [0 1]\n",
      "Unique Values in EXCEBLD: [0 1]\n",
      "Unique Values in SEIZURE: [0 1]\n",
      "Unique Values in PRECIP: [0 1]\n",
      "Unique Values in PROLONG: [0 1]\n",
      "Unique Values in CEPHALO: [0 1]\n",
      "Unique Values in CORD: [0 1]\n",
      "Unique Values in OTHERLB: [1 0]\n",
      "Unique Values in NANEMIA: [0 1]\n",
      "Unique Values in HYALINE: [0 1]\n",
      "Unique Values in MECONSYN: [0 1]\n",
      "Unique Values in NSEIZ: [0 1]\n",
      "Unique Values in FLGND: [0 1]\n",
      "Unique Values in MRACE3_recode: [0 1]\n",
      "Unique Values in PLDEL_recode: [0 1]\n",
      "Unique Values in DPLURAL_recode: [1 0]\n",
      "Unique Values in DELMETH5_recode: [1 0]\n",
      "Unique Values in BIRMON_recode: [1 0]\n",
      "Unique Values in WEEKDAYB_recode: [1 0]\n",
      "Unique Values in BIRATTND_recode: [0 1]\n"
     ]
    }
   ],
   "source": [
    "for column in df_den_expo.columns:\n",
    "    unique_values = df_den_expo[column].unique()\n",
    "    print(f\"Unique Values in {column}: {unique_values}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9bdf5b25-89ea-40d2-a6e3-025bea22c28b",
   "metadata": {},
   "source": [
    "## Export Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "a2c578ac-95d9-450e-a07d-123eea52d95b",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_den_expo.to_csv('LBIDD_den_final.csv',index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fb85e159-b39a-4cfc-9866-f85263f66a39",
   "metadata": {},
   "source": [
    "To export a random subset of the data:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "2bfeb11e-2e4f-4d2e-ad3f-93b4ca41e556",
   "metadata": {},
   "outputs": [],
   "source": [
    "random_seed = 42\n",
    "n=200000\n",
    "subset_df = df_den_expo.sample(n=n, random_state=random_seed)\n",
    "subset_df.to_csv(f'LBIDD_den_final_{n}.csv',index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "LBIDD_env",
   "language": "python",
   "name": "lbidd_env"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
