{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>name</th>\n",
       "      <th>first</th>\n",
       "      <th>last</th>\n",
       "      <th>compas_screening_date</th>\n",
       "      <th>sex</th>\n",
       "      <th>dob</th>\n",
       "      <th>age</th>\n",
       "      <th>age_cat</th>\n",
       "      <th>race</th>\n",
       "      <th>...</th>\n",
       "      <th>v_decile_score</th>\n",
       "      <th>v_score_text</th>\n",
       "      <th>v_screening_date</th>\n",
       "      <th>in_custody</th>\n",
       "      <th>out_custody</th>\n",
       "      <th>priors_count.1</th>\n",
       "      <th>start</th>\n",
       "      <th>end</th>\n",
       "      <th>event</th>\n",
       "      <th>two_year_recid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>miguel hernandez</td>\n",
       "      <td>miguel</td>\n",
       "      <td>hernandez</td>\n",
       "      <td>2013-08-14</td>\n",
       "      <td>Male</td>\n",
       "      <td>1947-04-18</td>\n",
       "      <td>69</td>\n",
       "      <td>Greater than 45</td>\n",
       "      <td>Other</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>Low</td>\n",
       "      <td>2013-08-14</td>\n",
       "      <td>2014-07-07</td>\n",
       "      <td>2014-07-14</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>327</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3</td>\n",
       "      <td>kevon dixon</td>\n",
       "      <td>kevon</td>\n",
       "      <td>dixon</td>\n",
       "      <td>2013-01-27</td>\n",
       "      <td>Male</td>\n",
       "      <td>1982-01-22</td>\n",
       "      <td>34</td>\n",
       "      <td>25 - 45</td>\n",
       "      <td>African-American</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>Low</td>\n",
       "      <td>2013-01-27</td>\n",
       "      <td>2013-01-26</td>\n",
       "      <td>2013-02-05</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>159</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4</td>\n",
       "      <td>ed philo</td>\n",
       "      <td>ed</td>\n",
       "      <td>philo</td>\n",
       "      <td>2013-04-14</td>\n",
       "      <td>Male</td>\n",
       "      <td>1991-05-14</td>\n",
       "      <td>24</td>\n",
       "      <td>Less than 25</td>\n",
       "      <td>African-American</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>Low</td>\n",
       "      <td>2013-04-14</td>\n",
       "      <td>2013-06-16</td>\n",
       "      <td>2013-06-16</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>63</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5</td>\n",
       "      <td>marcu brown</td>\n",
       "      <td>marcu</td>\n",
       "      <td>brown</td>\n",
       "      <td>2013-01-13</td>\n",
       "      <td>Male</td>\n",
       "      <td>1993-01-21</td>\n",
       "      <td>23</td>\n",
       "      <td>Less than 25</td>\n",
       "      <td>African-American</td>\n",
       "      <td>...</td>\n",
       "      <td>6</td>\n",
       "      <td>Medium</td>\n",
       "      <td>2013-01-13</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1174</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>6</td>\n",
       "      <td>bouthy pierrelouis</td>\n",
       "      <td>bouthy</td>\n",
       "      <td>pierrelouis</td>\n",
       "      <td>2013-03-26</td>\n",
       "      <td>Male</td>\n",
       "      <td>1973-01-22</td>\n",
       "      <td>43</td>\n",
       "      <td>25 - 45</td>\n",
       "      <td>Other</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>Low</td>\n",
       "      <td>2013-03-26</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1102</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7209</th>\n",
       "      <td>10996</td>\n",
       "      <td>steven butler</td>\n",
       "      <td>steven</td>\n",
       "      <td>butler</td>\n",
       "      <td>2013-11-23</td>\n",
       "      <td>Male</td>\n",
       "      <td>1992-07-17</td>\n",
       "      <td>23</td>\n",
       "      <td>Less than 25</td>\n",
       "      <td>African-American</td>\n",
       "      <td>...</td>\n",
       "      <td>5</td>\n",
       "      <td>Medium</td>\n",
       "      <td>2013-11-23</td>\n",
       "      <td>2013-11-22</td>\n",
       "      <td>2013-11-24</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>860</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7210</th>\n",
       "      <td>10997</td>\n",
       "      <td>malcolm simmons</td>\n",
       "      <td>malcolm</td>\n",
       "      <td>simmons</td>\n",
       "      <td>2014-02-01</td>\n",
       "      <td>Male</td>\n",
       "      <td>1993-03-25</td>\n",
       "      <td>23</td>\n",
       "      <td>Less than 25</td>\n",
       "      <td>African-American</td>\n",
       "      <td>...</td>\n",
       "      <td>5</td>\n",
       "      <td>Medium</td>\n",
       "      <td>2014-02-01</td>\n",
       "      <td>2014-01-31</td>\n",
       "      <td>2014-02-02</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>790</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7211</th>\n",
       "      <td>10999</td>\n",
       "      <td>winston gregory</td>\n",
       "      <td>winston</td>\n",
       "      <td>gregory</td>\n",
       "      <td>2014-01-14</td>\n",
       "      <td>Male</td>\n",
       "      <td>1958-10-01</td>\n",
       "      <td>57</td>\n",
       "      <td>Greater than 45</td>\n",
       "      <td>Other</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>Low</td>\n",
       "      <td>2014-01-14</td>\n",
       "      <td>2014-01-13</td>\n",
       "      <td>2014-01-14</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>808</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7212</th>\n",
       "      <td>11000</td>\n",
       "      <td>farrah jean</td>\n",
       "      <td>farrah</td>\n",
       "      <td>jean</td>\n",
       "      <td>2014-03-09</td>\n",
       "      <td>Female</td>\n",
       "      <td>1982-11-17</td>\n",
       "      <td>33</td>\n",
       "      <td>25 - 45</td>\n",
       "      <td>African-American</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>Low</td>\n",
       "      <td>2014-03-09</td>\n",
       "      <td>2014-03-08</td>\n",
       "      <td>2014-03-09</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>754</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7213</th>\n",
       "      <td>11001</td>\n",
       "      <td>florencia sanmartin</td>\n",
       "      <td>florencia</td>\n",
       "      <td>sanmartin</td>\n",
       "      <td>2014-06-30</td>\n",
       "      <td>Female</td>\n",
       "      <td>1992-12-18</td>\n",
       "      <td>23</td>\n",
       "      <td>Less than 25</td>\n",
       "      <td>Hispanic</td>\n",
       "      <td>...</td>\n",
       "      <td>4</td>\n",
       "      <td>Low</td>\n",
       "      <td>2014-06-30</td>\n",
       "      <td>2015-03-15</td>\n",
       "      <td>2015-03-15</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>258</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>7214 rows × 53 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         id                 name      first         last  \\\n",
       "0         1     miguel hernandez     miguel    hernandez   \n",
       "1         3          kevon dixon      kevon        dixon   \n",
       "2         4             ed philo         ed        philo   \n",
       "3         5          marcu brown      marcu        brown   \n",
       "4         6   bouthy pierrelouis     bouthy  pierrelouis   \n",
       "...     ...                  ...        ...          ...   \n",
       "7209  10996        steven butler     steven       butler   \n",
       "7210  10997      malcolm simmons    malcolm      simmons   \n",
       "7211  10999      winston gregory    winston      gregory   \n",
       "7212  11000          farrah jean     farrah         jean   \n",
       "7213  11001  florencia sanmartin  florencia    sanmartin   \n",
       "\n",
       "     compas_screening_date     sex         dob  age          age_cat  \\\n",
       "0               2013-08-14    Male  1947-04-18   69  Greater than 45   \n",
       "1               2013-01-27    Male  1982-01-22   34          25 - 45   \n",
       "2               2013-04-14    Male  1991-05-14   24     Less than 25   \n",
       "3               2013-01-13    Male  1993-01-21   23     Less than 25   \n",
       "4               2013-03-26    Male  1973-01-22   43          25 - 45   \n",
       "...                    ...     ...         ...  ...              ...   \n",
       "7209            2013-11-23    Male  1992-07-17   23     Less than 25   \n",
       "7210            2014-02-01    Male  1993-03-25   23     Less than 25   \n",
       "7211            2014-01-14    Male  1958-10-01   57  Greater than 45   \n",
       "7212            2014-03-09  Female  1982-11-17   33          25 - 45   \n",
       "7213            2014-06-30  Female  1992-12-18   23     Less than 25   \n",
       "\n",
       "                  race  ...  v_decile_score  v_score_text  v_screening_date  \\\n",
       "0                Other  ...               1           Low        2013-08-14   \n",
       "1     African-American  ...               1           Low        2013-01-27   \n",
       "2     African-American  ...               3           Low        2013-04-14   \n",
       "3     African-American  ...               6        Medium        2013-01-13   \n",
       "4                Other  ...               1           Low        2013-03-26   \n",
       "...                ...  ...             ...           ...               ...   \n",
       "7209  African-American  ...               5        Medium        2013-11-23   \n",
       "7210  African-American  ...               5        Medium        2014-02-01   \n",
       "7211             Other  ...               1           Low        2014-01-14   \n",
       "7212  African-American  ...               2           Low        2014-03-09   \n",
       "7213          Hispanic  ...               4           Low        2014-06-30   \n",
       "\n",
       "      in_custody  out_custody  priors_count.1 start   end event two_year_recid  \n",
       "0     2014-07-07   2014-07-14               0     0   327     0              0  \n",
       "1     2013-01-26   2013-02-05               0     9   159     1              1  \n",
       "2     2013-06-16   2013-06-16               4     0    63     0              1  \n",
       "3            NaN          NaN               1     0  1174     0              0  \n",
       "4            NaN          NaN               2     0  1102     0              0  \n",
       "...          ...          ...             ...   ...   ...   ...            ...  \n",
       "7209  2013-11-22   2013-11-24               0     1   860     0              0  \n",
       "7210  2014-01-31   2014-02-02               0     1   790     0              0  \n",
       "7211  2014-01-13   2014-01-14               0     0   808     0              0  \n",
       "7212  2014-03-08   2014-03-09               3     0   754     0              0  \n",
       "7213  2015-03-15   2015-03-15               2     0   258     0              1  \n",
       "\n",
       "[7214 rows x 53 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Import libraries\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "from itertools import combinations, chain\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "\n",
    "# Load in the 2 year COMPAS Recidivism dataset\n",
    "df = pd.read_csv(\"https://raw.githubusercontent.com/propublica/compas-analysis/master/compas-scores-two-years.csv\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "sex\n",
       "Male      5819\n",
       "Female    1395\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['sex'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "race\n",
       "African-American    3696\n",
       "Caucasian           2454\n",
       "Hispanic             637\n",
       "Other                377\n",
       "Asian                 32\n",
       "Native American       18\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['race'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "compas = {\n",
    "    'sex': {\n",
    "        1: 'Male',\n",
    "        0: 'Female',\n",
    "    },\n",
    "    'race': {\n",
    "        0: 'African-American',\n",
    "        2: 'Caucasian',\n",
    "        3: 'Hispanic',\n",
    "        5: 'Other',\n",
    "        1: 'Asian',\n",
    "        4: 'Native American',\n",
    "    }\n",
    "}"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "ai",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
