{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "cb951daf-6ad5-49d0-911f-c9e915750504",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn import metrics\n",
    "from sklearn import ensemble\n",
    "\n",
    "import statsmodels.api as sm\n",
    "\n",
    "from scipy.stats import norm\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import math\n",
    "from bisect import bisect"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "28a3e223-d17b-4f84-9d77-55d7593ddb5f",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "# for lawschool data\n",
    "xfeatures = ['race', 'cluster', 'lsat', 'zfygpa', 'zgpa', 'fulltime',\n",
    "       'fam_inc', 'age', 'gender']\n",
    "yfeatures = 'ugpa'\n",
    "protected_features = 'gender'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f33e8b57-b99b-4c23-a29d-d49d4482d222",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "# for community data\n",
    "xfeatures = df.columns[0:-2].to_list() + ['race']\n",
    "yfeatures = 'ViolentCrimesPerPop'\n",
    "protected_features = 'race'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c0081b07-7d77-4622-affd-fac2910bc226",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "# for gov data\n",
    "import pyreadr\n",
    "\n",
    "result = pyreadr.read_r('./gov_census.rda') # also works for Rds, rda\n",
    "\n",
    "# result is a dictionary where keys are the name of objects and the values python\n",
    "# objects\n",
    "print(result.keys()) # let's check what objects we got\n",
    "gov_census = result['gov_census'] # extract the pandas data frame\n",
    "\n",
    "gov_census.columns\n",
    "df=gov_census\n",
    "df=df.drop('occupation',axis=1)\n",
    "df.loc[(df['race']=='AIAN') | (df['race']=='NHOPI'),'race'] = 'other' \n",
    "\n",
    "\n",
    "xfeatures=[ 'sex', 'age', 'race', 'hispanic_origin', 'citizenship', 'nativity',\n",
    "       'marital', 'family_size', 'children', 'education_level',\n",
    "       'english_level', 'hours_worked', 'weeks_worked', 'industry',\n",
    "       'economic_region']\n",
    "\n",
    "yfeatures='salary'\n",
    "protected_features='race'\n",
    "\n",
    "for col in df.columns:\n",
    "    if df[col].dtype.name == 'category':\n",
    "        df.loc[:,col] = df[col].astype(\"category\").cat.codes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "9032344c-f650-4806-b54a-a4b35e7787c8",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "# for MEPS data\n",
    "df = pd.read_csv('./MEPS/meps_21_reg.csv')\n",
    "\n",
    "import re\n",
    "cc = []\n",
    "gg = []\n",
    "tmp = ''\n",
    "for col in df.columns:\n",
    "    aa = re.findall('(\\w*)=(\\d)', col)\n",
    "    if len(aa) != 0:\n",
    "        #print(aa)\n",
    "        if aa[0][0] == tmp:\n",
    "            gg.append(col)\n",
    "        else:\n",
    "            if len(gg) < 1:\n",
    "                tmp = aa[0][0]\n",
    "                gg.append(col)\n",
    "                continue\n",
    "        \n",
    "            bb = re.findall('(\\w*)=(\\d)', gg[0])\n",
    "            \n",
    "            dd = df[gg]*list(range(1,len(gg)+1))\n",
    "            df[bb[0][0]] = dd.sum(axis=1)\n",
    "            gg = []\n",
    "        tmp = aa[0][0]\n",
    "\n",
    "\n",
    "\n",
    "xfeatures = ['AGE', 'PCS42', 'MCS42', 'K6SUM42', 'PERWT16F', 'REGION', 'SEX', 'MARRY', 'FTSTU',\n",
    "       'ACTDTY', 'HONRDC', 'RTHLTH', 'MNHLTH', 'HIBPDX', 'CHDDX', 'ANGIDX',\n",
    "       'MIDX', 'OHRTDX', 'STRKDX', 'EMPHDX', 'CHBRON', 'CHOLDX', 'CANCERDX',\n",
    "       'DIABDX', 'JTPAIN', 'ARTHDX', 'ARTHTYPE', 'ASTHDX', 'ADHDADDX',\n",
    "       'PREGNT', 'WLKLIM', 'ACTLIM', 'SOCLIM', 'COGLIM', 'DFHEAR42', 'DFSEE42',\n",
    "       'ADSMOK42', 'PHQ242', 'EMPST', 'POVCAT', 'RACE']\n",
    "yfeatures = \"UTILIZATION_reg\"\n",
    "protected_features = 'RACE'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "3ac4f536-6bdb-4751-ad74-a3601670c99a",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "## scale the response to [0,1]\n",
    "\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "scaler = MinMaxScaler()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "166c7c59-7d67-4276-8125-97b8d73b65cf",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>AGE</th>\n",
       "      <th>RACE</th>\n",
       "      <th>PCS42</th>\n",
       "      <th>MCS42</th>\n",
       "      <th>K6SUM42</th>\n",
       "      <th>UTILIZATION_reg</th>\n",
       "      <th>PERWT16F</th>\n",
       "      <th>REGION=1</th>\n",
       "      <th>REGION=2</th>\n",
       "      <th>...</th>\n",
       "      <th>WLKLIM</th>\n",
       "      <th>ACTLIM</th>\n",
       "      <th>SOCLIM</th>\n",
       "      <th>COGLIM</th>\n",
       "      <th>DFHEAR42</th>\n",
       "      <th>DFSEE42</th>\n",
       "      <th>ADSMOK42</th>\n",
       "      <th>PHQ242</th>\n",
       "      <th>EMPST</th>\n",
       "      <th>POVCAT</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>44</td>\n",
       "      <td>1.0</td>\n",
       "      <td>57.76</td>\n",
       "      <td>57.06</td>\n",
       "      <td>1</td>\n",
       "      <td>14</td>\n",
       "      <td>12999.552725</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>43</td>\n",
       "      <td>1.0</td>\n",
       "      <td>59.11</td>\n",
       "      <td>54.10</td>\n",
       "      <td>3</td>\n",
       "      <td>7</td>\n",
       "      <td>11361.661447</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>16</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-1.00</td>\n",
       "      <td>-1.00</td>\n",
       "      <td>-1</td>\n",
       "      <td>4</td>\n",
       "      <td>13263.220829</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>13</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-1.00</td>\n",
       "      <td>-1.00</td>\n",
       "      <td>-1</td>\n",
       "      <td>5</td>\n",
       "      <td>11616.068609</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>66</td>\n",
       "      <td>0.0</td>\n",
       "      <td>22.45</td>\n",
       "      <td>53.13</td>\n",
       "      <td>0</td>\n",
       "      <td>266</td>\n",
       "      <td>4446.485068</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15651</th>\n",
       "      <td>17046</td>\n",
       "      <td>45</td>\n",
       "      <td>0.0</td>\n",
       "      <td>29.68</td>\n",
       "      <td>59.35</td>\n",
       "      <td>0</td>\n",
       "      <td>17</td>\n",
       "      <td>9699.333659</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15652</th>\n",
       "      <td>17048</td>\n",
       "      <td>16</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.00</td>\n",
       "      <td>-1.00</td>\n",
       "      <td>-1</td>\n",
       "      <td>0</td>\n",
       "      <td>10294.600896</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15653</th>\n",
       "      <td>17049</td>\n",
       "      <td>34</td>\n",
       "      <td>0.0</td>\n",
       "      <td>51.55</td>\n",
       "      <td>55.53</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>4905.602112</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15654</th>\n",
       "      <td>17050</td>\n",
       "      <td>29</td>\n",
       "      <td>0.0</td>\n",
       "      <td>57.49</td>\n",
       "      <td>54.20</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>4730.857685</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15655</th>\n",
       "      <td>17051</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.00</td>\n",
       "      <td>-1.00</td>\n",
       "      <td>-1</td>\n",
       "      <td>1</td>\n",
       "      <td>5046.199247</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>15656 rows × 176 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       Unnamed: 0  AGE  RACE  PCS42  MCS42  K6SUM42  UTILIZATION_reg  \\\n",
       "0               0   44   1.0  57.76  57.06        1               14   \n",
       "1               1   43   1.0  59.11  54.10        3                7   \n",
       "2               2   16   1.0  -1.00  -1.00       -1                4   \n",
       "3               3   13   1.0  -1.00  -1.00       -1                5   \n",
       "4               4   66   0.0  22.45  53.13        0              266   \n",
       "...           ...  ...   ...    ...    ...      ...              ...   \n",
       "15651       17046   45   0.0  29.68  59.35        0               17   \n",
       "15652       17048   16   0.0  -1.00  -1.00       -1                0   \n",
       "15653       17049   34   0.0  51.55  55.53        3                0   \n",
       "15654       17050   29   0.0  57.49  54.20        2                0   \n",
       "15655       17051    2   0.0  -1.00  -1.00       -1                1   \n",
       "\n",
       "           PERWT16F  REGION=1  REGION=2  ...  WLKLIM  ACTLIM  SOCLIM  COGLIM  \\\n",
       "0      12999.552725         1         0  ...       1       1       1       1   \n",
       "1      11361.661447         1         0  ...       1       1       1       1   \n",
       "2      13263.220829         1         0  ...       1       1       1       0   \n",
       "3      11616.068609         1         0  ...       1       1       1       0   \n",
       "4       4446.485068         1         0  ...       0       0       0       0   \n",
       "...             ...       ...       ...  ...     ...     ...     ...     ...   \n",
       "15651   9699.333659         0         0  ...       1       1       1       1   \n",
       "15652  10294.600896         0         0  ...       1       1       1       0   \n",
       "15653   4905.602112         0         0  ...       1       1       1       1   \n",
       "15654   4730.857685         0         0  ...       1       1       1       1   \n",
       "15655   5046.199247         0         0  ...       1       0       1       0   \n",
       "\n",
       "       DFHEAR42  DFSEE42  ADSMOK42  PHQ242  EMPST  POVCAT  \n",
       "0             1        1         1       0      0       4  \n",
       "1             1        1         1       0      0       4  \n",
       "2             1        1         0       0      0       4  \n",
       "3             1        1         0       0      0       4  \n",
       "4             1        0         0       0      3       0  \n",
       "...         ...      ...       ...     ...    ...     ...  \n",
       "15651         1        1         1       0      0       4  \n",
       "15652         1        1         0       0      3       4  \n",
       "15653         1        1         1       0      3       2  \n",
       "15654         1        1         1       0      3       2  \n",
       "15655         1        1         0       0      0       2  \n",
       "\n",
       "[15656 rows x 176 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "785e8924-6fbe-4f2d-82bc-78d10f6c683b",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.00000000e+00, 5.17647059e-01, 1.00000000e+00, ...,\n",
       "        0.00000000e+00, 0.00000000e+00, 1.00000000e+00],\n",
       "       [5.86475867e-05, 5.05882353e-01, 1.00000000e+00, ...,\n",
       "        0.00000000e+00, 0.00000000e+00, 1.00000000e+00],\n",
       "       [1.17295173e-04, 1.88235294e-01, 1.00000000e+00, ...,\n",
       "        0.00000000e+00, 0.00000000e+00, 1.00000000e+00],\n",
       "       ...,\n",
       "       [9.99882705e-01, 4.00000000e-01, 0.00000000e+00, ...,\n",
       "        0.00000000e+00, 1.00000000e+00, 5.00000000e-01],\n",
       "       [9.99941352e-01, 3.41176471e-01, 0.00000000e+00, ...,\n",
       "        0.00000000e+00, 1.00000000e+00, 5.00000000e-01],\n",
       "       [1.00000000e+00, 2.35294118e-02, 0.00000000e+00, ...,\n",
       "        0.00000000e+00, 0.00000000e+00, 5.00000000e-01]])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scaler.fit_transform(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "49da6e34-0408-40ad-b7ac-dca3ac36eaff",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "d7a7aff5-cf27-4433-a685-b5a29ccdcef2",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>AGE</th>\n",
       "      <th>RACE</th>\n",
       "      <th>PCS42</th>\n",
       "      <th>MCS42</th>\n",
       "      <th>K6SUM42</th>\n",
       "      <th>UTILIZATION_reg</th>\n",
       "      <th>PERWT16F</th>\n",
       "      <th>REGION=1</th>\n",
       "      <th>REGION=2</th>\n",
       "      <th>...</th>\n",
       "      <th>WLKLIM</th>\n",
       "      <th>ACTLIM</th>\n",
       "      <th>SOCLIM</th>\n",
       "      <th>COGLIM</th>\n",
       "      <th>DFHEAR42</th>\n",
       "      <th>DFSEE42</th>\n",
       "      <th>ADSMOK42</th>\n",
       "      <th>PHQ242</th>\n",
       "      <th>EMPST</th>\n",
       "      <th>POVCAT</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.517647</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.839643</td>\n",
       "      <td>0.780482</td>\n",
       "      <td>0.303030</td>\n",
       "      <td>0.026316</td>\n",
       "      <td>0.165587</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.000059</td>\n",
       "      <td>0.505882</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.856622</td>\n",
       "      <td>0.745510</td>\n",
       "      <td>0.363636</td>\n",
       "      <td>0.013158</td>\n",
       "      <td>0.144724</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.000117</td>\n",
       "      <td>0.188235</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.100616</td>\n",
       "      <td>0.094518</td>\n",
       "      <td>0.242424</td>\n",
       "      <td>0.007519</td>\n",
       "      <td>0.168946</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.000176</td>\n",
       "      <td>0.152941</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.100616</td>\n",
       "      <td>0.094518</td>\n",
       "      <td>0.242424</td>\n",
       "      <td>0.009398</td>\n",
       "      <td>0.147964</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.000235</td>\n",
       "      <td>0.776471</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.395548</td>\n",
       "      <td>0.734050</td>\n",
       "      <td>0.272727</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>0.056639</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15651</th>\n",
       "      <td>0.999707</td>\n",
       "      <td>0.529412</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.486480</td>\n",
       "      <td>0.807538</td>\n",
       "      <td>0.272727</td>\n",
       "      <td>0.031955</td>\n",
       "      <td>0.123549</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15652</th>\n",
       "      <td>0.999824</td>\n",
       "      <td>0.188235</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.100616</td>\n",
       "      <td>0.094518</td>\n",
       "      <td>0.242424</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.131132</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15653</th>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.400000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.761539</td>\n",
       "      <td>0.762405</td>\n",
       "      <td>0.363636</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.062487</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15654</th>\n",
       "      <td>0.999941</td>\n",
       "      <td>0.341176</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.836247</td>\n",
       "      <td>0.746692</td>\n",
       "      <td>0.333333</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.060261</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15655</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.023529</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.100616</td>\n",
       "      <td>0.094518</td>\n",
       "      <td>0.242424</td>\n",
       "      <td>0.001880</td>\n",
       "      <td>0.064278</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>15656 rows × 176 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       Unnamed: 0       AGE  RACE     PCS42     MCS42   K6SUM42  \\\n",
       "0        0.000000  0.517647   1.0  0.839643  0.780482  0.303030   \n",
       "1        0.000059  0.505882   1.0  0.856622  0.745510  0.363636   \n",
       "2        0.000117  0.188235   1.0  0.100616  0.094518  0.242424   \n",
       "3        0.000176  0.152941   1.0  0.100616  0.094518  0.242424   \n",
       "4        0.000235  0.776471   0.0  0.395548  0.734050  0.272727   \n",
       "...           ...       ...   ...       ...       ...       ...   \n",
       "15651    0.999707  0.529412   0.0  0.486480  0.807538  0.272727   \n",
       "15652    0.999824  0.188235   0.0  0.100616  0.094518  0.242424   \n",
       "15653    0.999883  0.400000   0.0  0.761539  0.762405  0.363636   \n",
       "15654    0.999941  0.341176   0.0  0.836247  0.746692  0.333333   \n",
       "15655    1.000000  0.023529   0.0  0.100616  0.094518  0.242424   \n",
       "\n",
       "       UTILIZATION_reg  PERWT16F  REGION=1  REGION=2  ...  WLKLIM  ACTLIM  \\\n",
       "0             0.026316  0.165587       1.0       0.0  ...     1.0     1.0   \n",
       "1             0.013158  0.144724       1.0       0.0  ...     1.0     1.0   \n",
       "2             0.007519  0.168946       1.0       0.0  ...     1.0     1.0   \n",
       "3             0.009398  0.147964       1.0       0.0  ...     1.0     1.0   \n",
       "4             0.500000  0.056639       1.0       0.0  ...     0.0     0.0   \n",
       "...                ...       ...       ...       ...  ...     ...     ...   \n",
       "15651         0.031955  0.123549       0.0       0.0  ...     1.0     1.0   \n",
       "15652         0.000000  0.131132       0.0       0.0  ...     1.0     1.0   \n",
       "15653         0.000000  0.062487       0.0       0.0  ...     1.0     1.0   \n",
       "15654         0.000000  0.060261       0.0       0.0  ...     1.0     1.0   \n",
       "15655         0.001880  0.064278       0.0       0.0  ...     1.0     0.0   \n",
       "\n",
       "       SOCLIM  COGLIM  DFHEAR42  DFSEE42  ADSMOK42  PHQ242  EMPST  POVCAT  \n",
       "0         1.0     1.0       1.0      1.0       1.0     0.0    0.0     1.0  \n",
       "1         1.0     1.0       1.0      1.0       1.0     0.0    0.0     1.0  \n",
       "2         1.0     0.0       1.0      1.0       0.0     0.0    0.0     1.0  \n",
       "3         1.0     0.0       1.0      1.0       0.0     0.0    0.0     1.0  \n",
       "4         0.0     0.0       1.0      0.0       0.0     0.0    1.0     0.0  \n",
       "...       ...     ...       ...      ...       ...     ...    ...     ...  \n",
       "15651     1.0     1.0       1.0      1.0       1.0     0.0    0.0     1.0  \n",
       "15652     1.0     0.0       1.0      1.0       0.0     0.0    1.0     1.0  \n",
       "15653     1.0     1.0       1.0      1.0       1.0     0.0    1.0     0.5  \n",
       "15654     1.0     1.0       1.0      1.0       1.0     0.0    1.0     0.5  \n",
       "15655     1.0     0.0       1.0      1.0       0.0     0.0    0.0     0.5  \n",
       "\n",
       "[15656 rows x 176 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "59a8e427-5193-4eb1-a472-ad41c9bebd3c",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "lower_quantile = 0.05\n",
    "upper_quantile = 0.95\n",
    "\n",
    "lower_quantile_name = 'qt_pred_' + str(lower_quantile)\n",
    "upper_quantile_name = 'qt_pred_' + str(upper_quantile)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "9dceaf58-098b-495b-97d5-7f19324b3981",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "import statsmodels.api as sm\n",
    "\n",
    "## quantile regression\n",
    "def linear_quantile(X_train, y_train, X_calib, X_test, quantiles = [0.05, 0.95]):\n",
    "    X_calib = X_calib.copy()\n",
    "    X_test = X_test.copy()\n",
    "    \n",
    "    quantreg = sm.QuantReg(y_train, X_train)  # fit linear quantile model\n",
    "    \n",
    "    \n",
    "    for q in quantiles:\n",
    "        X_calib.loc[:,'qt_pred_' +str(q)] = quantreg.fit(q=q).predict(X_calib.loc[:,xfeatures])\n",
    "        X_test.loc[:,'qt_pred_' +str(q)] = quantreg.fit(q=q).predict(X_test.loc[:,xfeatures])\n",
    "    \n",
    "    return X_calib, X_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "id": "dc221de6-6104-4d35-80c5-f90995a34ed4",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "from skgarden import RandomForestQuantileRegressor\n",
    "from datetime import datetime\n",
    "import random\n",
    "# quantile random forest\n",
    "def rf_quantile(X_train, y_train, X_calib, X_test, quantiles = [0.05, 0.95]):\n",
    "    calib = X_calib.copy()\n",
    "    test = X_test.copy()\n",
    "    N_ESTIMATORS = 15\n",
    "    rfqr = RandomForestQuantileRegressor(n_estimators=N_ESTIMATORS,random_state=random.seed(int(datetime.now().microsecond)),n_jobs=-1)\n",
    "    rfqr.fit(X_train, y_train)\n",
    "    \n",
    "    for q in quantiles:\n",
    "        calib.loc[:,'qt_pred_' +str(q)] =rfqr.predict(calib[xfeatures], int(100*q))\n",
    "        test.loc[:,'qt_pred_' +str(q)] = rfqr.predict(test[xfeatures], int(100*q))\n",
    "    return calib, test\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "39415c8f-a9ed-44d3-b6df-8176b5cad9a2",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "## quantile neural network\n",
    "\n",
    "import tensorflow as tf\n",
    "from tensorflow import keras\n",
    "\n",
    "EPOCHS = 200\n",
    "BATCH_SIZE = 32\n",
    "UNITS = 512\n",
    "####################\n",
    "\n",
    "def nn_quantile(X_train, y_train, X_calib, X_test, quantiles = [0.05, 0.95]):\n",
    "    X_calib = X_calib.copy()\n",
    "    X_test = X_test.copy()\n",
    "    def tilted_loss(q, y, f):\n",
    "        e = (y - f)\n",
    "        return keras.backend.mean(keras.backend.maximum(q * e, (q - 1) * e),\n",
    "                                  axis=-1)\n",
    "\n",
    "    optimizer = tf.optimizers.Adam(0.001)#tf.train.AdamOptimizer(0.001)\n",
    "    early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)\n",
    "\n",
    "    def keras_pred(x_train, train_labels, q):\n",
    "        print(q)\n",
    "        # Set input_dim for the number of features.\n",
    "        if len(x_train.shape) == 1:\n",
    "            input_dim = 1\n",
    "        else:\n",
    "            input_dim = x_train.shape[1]\n",
    "        model = keras.Sequential([\n",
    "          keras.layers.Dense(UNITS, activation=tf.nn.relu,\n",
    "                             input_dim=input_dim),\n",
    "          keras.layers.Dense(UNITS, activation=tf.nn.relu),\n",
    "          keras.layers.Dense(1)\n",
    "        ])\n",
    "\n",
    "        model.compile(loss=lambda y, f: tilted_loss(q, y, f), optimizer=optimizer)\n",
    "        model.fit(x_train, train_labels, epochs=EPOCHS, batch_size=BATCH_SIZE,\n",
    "                  verbose=0, validation_split=0.2, callbacks=[early_stop])\n",
    "\n",
    "        # Predict the quantile\n",
    "        \n",
    "        return model\n",
    "    \n",
    "    for q in quantiles:\n",
    "        model = keras_pred(X_train, y_train, q)\n",
    "        X_calib.loc[:,'qt_pred_' +str(q)] = model.predict(X_calib.loc[:,xfeatures])\n",
    "        X_test.loc[:,'qt_pred_' +str(q)] = model.predict(X_test.loc[:,xfeatures])\n",
    "        \n",
    "    return X_calib, X_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "7cc98944-705c-460d-a961-b4b1d81d6790",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "def quantl_syn(calib_new, pred_col = 'qt_pred_0.05', protected_col = 'gender', interpolation='linear', sigma=-1e-4):\n",
    "## generate sorted fair calibration set\n",
    "\n",
    "    calib_sorted = calib_new.sort_values(by=[pred_col])\n",
    "    groups = calib_sorted.groupby(protected_col)\n",
    "    n_calib = calib_new.shape[0]  # sample size of calibration\n",
    "    taus = np.linspace(0,1,n_calib)  # number of quantiles, here we choose n_calib\n",
    "\n",
    "    Ns = groups.size()\n",
    "    ps = groups.size() / n_calib\n",
    "\n",
    "    y_fair = np.empty(n_calib)\n",
    "    for group in groups:\n",
    "        original_subgroup_sorted= group[1][pred_col].values + np.random.uniform(-1*sigma,1*sigma)\n",
    "        fair_subgroup_sorted = np.empty(n_calib)  # equal length\n",
    "        \n",
    "        for k in range(n_calib):\n",
    "            fair_subgroup_sorted[k] = np.quantile(original_subgroup_sorted, taus[k], interpolation=interpolation)\n",
    "        \n",
    "        y_fair += ps[group[0]] * fair_subgroup_sorted\n",
    "        \n",
    "    for group in groups:\n",
    "        index = np.floor(np.linspace(0, n_calib, Ns[group[0]], endpoint=False)).astype('int')\n",
    "        calib_sorted.loc[calib_sorted[protected_col] == group[0], pred_col + '_fair'] = y_fair[index] #+ np.random.uniform(-1e-5,1e-5)\n",
    "        # change by data magnitude\n",
    "    return calib_sorted"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "13eb3a33-bd09-448a-a15c-52dcc4d8ca9e",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "def compute_coverage_len(y_test, y_lower, y_upper):\n",
    "    \"\"\" Compute average coverage and length of prediction intervals\n",
    "\n",
    "    Parameters\n",
    "    ----------\n",
    "\n",
    "    y_test : numpy array, true labels (n)\n",
    "    y_lower : numpy array, estimated lower bound for the labels (n)\n",
    "    y_upper : numpy array, estimated upper bound for the labels (n)\n",
    "\n",
    "    Returns\n",
    "    -------\n",
    "    coverage : float, average coverage\n",
    "    avg_length : float, average length\n",
    "\n",
    "    \"\"\"\n",
    "    in_the_range = np.sum((y_test >= y_lower) & (y_test <= y_upper))\n",
    "    coverage = in_the_range / len(y_test) * 100\n",
    "    avg_length = np.mean(abs(y_upper - y_lower))\n",
    "    return coverage, avg_length\n",
    "\n",
    "def get_coverage(calib_new):\n",
    "    y_calib = np.log(calib_new[yfeatures]+0.1).values\n",
    "    y_calib_lower = calib_new['qt_pred_0.05_fair'].values\n",
    "    y_calib_upper = calib_new['qt_pred_0.95_fair'].values\n",
    "    \n",
    "    calib_coverage, calib_avg_length=compute_coverage_len(y_calib, y_calib_lower, y_calib_upper)\n",
    "    return calib_coverage, calib_avg_length"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "dca376ec-6542-4aba-9928-3eefd9b37066",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "def get_Q_calib(calib_new, fair_flag):\n",
    "\n",
    "## give calibration Q value to conformalize the test points. \\alpha=0.1\n",
    "    if fair_flag == True:\n",
    "        df1 = calib_new[lower_quantile_name+'_fair'].values - np.log(calib_new[yfeatures]+0.1).values\n",
    "        df1 = pd.DataFrame(df1)\n",
    "        df2 = np.log(calib_new[yfeatures]+0.1).values - calib_new[upper_quantile_name+'_fair'].values\n",
    "        df2 = pd.DataFrame(df2)\n",
    "    else:\n",
    "        df1 = calib_new[lower_quantile_name].values - np.log(calib_new[yfeatures]+0.1).values\n",
    "        df1 = pd.DataFrame(df1)\n",
    "        df2 = np.log(calib_new[yfeatures]+0.1).values - calib_new[upper_quantile_name].values\n",
    "        df2 = pd.DataFrame(df2)\n",
    "\n",
    "    E_calib = pd.concat([df1,df2],axis=1).max(axis=1)\n",
    "    Q_09_calib=np.quantile(E_calib,.9*(1+1/E_calib.shape[0]))\n",
    "    return Q_09_calib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "8bc859fc-3529-41a0-9bfe-f9300c73f06d",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "def test_fair_interval(calib_new, test_new, Q_09_calib):\n",
    "\n",
    "# give fair prediction interval for the test sets\n",
    "    calib_group = calib_new.groupby(by=protected_features)\n",
    "    test_pred_fair = pd.DataFrame(columns=[lower_quantile_name+'_fair',upper_quantile_name+'_fair'])\n",
    "    for index, row in test_new.iterrows():\n",
    "        #row=test_new.iloc[index,:]\n",
    "\n",
    "        calib_subgroup = calib_group.get_group(row[protected_features])\n",
    "\n",
    "        flag = 1\n",
    "        for quantile_name in [lower_quantile_name, upper_quantile_name]:\n",
    "            rank1 = calib_subgroup[calib_subgroup[quantile_name]<row[quantile_name]].shape[0] \n",
    "            rank2 = np.random.uniform()*calib_subgroup[calib_subgroup[quantile_name]==row[quantile_name]].shape[0] + 1 \n",
    "            rank = (rank1 + rank2)/(calib_subgroup.shape[0] + 1)\n",
    "\n",
    "            ks = np.quantile(calib_subgroup[quantile_name+'_fair'],rank)\n",
    "\n",
    "            test_pred_fair.loc[index,quantile_name+'_fair']= ks-flag*Q_09_calib\n",
    "            flag = -1\n",
    "            \n",
    "    return test_pred_fair"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "7cc5e851-6b3f-4291-b2b2-366c99bb3d9d",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "from scipy import stats\n",
    "def emp_KS_dist(y_vec, sensitv_vec):\n",
    "\n",
    "# compute KS distance of give prediction: y_vec\n",
    "    uniq_sens = np.unique(sensitv_vec)\n",
    "    sens_num = len(uniq_sens)\n",
    "    ks_dist_vec = []\n",
    "    for s in range(sens_num):\n",
    "        for s_prime in range(s):\n",
    "            indices = [i for i, x in enumerate(sensitv_vec) if x == uniq_sens[s]]\n",
    "            indices_prime = [i for i, x in enumerate(sensitv_vec) if x == uniq_sens[s_prime]]\n",
    "            ks_dist_vec.append(stats.ks_2samp(y_vec[indices], y_vec[indices_prime])[0])\n",
    "    #print(ks_dist_vec)\n",
    "    return max(ks_dist_vec)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "7c2bbbaf-e05d-4f7a-bbf5-54bd172c2871",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "def full_process(df, seed):\n",
    "\n",
    "# CFQP\n",
    "\n",
    "    tmp, test = train_test_split(df,train_size=0.8, shuffle=True, random_state=seed)\n",
    "    train, calib = train_test_split(tmp,train_size=0.5, shuffle=True, random_state=seed)\n",
    "    \n",
    "    \n",
    "    y_train = np.log(train[yfeatures]+0.1)\n",
    "    #y_train = train[yfeatures]\n",
    "    X_train = train.loc[:,xfeatures]\n",
    "    X_test = test.loc[:,xfeatures]\n",
    "    y_test = np.log(test[yfeatures]+0.1)\n",
    "    \n",
    "    calib_linear, test_linear = rf_quantile(X_train,y_train, calib, test)\n",
    "    calib_new = quantl_syn(calib_new=calib_linear,pred_col=lower_quantile_name,protected_col=protected_features)\n",
    "    calib_new = quantl_syn(calib_new=calib_new,pred_col=upper_quantile_name,protected_col=protected_features)\n",
    "    \n",
    "    Q_09_calib_fair = get_Q_calib(calib_new, fair_flag=True)\n",
    "    Q_09_calib_unfair = get_Q_calib(calib_new, fair_flag=False)\n",
    "#     print(Q_09_calib_fair, Q_09_calib_unfair)\n",
    "    \n",
    "    test_pred_fair = test_fair_interval(calib_new, test_linear, Q_09_calib_fair)\n",
    "    test_pred_unfair = pd.DataFrame(columns=[lower_quantile_name+'_fair', upper_quantile_name+'_fair'])\n",
    "    test_pred_unfair[lower_quantile_name+'_fair'] = test_linear[lower_quantile_name] - Q_09_calib_unfair\n",
    "    test_pred_unfair[upper_quantile_name+'_fair'] = test_linear[upper_quantile_name] + Q_09_calib_unfair\n",
    "    \n",
    "    \n",
    "    test_linear_fair = pd.concat([test_linear,test_pred_fair],axis=1)\n",
    "    test_linear_unfair = pd.concat([test_linear,test_pred_unfair],axis=1)\n",
    "    \n",
    "    coverage_test_fair, length_test_fair = get_coverage(test_linear_fair)\n",
    "    coverage_test_unfair, length_test_unfair = get_coverage(test_linear_unfair)\n",
    "    \n",
    "    \n",
    "    KS_fair_lower = emp_KS_dist(test_pred_fair['qt_pred_0.05_fair'].values,test[protected_features].values)\n",
    "    KS_fair_upper = emp_KS_dist(test_pred_fair['qt_pred_0.95_fair'].values,test[protected_features].values)\n",
    "    \n",
    "    \n",
    "    KS_unfair_lower = emp_KS_dist(test_pred_unfair['qt_pred_0.05_fair'].values,test[protected_features].values)\n",
    "    KS_unfair_upper = emp_KS_dist(test_pred_unfair['qt_pred_0.95_fair'].values,test[protected_features].values)\n",
    "    \n",
    "    return ([coverage_test_fair,length_test_fair, KS_fair_lower, KS_fair_upper],[coverage_test_unfair, length_test_unfair, KS_unfair_lower, KS_unfair_upper] )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "id": "400ce3d4-aec7-4aab-8514-d2a845edd954",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "from tqdm.notebook import trange, tqdm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "916aedb8-52ea-4ada-ac7b-6ae0318c1f28",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c85eac2dff4345ea9be4d5d6297099d5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/200 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.0072786739395618305 9.272160550466424e-08\n",
      "0.007021883680351504 0.0038867495429987225\n",
      "0.006986235734986135 9.272160550466424e-08\n",
      "0.007007291205934241 9.272160550466424e-08\n",
      "0.007051305376685857 9.272160550466424e-08\n",
      "0.007124593416632852 9.272160550466424e-08\n",
      "0.007087005648882094 9.272160550466424e-08\n",
      "0.006910891085579566 0.0037244961294121766\n",
      "0.007114395753283542 0.0017627059262887354\n",
      "0.010045913721060895 0.008419307261223415\n",
      "0.0073926663568464335 9.272160550466424e-08\n",
      "0.007160377431541409 0.002660346014380366\n",
      "0.007072012449442777 0.0010249186682283922\n",
      "0.00729431677104575 0.004244716760610062\n",
      "0.007045309893571439 9.272160550466424e-08\n",
      "0.007238313680260067 9.272160550466424e-08\n",
      "0.006942017170989523 0.004488409439787233\n",
      "0.007125717476538007 9.272160550466424e-08\n",
      "0.007200099467458365 9.272160550466424e-08\n",
      "0.007166626628844153 0.00031426042158095837\n",
      "0.007153433079710947 9.272160550466424e-08\n",
      "0.007123716289075155 9.272160550466424e-08\n",
      "0.007372938058011402 0.0046556272233240215\n",
      "0.008500534464257883 0.0046556272233240215\n",
      "0.008641020567422823 0.0046556272233240215\n",
      "0.007094233014886875 9.272160550466424e-08\n",
      "0.008959211126006394 0.00560578515050208\n",
      "0.008842693305812431 0.0042518215669126915\n",
      "0.007104299689189464 0.004488409439787233\n",
      "0.0070623268089238245 0.003650946849680817\n",
      "0.007284437266082744 0.002933604164783409\n",
      "0.007279077482474694 0.004570576882982458\n",
      "0.00707982164950538 9.272160550466424e-08\n",
      "0.0070902560488224076 0.0019792707273808777\n",
      "0.007135721665592065 0.0003914876093959162\n",
      "0.007128557333993868 9.272160550466424e-08\n",
      "0.006990732823498824 0.0018167684358838324\n",
      "0.007261195654042041 9.272160550466424e-08\n",
      "0.0070468018474398875 0.002155200365187658\n",
      "0.007133722810034104 0.005424522366447424\n",
      "0.008732847868148763 0.003762084661206099\n",
      "0.010037706778806641 0.006983457621757733\n",
      "0.006996420300165784 4.926696637438206e-08\n",
      "0.006960066175539126 0.0046556272233240215\n",
      "0.007107650506799512 9.272160550466424e-08\n",
      "0.009609463612421619 0.004570576882982458\n",
      "0.007217913282408617 0.0018622325032642273\n",
      "0.00963617518334443 0.004655628288728549\n",
      "0.007012909195838635 9.272160550466424e-08\n",
      "0.007214551279698167 0.0017861244465986572\n",
      "0.007215849155122189 0.003482673943365907\n",
      "0.0073073156973921805 0.00039580897912790196\n",
      "0.008939336701228484 0.0046556272233240215\n",
      "0.007155682336146718 9.272160550466424e-08\n",
      "0.007669122283622673 0.004570576882982458\n",
      "0.007233247606210913 0.004488409439787233\n",
      "0.007003541294057225 9.272160550466424e-08\n",
      "0.007728272055719494 0.004570576882982458\n",
      "0.006968007637201179 0.0030391697076260333\n",
      "0.007098282306643444 0.004488409439787233\n",
      "0.007178653750643971 9.272160550466424e-08\n",
      "0.007031175193195072 0.002177567868784639\n",
      "0.007250420099147004 9.272160550466424e-08\n",
      "0.007030151945297991 0.001110776232350227\n",
      "0.006996114222092231 9.272160550466424e-08\n",
      "0.007197242736887599 9.272160550466424e-08\n",
      "0.010401726972348972 0.005104145686259555\n",
      "0.007286328374554341 0.0002865621536457878\n",
      "0.008783340574345377 0.0046556272233240215\n",
      "0.007209428948450114 0.0025780211568925487\n",
      "0.007130198796535758 0.0011515767956680435\n",
      "0.006941754026536628 9.272160550466424e-08\n",
      "0.008930516780894848 0.004865945616810529\n",
      "0.008372899881195602 0.004411645758777457\n",
      "0.007199513250910616 9.272160550466424e-08\n",
      "0.008647024133712655 0.004655626890325504\n",
      "0.007247388435123181 0.0036983389579987257\n",
      "0.008752749073726165 0.006268097329263193\n",
      "0.0071845184452068445 9.272160550466424e-08\n",
      "0.007097518201004682 0.0046556272233240215\n",
      "0.006925842000556681 9.272160550466424e-08\n",
      "0.007072628212308807 0.004488444041603852\n",
      "0.007165418868015028 0.0009011137175852947\n",
      "0.007104973625150723 0.0021995271631657072\n",
      "0.007070099195052304 9.272160550466424e-08\n",
      "0.007042480802148088 9.272160550466424e-08\n",
      "0.008339797683392706 0.004017419333099273\n",
      "0.007289145043624767 0.0046556272233240215\n",
      "0.007194930143452627 5.359373256297317e-08\n",
      "0.007245200765023399 0.0022527961141325906\n",
      "0.007185233444964467 0.004303761629836171\n",
      "0.007792036841928446 0.005241811834941524\n",
      "0.007026099477117853 5.359373256297317e-08\n",
      "0.0071755532923512 0.0030301201593697243\n",
      "0.007001619679303683 9.272160550466424e-08\n",
      "0.007118894757439431 9.272160550466424e-08\n",
      "0.006908358550831828 9.272160550466424e-08\n",
      "0.007716410780353273 0.004394000069478613\n",
      "0.007144126947155716 0.0046556272233240215\n",
      "0.007115913688279463 9.272160550466424e-08\n",
      "0.008681087768856857 0.004652562283285796\n",
      "0.007074888795662648 0.0029847707697872178\n",
      "0.0071436036719028095 9.272160550466424e-08\n",
      "0.007212731118550142 9.272160550466424e-08\n",
      "0.007099867450512054 0.0026625582276134704\n",
      "0.0072119780535246925 0.004488452699829892\n",
      "0.0071259970931882854 9.272160550466424e-08\n",
      "0.007114596069275336 0.004640511029335145\n",
      "0.0071198760424477925 9.272160550466424e-08\n",
      "0.0072378009071090155 9.272160550466424e-08\n",
      "0.0070234123227796985 0.004115062396788114\n",
      "0.00708812546875448 1.5770689674607327e-08\n",
      "0.007274628890986623 9.272160550466424e-08\n",
      "0.011640870971167037 0.007618151634621731\n",
      "0.0072825080051655 0.002586018071906053\n",
      "0.007069677045839384 9.272160550466424e-08\n",
      "0.007148677856760877 9.272160550466424e-08\n",
      "0.0070643130895935435 9.272160550466424e-08\n",
      "0.007015800459538202 9.272160550466424e-08\n",
      "0.007250419520577367 9.272160550466424e-08\n",
      "0.007126434829002992 0.0001996421757933668\n",
      "0.0071764545163715 0.003945422883650721\n",
      "0.007672454723031216 0.004655626890325504\n",
      "0.0072962667592277874 0.0003813474899676145\n",
      "0.007139284518795286 0.0033722586516737344\n",
      "0.007162073276839376 7.350333630640193e-08\n",
      "0.007258413158943631 0.006207513918909946\n",
      "0.008717994055903233 0.0036643739234777724\n",
      "0.007226392155506911 9.272160550466424e-08\n",
      "0.007270758578331371 0.004488452699829892\n",
      "0.00771417339268994 0.004332840619166332\n",
      "0.008938790951560804 0.004638604665125492\n",
      "0.007100596930292635 0.0044093071909920845\n",
      "0.0073404753713588455 0.0046556272233240215\n",
      "0.007266651185755979 0.00039963735819228946\n",
      "0.007208970635038003 9.272160550466424e-08\n",
      "0.007110322249380019 0.0013413323229236615\n",
      "0.007084771114409083 0.0046556272233240215\n",
      "0.009609263443829747 0.004885038952546407\n",
      "0.0077621062080100636 0.006207513918909946\n",
      "0.009184026898169129 0.0046556272233240215\n",
      "0.007092123257072203 0.004488409439787233\n",
      "0.007185609962383044 9.272160550466424e-08\n",
      "0.007709347414281152 0.0018554385579971143\n",
      "0.007447210548638061 0.002713096722217166\n",
      "0.007048085545093841 9.272160550466424e-08\n",
      "0.007075652111524722 0.004409319079745166\n",
      "0.007209015201731006 0.0028906969974640487\n",
      "0.0073463089248773095 0.0032099736904286236\n",
      "0.007188017290482573 0.004253643056393046\n",
      "0.009133656729715778 0.0046556272233240215\n",
      "0.007926682484200153 0.004570576882982458\n",
      "0.009229046705230438 0.004655627156676458\n",
      "0.007161490411019322 0.0030360524289327843\n",
      "0.008448134946131614 0.001456383832539215\n",
      "0.007242746933701749 9.272160550466424e-08\n",
      "0.007091738070072129 9.272160550466424e-08\n",
      "0.00921212541381699 0.006494033375790602\n",
      "0.00711884756684622 9.272160550466424e-08\n",
      "0.007143261565457859 0.0018216282965730544\n",
      "0.00971177264694172 0.005586758583558282\n",
      "0.007296521845716608 0.004258962266960875\n",
      "0.00950542154953579 0.009226111384700975\n",
      "0.009648793114356347 0.006184809616695257\n",
      "0.007301033395427758 0.00034138149934743317\n",
      "0.00794505791233796 0.0046556272233240215\n",
      "0.008755425165074104 0.0046556272233240215\n",
      "0.007277907613401968 0.0018608178818750238\n",
      "0.007181872521513366 0.004118704771039372\n",
      "0.007093884876631762 9.272160550466424e-08\n",
      "0.007090774033905234 0.0009151598604261082\n",
      "0.008564553514736062 0.004488452699829892\n",
      "0.007239705612545944 0.003718163491446517\n",
      "0.007222643130522677 9.272160550466424e-08\n",
      "0.007282321100984479 0.004638604665125492\n",
      "0.007183965215195265 0.004488445068748265\n",
      "0.007299415773089123 0.0045548239852913975\n",
      "0.007317560205847062 0.003266326913141001\n",
      "0.007247492788519327 0.003345943045303902\n",
      "0.008692903502941501 0.004253643287731763\n",
      "0.007089639146968807 0.004488452699829892\n",
      "0.008297315601439903 0.003791774264323285\n",
      "0.007227101827055993 0.004441000118833143\n",
      "0.007142612921853253 9.272160550466424e-08\n",
      "0.007334631315531137 0.0046386049314764465\n",
      "0.011382202006519937 0.0066205622882859255\n",
      "0.007350184981460561 0.0046556272233240215\n",
      "0.007191053549587156 9.272160550466424e-08\n",
      "0.0072083723393179255 0.004570576882982458\n",
      "0.008149899209347135 0.004570576882982458\n",
      "0.007137252238456515 0.005914654100650273\n",
      "0.007015727974223562 0.0031037408828728207\n",
      "0.007301040588735042 9.272160550466424e-08\n",
      "0.007123425830809271 9.272160550466424e-08\n",
      "0.007563452920708481 0.0045705601132945885\n",
      "0.007542214987485643 0.0046556272233240215\n",
      "0.007194715778785277 0.0046556272233240215\n",
      "0.007636550863010354 0.0019634606916429185\n",
      "0.007174266540424412 9.272160550466424e-08\n",
      "0.007099908806043498 9.272160550466424e-08\n"
     ]
    }
   ],
   "source": [
    "## replicate full process for 200 times\n",
    "fair_result = []\n",
    "unfair_result = []\n",
    "with open('meps_rf_progress.txt', 'w') as f:\n",
    "    for i in trange(200):\n",
    "        print(i, file=f,flush=True)\n",
    "        a,b = full_process(df,i)\n",
    "        fair_result.append(a)\n",
    "        unfair_result.append(b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "d2539ac3-ca75-4573-85e9-65fdde812d2d",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "fair_df = pd.DataFrame(fair_result)\n",
    "unfair_df = pd.DataFrame(unfair_result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "594021c7-e578-4bd6-80f5-6239924a9eda",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>200.000000</td>\n",
       "      <td>200.000000</td>\n",
       "      <td>200.000000</td>\n",
       "      <td>200.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>90.378831</td>\n",
       "      <td>0.390673</td>\n",
       "      <td>0.027440</td>\n",
       "      <td>0.039661</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>0.604041</td>\n",
       "      <td>0.010615</td>\n",
       "      <td>0.010884</td>\n",
       "      <td>0.012278</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>88.729246</td>\n",
       "      <td>0.360387</td>\n",
       "      <td>0.009088</td>\n",
       "      <td>0.017937</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>90.006386</td>\n",
       "      <td>0.384227</td>\n",
       "      <td>0.019595</td>\n",
       "      <td>0.030247</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>90.421456</td>\n",
       "      <td>0.390550</td>\n",
       "      <td>0.024794</td>\n",
       "      <td>0.037759</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>90.804598</td>\n",
       "      <td>0.398019</td>\n",
       "      <td>0.033397</td>\n",
       "      <td>0.045582</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>91.730524</td>\n",
       "      <td>0.421139</td>\n",
       "      <td>0.065694</td>\n",
       "      <td>0.085602</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                0           1           2           3\n",
       "count  200.000000  200.000000  200.000000  200.000000\n",
       "mean    90.378831    0.390673    0.027440    0.039661\n",
       "std      0.604041    0.010615    0.010884    0.012278\n",
       "min     88.729246    0.360387    0.009088    0.017937\n",
       "25%     90.006386    0.384227    0.019595    0.030247\n",
       "50%     90.421456    0.390550    0.024794    0.037759\n",
       "75%     90.804598    0.398019    0.033397    0.045582\n",
       "max     91.730524    0.421139    0.065694    0.085602"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fair_df.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "bc2d4135-b2c0-4d7f-8090-efbc05513924",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>200.000000</td>\n",
       "      <td>200.000000</td>\n",
       "      <td>200.000000</td>\n",
       "      <td>200.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>90.070083</td>\n",
       "      <td>0.381498</td>\n",
       "      <td>0.189097</td>\n",
       "      <td>0.300498</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>0.647029</td>\n",
       "      <td>0.009917</td>\n",
       "      <td>0.020065</td>\n",
       "      <td>0.028250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>88.250319</td>\n",
       "      <td>0.349214</td>\n",
       "      <td>0.139599</td>\n",
       "      <td>0.232256</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>89.591315</td>\n",
       "      <td>0.375069</td>\n",
       "      <td>0.176403</td>\n",
       "      <td>0.282440</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>90.070243</td>\n",
       "      <td>0.381436</td>\n",
       "      <td>0.188149</td>\n",
       "      <td>0.298272</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>90.517241</td>\n",
       "      <td>0.388147</td>\n",
       "      <td>0.201378</td>\n",
       "      <td>0.316819</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>91.762452</td>\n",
       "      <td>0.406162</td>\n",
       "      <td>0.263936</td>\n",
       "      <td>0.389650</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                0           1           2           3\n",
       "count  200.000000  200.000000  200.000000  200.000000\n",
       "mean    90.070083    0.381498    0.189097    0.300498\n",
       "std      0.647029    0.009917    0.020065    0.028250\n",
       "min     88.250319    0.349214    0.139599    0.232256\n",
       "25%     89.591315    0.375069    0.176403    0.282440\n",
       "50%     90.070243    0.381436    0.188149    0.298272\n",
       "75%     90.517241    0.388147    0.201378    0.316819\n",
       "max     91.762452    0.406162    0.263936    0.389650"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "unfair_df.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4a8593b7-73a5-4710-8e82-4ce2f967f368",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}