{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Summary table for **COMPAS** data\n",
    "\n",
    "Here we extract the summary measures for *COMPAS* data for four classifiers: (1) baseline, (2) project, (3) reduction, and (4) SenSR.\n",
    "For each of the classifiers we perform gradient flow attack with step size = 5e-3 where $i$ is the current step number. We perform 200 steps of attacks on each data points. \n",
    "\n",
    "First, we load the summary data for all experiments, and compile it in a panda dataframe. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import re\n",
    "import pandas as pd\n",
    "with open('summary.out', 'r') as f:\n",
    "    data = f.read()\n",
    "entries = re.split('\\n', data)[:-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "entries_dict = []\n",
    "for e in entries:\n",
    "    try:\n",
    "        entries_dict.append(eval(e))\n",
    "    except:\n",
    "        continue\n",
    "data = pd.DataFrame(entries_dict)\n",
    "data['reject'] = data['pval'] < 0.05\n",
    "lr = 5e-3\n",
    "data_lr = data.loc[data['lr'] == lr]\n",
    "data_lr = data_lr.rename(columns= {'average_odds_difference_gen': 'AOD-gen', 'average_odds_difference_race': 'AOD-race', 'statistical_parity_difference_gen': 'SPD-gen', 'statistical_parity_difference_race': 'SPD-race',\\\n",
    "                                   'equal_opportunity_difference_race': 'EOD-race', 'equal_opportunity_difference_gen': 'EOD-gen'})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We only extract the following measures: balanced accuracy, average odds difference for gender and race and lower bound and proportion of times the test being rejected. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "measure = ['bal_acc', 'AOD-gen', 'AOD-race', 'lb', 'reject']\n",
    "agg_dict = dict()\n",
    "for key in measure:\n",
    "    agg_dict[key] = ['mean', 'std']\n",
    "result = data_lr.groupby(['algo'], as_index=False).agg(agg_dict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "rows = ['sensr', 'reduction', 'baseline', 'project']\n",
    "row_names = ['Baseline', 'Project', 'Reduction','SenSR']\n",
    "colnames=['Balanced Acc', '$\\\\text{AOD}_{\\\\text{gen}}$', '$\\\\text{AOD}_{\\\\text{race}}$', '$T_n$', 'Rejection Prop']\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i, c in enumerate(measure):\n",
    "    if c == 'reject':\n",
    "        idx_best = (1-result[c]['mean']).idxmin()\n",
    "        result[colnames[i]] = (result[c]['mean']).apply(\"{:.1f}\".format)\n",
    "        #result.at[idx_best, colnames[i]] = '\\\\textbf{' + result[colnames[i]][idx_best] + '}'\n",
    "        \n",
    "    elif c == 'bal_acc':\n",
    "        idx_best = (result[c]['mean']).idxmax()\n",
    "        result[colnames[i]] = result[c]['mean'].apply(\"{:.3f}\".format)+ '$\\pm$' + result[c]['std'].apply(\"{:.3f}\".format)\n",
    "        best_val = result[colnames[i]][idx_best].split('$\\pm$')\n",
    "        best_val = '$\\pm$'.join(['\\\\textbf{' + best_val[0] + '}', best_val[1]])\n",
    "        #result.at[idx_best, colnames[i]] = best_val\n",
    "    elif c == 'lb':\n",
    "        idx_best = (result[c]['mean']).idxmin()\n",
    "        result[colnames[i]] = result[c]['mean'].apply(\"{:.3f}\".format)+ '$\\pm$' + result[c]['std'].apply(\"{:.3f}\".format)\n",
    "        best_val = result[colnames[i]][idx_best].split('$\\pm$')\n",
    "        best_val = '$\\pm$'.join(['\\\\textbf{' + best_val[0] + '}', best_val[1]])\n",
    "        #result.at[idx_best, colnames[i]] = best_val\n",
    "    else:\n",
    "        idx_best = (result[c]['mean']).abs().idxmin()\n",
    "        result[colnames[i]] = result[c]['mean'].apply(\"{:.3f}\".format)+ '$\\pm$' + result[c]['std'].apply(\"{:.3f}\".format)\n",
    "        best_val = result[colnames[i]][idx_best].split('$\\pm$')\n",
    "        best_val = '$\\pm$'.join(['\\\\textbf{' + best_val[0] + '}', best_val[1]])\n",
    "        #result.at[idx_best, colnames[i]] = best_val\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>Balanced Acc</th>\n",
       "      <th>$\\text{AOD}_{\\text{gen}}$</th>\n",
       "      <th>$\\text{AOD}_{\\text{race}}$</th>\n",
       "      <th>$T_n$</th>\n",
       "      <th>Rejection Prop</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Baseline</th>\n",
       "      <td>0.675$\\pm$0.013</td>\n",
       "      <td>0.218$\\pm$0.041</td>\n",
       "      <td>0.260$\\pm$0.026</td>\n",
       "      <td>2.385$\\pm$0.262</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Project</th>\n",
       "      <td>0.641$\\pm$0.017</td>\n",
       "      <td>0.039$\\pm$0.029</td>\n",
       "      <td>0.227$\\pm$0.021</td>\n",
       "      <td>1.161$\\pm$0.145</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Reduction</th>\n",
       "      <td>0.652$\\pm$0.012</td>\n",
       "      <td>-0.014$\\pm$0.054</td>\n",
       "      <td>0.037$\\pm$0.039</td>\n",
       "      <td>1.763$\\pm$0.069</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SenSR</th>\n",
       "      <td>0.640$\\pm$0.022</td>\n",
       "      <td>0.046$\\pm$0.031</td>\n",
       "      <td>0.237$\\pm$0.018</td>\n",
       "      <td>1.098$\\pm$0.061</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              Balanced Acc $\\text{AOD}_{\\text{gen}}$  \\\n",
       "                                                       \n",
       "Baseline   0.675$\\pm$0.013           0.218$\\pm$0.041   \n",
       "Project    0.641$\\pm$0.017           0.039$\\pm$0.029   \n",
       "Reduction  0.652$\\pm$0.012          -0.014$\\pm$0.054   \n",
       "SenSR      0.640$\\pm$0.022           0.046$\\pm$0.031   \n",
       "\n",
       "          $\\text{AOD}_{\\text{race}}$            $T_n$ Rejection Prop  \n",
       "                                                                      \n",
       "Baseline             0.260$\\pm$0.026  2.385$\\pm$0.262            1.0  \n",
       "Project              0.227$\\pm$0.021  1.161$\\pm$0.145            0.2  \n",
       "Reduction            0.037$\\pm$0.039  1.763$\\pm$0.069            1.0  \n",
       "SenSR                0.237$\\pm$0.018  1.098$\\pm$0.061            0.0  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ind = dict()\n",
    "for i, expt in enumerate(row_names):\n",
    "    ind[i] = expt\n",
    "    res = result[colnames].rename(index=ind)\n",
    "res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\\begin{tabular}{lccccc}\n",
      "\\toprule\n",
      "{} &     Balanced Acc & $\\text{AOD}_{\\text{gen}}$ & $\\text{AOD}_{\\text{race}}$ &            $T_n$ & Rejection Prop \\\\\n",
      "{} \\\\\n",
      "\\midrule\n",
      "Baseline  &  0.675$\\pm$0.013 &           0.218$\\pm$0.041 &            0.260$\\pm$0.026 &  2.385$\\pm$0.262 &            1.0 \\\\\n",
      "Project   &  0.641$\\pm$0.017 &           0.039$\\pm$0.029 &            0.227$\\pm$0.021 &  1.161$\\pm$0.145 &            0.2 \\\\\n",
      "Reduction &  0.652$\\pm$0.012 &          -0.014$\\pm$0.054 &            0.037$\\pm$0.039 &  1.763$\\pm$0.069 &            1.0 \\\\\n",
      "SenSR     &  0.640$\\pm$0.022 &           0.046$\\pm$0.031 &            0.237$\\pm$0.018 &  1.098$\\pm$0.061 &            0.0 \\\\\n",
      "\\bottomrule\n",
      "\\end{tabular}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(res.to_latex(escape=False, column_format='l' + 'c'*len(colnames)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.7.4 64-bit",
   "language": "python",
   "name": "python37464bit48aa32fa6dba4f1bbd692e320b15fd93"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
