{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "qcBlL7yWQj4u"
   },
   "source": [
    "Data PreProcessing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "1I6Ghx8dLUxZ"
   },
   "outputs": [],
   "source": [
    "# Data extraxtion and generating one hot vectores are done using the following reposetry\n",
    "# https://github.com/itdxer/adult-dataset-analysis/blob/master/Classification.ipynb\n",
    "\n",
    "import os\n",
    "import sys\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "%matplotlib inline\n",
    "\n",
    "prop_cycle = plt.rcParams['axes.prop_cycle']\n",
    "colors = prop_cycle.by_key()['color']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "vtBGhAVXLZur"
   },
   "outputs": [],
   "source": [
    "CURRENT_DIR = os.path.abspath(os.path.dirname(__name__))\n",
    "DATA_DIR = os.path.join('data')\n",
    "TRAIN_DATA_FILE = os.path.join( 'adult.data')\n",
    "TEST_DATA_FILE = os.path.join('adult.test')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "zSMBfEVILdZx"
   },
   "outputs": [],
   "source": [
    "from collections import OrderedDict\n",
    "\n",
    "data_types = OrderedDict([\n",
    "    (\"age\", \"int\"),\n",
    "    (\"workclass\", \"category\"),\n",
    "    (\"final_weight\", \"int\"),  # originally it was called fnlwgt\n",
    "    (\"education\", \"category\"),\n",
    "    (\"education_num\", \"int\"),\n",
    "    (\"marital_status\", \"category\"),\n",
    "    (\"occupation\", \"category\"),\n",
    "    (\"relationship\", \"category\"),\n",
    "    (\"race\", \"category\"),\n",
    "    (\"sex\", \"category\"),\n",
    "    (\"capital_gain\", \"float\"),  # required because of NaN values\n",
    "    (\"capital_loss\", \"int\"),\n",
    "    (\"hours_per_week\", \"int\"),\n",
    "    (\"native_country\", \"category\"),\n",
    "    (\"income_class\", \"category\"),\n",
    "])\n",
    "target_column = \"income_class\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "0QIOzhEeLiA4",
    "outputId": "58e494a5-590c-4b07-a8c2-ae230d1dfd4e"
   },
   "outputs": [],
   "source": [
    "def read_dataset(path):\n",
    "    return pd.read_csv(\n",
    "        path,\n",
    "        names=data_types,\n",
    "        index_col=None,\n",
    "\n",
    "        comment='|',  # test dataset has comment in it\n",
    "        skipinitialspace=True,  # Skip spaces after delimiter\n",
    "        na_values={\n",
    "            'capital_gain': 99999,\n",
    "            'workclass': '?',\n",
    "            'native_country': '?',\n",
    "            'occupation': '?',\n",
    "        },\n",
    "        dtype=data_types,\n",
    "    )\n",
    "\n",
    "def clean_dataset(data):\n",
    "    # Test dataset has dot at the end, we remove it in order\n",
    "    # to unify names between training and test datasets.\n",
    "    data['income_class'] = data.income_class.str.rstrip('.').astype('category')\n",
    "    \n",
    "    # Remove final weight column since there is no use\n",
    "    # for it during the classification.\n",
    "    data = data.drop('final_weight', axis=1)\n",
    "    \n",
    "    # Duplicates might create biases during the analysis and\n",
    "    # during prediction stage they might give over-optimistic\n",
    "    # (or pessimistic) results.\n",
    "    data = data.drop_duplicates()\n",
    "    \n",
    "    # Binarize target variable (>50K == 1 and <=50K == 0)\n",
    "    data[target_column] = (data[target_column] == '>50K').astype(int)\n",
    "\n",
    "    return data\n",
    "\n",
    "def deduplicate(train_data, test_data):\n",
    "    train_data['is_test'] = 0\n",
    "    test_data['is_test'] = 1\n",
    "\n",
    "    data = pd.concat([train_data, test_data])\n",
    "    # For some reason concatenation converts this column to object\n",
    "    data['native_country'] = data.native_country.astype('category')\n",
    "    data = data.drop_duplicates()\n",
    "    \n",
    "    train_data = data[data.is_test == 0].drop('is_test', axis=1)\n",
    "    test_data = data[data.is_test == 1].drop('is_test', axis=1)\n",
    "    \n",
    "    return train_data, test_data\n",
    "\n",
    "train_data = clean_dataset(read_dataset(TRAIN_DATA_FILE))\n",
    "test_data = clean_dataset(read_dataset(TEST_DATA_FILE))\n",
    "\n",
    "# Note that we did de-duplication per dataset, but there are duplicates\n",
    "# between training and test data. With duplicates between datasets\n",
    "# we will might get overconfident results.\n",
    "train_data, test_data = deduplicate(train_data, test_data)\n",
    "print(\"Percent of the positive classes in the training data: {:.2%}\".format(np.mean(train_data.income_class)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Fdt82QIWLlDK"
   },
   "outputs": [],
   "source": [
    "def get_categorical_columns(data, cat_columns=None, fillna=True):\n",
    "    if cat_columns is None:\n",
    "        cat_data = data.select_dtypes('category')\n",
    "    else:\n",
    "        cat_data = data[cat_columns]\n",
    "\n",
    "    if fillna:\n",
    "        for colname, series in cat_data.iteritems():\n",
    "            if 'Other' not in series.cat.categories:\n",
    "                series = series.cat.add_categories(['Other'])\n",
    "\n",
    "            cat_data[colname] = series.fillna('Other')\n",
    "            \n",
    "    return cat_data\n",
    "\n",
    "def features_with_one_hot_encoded_categories(data, cat_columns=None, fillna=True):\n",
    "    cat_data = get_categorical_columns(data, cat_columns, fillna)\n",
    "    one_hot_data = pd.get_dummies(cat_data)\n",
    "    df = pd.concat([data, one_hot_data], axis=1)\n",
    "\n",
    "    features = [\n",
    "        'age',\n",
    "        'education_num',\n",
    "        'hours_per_week',\n",
    "        'capital_gain',\n",
    "        'capital_loss',\n",
    "    ] + one_hot_data.columns.tolist()\n",
    "    features.remove('sex_Other')\n",
    "    X = df[features].fillna(0).values.astype(float)\n",
    "    y = df[target_column].values\n",
    "    \n",
    "    return X, y"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "9R21UmFkRD_q"
   },
   "source": [
    "Removing the datapoints that do not blong to the Black or White People"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "97CoHlIsM5ce",
    "outputId": "09d71c80-961c-4ac9-9d8b-b3a1b74a54a0"
   },
   "outputs": [],
   "source": [
    "#Generating Training and Test data. column 65 coresponds to white race and column 63 coresponds to black race.  \n",
    "X_train, y_train = features_with_one_hot_encoded_categories(train_data)\n",
    "X_test, y_test = features_with_one_hot_encoded_categories(test_data)\n",
    "y_train = y_train[((X_train[:,65]==1) | (X_train[:,63]==1)) ]\n",
    "X_train = X_train[((X_train[:,65]==1) | (X_train[:,63]==1)),: ]\n",
    "y_test = y_test[((X_test[:,65]==1) | (X_test[:,63]==1)) ]\n",
    "X_test = X_test[((X_test[:,65]==1) | (X_test[:,63]==1)) , : ]\n",
    "X_train = np.vstack((X_train,X_test))\n",
    "y_train = np.concatenate([y_train,y_test])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "pDSOGETERDAA"
   },
   "source": [
    "Training a logistic regression classifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "a2adK2w6qQmn",
    "outputId": "dbafbf4f-c6ba-4249-b83d-f2ec55fa8dcf"
   },
   "outputs": [],
   "source": [
    "\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "logisticRegr = LogisticRegression()\n",
    "logisticRegr.fit(X_train, y_train)\n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "XEYbwJGdSAoP"
   },
   "source": [
    "Claculating $\\Pr\\{A\\}$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "7EfM1vr2XH1t"
   },
   "outputs": [],
   "source": [
    "p_black = sum(X_train[:,63]==1)/(sum(X_train[:,63]==1)+sum(X_train[:,65]==1))\n",
    "p_white = 1-p_black"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "JdY871O7SQow"
   },
   "source": [
    "Next two blocks calculates $\\Pr\\{r(X,\\tilde{a}) = \\hat{y}, Y=1, A = a)\\}$ for different values $\\tilde{a}, \\hat{y}$ and $a$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "2r--XlYXn_dm"
   },
   "outputs": [],
   "source": [
    "#now we generate two other datasets. The column 63 of X_train_all_black is all one. It assumes that all the datapoint comes from the black community\n",
    "\n",
    "X_train_all_black = np.array(X_train)\n",
    "X_train_all_black[:,63] = 1\n",
    "X_train_all_black[:,65] = 0\n",
    "X_train_all_white =np.array(X_train)\n",
    "X_train_all_white[:,63] = 0\n",
    "X_train_all_white[:,65] = 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "P6HhEJH4p5xn"
   },
   "outputs": [],
   "source": [
    "# calculating \\Pr\\{r(X,a)=y,Y = 1, A=a\\} using training data, A=0 means white\n",
    "predict = logisticRegr.predict(X_train_all_white)\n",
    "Pr000 = sum((np.logical_and((predict == 0), np.logical_and((X_train[:,65]==1.0),y_train==1) )))/len(y_train)\n",
    "Pr010 = sum((np.logical_and((predict == 1), np.logical_and((X_train[:,65]==1.0),y_train==1) )))/len(y_train)\n",
    "Pr001 = sum((np.logical_and((predict == 0), np.logical_and((X_train[:,63]==1.0),y_train==1) )))/len(y_train)\n",
    "Pr011 = sum((np.logical_and((predict == 1), np.logical_and((X_train[:,63]==1.0),y_train==1) )))/len(y_train)\n",
    "\n",
    "predict = logisticRegr.predict(X_train_all_black)\n",
    "Pr100 = sum((np.logical_and((predict == 0), np.logical_and((X_train[:,65]==1.0),y_train==1) )))/len(y_train)\n",
    "Pr110 = sum((np.logical_and((predict == 1), np.logical_and((X_train[:,65]==1.0),y_train==1) )))/len(y_train)\n",
    "Pr101 = sum((np.logical_and((predict == 0), np.logical_and((X_train[:,63]==1.0),y_train==1) )))/len(y_train)\n",
    "Pr111 = sum((np.logical_and((predict == 1), np.logical_and((X_train[:,63]==1.0),y_train==1) )))/len(y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Ux9Rzje9S1Iq"
   },
   "source": [
    "Next  blocks calculates $\\Pr\\{r(X,\\tilde{a}) = \\hat{y}, A = a)\\}$ for different values $\\tilde{a}, \\hat{y}$ and $a$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "NHkBj3CJqDbq"
   },
   "outputs": [],
   "source": [
    "#calculating \\Pr\\{r(X,a)=y,A=a\\} using training data \n",
    "predict = logisticRegr.predict(X_train_all_white)\n",
    "Pr000NoY = sum((np.logical_and((predict == 0), (X_train[:,65]==1.0) )))/len(y_train)\n",
    "Pr010NoY = sum((np.logical_and((predict == 1), (X_train[:,65]==1.0) )))/len(y_train)\n",
    "Pr001NoY = sum((np.logical_and((predict == 0), (X_train[:,63]==1.0) )))/len(y_train)\n",
    "Pr011NoY = sum((np.logical_and((predict == 1), (X_train[:,63]==1.0) )))/len(y_train)\n",
    "\n",
    "predict = logisticRegr.predict(X_train_all_black)\n",
    "Pr100NoY = sum((np.logical_and((predict == 0), (X_train[:,65]==1.0) )))/len(y_train)\n",
    "Pr110NoY = sum((np.logical_and((predict == 1), (X_train[:,65]==1.0) )))/len(y_train)\n",
    "Pr101NoY = sum((np.logical_and((predict == 0), (X_train[:,63]==1.0) )))/len(y_train)\n",
    "Pr111NoY = sum((np.logical_and((predict == 1), (X_train[:,63]==1.0) )))/len(y_train)\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "JDAud7bxTAWX"
   },
   "source": [
    "We solve optimization problem (9) under ESR fairness notion"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Bxxxu9skxyLZ"
   },
   "outputs": [],
   "source": [
    "from scipy.optimize import linprog\n",
    "o = []\n",
    "beta = []\n",
    "epsilon=np.linspace(0.01,7,100)\n",
    "f0 = []\n",
    "f1 = []\n",
    "c = []\n",
    "for e in epsilon:\n",
    "  obj_coe = [-np.exp(e)*Pr000-Pr001,-np.exp(e)*Pr010-Pr011,-np.exp(e)*Pr101-Pr100,-np.exp(e)*Pr111-Pr110]\n",
    "  condition1_coe = [np.exp(e)*Pr000 - Pr001  , np.exp(e)*Pr010 - Pr011,-np.exp(e)*Pr101 +  Pr100 , -np.exp(e)*Pr111 + Pr110  ]\n",
    "  condition2_coe = [ np.exp(e)*Pr000NoY +  Pr001NoY   ,  np.exp(e)*Pr010NoY  +   Pr011NoY ,np.exp(e)*Pr101NoY  +  Pr100NoY  , np.exp(e)*Pr111NoY  + Pr110NoY]\n",
    "  condition = [condition1_coe,condition2_coe]\n",
    "  constant = [0,min([ np.exp(e)*Pr000NoY +  Pr001NoY   ,  np.exp(e)*Pr010NoY  +  Pr011NoY , np.exp(e)*Pr101NoY  +   Pr100NoY  ,  np.exp(e)*Pr111NoY  +   Pr110NoY])]\n",
    "   \n",
    "  bnd = [(0, 1), (0, 1), (0, 1) , (0, 1)]\n",
    "  opt = linprog(c=obj_coe,A_eq=condition, b_eq=constant, bounds=bnd, method=\"revised simplex\")\n",
    "  o.append(-opt['x'].dot(obj_coe)/constant[1])\n",
    "  f0.append(np.abs(np.array([np.exp(e)*Pr000  , np.exp(e)*Pr010,Pr100 , Pr110 ]).dot(opt['x'])/(   constant[1]      )) )\n",
    "  f1.append(np.abs(np.array([Pr001  , Pr011, np.exp(e)*Pr101, np.exp(e)*Pr111  ]).dot(opt['x'])/(   constant[1]      )) )\n",
    "  beta.append(opt['x'])\n",
    "  c.append(condition1_coe)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "b7q65rvGTSKM"
   },
   "source": [
    "We solve optimization problem (9) wihtout any fairness notion "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "wMwOo3mmh4J9"
   },
   "outputs": [],
   "source": [
    "#solving optimization problem (14)\n",
    "from scipy.optimize import linprog\n",
    "o_nf = []\n",
    "beta = []\n",
    "epsilon=np.linspace(0.01,7,100)\n",
    "f0_nf = []\n",
    "f1_nf = []\n",
    "c = []\n",
    "for e in epsilon:\n",
    "  obj_coe = [-np.exp(e)*Pr000-Pr001,-np.exp(e)*Pr010-Pr011,-np.exp(e)*Pr101-Pr100,-np.exp(e)*Pr111-Pr110]\n",
    "  condition2_coe = [ np.exp(e)*Pr000NoY +  Pr001NoY   ,  np.exp(e)*Pr010NoY  +   Pr011NoY ,np.exp(e)*Pr101NoY  +  Pr100NoY  , np.exp(e)*Pr111NoY  + Pr110NoY]\n",
    "  condition = [condition2_coe]\n",
    "  constant = [min([ np.exp(e)*Pr000NoY +  Pr001NoY   ,  np.exp(e)*Pr010NoY  +  Pr011NoY , np.exp(e)*Pr101NoY  +   Pr100NoY  ,  np.exp(e)*Pr111NoY  +   Pr110NoY])]\n",
    "   \n",
    "  bnd = [(0, 1), (0, 1), (0, 1) , (0, 1)]\n",
    "  opt = linprog(c=obj_coe,A_eq=condition, b_eq=constant, bounds=bnd, method=\"revised simplex\")\n",
    "  o_nf.append(-opt['x'].dot(obj_coe)/constant[0])\n",
    "  f0_nf.append(np.abs(np.array([np.exp(e)*Pr000  , np.exp(e)*Pr010,Pr100 , Pr110 ]).dot(opt['x'])/(   constant[0]      )) )\n",
    "  f1_nf.append(np.abs(np.array([Pr001  , Pr011, np.exp(e)*Pr101, np.exp(e)*Pr111  ]).dot(opt['x'])/(   constant[0]      )) )\n",
    "  beta.append(opt['x'])\n",
    "  c.append(condition1_coe)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "10Bx7yxvTWEa"
   },
   "source": [
    "We will solve optimization problem (9) under EO fairness notion.\n",
    "In order to do that, we need to find $\\Pr\\{Y=1,A=a,R=y\\}$ for different values of $a$ and $y$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "TBW6jxdPHRzI"
   },
   "outputs": [],
   "source": [
    "#effect of equal opportunity\n",
    "predict = logisticRegr.predict(X_train)\n",
    "temp = np.logical_and((X_train[:,65]==1.0),y_train==1)\n",
    "temp = np.logical_and(temp,predict==1)\n",
    "n = X_train.shape[0]\n",
    "PrY1A0R1 = np.sum(temp)/n\n",
    "###\n",
    "predict = logisticRegr.predict(X_train)\n",
    "temp = np.logical_and((X_train[:,63]==1.0),y_train==1)\n",
    "temp = np.logical_and(temp,predict==1)\n",
    "n = X_train.shape[0]\n",
    "PrY1A1R1 = np.sum(temp)/n\n",
    "###\n",
    "predict = logisticRegr.predict(X_train)\n",
    "temp = np.logical_and((X_train[:,65]==1.0),y_train==1)\n",
    "temp = np.logical_and(temp,predict==0)\n",
    "n = X_train.shape[0]\n",
    "PrY1A0R0 = np.sum(temp)/n\n",
    "###\n",
    "predict = logisticRegr.predict(X_train)\n",
    "temp = np.logical_and((X_train[:,63]==1.0),y_train==1)\n",
    "temp = np.logical_and(temp,predict==0)\n",
    "n = X_train.shape[0]\n",
    "PrY1A1R0 = np.sum(temp)/n\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "27UpoN3PTyJP"
   },
   "source": [
    "We will solve optimization problem (9) under EO fairness notion.\n",
    "In order to do that, we need to find $ \\Pr (X,\\tilde{a}) = y| Y=1,A=a)$ for different values of $\\tilde{a},a$ and $y$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "0hNsGOR7a8hT"
   },
   "outputs": [],
   "source": [
    "#effect of equal opportunity \\Pr r(X,tilde{a}) = y| Y=1,A=a)\n",
    "temp = X_train_all_white[np.logical_and((X_train[:,65]==1.0),y_train==1)]\n",
    "n = temp.shape[0]\n",
    "PrAtild0R1_A0Y1 = np.sum(logisticRegr.predict(temp)==1)/n\n",
    "PrAtild0R0_A0Y1 = np.sum(logisticRegr.predict(temp)==0)/n\n",
    "\n",
    "temp = X_train_all_black[np.logical_and((X_train[:,65]==1.0),y_train==1)]\n",
    "n = temp.shape[0]\n",
    "PrAtild1R1_A0Y1 = np.sum(logisticRegr.predict(temp)==1)/n\n",
    "PrAtild1R0_A0Y1 = np.sum(logisticRegr.predict(temp)==0)/n\n",
    "\n",
    "\n",
    "temp = X_train_all_white[np.logical_and((X_train[:,63]==1.0),y_train==1)]\n",
    "n = temp.shape[0]\n",
    "PrAtild0R1_A1Y1 = np.sum(logisticRegr.predict(temp)==1)/n\n",
    "PrAtild0R0_A1Y1 = np.sum(logisticRegr.predict(temp)==0)/n\n",
    "\n",
    "temp = X_train_all_black[np.logical_and((X_train[:,63]==1.0),y_train==1)]\n",
    "n = temp.shape[0]\n",
    "PrAtild1R1_A1Y1 = np.sum(logisticRegr.predict(temp)==1)/n\n",
    "PrAtild1R0_A1Y1 = np.sum(logisticRegr.predict(temp)==0)/n\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "BMw-u15v6yfQ"
   },
   "source": [
    "We will solve optimization problem (9) under EO fairness notion.\n",
    "In order to do that, we need to find $ \\Pr ( R=y,A=a)$ for different values of $ a$ and $y$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "zpRA8ej3n2c8"
   },
   "outputs": [],
   "source": [
    "#effect of equal opportunity\n",
    "predict = logisticRegr.predict(X_train)\n",
    "temp = np.logical_and((X_train[:,65]==1.0),predict==1)\n",
    "n = X_train.shape[0]\n",
    "PrA0R1 = np.sum(temp)/n\n",
    "###\n",
    "predict = logisticRegr.predict(X_train)\n",
    "temp = np.logical_and((X_train[:,63]==1.0),predict==1)\n",
    "n = X_train.shape[0]\n",
    "PrA1R1 = np.sum(temp)/n\n",
    "###\n",
    "predict = logisticRegr.predict(X_train)\n",
    "temp = np.logical_and((X_train[:,65]==1.0),predict==0)\n",
    "n = X_train.shape[0]\n",
    "PrA0R0 = np.sum(temp)/n\n",
    "###\n",
    "predict = logisticRegr.predict(X_train)\n",
    "temp = np.logical_and((X_train[:,63]==1.0),predict==0)\n",
    "n = X_train.shape[0]\n",
    "PrA1R0 = np.sum(temp)/n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "POp4Uucaaftl"
   },
   "source": [
    "We  solve optimization problem (9) under EO fairness notion.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Jfn3cXXUc1Ar"
   },
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "#solving optimization problem (14)\n",
    "from scipy.optimize import linprog\n",
    "o_eq = []\n",
    "beta = []\n",
    "epsilon=np.linspace(0.01,7,100)\n",
    "f0_eq = []\n",
    "f1_eq = []\n",
    "c = []\n",
    "for e in epsilon:\n",
    "  #obj_coe = [-p_white*np.exp(e)*Pr000Y - p_black * Pr001Y  , -p_white*np.exp(e)*Pr010Y - p_black * Pr011Y,\n",
    "  #           -p_black*np.exp(e)*Pr101Y - p_white * Pr100Y , -p_black*np.exp(e)*Pr111Y - p_white * Pr110Y  ]\n",
    "  obj_coe = [-np.exp(e)*Pr000-Pr001,-np.exp(e)*Pr010-Pr011,-np.exp(e)*Pr101-Pr100,-np.exp(e)*Pr111-Pr110]\n",
    "  condition1_coe = [np.exp(e)*PrAtild0R0_A0Y1 - PrAtild0R0_A1Y1  , np.exp(e)*PrAtild0R1_A0Y1 - PrAtild0R1_A1Y1  ,\n",
    "                    -np.exp(e)*PrAtild1R0_A1Y1 + PrAtild1R0_A0Y1 , -np.exp(e)*PrAtild1R1_A1Y1 + PrAtild1R1_A0Y1 ]\n",
    "  condition2_coe = [ np.exp(e)*Pr000NoY +  Pr001NoY   ,  np.exp(e)*Pr010NoY  +   Pr011NoY ,np.exp(e)*Pr101NoY  +  Pr100NoY  , np.exp(e)*Pr111NoY  + Pr110NoY]\n",
    "  condition = [condition1_coe,condition2_coe]\n",
    "  constant = [0,min([ np.exp(e)*Pr000NoY +  Pr001NoY   ,  np.exp(e)*Pr010NoY  +  Pr011NoY , np.exp(e)*Pr101NoY  +   Pr100NoY  ,  np.exp(e)*Pr111NoY  +   Pr110NoY])]\n",
    "   \n",
    "  bnd = [(0, 1), (0, 1), (0, 1) , (0, 1)]\n",
    "  opt = linprog(c=obj_coe,A_eq=condition, b_eq=constant, bounds=bnd, method=\"revised simplex\")\n",
    "  o_eq.append(-opt['x'].dot(obj_coe)/constant[1])\n",
    "  f0_eq.append(np.abs(np.array([np.exp(e)*Pr000  , np.exp(e)*Pr010,Pr100 , Pr110 ]).dot(opt['x'])/(   constant[1]      )) )\n",
    "  f1_eq.append(np.abs(np.array([Pr001  , Pr011, np.exp(e)*Pr101, np.exp(e)*Pr111  ]).dot(opt['x'])/(   constant[1]      )) )\n",
    "  beta.append(opt['x'])\n",
    "  c.append(condition1_coe)\n",
    " "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "mBTT2GxtayZI"
   },
   "source": [
    "We plot figure 2 3 4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 981
    },
    "id": "KH63SD8_PFb7",
    "outputId": "5898f694-0011-47e5-d91b-8ae47ee1988c"
   },
   "outputs": [],
   "source": [
    "\n",
    "#plt.plot(epsilon,f2)\n",
    "font = {'family' : 'normal',\n",
    "        'size'   : 15}\n",
    "import matplotlib\n",
    "matplotlib.rc('font', **font)\n",
    "plt.figure(figsize=(5,4))\n",
    "plt.plot(epsilon,o,'b',label=r'ESR',linewidth=4)\n",
    "plt.plot(epsilon,o_eq,'r',label=r'EO',linewidth=4)\n",
    "plt.plot(epsilon,o_nf,'--g',label=r'None',linewidth=4)\n",
    "plt.legend(loc='lower right' )\n",
    "plt.xlabel(r'Privacy Loss $\\epsilon$')\n",
    "plt.ylabel(r'Accuracy')\n",
    "plt.grid()\n",
    "plt.savefig('acc.eps', format='eps',bbox_inches='tight')\n",
    "\n",
    "plt.figure(figsize=(5,4))\n",
    "l1, = plt.plot(epsilon,f0,'r',label=r'White: ESR',linewidth=4)\n",
    "l2, = plt.plot(epsilon,f1,'b:' ,label=r'Black: ESR',linewidth=4)\n",
    "l3, = plt.plot(epsilon,f0_eq*np.ones(epsilon.shape),'#C4DF76',label=r'White: EO',linewidth=4)\n",
    "l4, = plt.plot(epsilon,f1_eq*np.ones(epsilon.shape),'c' ,label=r'Black: EO',linewidth=4)\n",
    "l5, = plt.plot(epsilon,f0_nf,'g--',label=r'White: None',linewidth=4)\n",
    "l6, = plt.plot(epsilon,f1_nf,'y--', label=r'Black: None',linewidth=4)\n",
    "\n",
    "# Create a legend for the first line.\n",
    "first_legend = plt.legend(handles=[l1,l2,l3], loc='upper right', bbox_to_anchor=(1, 0.9))\n",
    "\n",
    "# Add the legend manually to the current Axes.\n",
    "plt.gca().add_artist(first_legend)\n",
    "\n",
    "# Create another legend for the second line.\n",
    "plt.legend(handles=[l4,l5,l6], loc='lower right',bbox_to_anchor=(1,  0.1))\n",
    "\n",
    "plt.xlabel(r'Privacy Loss $\\epsilon$')\n",
    "plt.ylabel(r'Selection Rate')\n",
    "plt.grid()\n",
    "\n",
    "plt.savefig('fair.eps', format='eps',bbox_inches='tight')\n",
    "\n",
    "#####\n",
    "\n",
    "plt.figure(figsize=(5,4))\n",
    "plt.plot(epsilon,abs(np.array(f0)-np.array(f1)),'r',label=r'ESR',linewidth=4)\n",
    "plt.plot(epsilon,abs(np.array(f0_eq)-np.array(f1_eq)),'#C4DF76',label=r'EO',linewidth=4)\n",
    "plt.plot(epsilon,abs(np.array(f0_nf)-np.array(f1_nf)),'g--',label=r'None',linewidth=4)\n",
    "plt.legend(loc='right')\n",
    "plt.xlabel(r'Privacy Loss $\\epsilon$')\n",
    "plt.ylabel(r'disparity ($\\gamma$)')\n",
    "\n",
    "plt.grid()\n",
    "\n",
    "plt.savefig('disparity.eps', format='eps',bbox_inches='tight')"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "name": "figure2-3-4.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
