{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from glob import glob\n",
    "import re\n",
    "import scipy.stats\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Please change this variable to switch to a different dataset\n",
    "dataset = 'utkface'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Game parameters\n",
    "builder_lambda = 0.7\n",
    "discount = 1.5\n",
    "# Results Path\n",
    "repo_path = ''"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Loading PF"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def is_pareto_efficient(costs, return_mask = False):\n",
    "    \"\"\"\n",
    "        Find the pareto-efficient points\n",
    "        :param costs: An (n_points, n_costs) array\n",
    "        :param return_mask: True to return a mask\n",
    "        :return: An array of indices of pareto-efficient points.\n",
    "            If return_mask is True, this will be an (n_points, ) boolean array\n",
    "            Otherwise it will be a (n_efficient_points, ) integer array of indices.\n",
    "    \"\"\"\n",
    "    is_efficient = np.arange(costs.shape[0])\n",
    "    n_points = costs.shape[0]\n",
    "    next_point_index = 0  # Next index in the is_efficient array to search for\n",
    "    while next_point_index<len(costs):\n",
    "        nondominated_point_mask = np.any(costs<costs[next_point_index], axis=1)\n",
    "        nondominated_point_mask[next_point_index] = True\n",
    "        is_efficient = is_efficient[nondominated_point_mask]  # Remove dominated points\n",
    "        costs = costs[nondominated_point_mask]\n",
    "        next_point_index = np.sum(nondominated_point_mask[:next_point_index])+1\n",
    "    if return_mask:\n",
    "        is_efficient_mask = np.zeros(n_points, dtype = bool)\n",
    "        is_efficient_mask[is_efficient] = True\n",
    "        return is_efficient_mask\n",
    "    else:\n",
    "        return is_efficient\n",
    "    \n",
    "def get_pf(losses, priv_values, fair_values):\n",
    "    # select points on the PF\n",
    "    pf_indices = is_pareto_efficient(losses)\n",
    "    pf_losses = losses[pf_indices, :]\n",
    "    pf_priv = priv_values[pf_indices]\n",
    "    pf_fair = fair_values[pf_indices]\n",
    "    \n",
    "    return pf_losses, pf_priv, pf_fair, pf_indices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load the losses\n",
    "loss_dir = f\"{repo_path}previous_results/loss_functions/{dataset}/fairPATE\"\n",
    "\n",
    "# if pareto.npz exists, load it\n",
    "if glob(loss_dir+'/pareto.npz'):\n",
    "    pareto_arrays = np.load(loss_dir+'/pareto.npz', allow_pickle=True)\n",
    "    loss_builder_acc = pareto_arrays['loss_builder_acc']\n",
    "    loss_privacy = pareto_arrays['loss_privacy']\n",
    "    loss_fairness = pareto_arrays['loss_fairness']\n",
    "    priv_fair_values = pareto_arrays['priv_fair_values']\n",
    "    loss_builder_cov = pareto_arrays['loss_builder_cov']\n",
    "    metadata = pareto_arrays['metadata'].item()\n",
    "    print(f\"Loaded dataset {metadata['dataset']}\")\n",
    "else:\n",
    "    loss_builder_acc = np.load(loss_dir+'/builder_loss_acc.npy')\n",
    "    loss_privacy = np.load(loss_dir+'/privacy_loss.npy')\n",
    "    loss_fairness = np.load(loss_dir+'/fairness_loss.npy')\n",
    "    priv_fair_values = np.load(loss_dir+'/priv_fair_values.npy')\n",
    "    loss_builder_cov = np.load(loss_dir+'/builder_loss_cov.npy')\n",
    "    print('Loaded dataset from npy')\n",
    "\n",
    "\n",
    "\n",
    "priv_values = priv_fair_values[:,0]\n",
    "fair_values = priv_fair_values[:,1]\n",
    "losses = np.squeeze(np.stack((-1 * loss_builder_acc, loss_privacy, loss_fairness, -1 * loss_builder_cov), axis=-1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# get points on pf\n",
    "pf_losses, pf_priv, pf_fair, pf_indices = get_pf(losses, priv_values, fair_values)\n",
    "loss_privacy = pf_losses[:, 1]\n",
    "loss_fairness = pf_losses[:, 2]\n",
    "loss_builder_weighted = builder_lambda *0.01 * pf_losses[:, 0] + (1-builder_lambda) * pf_losses[:, 3]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Price of Anarchy"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Calculation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "# find max\n",
    "error_max = max(1+loss_builder_weighted)\n",
    "fair_loss_max = max(loss_fairness)\n",
    "priv_loss_max = max(loss_privacy)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.30130807453416153"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "error_max"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_convergence(df):\n",
    "    for i in range(1, 21):\n",
    "        # Go from end forward\n",
    "        priv_change = abs((df['epsilon'].loc[i] - df['epsilon'].loc[i-1])/df['epsilon'].loc[i-1])\n",
    "        fair_change = abs((df['gamma'].loc[i] - df['gamma'].loc[i-1])/df['gamma'].loc[i-1])\n",
    "        if priv_change > 0.01 or fair_change > 0.01:\n",
    "            return i"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "def calculate_PoA(df, end):\n",
    "    # Numerator\n",
    "    numerator = 0\n",
    "    for i in range(end+1):\n",
    "        error_normalized = (1+df['loss_build_combined'].loc[i])/error_max\n",
    "        fairness_normalized = (df['gamma'].loc[i] - df['gamma'].loc[0])/fair_loss_max\n",
    "        privacy_normalized = (df['epsilon'].loc[i] - df['epsilon'].loc[0])/priv_loss_max\n",
    "        numerator += (error_normalized + fairness_normalized + privacy_normalized) * (1/1.5)**i\n",
    "    # Denominator\n",
    "    denominator = (1 + df['loss_build'].loc[0])/error_max\n",
    "    \n",
    "    return numerator, denominator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Read in game data\n",
    "prices = []\n",
    "folder = 'utkface_RQ1_new'\n",
    "for path in filter(lambda _str: re.match(r'\\d*', _str.split(\"/\")[-1]), glob(repo_path+f\"results/{folder}/*\")):\n",
    "    df = pd.read_parquet(f\"{path}/df.parquet.gzip\")\n",
    "    df = df[df['agent'] != 'calibration'].copy()\n",
    "    \n",
    "    # Find where the game has converged\n",
    "    end = find_convergence(df)\n",
    "    print(end)\n",
    "    \n",
    "    # Calculate PoA\n",
    "    n, d = calculate_PoA(df, end)\n",
    "    print(n, d)\n",
    "    poa = n/d\n",
    "    prices.append(poa)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "prices"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def mean_confidence_interval(data, confidence=0.95):\n",
    "    a = 1.0 * np.array(data)\n",
    "    n = len(a)\n",
    "    m, se = np.mean(a), scipy.stats.sem(a)\n",
    "    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)\n",
    "    return m, h"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mean_confidence_interval(prices, confidence=0.95)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
