{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "dNtJtZkL34Hg"
      },
      "source": [
        "# **Stochastic Bandits for Egalitarian Assignment - Synthetic Experiments**\n",
        "---\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "cellView": "form",
        "id": "KXRC_YXSfeO0"
      },
      "outputs": [],
      "source": [
        "rootdir = 'drive/MyDrive/egalucb/synthetic' # @param {type: \"string\"}\n",
        "\n",
        "from google.colab import drive\n",
        "drive.mount('drive', force_remount=True)\n",
        "! mkdir -p {rootdir}"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "KZIfDThxMJs9"
      },
      "source": [
        "## Setup"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "! sudo apt install cm-super dvipng texlive-latex-extra texlive-latex-recommended"
      ],
      "metadata": {
        "id": "R8fI9vx4hpOx"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "M4BUF4p2styx"
      },
      "outputs": [],
      "source": [
        "import numpy as np\n",
        "import matplotlib\n",
        "import matplotlib.pyplot as plt\n",
        "\n",
        "from collections import defaultdict\n",
        "from scipy.optimize import curve_fit\n",
        "from sklearn.metrics import r2_score\n",
        "from tqdm import tqdm"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "h6LFbGr5xZj2"
      },
      "outputs": [],
      "source": [
        "olderr = np.seterr(all='ignore')\n",
        "\n",
        "matplotlib.rcParams[\"text.usetex\"] = True\n",
        "matplotlib.rcParams[\"font.size\"] = \"7\"\n",
        "matplotlib.rcParams['mathtext.fontset'] = 'stix'\n",
        "matplotlib.rcParams['font.family'] = 'STIXGeneral'\n",
        "\n",
        "def pt2inches(width, height):\n",
        "    return (width / 72.27, height / 72.27)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "uwFYjlEBho0a"
      },
      "outputs": [],
      "source": [
        "def sample_normal(scale):\n",
        "    return lambda mu: np.random.normal(loc=mu, scale=scale)\n",
        "\n",
        "def sample_bernoulli(mu):\n",
        "    return np.random.binomial(n=1, p=mu)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "gUUbPeA_spsy"
      },
      "outputs": [],
      "source": [
        "class MultiUserBanditInstance:\n",
        "\n",
        "    def __init__(self, mus, U, sampler=sample_bernoulli):\n",
        "        self.K = len(mus)\n",
        "        self.U = U\n",
        "        self.mus = mus\n",
        "        self.mustar = np.sum(np.sort(mus)[-U:])\n",
        "        self.sampler = sampler\n",
        "\n",
        "    def pull(self, arms):\n",
        "        rewards = np.zeros(self.U)\n",
        "        for u in range(self.U):\n",
        "            rewards[u] = self.sampler(self.mus[arms[u]])\n",
        "        return rewards\n",
        "\n",
        "    def simulate_egalucb(self, T):\n",
        "        cumrewards = np.zeros(self.K)\n",
        "        numplays = np.zeros(self.K, dtype=int)\n",
        "        expregrets = np.zeros((self.U, T + 1))\n",
        "        B = int(T / self.U)\n",
        "        t = 0\n",
        "        for b in tqdm(range(B)):\n",
        "            muhats = cumrewards / numplays\n",
        "            ucbs = muhats + np.sqrt(6 * np.log(b) / numplays)\n",
        "            ucbs = np.nan_to_num(ucbs, nan=np.inf)\n",
        "            blockarms = ucbs.argsort()[-self.U:][::-1]\n",
        "            blockarms = np.concatenate([blockarms, blockarms])\n",
        "            for i in range(self.U):\n",
        "                t += 1\n",
        "                arms = blockarms[i:self.U+i]\n",
        "                rewards = self.pull(arms)\n",
        "                cumrewards[arms] += rewards\n",
        "                numplays[arms] += 1\n",
        "                expregrets[:,t] = expregrets[:,t-1] + (self.mustar / self.U - self.mus[arms])\n",
        "        expregrets = expregrets[:, 1:]\n",
        "        return cumrewards, numplays, expregrets"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "aZ7n1ogw_uSj"
      },
      "source": [
        "## Experiment 1: How regret evolves over time?"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "U00eLiiy_y3D"
      },
      "source": [
        "#### Gaussian bandits with variance 1.0"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "tKu14vnD3oJ8"
      },
      "outputs": [],
      "source": [
        "K = 100\n",
        "U = 20\n",
        "T = 150000"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "klkqNE6ECi6p"
      },
      "outputs": [],
      "source": [
        "mus = np.random.uniform(low=0.01, high=0.99, size=K)\n",
        "\n",
        "instances = []\n",
        "expregrets_list = []\n",
        "\n",
        "for i in range(5):\n",
        "    instances.append([])\n",
        "    for j in range(30):\n",
        "        instances[i].append(MultiUserBanditInstance(mus, i+1, sampler=sample_normal(1.0)))\n",
        "\n",
        "for i in range(5):\n",
        "    expregrets_list.append([])\n",
        "    for j in range(30):\n",
        "        _, _, expregrets = instances[i][j].simulate_egalucb(T)\n",
        "        expregrets_list[i].append(expregrets)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "lIjX9cbBKAhz"
      },
      "outputs": [],
      "source": [
        "np.save(f'{rootdir}/ex1-gaussian-1.0.npy', np.array(expregrets_list, dtype=object))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "a67qFnhK_38D"
      },
      "source": [
        "#### Gaussian bandits with variance 0.1"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "eJ37tyF29la0"
      },
      "outputs": [],
      "source": [
        "K = 100\n",
        "U = 20\n",
        "T = 150000"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "tF_SFJdFMmEx"
      },
      "outputs": [],
      "source": [
        "mus = np.random.uniform(low=0.01, high=0.99, size=K)\n",
        "\n",
        "instances = []\n",
        "expregrets_list = []\n",
        "\n",
        "for i in range(5):\n",
        "    instances.append([])\n",
        "    for j in range(30):\n",
        "        instances[i].append(MultiUserBanditInstance(mus, i+1, sampler=sample_normal(0.1)))\n",
        "\n",
        "for i in range(5):\n",
        "    expregrets_list.append([])\n",
        "    for j in range(30):\n",
        "        _, _, expregrets = instances[i][j].simulate_egalucb(T)\n",
        "        expregrets_list[i].append(expregrets)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "YIGDa63_L7uA"
      },
      "outputs": [],
      "source": [
        "np.save(f'{rootdir}/ex1-gaussian-0.1.npy', np.array(expregrets_list, dtype=object))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "p6xHjPCW_-UU"
      },
      "source": [
        "#### Bernoulli bandits"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "lbAJ2HRb9VeB"
      },
      "outputs": [],
      "source": [
        "K = 100\n",
        "U = 20\n",
        "T = 150000"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "1YG11XKjMqgH"
      },
      "outputs": [],
      "source": [
        "mus = np.random.uniform(low=0.01, high=0.99, size=K)\n",
        "\n",
        "instances = []\n",
        "expregrets_list = []\n",
        "\n",
        "for i in range(5):\n",
        "    instances.append([])\n",
        "    for j in range(30):\n",
        "        instances[i].append(MultiUserBanditInstance(mus, i+1, sampler=sample_bernoulli))\n",
        "\n",
        "for i in range(5):\n",
        "    expregrets_list.append([])\n",
        "    for j in range(30):\n",
        "        _, _, expregrets = instances[i][j].simulate_egalucb(T)\n",
        "        expregrets_list[i].append(expregrets)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "y85trpn7MqgH"
      },
      "outputs": [],
      "source": [
        "np.save(f'{rootdir}/ex1-bernoulli.npy', np.array(expregrets_list, dtype=object))"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "#### Plot of regret over time"
      ],
      "metadata": {
        "id": "DDTKdVf2x_gF"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "data_for_plots = {}\n",
        "data_for_plots['ex1-gaussian-1.0'] = np.load(f'{rootdir}/ex1-gaussian-1.0.npy', allow_pickle=True)\n",
        "data_for_plots['ex1-gaussian-0.1'] = np.load(f'{rootdir}/ex1-gaussian-0.1.npy', allow_pickle=True)\n",
        "data_for_plots['ex1-bernoulli'] = np.load(f'{rootdir}/ex1-bernoulli.npy', allow_pickle=True)"
      ],
      "metadata": {
        "id": "7drvA9f0iw-J"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "fig, axes = plt.subplots(1, 3, figsize=(pt2inches(600, 120)), dpi=120)\n",
        "linestyles = [(0, ()), (0, (1, 1)), (0, (5, 1)), (0, (5, 5)), (0, (3, 1, 1, 1))]\n",
        "colors = ['#EE6677', '#228833', '#4477AA', '#AA3377', '#CCBB44']\n",
        "keys = ['ex1-gaussian-1.0', 'ex1-gaussian-0.1', 'ex1-bernoulli']\n",
        "titles = [r'Gaussian $(\\sigma^2=1.0)$', r'Gaussian $(\\sigma^2=0.1)$', 'Bernoulli']\n",
        "\n",
        "for a in range(3):\n",
        "\n",
        "    expregrets_list = data_for_plots[keys[a]]\n",
        "\n",
        "    for i in range(5):\n",
        "\n",
        "        regrets = np.stack(expregrets_list[i]) # dim = (C, U, T)\n",
        "        regrets_mean = regrets[:,0,:].mean(axis=0) # dim = (T)\n",
        "        regrets_std = regrets[:,0,:].std(axis=0)\n",
        "\n",
        "        res = 500\n",
        "        time = np.arange(1, T + 1)\n",
        "\n",
        "        axes[a].plot(time[::res], regrets_mean[::res], color=colors[i], label=f'{i+1:d} users', linestyle=linestyles[i], linewidth=0.5)\n",
        "        axes[a].fill_between(time[::res], regrets[:,0,:].min(axis=0)[::res], regrets[:,0,:].max(axis=0)[::res], alpha=0.15, color=colors[i])\n",
        "\n",
        "    xticks = matplotlib.ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x / 1000))\n",
        "    yticks = matplotlib.ticker.FuncFormatter(lambda y, pos: '{0:g}'.format(y / 1000))\n",
        "\n",
        "    axes[a].xaxis.set_major_formatter(xticks)\n",
        "    axes[a].yaxis.set_major_formatter(yticks)\n",
        "    axes[a].set_xticks(np.arange(0, 150001, 50000))\n",
        "    axes[a].set_yticks(np.arange(0, 18001, 3000))\n",
        "    axes[a].tick_params(axis='both', which='both', length=0)\n",
        "\n",
        "    axes[a].spines['top'].set_visible(False)\n",
        "    axes[a].spines['right'].set_visible(False)\n",
        "    axes[a].spines['bottom'].set_visible(False)\n",
        "    axes[a].spines['left'].set_visible(False)\n",
        "\n",
        "    axes[a].set_xlabel('Timestep (in thousands)')\n",
        "    axes[a].set_title(titles[a])\n",
        "    axes[a].grid(alpha=0.25, axis='y', color='#BBBBBB', linewidth=0.5)\n",
        "\n",
        "handles, labels = axes[0].get_legend_handles_labels()\n",
        "legend = fig.legend(handles, labels, bbox_to_anchor=(0.5, -0.3), loc='lower center', ncol=5)\n",
        "legend.get_frame().set(alpha=0.15, boxstyle='round,pad=0.2,rounding_size=0.8', facecolor='#BBBBBB', linewidth=0)\n",
        "axes[0].set_ylabel('Regret (in thousands)')\n",
        "\n",
        "fig.show()\n",
        "fig.savefig(f'{rootdir}/ex1-synthetic.pdf', bbox_inches='tight')"
      ],
      "metadata": {
        "id": "cevYGjNIx95v"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "OFsx5yTh2mZy"
      },
      "source": [
        "## Experiment 2: Fixing K and T, what is the regret as a function of U?"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Dnew9fY04O9f"
      },
      "source": [
        "#### Bernoulli bandits v1"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "pCtugiKKSY6c"
      },
      "outputs": [],
      "source": [
        "K = 1024\n",
        "T = 262144 # 2**18\n",
        "\n",
        "instances = []\n",
        "expregrets_list = []\n",
        "\n",
        "Us = [2, 4, 8, 16, 32, 64, 128, 256]\n",
        "\n",
        "for i in range(len(Us)):\n",
        "    U = Us[i]\n",
        "    mu = np.concatenate([np.ones(U) * 0.8, np.ones(K - U) * 0.5])\n",
        "    instances.append(MultiUserBanditInstance(mu, U, sampler=sample_bernoulli))\n",
        "\n",
        "for i in range(len(Us)):\n",
        "    _, _, expregrets = instances[i].simulate_egalucb(T)\n",
        "    expregrets_list.append(expregrets)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "FHnkMT2brkpX"
      },
      "outputs": [],
      "source": [
        "np.save(f'{rootdir}/ex2-bernoulli-v1.npy', np.array(expregrets_list, dtype=object))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "V76aP0N_89jF"
      },
      "source": [
        "#### Bernoulli bandits v2"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "cNZLTims89jO"
      },
      "outputs": [],
      "source": [
        "K = 20\n",
        "T = 126000\n",
        "\n",
        "instances = []\n",
        "expregrets_list = []\n",
        "\n",
        "Us = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]\n",
        "\n",
        "for i in range(len(Us)):\n",
        "    instances.append([])\n",
        "    for j in range(30):\n",
        "        U = Us[i]\n",
        "        mu = np.concatenate([np.ones(U) * 0.8, np.ones(K - U) * 0.5])\n",
        "        instances[i].append(MultiUserBanditInstance(mu, U, sampler=sample_bernoulli))\n",
        "\n",
        "for i in range(len(Us)):\n",
        "    expregrets_list.append([])\n",
        "    for j in range(30):\n",
        "        _, _, expregrets = instances[i][j].simulate_egalucb(T)\n",
        "        expregrets_list[i].append(expregrets)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "tYTbx90c89jP"
      },
      "outputs": [],
      "source": [
        "np.save(f'{rootdir}/ex2-bernoulli-v2.npy', np.array(expregrets_list, dtype=object))"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "#### Plot of regret over users"
      ],
      "metadata": {
        "id": "-k_Wc4u10jUx"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "data_for_plots = {}\n",
        "data_for_plots['ex2-bernoulli-v1'] = np.load(f'{rootdir}/ex2-bernoulli-v1.npy', allow_pickle=True)\n",
        "data_for_plots['ex2-bernoulli-v2'] = np.load(f'{rootdir}/ex2-bernoulli-v2.npy', allow_pickle=True)"
      ],
      "metadata": {
        "id": "qFPjSRCSLmXL"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "fig, ax = plt.subplots(figsize=(pt2inches(200, 100)), dpi=120)\n",
        "\n",
        "Us = [2, 4, 8, 16, 32, 64, 128, 256]\n",
        "\n",
        "regrets = np.zeros(len(Us))\n",
        "for i in range(len(Us)):\n",
        "    regrets[i] = data_for_plots['ex2-bernoulli-v1'][i][0,-1]\n",
        "\n",
        "ax.plot(Us, regrets, color='#BBBBBB', linestyle='--', linewidth=0.5, marker='.', markeredgecolor='#EE6677', markeredgewidth=0.5, markerfacecolor='#FFFFFF', markersize=3)\n",
        "\n",
        "ax.set_xscale('log', base=2)\n",
        "ax.set_yscale('log', base=2)\n",
        "\n",
        "ax.set_xticks([2 ** u for u in range(1, 9)])\n",
        "ax.set_yticks([2 ** i for i in range(9, 17, 1)])\n",
        "ax.tick_params(axis='both', which='both', length=0)\n",
        "\n",
        "yticklabels = ax.get_yticklabels()\n",
        "for i in range(0, len(yticklabels), 2):\n",
        "    yticklabels[i] = ''\n",
        "ax.set_yticklabels(yticklabels)\n",
        "\n",
        "ax.spines['top'].set_visible(False)\n",
        "ax.spines['right'].set_visible(False)\n",
        "ax.spines['bottom'].set_visible(False)\n",
        "ax.spines['left'].set_visible(False)\n",
        "\n",
        "ax.set_xlabel('Number of users')\n",
        "ax.set_ylabel('Regret')\n",
        "\n",
        "ax.grid(alpha=0.25, axis='x', color='#BBBBBB', linewidth=0.5)\n",
        "ax.grid(alpha=0.25, axis='y', color='#BBBBBB', linewidth=0.5)\n",
        "\n",
        "fig.show()\n",
        "fig.savefig(f'{rootdir}/ex2-bernoulli-v1.pdf', bbox_inches='tight')"
      ],
      "metadata": {
        "id": "lx3wBEyid2ET"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "0LqDky9789jP"
      },
      "outputs": [],
      "source": [
        "fig, ax = plt.subplots(figsize=(pt2inches(200, 100)), dpi=120)\n",
        "\n",
        "Us = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]\n",
        "\n",
        "regrets_mean = np.zeros(len(Us))\n",
        "regrets_std = np.zeros(len(Us))\n",
        "\n",
        "for i in range(len(Us)):\n",
        "    regrets = np.stack(data_for_plots['ex2-bernoulli-v2'][i])[:,0,-1]\n",
        "    regrets_mean[i] = np.mean(regrets)\n",
        "    regrets_std[i] = np.std(regrets)\n",
        "\n",
        "# ax.plot(Us, regrets_mean, 'k', linewidth=0.5)\n",
        "ax.errorbar(Us, regrets_mean, regrets_std * 3, color='#BBBBBB', ecolor='#EE6677', elinewidth=0.5, linestyle='--', linewidth=0.5, marker='.', markeredgecolor='#EE6677', markeredgewidth=0.5, markerfacecolor='#FFFFFF', markersize=3)\n",
        "# ax.errorbar(Us, regrets_mean, yerr=regrets_std * 3, marker='.', color='#000000', ecolor=colors[0], elinewidth=0.5, linewidth=0.5, markersize=2, markeredgecolor=colors[0], markeredgewidth=0.5)\n",
        "\n",
        "xticks = matplotlib.ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x))\n",
        "yticks = matplotlib.ticker.FuncFormatter(lambda y, pos: '{0:g}'.format(y / 100))\n",
        "\n",
        "ax.xaxis.set_major_formatter(xticks)\n",
        "ax.yaxis.set_major_formatter(yticks)\n",
        "ax.set_xticks(np.arange(0, 21, 2))\n",
        "ax.set_yticks(np.arange(0, 1801, 600))\n",
        "ax.tick_params(axis='both', which='both', length=0)\n",
        "\n",
        "xticklabels = ax.get_xticklabels()\n",
        "xticklabels[0] = ''\n",
        "ax.set_xticklabels(xticklabels)\n",
        "\n",
        "ax.spines['top'].set_visible(False)\n",
        "ax.spines['right'].set_visible(False)\n",
        "ax.spines['bottom'].set_visible(False)\n",
        "ax.spines['left'].set_visible(False)\n",
        "\n",
        "ax.set_xlabel('Number of users')\n",
        "ax.set_ylabel('Regret\\n(in hundreds)')\n",
        "ax.grid(alpha=0.25, axis='y', color='#BBBBBB', linewidth=0.5)\n",
        "\n",
        "fig.show()\n",
        "# fig.subplots_adjust(left=0.35, right=0.95, bottom=0.30, top=0.95)\n",
        "fig.savefig(f'{rootdir}/ex2-bernoulli-v2.pdf', bbox_inches='tight')"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "collapsed_sections": [
        "KZIfDThxMJs9",
        "U00eLiiy_y3D",
        "a67qFnhK_38D",
        "p6xHjPCW_-UU",
        "DDTKdVf2x_gF",
        "Dnew9fY04O9f",
        "V76aP0N_89jF",
        "oWjBSXJx11xV",
        "XluRbAZIz8DR",
        "A-arPT2VARF9",
        "rfgGONsB2vL9"
      ],
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}