{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "4e263423",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np \n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5d8f918b",
   "metadata": {},
   "source": [
    "# General params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "03c44181",
   "metadata": {},
   "outputs": [],
   "source": [
    "n_iter = 10**7\n",
    "n_mc = 20\n",
    "list_mu1 = [0.75,0.70,0.70,0.70,0.70]\n",
    "list_mu2 = [0.75,0.625,0.5,0.375,0.25]\n",
    "list_mu3 = [0.75,0.53125,0.375,0.28125,0.25]\n",
    "list_mu4 = [0.75,0.71875,0.625,0.46875,0.25]\n",
    "eps1, eps2, eps3, eps4 = 0.1, 0.25, 0.5, 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "bc6cb5eb",
   "metadata": {},
   "outputs": [],
   "source": [
    "list_mu = [list_mu1, list_mu2, list_mu3, list_mu4]\n",
    "epss = [eps1, eps2 ,eps3, eps4]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "d5c9ec4b",
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_results(f, list_mu, n_iter, n_mc):\n",
    "    return np.array([f(mu=list_mu, n = n_iter)[0] for i in range(n_mc)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "528880a7",
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_private_results(f, list_mu, epsilon, n_iter, n_mc):\n",
    "    return np.array([f(mu=list_mu, epsilon = epsilon, n = n_iter)[0] for i in range(n_mc)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "679c967f",
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_results(gen_res):\n",
    "    plt.plot(gen_res.mean(axis=0))\n",
    "    plt.fill_between(np.arange(gen_res.shape[-1]), gen_res.mean(axis=0) - gen_res.std(axis=0), gen_res.mean(axis=0) + gen_res.std(axis=0), facecolor='b', alpha=0.1)\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "4c788055",
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_private_results(gen_res_eps1, gen_res_eps2, gen_res_eps3, gen_res_eps4):\n",
    "    plt.plot(gen_res_eps1.mean(axis=0), 'b')\n",
    "    plt.fill_between(np.arange(gen_res_eps1.shape[-1]), gen_res_eps1.mean(axis=0) - gen_res_eps1.std(axis=0), gen_res_eps1.mean(axis=0) + gen_res_eps1.std(axis=0), facecolor='b', alpha=0.1)\n",
    "    plt.plot(gen_res_eps2.mean(axis=0), 'g')\n",
    "    plt.fill_between(np.arange(gen_res_eps2.shape[-1]), gen_res_eps2.mean(axis=0) - gen_res_eps2.std(axis=0), gen_res_eps2.mean(axis=0) + gen_res_eps2.std(axis=0), facecolor='g', alpha=0.1)\n",
    "    plt.plot(gen_res_eps3.mean(axis=0), 'r')\n",
    "    plt.fill_between(np.arange(gen_res_eps3.shape[-1]), gen_res_eps3.mean(axis=0) - gen_res_eps3.std(axis=0), gen_res_eps3.mean(axis=0) + gen_res_eps3.std(axis=0), facecolor='r', alpha=0.1)\n",
    "    plt.plot(gen_res_eps4.mean(axis=0), 'm')\n",
    "    plt.fill_between(np.arange(gen_res_eps4.shape[-1]), gen_res_eps4.mean(axis=0) - gen_res_eps4.std(axis=0), gen_res_eps4.mean(axis=0) + gen_res_eps4.std(axis=0), facecolor='m', alpha=0.1)\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d40f94fb",
   "metadata": {},
   "source": [
    "# DP-UCB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "6f1a421b",
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_regret_DP_UCB(mu, epsilon, n = 10000):\n",
    "    mu = np.array(mu)\n",
    "    # Optimal arm's mean\n",
    "    u_star = max(mu)\n",
    "    # number of arms\n",
    "    n_arms = len(mu)\n",
    "    # Step count for each arm, we start by exploring each action once\n",
    "    k_n = np.ones(n_arms)\n",
    "    # Noisy sums for each arms, init with means since each arm is played once first\n",
    "    noisy_sums = mu.copy()\n",
    "    sums = mu.copy()\n",
    "    # regret\n",
    "    regret = list((u_star - mu).cumsum())\n",
    "    r = (u_star - mu).sum()\n",
    "    actions = []\n",
    "    \n",
    "    \n",
    "    # private bonus\n",
    "    k = n_arms\n",
    "    gamma =  k*(np.log(n)**2)*(np.log((k * n * np.log(n)) / 0.1))/epsilon\n",
    "    \n",
    "    # each tree mechanism is initiated with epsilon/k\n",
    "    epsilon = epsilon/n_arms\n",
    "    epsilon1 = epsilon / np.log(n)\n",
    "    logn_2 = int(np.log(n-n_arms)/np.log(2))\n",
    "    alpha = np.zeros((logn_2 + 1, n_arms))\n",
    "    alpha_hat = np.zeros((logn_2 + 1, n_arms))\n",
    "    \n",
    "    for t in range(n_arms+1, n+1):\n",
    "        # Select action according to UCB Criteria, make it noisy!\n",
    "        a = np.argmax(noisy_sums/k_n + np.sqrt(2*np.log(t)/k_n) + gamma/k_n)\n",
    "        actions.append(a)\n",
    "        # Sample the reward\n",
    "        #reward = mu[a]\n",
    "        reward = np.random.binomial(n=1, p= mu[a])\n",
    "        reward_stream_at_t = np.zeros(n_arms)\n",
    "        reward_stream_at_t[a] = np.random.binomial(n=1, p=mu[a])\n",
    "        if t == n_arms+1:\n",
    "            reward_stream_at_t += mu.copy()\n",
    "        # update the statistics\n",
    "        # number of pulls\n",
    "        k_n[a] += 1\n",
    "        #noisy sums using the tree mechanism\n",
    "        binary_rep = np.array(list(np.binary_repr(t - n_arms, width = logn_2 + 1))).astype(int)\n",
    "        i = np.min(np.nonzero(np.flip(binary_rep)))\n",
    "        alpha[i] = alpha[:i].sum(axis=0) + reward_stream_at_t\n",
    "        for j in range(i):\n",
    "            alpha[j] = 0\n",
    "            alpha_hat[j] = 0\n",
    "        alpha_hat[i] = alpha[i] + np.random.laplace(loc=0.0, scale=1/epsilon1, size=n_arms)\n",
    "        noisy_sums = alpha_hat.T.dot(np.flip(binary_rep))\n",
    "        sums[a] += mu[a]\n",
    "        # update total reward\n",
    "        #total_reward += reward\n",
    "        r += u_star - reward\n",
    "        regret.append(r)\n",
    "        \n",
    "    return regret, actions"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ab9f6122",
   "metadata": {},
   "source": [
    "### Generating and saving results in .npy files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "1a1f3e83",
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in range(len(list_mu)):\n",
    "    list_mu_i = list_mu[i]\n",
    "    for j in range(len(epss)):\n",
    "        eps_j = epss[j]\n",
    "        gen_res_private_ucb_i_eps_j = generate_private_results(compute_regret_DP_UCB, list_mu_i, eps_j, n_iter, n_mc)\n",
    "        name_file_i_j = \"dp_ucb_\" + str(i+1) + \"_eps\" + str(j+1) + \".npy\"\n",
    "        with open(name_file_i_j, 'wb') as f:\n",
    "            np.save(f, gen_res_private_ucb_i_eps_j)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ffc807cc",
   "metadata": {},
   "source": [
    "# DP-SE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "28d3d4db",
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_regret_dp_successive(mu, epsilon, n):\n",
    "    t = 0\n",
    "    epoch = 0\n",
    "    n_arms = len(mu)\n",
    "    S = list(np.arange(n_arms))\n",
    "    u_star = max(mu)\n",
    "    beta = 1/n\n",
    "    regret = 0\n",
    "    regrets = []\n",
    "    while len(S) > 1:\n",
    "        epoch += 1\n",
    "        r = 0\n",
    "        emp_means = np.zeros(n_arms)\n",
    "        sums = np.zeros(n_arms)\n",
    "        num_pulls = np.zeros(n_arms)\n",
    "        mu_noised = np.zeros(n_arms)\n",
    "        delta_e = 2**(-epoch)\n",
    "        R_e_1 = 32 * np.log(8 * len(S) * (epoch**2) / beta) /  ((delta_e)**2)\n",
    "        R_e_2 = 8 * np.log(4 * len(S) * (epoch**2) / beta) /  (delta_e * epsilon)\n",
    "        R_e = max(R_e_1 , R_e_2 ) + 1\n",
    "        while r < R_e:\n",
    "            r += 1\n",
    "            for a in S:\n",
    "                t += 1\n",
    "                # reward_a_t = mu[a]\n",
    "                reward_a_t = np.random.binomial(n=1, p = mu[a])\n",
    "                regret += u_star - mu[a]\n",
    "                regrets.append(regret)\n",
    "                sums[a] += reward_a_t\n",
    "                num_pulls[a] += 1\n",
    "                emp_means[a] = sums[a]/num_pulls[a]\n",
    "                if t >= n:\n",
    "                    return regrets, [0]\n",
    "        h_e = np.sqrt( np.log(8 * len(S) * (epoch**2) / beta) / (2*R_e))\n",
    "        c_e = np.log(4 * len(S) * (epoch**2) / beta) / (R_e*epsilon)\n",
    "        for a in S:\n",
    "            mu_noised[a] = emp_means[a] + np.random.laplace(loc=0.0, scale=1/(epsilon*r))\n",
    "        mu_noised_max = max(mu_noised[S])\n",
    "        for j in S:\n",
    "            if mu_noised_max - mu_noised[j] > 2*(h_e + c_e):\n",
    "                S.remove(j)\n",
    "    if t<n:\n",
    "        reg = regrets[-1]\n",
    "        a = S[0]\n",
    "        regrets += [reg + k*(u_star - mu[a]) for k in range(n-t)]\n",
    "    return regrets, [0.1]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "05359054",
   "metadata": {},
   "source": [
    "### Generating and saving results in .npy files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "8a6556f4",
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in range(len(list_mu)):\n",
    "    list_mu_i = list_mu[i]\n",
    "    for j in range(len(epss)):\n",
    "        eps_j = epss[j]\n",
    "        gen_res_private_ucb_i_eps_j = generate_private_results(compute_regret_dp_successive, list_mu_i, eps_j, n_iter, n_mc)\n",
    "        name_file_i_j = \"dp_se_\" + str(i+1) + \"_eps\" + str(j+1) + \".npy\"\n",
    "        with open(name_file_i_j, 'wb') as f:\n",
    "            np.save(f, gen_res_private_ucb_i_eps_j)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cff0af15",
   "metadata": {},
   "source": [
    "# Adap-UCB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "dcdeae85",
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_regret_private_adap_ucb(mu, epsilon = 0.2, n = 10000, alpha=3.1):\n",
    "    mu = np.array(mu)\n",
    "    # Optimal arm's mean\n",
    "    u_star = max(mu)\n",
    "    # number of arms\n",
    "    n_arms = len(mu)\n",
    "    # epoch size for each arm\n",
    "    s = np.ones(n_arms, dtype=int)\n",
    "    # private empirical means\n",
    "    mu_tilda = mu.copy() + np.random.laplace(loc=0.0, scale=1/(epsilon*s))\n",
    "    t = n_arms + 1\n",
    "    # regret\n",
    "    regret = list((u_star - mu).cumsum())\n",
    "    r = (u_star - mu).sum()\n",
    "    actions = list(np.arange(n_arms))\n",
    "    \n",
    "    while t < n:\n",
    "        # Select action according to UCB Criteria, make it noisy!\n",
    "        a = np.argmax(mu_tilda  +  (alpha*np.log(t))/(epsilon*s) + np.sqrt((alpha*np.log(t))/(2*s)))\n",
    "        new_s_a = s[a]*2\n",
    "        mu_tilda[a] = np.random.binomial(n=1, p=mu[a], size=new_s_a).mean() + np.random.laplace(loc=0.0, scale=1/(epsilon*new_s_a))\n",
    "        if t + new_s_a > n:\n",
    "            regret += ([r + k*(u_star - mu[a]) for k in range(n-t)])\n",
    "            actions += ([a] * (n-t))\n",
    "        else:\n",
    "            regret += ([r + k*(u_star - mu[a]) for k in range(new_s_a)])\n",
    "            actions += ([a] * new_s_a)\n",
    "        r = regret[-1]\n",
    "        t = t + new_s_a\n",
    "        s[a] = new_s_a\n",
    "        \n",
    "    return regret, actions"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bbd29859",
   "metadata": {},
   "source": [
    "### Generating and saving results in .npy files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "9f136e5a",
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in range(len(list_mu)):\n",
    "    list_mu_i = list_mu[i]\n",
    "    for j in range(len(epss)):\n",
    "        eps_j = epss[j]\n",
    "        gen_res_private_ucb_i_eps_j = generate_private_results(compute_regret_private_adap_ucb, list_mu_i, eps_j, n_iter, n_mc)\n",
    "        name_file_i_j = \"dp_ucb_\" + str(i+1) + \"_sliding_eps\" + str(j+1) + \".npy\"\n",
    "        with open(name_file_i_j, 'wb') as f:\n",
    "            np.save(f, gen_res_private_ucb_i_eps_j)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "05f2a631",
   "metadata": {},
   "source": [
    "# Adap-KLUCB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "d2b6beed",
   "metadata": {},
   "outputs": [],
   "source": [
    "def kl(x, y, eps = 1e-15):\n",
    "    x = min(max(x, eps), 1 - eps)\n",
    "    y = min(max(y, eps), 1 - eps)\n",
    "    return x * np.log(x / y) + (1 - x) * np.log((1 - x) / (1 - y))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "83c9203b",
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_u(x, d, precision=1e-6, max_iterations=50):\n",
    "    # d is the upper_bound on kl in ucb_kl criterion and x the mean reward\n",
    "    a = max(x, 0)\n",
    "    b = 1\n",
    "    n_iter = 0\n",
    "    while n_iter < max_iterations and b - a > precision:\n",
    "        n_iter += 1\n",
    "        m = (a + b) / 2.\n",
    "        if kl(x, m) > d:\n",
    "            b = m\n",
    "        else:\n",
    "            a = m\n",
    "    return (a + b) / 2."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "8b1b7ad0",
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_regret_private_adap_klucb(mu, epsilon = 0.2, n = 10000, alpha=3.1):\n",
    "    mu = np.array(mu)\n",
    "    # Optimal arm's mean\n",
    "    u_star = max(mu)\n",
    "    # number of arms\n",
    "    n_arms = len(mu)\n",
    "    # epoch size for each arm\n",
    "    s = np.ones(n_arms, dtype=int)\n",
    "    # private empirical means\n",
    "    mu_tilda = mu.copy() + np.random.laplace(loc=0.0, scale=1/(epsilon*s))\n",
    "    t = n_arms + 1\n",
    "    # regret\n",
    "    regret = list((u_star - mu).cumsum())\n",
    "    r = (u_star - mu).sum()\n",
    "    actions = list(np.arange(n_arms))\n",
    "    \n",
    "    while t < n:\n",
    "        # Select action according to kl-UCB Criteria, make it noisy! \n",
    "        indexes = []\n",
    "        for action in range(n_arms):\n",
    "            d = (alpha * np.log(t)) / s[action]\n",
    "            clipped = min(max(mu_tilda[action] + d/epsilon, 0), 1)\n",
    "            indexes.append(find_u(clipped , d))\n",
    "        indexes = np.array(indexes)\n",
    "        a = np.random.choice(np.flatnonzero(indexes == indexes.max()))\n",
    "        new_s_a = s[a]*2\n",
    "        mu_tilda[a] = np.random.binomial(n=1, p=mu[a], size=new_s_a).mean() + np.random.laplace(loc=0.0, scale=1/(epsilon*new_s_a))\n",
    "        if t + new_s_a > n:\n",
    "            regret += ([r + k*(u_star - mu[a]) for k in range(n-t)])\n",
    "            actions += ([a] * (n-t))\n",
    "        else:\n",
    "            regret += ([r + k*(u_star - mu[a]) for k in range(new_s_a)])\n",
    "            actions += ([a] * new_s_a)\n",
    "        r = regret[-1]\n",
    "        t = t + new_s_a\n",
    "        s[a] = new_s_a\n",
    "        \n",
    "    return regret, actions"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b2482936",
   "metadata": {},
   "source": [
    "### Generating and saving results in .npy files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "bafa9a8c",
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in range(len(list_mu)):\n",
    "    list_mu_i = list_mu[i]\n",
    "    for j in range(len(epss)):\n",
    "        eps_j = epss[j]\n",
    "        gen_res_private_ucb_i_eps_j = generate_private_results(compute_regret_private_adap_klucb, list_mu_i, eps_j, n_iter, n_mc)\n",
    "        name_file_i_j = \"dp_kl_ucb_\" + str(i+1) + \"_sliding_eps\" + str(j+1) + \".npy\"\n",
    "        with open(name_file_i_j, 'wb') as f:\n",
    "            np.save(f, gen_res_private_ucb_i_eps_j)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c4fe08fa",
   "metadata": {},
   "source": [
    "### Generating results for Adap-klucb upper_bound for the privacy regimes plot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "ebcda715",
   "metadata": {},
   "outputs": [],
   "source": [
    "list_mu = [0.8,0.1,0.1,0.1,0.1]\n",
    "def asympt_regret_AdaP_KL_UCB_list_mu1_n10_7(epsilon):\n",
    "    L=np.array([compute_regret_private_adap_klucb(mu=list_mu, epsilon = epsilon, n = n_iter)[0] for i in range(5)])[:,-1]\n",
    "    return L.mean(), L.std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "00a8aa58",
   "metadata": {},
   "outputs": [],
   "source": [
    "epsilons = np.linspace(0.05, 1, 50)\n",
    "adap_kl_upper_bounds = np.array([asympt_regret_AdaP_KL_UCB_list_mu1_n10_7(epsilon) for epsilon in  epsilons ])\n",
    "with open('adap_kl_upper_bound.npy', 'wb') as f:\n",
    "    np.save(f, adap_kl_upper_bounds)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
