{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "a62bf039-6fa1-4334-930b-6f39e9f9a824",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np \n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0738f621-478d-4714-8e5e-b0f7e3755acd",
   "metadata": {},
   "source": [
    "# General params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "3a0c50cd-74c0-43f8-bbca-f6c2a656775d",
   "metadata": {},
   "outputs": [],
   "source": [
    "n_iter = 10**6 # The horizon T\n",
    "n_mc = 100 # number of times each experiment is repeated\n",
    "list_mu1 = [0.75,0.70,0.70,0.70,0.70]\n",
    "list_mu2 = [0.75,0.625,0.5,0.375,0.25]\n",
    "list_mu3 = [0.75,0.53125,0.375,0.28125,0.25]\n",
    "list_mu4 = [0.75,0.71875,0.625,0.46875,0.25]\n",
    "eps1, eps2, eps3, eps4, eps5 = 0.01, 0.1, 0.25, 0.5, 1\n",
    "list_mu = [list_mu1, list_mu2, list_mu3, list_mu4]\n",
    "epss = [eps1, eps2 ,eps3, eps4, eps5]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "64ea68d8-b72d-4f9a-ad7c-c5ce3aec345e",
   "metadata": {},
   "source": [
    "# IMED Baseline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "33b48362-f9c0-4b75-86b9-b87fcca9f9b5",
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_regret_imed(mu, n = 10000):\n",
    "    mu = np.array(mu)\n",
    "    # Optimal arm's mean\n",
    "    u_star = max(mu)\n",
    "    # number of arms\n",
    "    n_arms = len(mu)\n",
    "    # Step count for each arm, we start by exploring each action once\n",
    "    k_n = np.ones(n_arms)\n",
    "    # Reward sums for each arms, init by taking one realisation of each arm\n",
    "    sums = np.random.binomial(n=1, p=mu)\n",
    "    # regret, init\n",
    "    regret = list((u_star - mu).cumsum())\n",
    "    r = (u_star - mu).sum()\n",
    "    actions = []\n",
    "    \n",
    "    for t in range(n_arms + 1, n):\n",
    "        # Compute Imed Index \n",
    "        indexes = []\n",
    "        mu_hat_star = np.max(sums/k_n)\n",
    "        for action in range(n_arms):\n",
    "            indexes.append(k_n[action] * kl(sums[action]/k_n[action], mu_hat_star) + np.log(k_n[action]))\n",
    "            \n",
    "        # Select action according to Imed criteria\n",
    "        indexes = np.array(indexes)\n",
    "        a = np.random.choice(np.flatnonzero(indexes == indexes.min()))\n",
    "        actions.append(a)\n",
    "        # Sample the reward\n",
    "        reward = np.random.binomial(n=1, p= mu[a])\n",
    "        # update the statistics\n",
    "        # number of pulls\n",
    "        k_n[a] += 1\n",
    "        #real sums\n",
    "        sums[a] += reward\n",
    "        # calculate regrets\n",
    "        r += u_star - mu[a]\n",
    "        regret.append(r)\n",
    "        \n",
    "    return regret, actions"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5671cbbd-62f2-4625-b318-9de55a521dc5",
   "metadata": {},
   "source": [
    "# DP-SE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "50f4dcca-09da-4042-835a-0a2ded6d05d0",
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_regret_dp_se(mu, epsilon, n):\n",
    "    # init\n",
    "    t = 0\n",
    "    epoch = 0\n",
    "    n_arms = len(mu)\n",
    "    # set of active arms\n",
    "    S = list(np.arange(n_arms))\n",
    "    u_star = max(mu)\n",
    "    beta = 1/n\n",
    "    regret = 0\n",
    "    regrets = []\n",
    "    actions = []\n",
    "    while len(S) > 1:\n",
    "        epoch += 1\n",
    "        r = 0\n",
    "        emp_means = np.zeros(n_arms)\n",
    "        sums = np.zeros(n_arms)\n",
    "        num_pulls = np.zeros(n_arms)\n",
    "        mu_noised = np.zeros(n_arms)\n",
    "        delta_e = 2**(-epoch)\n",
    "        R_e_1 = 32 * np.log(8 * len(S) * (epoch**2) / beta) /  ((delta_e)**2)\n",
    "        R_e_2 = 8 * np.log(4 * len(S) * (epoch**2) / beta) /  (delta_e * epsilon)\n",
    "        R_e = max(R_e_1 , R_e_2 ) + 1\n",
    "        while r < R_e:\n",
    "            r += 1\n",
    "            for a in S:\n",
    "                t += 1\n",
    "                # sample reward for each arm in the active set\n",
    "                reward_a_t = np.random.binomial(n=1, p = mu[a])\n",
    "                regret += u_star - mu[a]\n",
    "                regrets.append(regret)\n",
    "                actions.append(a)\n",
    "                sums[a] += reward_a_t\n",
    "                num_pulls[a] += 1\n",
    "                emp_means[a] = sums[a]/num_pulls[a]\n",
    "                if t >= n:\n",
    "                    return regrets, actions\n",
    "        h_e = np.sqrt( np.log(8 * len(S) * (epoch**2) / beta) / (2*R_e))\n",
    "        c_e = np.log(4 * len(S) * (epoch**2) / beta) / (R_e*epsilon)\n",
    "        for a in S:\n",
    "            mu_noised[a] = emp_means[a] + np.random.laplace(loc=0.0, scale=1/(epsilon*r))\n",
    "        mu_noised_max = max(mu_noised[S])\n",
    "        for j in S:\n",
    "            if mu_noised_max - mu_noised[j] > 2*(h_e + c_e):\n",
    "                S.remove(j)\n",
    "    if t<n:\n",
    "        reg = regrets[-1]\n",
    "        a = S[0]\n",
    "        regrets += [reg + k*(u_star - mu[a]) for k in range(n-t)]\n",
    "        actions += ([a] * (n-t))\n",
    "    return regrets, actions"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b7df014b-553e-4518-b9a9-68efaec7f4d5",
   "metadata": {},
   "source": [
    "# AdaP-KLUCB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "b22ddfc2-33b9-4249-aaee-8c9cac78ac89",
   "metadata": {},
   "outputs": [],
   "source": [
    "def kl(x, y, eps = 1e-15):\n",
    "    x = min(max(x, eps), 1 - eps)\n",
    "    y = min(max(y, eps), 1 - eps)\n",
    "    return x * np.log(x / y) + (1 - x) * np.log((1 - x) / (1 - y))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "d9179588-f79e-474f-83dc-00c6beca2d34",
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_u(x, d, precision=1e-6, max_iterations=50):\n",
    "    # d is the optimism upper_bound on kl in kl_ucb criterion, and x the mean reward/noisy clipped mean\n",
    "    a = max(x, 0)\n",
    "    b = 1\n",
    "    n_iter = 0\n",
    "    while n_iter < max_iterations and b - a > precision:\n",
    "        n_iter += 1\n",
    "        m = (a + b) / 2.\n",
    "        if kl(x, m) > d:\n",
    "            b = m\n",
    "        else:\n",
    "            a = m\n",
    "    return (a + b) / 2."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "8594f902-1399-44b6-a68a-ef7d24a72bc3",
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_regret_adap_klucb(mu, epsilon, n, alpha=3.1):\n",
    "    mu = np.array(mu)\n",
    "    # Optimal arm's mean\n",
    "    u_star = max(mu)\n",
    "    # number of arms\n",
    "    n_arms = len(mu)\n",
    "    # epoch size for each arm\n",
    "    s = np.ones(n_arms, dtype=int)\n",
    "    # private empirical means, init with sampling one reward from each arm\n",
    "    mu_tilda = np.random.binomial(n=1, p=mu) + np.random.laplace(loc=0.0, scale=1/(epsilon*s), size = n_arms)\n",
    "    # timestep\n",
    "    t = n_arms + 1\n",
    "    # regret\n",
    "    regret = list((u_star - mu).cumsum())\n",
    "    r = (u_star - mu).sum()\n",
    "    actions = list(np.arange(n_arms))\n",
    "    \n",
    "    while t <= n:\n",
    "        # Select action according to kl-UCB Criteria, make it noisy! \n",
    "        indexes = []\n",
    "        for action in range(n_arms):\n",
    "            d = (alpha * np.log(t)) / s[action]\n",
    "            clipped = min(max(mu_tilda[action] + d/epsilon, 0), 1)\n",
    "            indexes.append(find_u(clipped , d))\n",
    "        indexes = np.array(indexes)\n",
    "        a = np.random.choice(np.flatnonzero(indexes == indexes.max()))\n",
    "        new_s_a = s[a]*2\n",
    "        mu_tilda[a] = np.random.binomial(n=1, p=mu[a], size=new_s_a).mean() + np.random.laplace(loc=0.0, scale=1/(epsilon*new_s_a))\n",
    "        if t + new_s_a > n:\n",
    "            regret += ([r + k*(u_star - mu[a]) for k in range(n-t)])\n",
    "            actions += ([a] * (n-t))\n",
    "        else:\n",
    "            regret += ([r + k*(u_star - mu[a]) for k in range(new_s_a)])\n",
    "            actions += ([a] * new_s_a)\n",
    "        r = regret[-1]\n",
    "        t = t + new_s_a\n",
    "        s[a] = new_s_a\n",
    "        \n",
    "    return regret, actions"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a92215d6-1a39-452d-a745-24df0ac1e3f5",
   "metadata": {},
   "source": [
    "# Lazy-DP-TS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "8b9fa713-81f1-4585-9561-c07d9d81588d",
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_regret_lazy_dp_ts(mu, epsilon, n):\n",
    "    mu = np.array(mu)\n",
    "    # Optimal arm's mean\n",
    "    u_star = max(mu)\n",
    "    # number of arms\n",
    "    n_arms = len(mu)\n",
    "    # Arm counts, init by taking each arm once\n",
    "    O_n = np.ones(n_arms)\n",
    "    # Reward noisy means for each arm, init by taking each arm once\n",
    "    mu_tildas = np.random.binomial(n=1, p=mu) + np.random.laplace(loc=0.0, scale=1/(epsilon*O_n))\n",
    "    # r used to check arm-dependent doubling\n",
    "    r_n = np.zeros(n_arms)\n",
    "    # list that store each arm's active rewards\n",
    "    psi_n = [[] for a in range(n_arms)]\n",
    "    # regret, init\n",
    "    regret = list((u_star - mu).cumsum())\n",
    "    r = (u_star - mu).sum()\n",
    "    actions = list(np.arange(n_arms))       \n",
    "        \n",
    "    \n",
    "    for t in range(n_arms + 1, n + 1):\n",
    "        # Compute TS index \n",
    "        indexes = []\n",
    "        for action in range(n_arms):\n",
    "            # compute clipped mean + optimism bonus\n",
    "            clipped_noisy_bonus_mean_a = min(max(mu_tildas[action] + (3*np.log(t))/(epsilon*O_n[action]),0),1)\n",
    "            theta_a = np.random.beta(clipped_noisy_bonus_mean_a * O_n[action] + 1, (1 - clipped_noisy_bonus_mean_a)* O_n[action] + 1)\n",
    "            indexes.append(theta_a)\n",
    "            \n",
    "        # Select action according to TS criteria\n",
    "        indexes = np.array(indexes)\n",
    "        a = np.random.choice(np.flatnonzero(indexes == indexes.max()))\n",
    "        actions.append(a)\n",
    "        # Sample the reward\n",
    "        reward = np.random.binomial(n=1, p= mu[a])\n",
    "        # append reward to active list\n",
    "        psi_n[a].append(reward)\n",
    "        # Only update statistics if arm count doubles\n",
    "        if len(psi_n[a]) == 2**(r_n[a] + 1):\n",
    "            O_n[a] = 2**(r_n[a] + 1)\n",
    "            # Update noisy mean list with only the noisy mean of the active rewards list\n",
    "            mu_tildas[a] = (np.array(psi_n[a]).sum() + np.random.laplace(loc=0.0, scale=1/(epsilon)))/O_n[a]\n",
    "            r_n[a] += 1\n",
    "            # Forget\n",
    "            psi_n[a] = []\n",
    "        # calculate regrets\n",
    "        r += u_star - mu[a]\n",
    "        regret.append(r)\n",
    "        \n",
    "    return regret, actions"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7182109c-54d8-4dd1-b021-69aaf0bc53de",
   "metadata": {},
   "source": [
    "# DP-KL-UCB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "f742a519-e08a-431a-8d7c-57d82b7e6207",
   "metadata": {},
   "outputs": [],
   "source": [
    "def d_eps(x, y, epsilon, prec = 1e-15):\n",
    "    if epsilon > np.log(min(max(y, prec), 1 - prec) / min(max(x, prec), 1 - prec)) + np.log((1 - min(max(x, prec), 1 - prec)) / (1 - min(max(y, prec), 1 - prec))):\n",
    "        return kl(x, y, prec)\n",
    "    return kl( y/(y + (1 - y)*(np.exp(epsilon))), y , prec) + epsilon * (y/(y + (1 - y)*(np.exp(epsilon))) - x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "31608298-4475-45fb-bbc2-98aa2f78b45c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_d_u(x, d, epsilon, precision=1e-6, max_iterations=50):\n",
    "    # d is the upper_bound on kl in dp-kl-ucb criterion and x the clipped noisy mean reward\n",
    "    a = max(x, 0)\n",
    "    b = 1\n",
    "    n_iter = 0\n",
    "    while n_iter < max_iterations and b - a > precision:\n",
    "        n_iter += 1\n",
    "        m = (a + b) / 2.\n",
    "        if d_eps(x, m, epsilon) > d:\n",
    "            b = m\n",
    "        else:\n",
    "            a = m\n",
    "    return (a + b) / 2."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "cd522d9d-1dda-4c05-8494-77dbdf1d6fbe",
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_regret_dp_klucb(mu, epsilon, n, alpha = 2, n_zero = 1):\n",
    "    # mu is a list, mu_arr is an np array\n",
    "    mu_arr = np.array(mu)\n",
    "    # Optimal arm's mean\n",
    "    u_star = max(mu_arr)\n",
    "    # number of arms\n",
    "    n_arms = len(mu_arr)\n",
    "    # noisy sums, init with n_zero rewards for each arm\n",
    "    sums = np.random.binomial(n=1, p=n_zero*mu).reshape((n_zero,n_arms)).sum(axis = 0) + np.random.laplace(loc=0.0, scale=1/epsilon, size = n_arms)\n",
    "    # noisy means\n",
    "    mu_tildas = sums/n_zero\n",
    "    # arm counts, init with n_zero for each\n",
    "    k_n = n_zero * np.ones(n_arms, dtype=int)\n",
    "    # epochs\n",
    "    m = np.zeros(n_arms, dtype=int)\n",
    "    # timestep\n",
    "    t = n_arms * n_zero + 1\n",
    "    # regret init\n",
    "    actions = list(np.repeat(np.arange(n_arms),n_zero))\n",
    "    regret = list((u_star - mu_arr[actions]).cumsum())\n",
    "    r = regret[-1]\n",
    "    while t <= n:\n",
    "        # Select action according to dp-kl-ucb criteria \n",
    "        indexes = []\n",
    "        for action in range(n_arms):\n",
    "            d = (np.log(t)) / k_n[action]\n",
    "            clipped = min(max(mu_tildas[action], 0), 1)\n",
    "            indexes.append(find_d_u(clipped , d, epsilon))\n",
    "        indexes = np.array(indexes)\n",
    "        a = np.random.choice(np.flatnonzero(indexes == indexes.max()))\n",
    "        # Update the epoch\n",
    "        m_a = m[a] + 1\n",
    "        # Compute batch size, depends on alpha parameter\n",
    "        b_m = int((n_zero*(alpha**(m_a + 1) - 1))/(alpha - 1)) - int((n_zero*(alpha**(m_a) - 1))/(alpha - 1))\n",
    "        # Update noisy sums with b_m realisations of the reward\n",
    "        sums[a] += np.random.binomial(n=1, p=mu[a], size=b_m).sum() + np.random.laplace(loc=0.0, scale=1/epsilon)\n",
    "        # Update arm counts and noisy means\n",
    "        k_n[a] += b_m\n",
    "        mu_tildas[a] = sums[a]/k_n[a]\n",
    "        if t + b_m > n:\n",
    "            regret += ([r + k*(u_star - mu[a]) for k in range(n-t)])\n",
    "            actions += ([a] * (n-t))\n",
    "        else:\n",
    "            regret += ([r + k*(u_star - mu[a]) for k in range(b_m)])\n",
    "            actions += ([a] * b_m)\n",
    "        r = regret[-1]\n",
    "        t = t + b_m\n",
    "        m[a] += 1\n",
    "        \n",
    "    return regret, actions"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4b4aaf3c-0bf4-4cfd-9989-9ac445457b73",
   "metadata": {},
   "source": [
    "# DP-IMED"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "a465432a-82fa-4f73-8788-27f7ad3c40f5",
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_regret_dp_imed(mu, epsilon, n, alpha = 2, n_zero = 1):\n",
    "    # mu is a list, mu_arr is an np array\n",
    "    mu_arr = np.array(mu)\n",
    "    # Optimal arm's mean\n",
    "    u_star = max(mu_arr)\n",
    "    # number of arms\n",
    "    n_arms = len(mu_arr)\n",
    "    # noisy sums, init with n_zero rewards for each arm\n",
    "    sums = np.random.binomial(n=1, p=n_zero*mu).reshape((n_zero,n_arms)).sum(axis = 0) + np.random.laplace(loc=0.0, scale=1/epsilon, size = n_arms)\n",
    "    # noisy means\n",
    "    mu_tildas = sums/n_zero\n",
    "    # arm counts, init with n_zero for each\n",
    "    k_n = n_zero * np.ones(n_arms, dtype=int)\n",
    "    # epochs\n",
    "    m = np.zeros(n_arms, dtype=int)\n",
    "    # timestep\n",
    "    t = n_arms * n_zero + 1\n",
    "    # regret init\n",
    "    actions = list(np.repeat(np.arange(n_arms),n_zero))\n",
    "    regret = list((u_star - mu_arr[actions]).cumsum())\n",
    "    r = regret[-1]\n",
    "    while t <= n:\n",
    "        # Select action according to dp-imed criteria \n",
    "        indexes = []\n",
    "        clipped_max = min(max(max(mu_tildas), 0), 1)\n",
    "        for action in range(n_arms):\n",
    "            clipped_mu = min(max(mu_tildas[action], 0), 1)\n",
    "            indexes.append(k_n[action]*d_eps(clipped_mu, clipped_max, epsilon) + np.log(k_n[action]))\n",
    "        indexes = np.array(indexes)\n",
    "        a = np.random.choice(np.flatnonzero(indexes == indexes.min()))\n",
    "        # Update the epoch\n",
    "        m_a = m[a] + 1\n",
    "        # Compute batch size, depends on alpha parameter\n",
    "        b_m = int((n_zero*(alpha**(m_a + 1) - 1))/(alpha - 1)) - int((n_zero*(alpha**(m_a) - 1))/(alpha - 1))\n",
    "        # Update noisy sums with b_m realisations of the reward\n",
    "        sums[a] += np.random.binomial(n=1, p=mu[a], size=b_m).sum() + np.random.laplace(loc=0.0, scale=1/epsilon)\n",
    "        # Update arm counts and noisy means\n",
    "        k_n[a] += b_m\n",
    "        mu_tildas[a] = sums[a]/k_n[a]\n",
    "        if t + b_m > n:\n",
    "            regret += ([r + k*(u_star - mu[a]) for k in range(n-t)])\n",
    "            actions += ([a] * (n-t))\n",
    "        else:\n",
    "            regret += ([r + k*(u_star - mu[a]) for k in range(b_m)])\n",
    "            actions += ([a] * b_m)\n",
    "        r = regret[-1]\n",
    "        t = t + b_m\n",
    "        m[a] += 1\n",
    "        \n",
    "    return regret, actions"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "09e95ea4-91f0-44f4-b0bb-d76b8f13fbf5",
   "metadata": {},
   "source": [
    "# Useful functions to generate results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "27c64b08-cd66-452b-8c22-7ac07424b498",
   "metadata": {},
   "outputs": [],
   "source": [
    "# assumes that the function f takes as inputs mu and n, (no epsilon), and f returns a tuple (regret, actions)\n",
    "# each exp is repeated n_mc times\n",
    "def generate_results(f, list_mu, n_iter, n_mc):\n",
    "    return np.array([f(mu=list_mu, n = n_iter)[0] for i in range(n_mc)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "79efd081-62f9-43e1-b95c-b92d9f4c169b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# assumes that the function f takes as inputs mu, n and epsilon, and f returns a tuple (regret, actions)\n",
    "# each exp is repeated n_mc times\n",
    "def generate_private_results(f, list_mu, epsilon, n_iter, n_mc):\n",
    "    return np.array([f(mu=list_mu, epsilon = epsilon, n = n_iter)[0] for i in range(n_mc)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "03f84b82",
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_private_results_alpha(f, list_mu, epsilon, n_iter, n_mc, alpha):\n",
    "    return np.array([f(mu=list_mu, epsilon = epsilon, n = n_iter, alpha = alpha)[0] for i in range(n_mc)])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "88a9e110-9491-40c4-be49-32cd3dca3b17",
   "metadata": {},
   "source": [
    "# Generating and saving .npy files"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "603456d0-225f-45c4-82c8-07a858cb6803",
   "metadata": {},
   "source": [
    "## IMED"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "9fac2e89-541e-414b-b46b-61ef7e875be9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n",
      "1\n",
      "2\n",
      "3\n"
     ]
    }
   ],
   "source": [
    "# For each env in list_mu, save regret values for each run in .npy files\n",
    "for i in range(len(list_mu)):\n",
    "    list_mu_i = list_mu[i]\n",
    "    gen_res_imed_i = generate_results(compute_regret_imed, list_mu_i, n_iter, n_mc)\n",
    "    name_file_i = \"IMED_\" + str(i+1) + \".npy\"\n",
    "    with open(name_file_i, 'wb') as f:\n",
    "        np.save(f, gen_res_imed_i)\n",
    "        print(i)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "07d357c9-fb19-4279-912f-675b7b3147b9",
   "metadata": {},
   "source": [
    "## DP-SE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "8daf8839-c97e-4cbf-88d5-b846fe957132",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 0\n",
      "0 1\n",
      "0 2\n",
      "0 3\n",
      "1 0\n",
      "1 1\n",
      "1 2\n",
      "1 3\n",
      "2 0\n",
      "2 1\n",
      "2 2\n",
      "2 3\n",
      "3 0\n",
      "3 1\n",
      "3 2\n",
      "3 3\n"
     ]
    }
   ],
   "source": [
    "# For each env in list_mu, and every epsilon in epss, save regret values for each run in .npy files\n",
    "for i in range(len(list_mu)):\n",
    "    list_mu_i = list_mu[i]\n",
    "    for j in range(len(epss)):\n",
    "        eps_j = epss[j]\n",
    "        gen_res_dp_se_i_eps_j = generate_private_results(compute_regret_dp_se, list_mu_i, eps_j, n_iter, n_mc)\n",
    "        name_file_i_j = \"dp_se_\" + str(i+1) + \"_eps\" + str(j+1) + \".npy\"\n",
    "        with open(name_file_i_j, 'wb') as f:\n",
    "            np.save(f, gen_res_dp_se_i_eps_j)\n",
    "            print(i,j)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1ed59314-58ca-41b8-a42e-627bf281d8b8",
   "metadata": {},
   "source": [
    "## AdaP-KLUCB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "6e14ea36-01b5-424d-9c28-92dc6d1a6c87",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 0\n",
      "0 1\n",
      "0 2\n",
      "0 3\n",
      "1 0\n",
      "1 1\n",
      "1 2\n",
      "1 3\n",
      "2 0\n",
      "2 1\n",
      "2 2\n",
      "2 3\n",
      "3 0\n",
      "3 1\n",
      "3 2\n",
      "3 3\n"
     ]
    }
   ],
   "source": [
    "# For each env in list_mu, and every epsilon in epss, save regret values for each run in .npy files\n",
    "for i in range(len(list_mu)):\n",
    "    list_mu_i = list_mu[i]\n",
    "    for j in range(len(epss)):\n",
    "        eps_j = epss[j]\n",
    "        gen_res_adap_klucb_i_eps_j = generate_private_results(compute_regret_adap_klucb, list_mu_i, eps_j, n_iter, n_mc)\n",
    "        name_file_i_j = \"adap_klucb_\" + str(i+1) + \"_eps\" + str(j+1) + \".npy\"\n",
    "        with open(name_file_i_j, 'wb') as f:\n",
    "            np.save(f, gen_res_adap_klucb_i_eps_j)\n",
    "            print(i,j)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4e3abb63-5351-46af-a538-88ddf163341d",
   "metadata": {},
   "source": [
    "## Lazy-DP-TS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "26b7b30e-a1d4-48c1-b0ac-138daf50a0d6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 0\n",
      "0 1\n",
      "0 2\n",
      "0 3\n",
      "1 0\n",
      "1 1\n",
      "1 2\n",
      "1 3\n",
      "2 0\n",
      "2 1\n",
      "2 2\n",
      "2 3\n",
      "3 0\n",
      "3 1\n",
      "3 2\n",
      "3 3\n"
     ]
    }
   ],
   "source": [
    "# For each env in list_mu, and every epsilon in epss, save regret values for each run in .npy files\n",
    "for i in range(len(list_mu)):\n",
    "    list_mu_i = list_mu[i]\n",
    "    for j in range(len(epss)):\n",
    "        eps_j = epss[j]\n",
    "        gen_res_lazy_dp_ts_i_eps_j = generate_private_results(compute_regret_lazy_dp_ts, list_mu_i, eps_j, n_iter, n_mc)\n",
    "        name_file_i_j = \"lazy_dp_ts_\" + str(i+1) + \"_eps\" + str(j+1) + \".npy\"\n",
    "        with open(name_file_i_j, 'wb') as f:\n",
    "            np.save(f, gen_res_lazy_dp_ts_i_eps_j)\n",
    "            print(i,j)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0ab2c422-eb89-4b3d-9bad-e5047dc44082",
   "metadata": {},
   "source": [
    "## DP-KL-UCB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "4c9d0615-ddbc-421f-be3a-e277208344ec",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 0\n",
      "0 1\n",
      "0 2\n",
      "0 3\n",
      "1 0\n",
      "1 1\n",
      "1 2\n",
      "1 3\n",
      "2 0\n",
      "2 1\n",
      "2 2\n",
      "2 3\n",
      "3 0\n",
      "3 1\n",
      "3 2\n",
      "3 3\n"
     ]
    }
   ],
   "source": [
    "# For each env in list_mu, and every epsilon in epss, save regret values for each run in .npy files\n",
    "for i in range(len(list_mu)):\n",
    "    list_mu_i = list_mu[i]\n",
    "    for j in range(len(epss)):\n",
    "        eps_j = epss[j]\n",
    "        gen_res_dp_klucb_i_eps_j = generate_private_results(compute_regret_dp_klucb, list_mu_i, eps_j, n_iter, n_mc)\n",
    "        name_file_i_j = \"dp_kl_ucb\" + str(i+1) + \"_eps\" + str(j+1) + \".npy\"\n",
    "        with open(name_file_i_j, 'wb') as f:\n",
    "            np.save(f, gen_res_dp_klucb_i_eps_j)\n",
    "            print(i,j)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cd53f0ba-6388-496a-a307-63fe615e9968",
   "metadata": {},
   "source": [
    "## DP-IMED"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "593fa57a-b427-4d24-a1d9-f92800e010e6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 0\n",
      "0 1\n",
      "0 2\n",
      "0 3\n",
      "1 0\n",
      "1 1\n",
      "1 2\n",
      "1 3\n",
      "2 0\n",
      "2 1\n",
      "2 2\n",
      "2 3\n",
      "3 0\n",
      "3 1\n",
      "3 2\n",
      "3 3\n"
     ]
    }
   ],
   "source": [
    "# For each env in list_mu, and every epsilon in epss, save regret values for each run in .npy files\n",
    "for i in range(len(list_mu)):\n",
    "    list_mu_i = list_mu[i]\n",
    "    for j in range(len(epss)):\n",
    "        eps_j = epss[j]\n",
    "        gen_res_dp_imed_i_eps_j = generate_private_results(compute_regret_dp_imed, list_mu_i, eps_j, n_iter, n_mc)\n",
    "        name_file_i_j = \"dp_imed_\" + str(i+1) + \"_eps\" + str(j+1) + \".npy\"\n",
    "        with open(name_file_i_j, 'wb') as f:\n",
    "            np.save(f, gen_res_dp_imed_i_eps_j)\n",
    "            print(i,j)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "95181cdd",
   "metadata": {},
   "source": [
    "## Effect of $\\alpha$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "84624af2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.01\n",
      "1.5\n",
      "2\n",
      "3\n"
     ]
    }
   ],
   "source": [
    "fixed_list_mu = list_mu[1]\n",
    "fixed_eps = epss[2]\n",
    "alphas = [1.01, 1.5, 2, 3]\n",
    "for alpha in alphas:\n",
    "    gen_res_dp_imed_i_eps_j = generate_private_results_alpha(compute_regret_dp_imed, fixed_list_mu, fixed_eps, n_iter, n_mc, alpha)\n",
    "    name_file_i_j = \"dp_imed_\" + str(1+1) + \"_eps_\" + str(2+1) + \"_alpha_\" + str(alpha) + \".npy\"\n",
    "    with open(name_file_i_j, 'wb') as f:\n",
    "        np.save(f, gen_res_dp_imed_i_eps_j)\n",
    "        print(alpha)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
