{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import numpy as np\n",
    "import random\n",
    "from scipy.stats import truncnorm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_time_series(T, k, alpha, sigma, L, seed):\n",
    "    \"\"\"\n",
    "    Generate $k$ time series of size $T$, each of them following an independent AR-1 process. \n",
    "    \n",
    "    INPUT\n",
    "    -----\n",
    "    T (int) : length of time horizon\n",
    "    k (int) : number of arms\n",
    "    alpha (list): AR parameters \n",
    "    sigma (list): stochastic rates of change\n",
    "    L (int): boundary\n",
    "    seed (int) : random seed\n",
    "    \n",
    "    OUTPUT\n",
    "    -----\n",
    "    time_series : observed rewards of time series\n",
    "    time_series_expected : expected rewards of time series at time $t$\n",
    "    best_arm : arm with the highest expected reward at time $t$\n",
    "    \n",
    "    \"\"\"\n",
    "\n",
    "    np.random.seed(seed)\n",
    "    \n",
    "    time_series = np.zeros((k, T))\n",
    "    time_series_expected = np.zeros((k, T))\n",
    "\n",
    "    # Initial_values\n",
    "    mu = [np.random.uniform(-L, L, k)]\n",
    "    X = [mu[-1] + np.random.normal(0, sigma, k)]\n",
    "\n",
    "    for t in range(1, T+1):\n",
    "        mu.append([max(min(alpha[i] * X[-1][i], L),-L) for i in range(k)])\n",
    "        \n",
    "        # Update the actual rewards at t\n",
    "        X_t = mu[-1] + np.random.normal(0, sigma, k)\n",
    "        X.append(X_t)\n",
    "\n",
    "    time_series = np.vstack(X).T\n",
    "    time_series_expected = np.vstack(mu).T\n",
    "    \n",
    "    best_arm = np.argmax(time_series_expected, axis=0)\n",
    "    \n",
    "    return time_series, time_series_expected, best_arm "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_regret_percentage(time_series_expected, arms_selected, best_arm):\n",
    "    \"\"\"\n",
    "    Compute the normalized regret given the arms selected\n",
    "    \"\"\"\n",
    "    k, T = time_series_expected.shape\n",
    "    assert (len(arms_selected) == T)\n",
    "    assert (len(best_arm) == T)\n",
    "    benchmark = np.array([time_series_expected[best_arm[t], t] for t in range(T)])\n",
    "    true_rewards = np.array([time_series_expected[arms_selected[t], t] for t in range(T)])\n",
    "    return (np.sum(benchmark) - np.sum(true_rewards))/np.sum(benchmark)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def AR2(time_series, alpha, sigma, c_0, criteria=None, L = 5.0):\n",
    "    \"\"\"\n",
    "    AR2 : Alteration-and-Restarting algorithm for dynamic AR bandits\n",
    "    \"\"\"\n",
    "    alpha = np.array(alpha)\n",
    "    \n",
    "    arms_selected = []\n",
    "    rewards = []\n",
    "    UCB_lst = []\n",
    "    \n",
    "    V = set()\n",
    "    c_1 = 8 * c_0\n",
    "\n",
    "    k, T = time_series.shape\n",
    "\n",
    "    ### Initialization ###\n",
    "    for i in range(k):\n",
    "        arms_selected.append(i)\n",
    "        rewards.append(time_series[i, i])\n",
    "        \n",
    "    # gamma: our current belief for the expected reward of each arm at t = k\n",
    "    gamma = np.array([alpha[i]**(k-i)*time_series[i, i] for i in range(k)])\n",
    "\n",
    "    # tau: the last round at which that arm i was played\n",
    "    # tau_trig: the last round at which arm i was last triggered\n",
    "    tau = np.arange(k)\n",
    "    tau_trig = np.zeros(k)\n",
    "        \n",
    "    for t in range(k, T):\n",
    "        i_star = np.argmax(gamma) \n",
    "        gamma_star = gamma[i_star]\n",
    "        \n",
    "        confidence_bound_star = 0\n",
    "        if alpha[i_star] == 1:\n",
    "            confidence_bound_star = np.sqrt(t - tau[i_star] - 1)\n",
    "        else:\n",
    "            confidence_bound_star = np.sqrt((alpha[i_star]**2 - alpha[i_star]**(2*(t-tau[i_star])))/(1-alpha[i_star]**2))\n",
    "    \n",
    "        ### Step 1: Determine the triggering set ### \n",
    "        for j in set(range(k)).difference(V).difference({i_star}):\n",
    "            if alpha[j] == 1:\n",
    "                confidence_bound = np.sqrt(t - tau[j] - 1)\n",
    "            else:\n",
    "                confidence_bound = np.sqrt((alpha[j]**2 - alpha[j]**(2*(t-tau[j])))/(1-alpha[j]**2))\n",
    "            \n",
    "            # Check the triggering condition  \n",
    "            if gamma_star - gamma[j] <= c_1 * sigma[j] * confidence_bound:\n",
    "                # Add j to the activation set\n",
    "                V.add(j)\n",
    "                tau_trig[j] = t\n",
    "                \n",
    "        # compute the upper confidence bound for each arm\n",
    "        UCB = [gamma[j] + c_1 * sigma[j] * np.sqrt((alpha[j]**2 - alpha[j]**(2*(t-tau[j])))/(1-alpha[j]**2)) \\\n",
    "                        if alpha[j] < 1 else gamma[j] + c_1 * sigma[j] * np.sqrt(t - tau[j] - 1) for j in range(k)]\n",
    "        UCB_lst.append(UCB)\n",
    "        UCB = np.array(UCB)\n",
    "\n",
    "        ### Step 2: Exploit or Explore ###\n",
    "        \n",
    "        # Odd rounds: Play a triggered arm\n",
    "        if t % 2 == 1 and len(V) > 0:\n",
    "            V_list = list(V)\n",
    "            if criteria == \"UCB\":\n",
    "                # pick an activated arm with the highest UCB\n",
    "                if np.max(UCB[V_list]) > UCB[i_star]:\n",
    "                    i_t = np.array(V_list)[np.argmax(UCB[V_list])] \n",
    "                    V.remove(i_t)\n",
    "                else:\n",
    "                    i_t = i_star\n",
    "            else:\n",
    "                V_list = list(V)\n",
    "                # pick a triggered arm with the earliest activation time\n",
    "                i_t = np.array(V_list)[np.argmin(tau_trig[V_list])] \n",
    "                V.remove(i_t)\n",
    "        # Even rounds: Play the superior arm\n",
    "        else:\n",
    "            i_t = i_star\n",
    "            \n",
    "        arms_selected.append(i_t)\n",
    "        rewards.append(time_series[i_t, t])\n",
    "        tau[i_t] = t\n",
    "        \n",
    "        # Update the new belief states\n",
    "        gamma = alpha * gamma\n",
    "        gamma[i_t] = max(min(alpha[i_t] * time_series[i_t, t], L), -L)\n",
    "            \n",
    "    return np.array(arms_selected), np.array(rewards), np.array(UCB_lst).T\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def ETC(time_series, m=50):\n",
    "    \"\"\"\n",
    "    \"Explore-then-commit\" algorithm :  \n",
    "    First select each arm for m rounds, then stick with the arm with the highest expected reward\n",
    "    \"\"\"\n",
    "    print(\"running ETC\")\n",
    "    \n",
    "    arms_selected = []\n",
    "    rewards = []\n",
    "    K, T = time_series.shape\n",
    "    \n",
    "    sum_rewards = np.zeros(K)\n",
    "    \n",
    "    for i in range(K):\n",
    "        for j in range(m):\n",
    "            current_t = i * m + j\n",
    "            true_X = time_series[i, current_t]\n",
    "            sum_rewards[i] += true_X\n",
    "            arms_selected.append(i)\n",
    "            rewards.append(true_X)\n",
    "        \n",
    "    i_star = np.argmax(sum_rewards)\n",
    "    \n",
    "    t = K * m\n",
    "    \n",
    "    while t < T:\n",
    "        arms_selected.append(i_star)\n",
    "        rewards.append(time_series[i_star, t])\n",
    "        \n",
    "        t += 1\n",
    "        \n",
    "    return np.array(arms_selected), np.array(rewards)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def RExp3(time_series, V):\n",
    "    \"\"\"\n",
    "    RExp3 from \"Besbes et al. 2014\"\n",
    "    \"\"\" \n",
    "    K, T = time_series.shape \n",
    "    \n",
    "    batch_size = int(np.ceil((K*np.log(K))**(1/3)*(T/V)**(2/3)))\n",
    "    gamma = min(1, np.sqrt(K*np.log(K)/(np.exp(1)*batch_size)))\n",
    "    \n",
    "    print (\"Batch size: {}\".format(batch_size))\n",
    "    \n",
    "    arms_selected = []\n",
    "    rewards = []\n",
    "    \n",
    "    j = 1\n",
    "    while j <= np.ceil(T/batch_size):\n",
    "        t_start = (j-1)*batch_size\n",
    "        w = np.ones(K)\n",
    "        for t in range(t_start, min(T, t_start + batch_size)):\n",
    "            w_sum = np.sum(w)\n",
    "            p = (1-gamma)*w/w_sum + (gamma/K)\n",
    "            arm_t = np.random.choice(np.arange(K), p=p)\n",
    "            reward_t = time_series[arm_t, t]\n",
    "            w[arm_t] = w[arm_t]*np.exp(gamma*(reward_t/p[arm_t])/K)\n",
    "            \n",
    "            arms_selected.append(arm_t)\n",
    "            rewards.append(reward_t)\n",
    "            \n",
    "        j += 1\n",
    "        \n",
    "    return np.array(arms_selected), np.array(rewards) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "\n",
    "def epsilon_greedy(time_series, alpha, epsilon=0.1):\n",
    "    \"\"\"\n",
    "    Epsilon-greedy algorithm : act greedily with probability (1-epsilon), explore a random arm w.p. epsilon\n",
    "    \"\"\"\n",
    "    print (\"running eps-greedy\")\n",
    "    \n",
    "    arms_selected = []\n",
    "    rewards = []\n",
    "    K, T = time_series.shape\n",
    "    observed_idx = np.zeros(K)\n",
    "    \n",
    "    # Initialization\n",
    "    for i in range(K):\n",
    "        true_X = time_series[i, i]\n",
    "        observed_idx[i] = i\n",
    "        arms_selected.append(i)\n",
    "        rewards.append(true_X)\n",
    "        \n",
    "    t = K\n",
    "    while t < T:\n",
    "        # w.p. (1-epsilon), act greedily\n",
    "        if random.random() > epsilon:\n",
    "            d = t - observed_idx\n",
    "            expected_t = np.array([alpha[i]**d[i]*time_series[i, int(t-d[i])] for i in range(K)])\n",
    "\n",
    "            i_t = np.argmax(expected_t)\n",
    "        # w.p. epsilon, randomly explore any of the arms \n",
    "        else:\n",
    "            i_t = random.randrange(K)\n",
    "        \n",
    "        observed_idx[i_t] = t\n",
    "        arms_selected.append(i_t)\n",
    "        rewards.append(time_series[i_t, t])\n",
    "        \n",
    "        t += 1\n",
    "        \n",
    "    return np.array(arms_selected), np.array(rewards)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def UCB_simple(time_series):\n",
    "    \"\"\"\n",
    "    UCB1 (Auer et al. 2002)\n",
    "    \"\"\"\n",
    "    arms_selected = []\n",
    "    rewards = []\n",
    "    UCB = []\n",
    "\n",
    "    ### Exploration -- Estimate the alpha's ###\n",
    "    # K: number of arms\n",
    "    # T: number of time steps\n",
    "    K, T = time_series.shape\n",
    "    mean = np.zeros(K)\n",
    "    num_played = np.ones(K)\n",
    "\n",
    "    # Initialization: play each arm once \n",
    "    for i in range(K):\n",
    "        mean[i] = time_series[i, i]\n",
    "        arms_selected.append(i)\n",
    "        rewards.append(time_series[i,i])\n",
    "\n",
    "    for t in range(K, T):\n",
    "        UCB_t = mean + np.sqrt(2*np.log(t+1)/num_played)\n",
    "        i_t = np.argmax(UCB_t)\n",
    "        reward_t = time_series[i_t, t]\n",
    "\n",
    "        mean[i_t] = (mean[i_t]*num_played[i_t] + reward_t)/(num_played[i_t] + 1)\n",
    "        num_played[i_t] += 1\n",
    "\n",
    "        arms_selected.append(i_t)\n",
    "        rewards.append(time_series[i_t, t])\n",
    "        UCB.append(list(UCB_t))\n",
    "        \n",
    "    return np.array(arms_selected), np.array(rewards), np.array(UCB).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def UCB_missing_data(time_series, alpha, sigma, delta=0.1):\n",
    "    \"\"\"\n",
    "    modified UCB algorithm \n",
    "    \"\"\"\n",
    "    arms_selected = []\n",
    "    rewards = []\n",
    "    UCB = []\n",
    "\n",
    "    ### Exploration -- Estimate the alpha's ###\n",
    "    # K: number of arms\n",
    "    # T: number of time steps\n",
    "    K, T = time_series.shape\n",
    "    observed_idx = np.zeros(K)\n",
    "\n",
    "    ### Select each arm once as a starter ###\n",
    "    for i in range(K):\n",
    "        true_X = time_series[i, i]\n",
    "        observed_idx[i] = i\n",
    "        arms_selected.append(i)\n",
    "        rewards.append(true_X)\n",
    "        \n",
    "    ### Exploitation -- use the UCB of X_t's to select the arm ###\n",
    "    for t in range(K, T):\n",
    "        d = t - observed_idx\n",
    "\n",
    "        var = [(alpha[i]**2-alpha[i]**(2*(d[i])))/(1-alpha[i]**2) if alpha[i] < 1 else d[i]-1 for i in range(K)]\n",
    "        bound = np.sqrt(2) * np.log(1/delta) * sigma * np.sqrt(var)\n",
    "        UCB_t = np.array([(alpha[i])**d[i]*time_series[i, int(t-d[i])] + bound[i] for i in range(K)])\n",
    "\n",
    "        i_t = np.argmax(UCB_t)\n",
    "        \n",
    "        observed_idx[i_t] = t\n",
    "\n",
    "        arms_selected.append(i_t)\n",
    "        rewards.append(time_series[i_t, t])\n",
    "        UCB.append(list(UCB_t))\n",
    "        \n",
    "    return np.array(arms_selected), np.array(rewards), np.array(UCB).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def sliding_window_UCB(time_series, B, ksi=0.8):\n",
    "    \"\"\"\n",
    "    sliding window UCB\n",
    "    \"\"\"\n",
    "    K, T = time_series.shape\n",
    "    \n",
    "    arms_selected = np.zeros((K, T))\n",
    "    \n",
    "    rewards = []\n",
    "    \n",
    "    len_period = int(0.5 * np.sqrt(T))\n",
    "    num_period = int(np.ceil(T/len_period))\n",
    "    \n",
    "    # length of the sliding window\n",
    "    tau = int(2 * B * np.sqrt(T * np.log(T)/num_period))\n",
    "    print (\"length of the sliding window = {}\".format(tau))\n",
    "    \n",
    "    for i in range(K):\n",
    "        arms_selected[i, i] = 1\n",
    "        rewards.append(time_series[i,i])\n",
    "        \n",
    "    for t in range(K, T):\n",
    "        len_window = min(t, tau)\n",
    "        history = arms_selected[:, t-len_window:t] # from time $t-tau$ to $t-1$\n",
    "        N_t = np.sum(history, axis = 1)\n",
    "        sum_X = np.sum(time_series[:, t-len_window:t] * history, axis = 1)\n",
    "        mean_X = sum_X/N_t\n",
    "        \n",
    "        c_t = B * np.sqrt(ksi * np.log(len_window)) / np.sqrt(N_t)\n",
    "        \n",
    "        i_t = np.argmax(mean_X + c_t)\n",
    "        arms_selected[i_t, t] = 1\n",
    "        rewards.append(time_series[i_t, t])\n",
    "    \n",
    "    return np.nonzero(arms_selected.T)[1], np.array(rewards)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def SW_TS(time_series, B):\n",
    "    \"\"\"\n",
    "    sliding window Thompson sampling\n",
    "    \"\"\"\n",
    "    K, T = time_series.shape\n",
    "    \n",
    "    ### For simplicity, we assume that the time series are bounded ###\n",
    "    time_series = np.minimum(np.maximum(time_series, -L), L)\n",
    "    \n",
    "    arms_selected = np.zeros((K, T))\n",
    "    \n",
    "    rewards = []\n",
    "    \n",
    "    len_period = int(0.5 * np.sqrt(T))\n",
    "    num_period = int(np.ceil(T/len_period))\n",
    "    \n",
    "    # length of the sliding window\n",
    "    tau = int(2 * B * np.sqrt(T * np.log(T)/num_period))\n",
    "    print (\"length of the sliding window = {}\".format(tau))\n",
    "    \n",
    "    for i in range(K):\n",
    "        arms_selected[i, i] = 1\n",
    "        rewards.append(time_series[i,i])\n",
    "        \n",
    "    for t in range(K, T):\n",
    "        len_window = min(t, tau)\n",
    "        history = arms_selected[:, t-len_window:t] # from time $t-tau$ to $t-1$\n",
    "        \n",
    "        # Number of times that arm i has been selected \n",
    "        N_t = np.sum(history, axis = 1)\n",
    "        \n",
    "        # Rewards collected by arm i within the window\n",
    "        sum_X = np.sum(time_series[:, t-len_window:t] * history, axis = 1)/(2*L) + 1/2*N_t\n",
    "        \n",
    "        theta = [np.random.beta(sum_X[i]+1, N_t[i]-sum_X[i]+1) for i in range(i)]\n",
    "           \n",
    "        i_t = np.argmax(theta)\n",
    "        arms_selected[i_t, t] = 1\n",
    "        rewards.append(time_series[i_t, t])\n",
    "        \n",
    "    return np.nonzero(arms_selected.T)[1], np.array(rewards)\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "### Compute regret for different-sized instances ###\n",
    "T_max = 10000\n",
    "num_simulations = 100\n",
    "\n",
    "k = 10\n",
    "L = 1\n",
    "\n",
    "regret_AR2_1low = np.zeros(num_simulations)\n",
    "regret_AR2_1high = np.zeros(num_simulations)\n",
    "\n",
    "regret_ETC_low = np.zeros(num_simulations)\n",
    "regret_ETC_high = np.zeros(num_simulations)\n",
    "\n",
    "regret_RExp3_low = np.zeros(num_simulations)\n",
    "regret_RExp3_high = np.zeros(num_simulations)\n",
    " \n",
    "regret_eps_low = np.zeros(num_simulations)\n",
    "regret_eps_high = np.zeros(num_simulations)\n",
    "\n",
    "regret_UCB_low = np.zeros(num_simulations)\n",
    "regret_UCB_high = np.zeros(num_simulations)\n",
    "\n",
    "regret_UCB_mod_low = np.zeros(num_simulations)\n",
    "regret_UCB_mod_high = np.zeros(num_simulations)\n",
    "\n",
    "regret_SW_UCB_low = np.zeros(num_simulations)\n",
    "regret_SW_UCB_high = np.zeros(num_simulations)\n",
    "\n",
    "regret_SW_TS_low = np.zeros(num_simulations)\n",
    "regret_SW_TS_high = np.zeros(num_simulations)\n",
    "\n",
    "\n",
    "for n in range(num_simulations):\n",
    "    print (\"### simulation {} ###\".format(n))\n",
    "    \n",
    "    np.random.seed(n + 100)\n",
    "    \n",
    "    # First generate alphas with expected values less than the threshold \n",
    "    alpha_1 = np.minimum(np.maximum(np.random.dirichlet(np.ones(k) * 5) + 0.4 - 1/k, 0), 1)\n",
    "    print (\"alpha-exp1:\", alpha_1)\n",
    "    sigma_1 = np.random.random(size=k) * 0.5\n",
    "    print (\"sigma-exp1:\", sigma_1) \n",
    "    \n",
    "    time_series_1, time_series_expected_1, best_arm_1 = \\\n",
    "        generate_time_series(T_max, k, alpha_1, sigma_1, L=L, seed=n)\n",
    "    \n",
    "    # Then generate alphas with expected values higher than the threshold \n",
    "    alpha_2 = np.minimum(np.maximum(np.random.dirichlet(np.ones(k) * 5) + 0.9 - 1/k, 0), 1)\n",
    "    print (\"alpha-exp2:\", alpha_2)\n",
    "    sigma_2 = np.random.random(size=k) * 0.5\n",
    "    print (\"sigma-exp2:\", sigma_2) \n",
    "    \n",
    "    time_series_2, time_series_expected_2, best_arm_2 = \\\n",
    "        generate_time_series(T_max, k, alpha_2, sigma_2, L=L, seed=n)\n",
    "        \n",
    "    arms_selected_AR2_1low, _, _ = AR2(time_series_1, alpha_1, sigma_1, 0.001, criteria=\"UCB\", L = L)\n",
    "    arms_selected_ETC_low, _ = ETC(time_series_1, m=100)\n",
    "    arms_selected_RExp3_low, _ = RExp3(time_series_1, V=0.05*T_max)\n",
    "    arms_selected_eps_low, _ = epsilon_greedy(time_series_1, alpha_1, epsilon=0.1)\n",
    "    arms_selected_UCB_low, _, _ = UCB_simple(time_series_1)\n",
    "    arms_selected_UCB_mod_low, _, _ = UCB_missing_data(time_series_1, alpha_1, sigma_1, delta=0.5)\n",
    "    arms_selected_SW_UCB_low, _ = sliding_window_UCB(time_series_1, 2*L, ksi=0.8)\n",
    "    arms_selected_SW_TS_low, _ = SW_TS(time_series_1, 2*L)\n",
    "    \n",
    "    regret_AR2_1low[n] = compute_regret_percentage(time_series_expected_1[:, :T_max], \\\n",
    "                                                   arms_selected_AR2_1low[:T_max], \\\n",
    "                                                   best_arm_1[:T_max])\n",
    "    \n",
    "    regret_ETC_low[n] = compute_regret_percentage(time_series_expected_1[:, :T_max], \\\n",
    "                                                  arms_selected_ETC_low[:T_max], \\\n",
    "                                                  best_arm_1[:T_max])\n",
    "    \n",
    "    regret_RExp3_low[n] = compute_regret_percentage(time_series_expected_1[:, :T_max], \\\n",
    "                                                  arms_selected_RExp3_low[:T_max], \\\n",
    "                                                  best_arm_1[:T_max])\n",
    "    \n",
    "    regret_eps_low[n] = compute_regret_percentage(time_series_expected_1[:, :T_max], \\\n",
    "                                                  arms_selected_eps_low[:T_max], \\\n",
    "                                                  best_arm_1[:T_max])\n",
    "    \n",
    "    regret_UCB_low[n] = compute_regret_percentage(time_series_expected_1[:, :T_max], \\\n",
    "                                                  arms_selected_UCB_low[:T_max], \\\n",
    "                                                  best_arm_1[:T_max])\n",
    "    \n",
    "    regret_UCB_mod_low[n] = compute_regret_percentage(time_series_expected_1[:, :T_max], \\\n",
    "                                                      arms_selected_UCB_mod_low[:T_max], \\\n",
    "                                                      best_arm_1[:T_max])\n",
    "    \n",
    "    regret_SW_UCB_low[n] = compute_regret_percentage(time_series_expected_1[:, :T_max], \\\n",
    "                                                      arms_selected_SW_UCB_low[:T_max], \\\n",
    "                                                      best_arm_1[:T_max])\n",
    "    \n",
    "    regret_SW_TS_low[n] = compute_regret_percentage(time_series_expected_1[:, :T_max], \\\n",
    "                                                      arms_selected_SW_TS_low[:T_max], \\\n",
    "                                                      best_arm_1[:T_max])\n",
    "\n",
    "    ####################################\n",
    "        \n",
    "    arms_selected_AR2_1high, _, _ = AR2(time_series_2, alpha_2, sigma_2, 0.1, criteria=\"UCB\", L = L)\n",
    "    arms_selected_ETC_high, _ = ETC(time_series_2, m=100)\n",
    "    arms_selected_RExp3_high, _ = RExp3(time_series_2, V=0.05*T_max)\n",
    "    arms_selected_eps_high, _ = epsilon_greedy(time_series_2, alpha_2, epsilon=0.1)\n",
    "    arms_selected_UCB_high, _, _ = UCB_simple(time_series_2)\n",
    "    arms_selected_UCB_mod_high, _, _ = UCB_missing_data(time_series_2, alpha_2, sigma_2, delta=0.5)\n",
    "    arms_selected_SW_UCB_high, _ = sliding_window_UCB(time_series_2, 2*L, ksi=0.8)\n",
    "    arms_selected_SW_TS_high, _ = SW_TS(time_series_2, 2*L)\n",
    "    \n",
    "    regret_AR2_1high[n] = compute_regret_percentage(time_series_expected_2[:, :T_max], \\\n",
    "                                                   arms_selected_AR2_1high[:T_max], \\\n",
    "                                                   best_arm_2[:T_max])\n",
    "    \n",
    "    regret_ETC_high[n] = compute_regret_percentage(time_series_expected_2[:, :T_max], \\\n",
    "                                                  arms_selected_ETC_high[:T_max], \\\n",
    "                                                  best_arm_2[:T_max])\n",
    "    \n",
    "    regret_RExp3_high[n] = compute_regret_percentage(time_series_expected_2[:, :T_max], \\\n",
    "                                                  arms_selected_RExp3_high[:T_max], \\\n",
    "                                                  best_arm_2[:T_max])\n",
    "    \n",
    "    regret_UCB_high[n] = compute_regret_percentage(time_series_expected_2[:, :T_max], \\\n",
    "                                                  arms_selected_UCB_high[:T_max], \\\n",
    "                                                  best_arm_2[:T_max])\n",
    "    \n",
    "    regret_eps_high[n] = compute_regret_percentage(time_series_expected_2[:, :T_max], \\\n",
    "                                                  arms_selected_eps_high[:T_max], \\\n",
    "                                                  best_arm_2[:T_max])\n",
    "    \n",
    "    regret_UCB_mod_high[n] = compute_regret_percentage(time_series_expected_2[:, :T_max], \\\n",
    "                                                      arms_selected_UCB_mod_high[:T_max], \\\n",
    "                                                      best_arm_2[:T_max])\n",
    "    \n",
    "    regret_SW_UCB_high[n] = compute_regret_percentage(time_series_expected_2[:, :T_max], \\\n",
    "                                                      arms_selected_SW_UCB_high[:T_max], \\\n",
    "                                                      best_arm_2[:T_max])\n",
    "    \n",
    "    regret_SW_TS_high[n] = compute_regret_percentage(time_series_expected_2[:, :T_max], \\\n",
    "                                                      arms_selected_SW_TS_high[:T_max], \\\n",
    "                                                      best_arm_2[:T_max])\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print (\"k = {}\".format(k))\n",
    "print (\"Regret (low alpha)\")\n",
    "regret_low = \\\n",
    "[regret_AR2_1low, regret_ETC_low, regret_UCB_low, regret_eps_low, \\\n",
    " regret_RExp3_low, regret_UCB_mod_low, regret_SW_UCB_low, regret_SW_TS_low]\n",
    "\n",
    "for regret in regret_low:\n",
    "    print (\"{:0.2f} ({:0.2f})\".format(np.mean(regret), np.std(regret)))    \n",
    "    \n",
    "print (\"Regret (high alpha)\")\n",
    "regret_high = \\\n",
    "[regret_AR2_1high, regret_ETC_high, regret_UCB_high, regret_eps_high, \\\n",
    " regret_RExp3_high, regret_UCB_mod_high, regret_SW_UCB_high, regret_SW_TS_high]\n",
    "\n",
    "for regret in regret_high:\n",
    "    print (\"{:0.2f} ({:0.2f})\".format(np.mean(regret), np.std(regret)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = regret_high\n",
    "\n",
    "fig, ax = plt.subplots()\n",
    "bplot = ax.boxplot(data,\n",
    "                  vert=True,  \n",
    "                  patch_artist=True,  \n",
    "                  )\n",
    "ax.set_xticklabels([\"AR2 \\n\", \\\n",
    "                    \"ETC \\n\", \\\n",
    "                    \"UCB \\n\", \\\n",
    "                    r\"$\\epsilon$\"+\"-greedy \\n\", \\\n",
    "                    \"RExp3 \\n\", \\\n",
    "                    \"mod-UCB \\n\", \\\n",
    "                    \"SW-UCB \\n\", \\\n",
    "                    \"SW-TS \\n\"\n",
    "                   ])\n",
    "ax.axvspan(0.5, 1.5, color='gray', alpha=0.2)\n",
    "sns.set_style(\"whitegrid\")\n",
    "\n",
    "colors = ['pink'] + ['lightblue'] * (len(data)-1)\n",
    "for patch, color in zip(bplot['boxes'], colors):\n",
    "    patch.set_facecolor(color)\n",
    "\n",
    "plt.ylabel('normalized regret', fontsize=16)\n",
    "\n",
    "fig.set_size_inches(12,6)\n",
    "plt.rcParams.update({'font.size': 15})\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
