{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "cellView": "form",
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 364,
     "status": "ok",
     "timestamp": 1652373079763,
     "user": {
      "displayName": "Imad Aouali",
      "userId": "10957666911602104909"
     },
     "user_tz": 240
    },
    "id": "dUrLMhAcZYDY",
    "outputId": "e7637e0a-2d44-4760-e4d6-bcbb43502924",
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "python 3.6.8\n",
      "matplotlib 3.2.2\n",
      "30 joblib cores\n"
     ]
    }
   ],
   "source": [
    "# Imports and defaults\n",
    "import itertools\n",
    "import joblib\n",
    "from joblib import Parallel, delayed\n",
    "import matplotlib as mpl\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "from scipy.linalg import block_diag\n",
    "import time\n",
    "import os\n",
    "\n",
    "mpl.style.use(\"classic\")\n",
    "mpl.rcParams[\"figure.figsize\"] = [5, 3]\n",
    "\n",
    "mpl.rcParams[\"axes.linewidth\"] = 0.75\n",
    "mpl.rcParams[\"figure.facecolor\"] = \"w\"\n",
    "mpl.rcParams[\"grid.linewidth\"] = 0.75\n",
    "mpl.rcParams[\"lines.linewidth\"] = 0.75\n",
    "mpl.rcParams[\"patch.linewidth\"] = 0.75\n",
    "mpl.rcParams[\"xtick.major.size\"] = 3\n",
    "mpl.rcParams[\"ytick.major.size\"] = 3\n",
    "\n",
    "mpl.rcParams[\"pdf.fonttype\"] = 42\n",
    "mpl.rcParams[\"ps.fonttype\"] = 42\n",
    "mpl.rcParams[\"font.size\"] = 9\n",
    "mpl.rcParams[\"axes.titlesize\"] = \"medium\"\n",
    "mpl.rcParams[\"legend.fontsize\"] = \"medium\"\n",
    "\n",
    "import platform\n",
    "print(\"python %s\" % platform.python_version())\n",
    "print(\"matplotlib %s\" % mpl.__version__)\n",
    "print(\"%d joblib cores\" % joblib.cpu_count())\n",
    "\n",
    "def linestyle2dashes(style):\n",
    "  if style == \"--\":\n",
    "    return (3, 3)\n",
    "  elif style == \":\":\n",
    "    return (0.5, 2.5)\n",
    "  else:\n",
    "    return (None, None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "cellView": "form",
    "id": "f7uZWkUjRmUs",
    "tags": []
   },
   "outputs": [],
   "source": [
    "# Bandit environments\n",
    "class CoBandit(object):\n",
    "  \"\"\"Contextual bandit with K arms.\"\"\"\n",
    "\n",
    "  def __init__(self, K, contexts, Theta, sigma=1.0):\n",
    "    self.K = K  # number of arms\n",
    "    self.contexts = np.copy(contexts)  # [number of contexts] x d feature matrix\n",
    "    self.num_contexts = self.contexts.shape[0]  # number of contexts\n",
    "    self.d = self.contexts.shape[1]  # number of features\n",
    "    self.Theta = np.copy(Theta)  # [number of arms] x d arm parameters\n",
    "    self.sigma = sigma  # reward noise\n",
    "\n",
    "    self.randomize()\n",
    "\n",
    "  def randomize(self):\n",
    "    # randomly choose one context per arm (does not have to be the same)\n",
    "    ndx = np.random.randint(self.num_contexts, size=self.K)\n",
    "    self.X = self.contexts[ndx, :]\n",
    "\n",
    "    # mean and stochastic rewards\n",
    "    self.mut = (self.X * self.Theta).sum(axis=-1)\n",
    "    self.rt = self.mut + self.sigma * np.random.randn(self.K)\n",
    "    self.best_arm = np.argmax(self.mut)\n",
    "\n",
    "  def reward(self, arm):\n",
    "    # instantaneous reward of the arm\n",
    "    return self.rt[arm]\n",
    "\n",
    "  def regret(self, arm):\n",
    "    # instantaneous regret of the arm\n",
    "    return self.rt[self.best_arm] - self.rt[arm]\n",
    "\n",
    "  def pregret(self, arm):\n",
    "    # expected regret of the arm\n",
    "    return self.mut[self.best_arm] - self.mut[arm]\n",
    "\n",
    "  def print(self):\n",
    "    return \"Contextual bandit: %d dimensions, %d arms\" % (self.d, self.K)\n",
    "\n",
    "\n",
    "def evaluate_one(Alg, params, env, n, period_size=1):\n",
    "  \"\"\"One run of a bandit algorithm.\"\"\"\n",
    "  alg = Alg(env, n, params)\n",
    "\n",
    "  regret = np.zeros(n // period_size)\n",
    "  for t in range(n):\n",
    "    # generate state\n",
    "    env.randomize()\n",
    "\n",
    "    # take action and update agent\n",
    "    arm = alg.get_arm(t)\n",
    "    alg.update(t, arm, env.reward(arm))\n",
    "\n",
    "    # track performance\n",
    "    regret_at_t = env.regret(arm)\n",
    "    regret[t // period_size] += regret_at_t\n",
    "\n",
    "  return regret, alg\n",
    "\n",
    "\n",
    "def evaluate(Alg, params, env, n=1000, period_size=1, printout=True):\n",
    "  \"\"\"Multiple runs of a bandit algorithm.\"\"\"\n",
    "  if printout:\n",
    "    print(\"Evaluating %s\" % Alg.print(), end=\"\")\n",
    "  start = time.time()\n",
    "\n",
    "  num_exps = len(env)\n",
    "  regret = np.zeros((n // period_size, num_exps))\n",
    "  alg = num_exps * [None]\n",
    "\n",
    "  output = Parallel(n_jobs=-1)(delayed(evaluate_one)(Alg, params, env[ex], n, period_size)\n",
    "    for ex in range(num_exps))\n",
    "  for ex in range(num_exps):\n",
    "    regret[:, ex] = output[ex][0]\n",
    "    alg[ex] = output[ex][1]\n",
    "  if printout:\n",
    "    print(\" %.1f seconds\" % (time.time() - start))\n",
    "\n",
    "  if printout:\n",
    "    total_regret = regret.sum(axis=0)\n",
    "    print(\"Regret: %.2f +/- %.2f (median: %.2f, max: %.2f, min: %.2f)\" %\n",
    "      (total_regret.mean(), total_regret.std() / np.sqrt(num_exps),\n",
    "      np.median(total_regret), total_regret.max(), total_regret.min()))\n",
    "\n",
    "  return regret, alg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class dTS:\n",
    "  def __init__(self, env, n, params):\n",
    "    self.env = env  # bandit environment that the agent interacts with\n",
    "    self.K = self.env.K  # number of arms\n",
    "    self.d = self.env.d  # number of features\n",
    "    self.n = n  # horizon\n",
    "    self.W = self.env.W\n",
    "    self.L = self.env.L\n",
    "    self.Sigma = np.copy(self.env.Sigma)\n",
    "    self.sigma = self.env.sigma  # reward noise\n",
    "    \n",
    "    # override default values\n",
    "    for attr, val in params.items():\n",
    "      if isinstance(val, np.ndarray):\n",
    "        setattr(self, attr, np.copy(val))\n",
    "      elif isinstance(val, list):\n",
    "        setattr(self, attr, val.copy())\n",
    "      else:\n",
    "        setattr(self, attr, val)\n",
    "     \n",
    "    self.Lambda = 1 / self.Sigma\n",
    "        \n",
    "    # sufficient statistics\n",
    "    self.G_hat = np.zeros((self.K, self.d, self.d))\n",
    "    self.B_hat = np.zeros((self.K, self.d))\n",
    "    \n",
    "  def update(self, t, arm, r):\n",
    "    # update sufficient statistics\n",
    "    x = self.env.X[arm, :]\n",
    "    self.G_hat[arm, :, :] += np.outer(x, x) / np.square(self.sigma)\n",
    "    self.B_hat[arm, :] += x * r / np.square(self.sigma)\n",
    "\n",
    "  def get_arm(self, t):\n",
    "        \n",
    "    #Computing Gaussian parameters\n",
    "    Sigma_hat = []\n",
    "    for i in range(self.K):\n",
    "        Sigma_hat_helper = np.linalg.inv(self.Lambda[0] * np.eye(self.d) + self.G_hat[i, :, :])\n",
    "        Sigma_hat.append(Sigma_hat_helper)\n",
    "    \n",
    "    G_bar = np.zeros((self.L, self.d, self.d))\n",
    "    B_bar = np.zeros((self.L, self.d))\n",
    "    Sigma_bar = []\n",
    "    \n",
    "    for l in range(self.L):\n",
    "        W = self.W[l]\n",
    "        W_T = self.W[l].T\n",
    "        if l==0:\n",
    "            G_helper = np.zeros((self.d, self.d))\n",
    "            B_helper = np.zeros(self.d)\n",
    "            for i in range(self.K):\n",
    "                G_helper += self.Lambda[l] * np.eye(self.d) - (self.Lambda[l]**2) * Sigma_hat[i]\n",
    "                B_helper += Sigma_hat[i].dot(self.B_hat[i, :])\n",
    "            G_bar[l, :, :] = W_T.dot(G_helper.dot(W))\n",
    "            B_bar[l, :] = self.Lambda[l] * W_T.dot(B_helper)\n",
    "            Sigma_bar.append(np.linalg.inv(self.Lambda[l+1] * np.eye(self.d) + G_bar[l, :, :])) \n",
    "        else:\n",
    "            G_bar[l, :, :] = self.Lambda[l] * np.eye(self.d) - (self.Lambda[l]**2) * Sigma_bar[l-1]\n",
    "            B_bar[l, :] = self.Lambda[l] * W_T.dot(Sigma_bar[l-1].dot(B_bar[l-1, :]))\n",
    "            Sigma_bar.append(np.linalg.inv(self.Lambda[l+1] * np.eye(self.d) + G_bar[l, :, :]))\n",
    "    \n",
    "    #Hierarchical sampling\n",
    "    #when l=self.L\n",
    "    mu_bar = Sigma_bar[self.L-1].dot(B_bar[self.L-1, :])\n",
    "    psi =  np.random.multivariate_normal(mu_bar, Sigma_bar[self.L-1])\n",
    "    \n",
    "    for l in range(self.L-2, -1, -1):\n",
    "        mu_bar = self.Lambda[l+1] * self.W[l+1].dot(psi) + B_bar[l, :]\n",
    "        mu_bar = Sigma_bar[l].dot(mu_bar)\n",
    "        psi = np.random.multivariate_normal(mu_bar, Sigma_bar[l])\n",
    "        \n",
    "    self.mu = np.zeros(self.K)\n",
    "    for i in range(self.K):\n",
    "      mu_ti = self.Lambda[0] * self.W[0].dot(psi) + self.B_hat[i, :]\n",
    "      mu_ti = Sigma_hat[i].dot(mu_ti)\n",
    "      # posterior sampling\n",
    "      theta_tilde = np.random.multivariate_normal(mu_ti, Sigma_hat[i])\n",
    "      self.mu[i] = self.env.X[i, :].dot(theta_tilde)\n",
    "    \n",
    "    arm = np.argmax(self.mu)\n",
    "    return arm\n",
    "\n",
    "  @staticmethod\n",
    "  def print():\n",
    "    return \"dTS\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class HierTS:\n",
    "  def __init__(self, env, n, params):\n",
    "    self.env = env  # bandit environment that the agent interacts with\n",
    "    self.K = self.env.K  # number of arms\n",
    "    self.d = self.env.d  # number of features\n",
    "    self.n = n  # horizon\n",
    "    self.B_L = self.env.B_L\n",
    "    self.mar_Sigma_1 = np.copy(self.env.mar_Sigma_1)\n",
    "    self.Sigma_L = np.copy(self.env.Sigma[L]) * self.B_L.dot(self.B_L.T) + 10**(-3) * np.eye(self.d)\n",
    "    self.sigma = self.env.sigma  # reward noise\n",
    "    \n",
    "    # override default values\n",
    "    for attr, val in params.items():\n",
    "      if isinstance(val, np.ndarray):\n",
    "        setattr(self, attr, np.copy(val))\n",
    "      elif isinstance(val, list):\n",
    "        setattr(self, attr, val.copy())\n",
    "      else:\n",
    "        setattr(self, attr, val)\n",
    "     \n",
    "    self.mar_Lambda_1 = np.linalg.inv(self.mar_Sigma_1)\n",
    "    self.Lambda_L = np.linalg.inv(self.Sigma_L)\n",
    "        \n",
    "    # sufficient statistics\n",
    "    self.G_hat = np.zeros((self.K, self.d, self.d))\n",
    "    self.B_hat = np.zeros((self.K, self.d))\n",
    "    \n",
    "  def update(self, t, arm, r):\n",
    "    # update sufficient statistics\n",
    "    x = self.env.X[arm, :]\n",
    "    self.G_hat[arm, :, :] += np.outer(x, x) / np.square(self.sigma)\n",
    "    self.B_hat[arm, :] += x * r / np.square(self.sigma)\n",
    "\n",
    "  def get_arm(self, t):\n",
    "        \n",
    "    #Computing Gaussian parameters\n",
    "    Sigma_hat = []\n",
    "    for i in range(self.K):\n",
    "        Sigma_hat_helper = np.linalg.inv(self.mar_Lambda_1 + self.G_hat[i, :, :])\n",
    "        Sigma_hat.append(Sigma_hat_helper)\n",
    "    \n",
    "    G_bar = np.zeros((self.d, self.d))\n",
    "    B_bar = np.zeros((self.d))\n",
    "    \n",
    "    \n",
    "    G_helper = np.zeros((self.d, self.d))\n",
    "    B_helper = np.zeros(self.d)\n",
    "    for i in range(self.K):\n",
    "        G_helper += self.mar_Lambda_1 - self.mar_Lambda_1.dot(Sigma_hat[i].dot(self.mar_Lambda_1))\n",
    "        B_helper += Sigma_hat[i].dot(self.B_hat[i, :])\n",
    "        \n",
    "    G_bar = G_helper\n",
    "    B_bar = self.mar_Lambda_1.dot(B_helper)\n",
    "    Sigma_bar = np.linalg.inv(self.Lambda_L + G_bar) \n",
    "    \n",
    "    #Hierarchical sampling\n",
    "    mu_bar = Sigma_bar.dot(B_bar)\n",
    "    psi =  np.random.multivariate_normal(mu_bar, Sigma_bar)\n",
    "            \n",
    "    self.mu = np.zeros(self.K)\n",
    "    for i in range(self.K):\n",
    "      mu_ti = self.mar_Lambda_1.dot(psi) + self.B_hat[i, :]\n",
    "      mu_ti = Sigma_hat[i].dot(mu_ti)\n",
    "      # posterior sampling\n",
    "      theta_tilde = np.random.multivariate_normal(mu_ti, Sigma_hat[i])\n",
    "      self.mu[i] = self.env.X[i, :].dot(theta_tilde)\n",
    "    \n",
    "    arm = np.argmax(self.mu)\n",
    "    return arm\n",
    "\n",
    "  @staticmethod\n",
    "  def print():\n",
    "    return \"HierTS\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class LinTS:\n",
    "  def __init__(self, env, n, params):\n",
    "    self.env = env  # bandit environment that the agent interacts with\n",
    "    self.K = self.env.K  # number of arms\n",
    "    self.d = self.env.d  # number of features\n",
    "    self.n = n  # horizon\n",
    "    self.mar_Sigma = np.copy(self.env.mar_Sigma)\n",
    "    self.sigma = self.env.sigma  # reward noise\n",
    "    \n",
    "    # override default values\n",
    "    for attr, val in params.items():\n",
    "      if isinstance(val, np.ndarray):\n",
    "        setattr(self, attr, np.copy(val))\n",
    "      elif isinstance(val, list):\n",
    "        setattr(self, attr, val.copy())\n",
    "      else:\n",
    "        setattr(self, attr, val)\n",
    "     \n",
    "    self.marLambda = np.linalg.inv(self.mar_Sigma)\n",
    "        \n",
    "    # sufficient statistics\n",
    "    self.G_hat = np.zeros((self.K, self.d, self.d))\n",
    "    self.B_hat = np.zeros((self.K, self.d))\n",
    "    \n",
    "  def update(self, t, arm, r):\n",
    "    # update sufficient statistics\n",
    "    x = self.env.X[arm, :]\n",
    "    self.G_hat[arm, :, :] += np.outer(x, x) / np.square(self.sigma)\n",
    "    self.B_hat[arm, :] += x * r / np.square(self.sigma)\n",
    "\n",
    "  def get_arm(self, t):\n",
    "    self.mu = np.zeros(self.K)\n",
    "    for i in range(self.K):\n",
    "      # linear model posterior\n",
    "      Sigma_hat = np.linalg.inv(self.marLambda + self.G_hat[i, :, :])\n",
    "      mu_ti = Sigma_hat.dot(self.B_hat[i, :]) #Because prior mean is 0\n",
    "      # posterior sampling\n",
    "      theta_tilde = np.random.multivariate_normal(mu_ti, Sigma_hat)\n",
    "      self.mu[i] = self.env.X[i, :].dot(theta_tilde)\n",
    "      \n",
    "    arm = np.argmax(self.mu)\n",
    "    return arm\n",
    "\n",
    "  @staticmethod\n",
    "  def print():\n",
    "    return \"LinTS\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class LinUCB:\n",
    "  def __init__(self, env, n, params):\n",
    "    self.env = env  # bandit environment that the agent interacts with\n",
    "    self.K = self.env.K  # number of arms\n",
    "    self.d = self.env.d  # number of features\n",
    "    self.n = n  # horizon\n",
    "    self.Lambda = 1 / np.sum(self.env.Sigma)\n",
    "    self.S = self.env.S\n",
    "    self.sigma = self.env.sigma  # reward noise\n",
    "    \n",
    "    # override default values\n",
    "    for attr, val in params.items():\n",
    "      if isinstance(val, np.ndarray):\n",
    "        setattr(self, attr, np.copy(val))\n",
    "      elif isinstance(val, list):\n",
    "        setattr(self, attr, val.copy())\n",
    "      else:\n",
    "        setattr(self, attr, val)\n",
    "             \n",
    "    # sufficient statistics\n",
    "    self.G_hat = np.zeros((self.K, self.d, self.d))\n",
    "    self.B_hat = np.zeros((self.K, self.d))\n",
    "    \n",
    "    self.cew = self.confidence_ellipsoid_width(n)\n",
    "    \n",
    "  def update(self, t, arm, r):\n",
    "    # update sufficient statistics\n",
    "    x = self.env.X[arm, :]\n",
    "    self.G_hat[arm, :, :] += np.outer(x, x)\n",
    "    self.B_hat[arm, :] += x * r\n",
    "\n",
    "  def confidence_ellipsoid_width(self, t):\n",
    "    # Theorem 2 in Abassi-Yadkori (2011)\n",
    "    # Improved Algorithms for Linear Stochastic Bandits\n",
    "    delta = 1 / self.n\n",
    "    L = np.amax(np.linalg.norm(self.env.contexts, axis=1))\n",
    "    Lambda = self.Lambda\n",
    "    R = self.sigma\n",
    "    S = self.S\n",
    "    width = np.sqrt(Lambda) * S + R * np.sqrt(self.d * np.log((1 + t * np.square(L) / Lambda) / delta))\n",
    "    return width\n",
    "\n",
    "  def get_arm(self, t):\n",
    "    self.mu = np.zeros(self.K)\n",
    "    for i in range(self.K):\n",
    "      # linear model\n",
    "      Gt = self.Lambda * np.eye(self.d) + self.G_hat[i, :, :]\n",
    "      Sigma_hat = np.linalg.inv(Gt)\n",
    "      theta_hat = np.linalg.solve(Gt, self.B_hat[i, :])\n",
    "\n",
    "      # UCBs\n",
    "      self.mu[i] = self.env.X[i, :].dot(theta_hat) + self.cew * \\\n",
    "        np.sqrt(self.env.X[i, :].dot(Sigma_hat).dot(self.env.X[i, :]))\n",
    "\n",
    "    arm = np.argmax(self.mu)\n",
    "    return arm\n",
    "\n",
    "  @staticmethod\n",
    "  def print():\n",
    "    return \"LinUCB\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Evaluating dTS"
     ]
    }
   ],
   "source": [
    "sigma = 1\n",
    "n = 5000\n",
    "num_runs = 50\n",
    "\n",
    "algs = [\n",
    " (\"dTS\", {}, \"red\", \"-\", \"LindTS\"),\n",
    " (\"HierTS\", {}, \"green\", \"-\", \"HierTS\"),\n",
    " (\"LinTS\", {}, \"blue\", \"-\", \"LinTS\"),\n",
    " (\"LinUCB\", {}, \"cyan\", \"-\", \"LinUCB\"),\n",
    "]\n",
    "\n",
    "exps = [\n",
    "    {\"K\": 1000, \"d_l\": [20, 10, 5, 2], \"d\": 20}, \n",
    "    #{\"K\": 10000, \"d_l\": [20, 10, 5, 2], \"d\": 20}, \n",
    "]\n",
    "\n",
    "for exp in exps:\n",
    "  # set parameters of the experiment\n",
    "  for attr, val in exp.items():\n",
    "    globals()[attr] = val\n",
    "    \n",
    "  L = len(d_l) # depth of the diffusion\n",
    "\n",
    "  # bandit environments\n",
    "  envs = []\n",
    "  for run in range(num_runs):\n",
    "        \n",
    "    # possible contexts\n",
    "    contexts = 2 * np.random.rand(100, d) - 1\n",
    "        \n",
    "    W = []\n",
    "    for l in range(L):\n",
    "        W_helper = 2 * np.random.rand(d, d) - 1\n",
    "        W_helper[d_l[l]:] = 0\n",
    "        W.append(W_helper)\n",
    "    \n",
    "    # set the covariances\n",
    "    Sigma = np.zeros(L+1) \n",
    "    for l in range(L+1):\n",
    "        #Sigma[l] = 2**(l+1) #decreasing covariances\n",
    "        Sigma[l] = 1 #constant covariances\n",
    "    \n",
    "    mar_Sigma = Sigma[0] * np.eye(d)\n",
    "    B_l = np.eye(d)\n",
    "    mar_Sigma_1 = np.copy(mar_Sigma) #if L=1, then mar_Sigma_1 = Sigma[0] * np.eye(d)\n",
    "    for l in range(L):\n",
    "        B_l = B_l.dot(W[l])\n",
    "        B_l_transpose = B_l.T\n",
    "        mar_Sigma += Sigma[l+1] * B_l.dot(B_l_transpose)\n",
    "        if l==L-2:\n",
    "            mar_Sigma_1 = np.copy(mar_Sigma) #if L>1, this condition is triggered and mar_Sigma_1 = mar_Sigma wihtout the last covariance\n",
    "        \n",
    "    # generate latent parameters and action parameters\n",
    "    psi = np.random.multivariate_normal(np.zeros(d), Sigma[L] * np.eye(d))\n",
    "    for l in range(L-1, 0, -1):\n",
    "        psi = np.random.multivariate_normal(W[l].dot(psi), Sigma[l] * np.eye(d))\n",
    "    \n",
    "    Theta = np.zeros((K, d))\n",
    "    for i in range(K):\n",
    "        Theta[i, :] = np.random.multivariate_normal(W[0].dot(psi),  Sigma[0] * np.eye(d))\n",
    "        \n",
    "    # initialize bandit environment\n",
    "    env = CoBandit(K, contexts, Theta, sigma=sigma)\n",
    "\n",
    "    # pass parameters for algorithm initialization\n",
    "    env.L = L\n",
    "    env.W = W\n",
    "    env.Sigma = Sigma\n",
    "    env.mar_Sigma = mar_Sigma\n",
    "    env.mar_Sigma_1 = mar_Sigma_1\n",
    "    env.B_L = B_l\n",
    "    env.S = np.linalg.norm(Theta, axis=1).max()\n",
    "    envs.append(env)\n",
    "\n",
    "  # simulation\n",
    "  for alg in algs:\n",
    "    # all runs for a single algorithm\n",
    "    alg_class = globals()[alg[0]]\n",
    "    regret, logs = evaluate(alg_class, alg[1], envs, n)\n",
    "\n",
    "    # # save results    \n",
    "    fname = \"results_lb/K={}_d_l={}_d={}\".format(K, d_l, d)\n",
    "    isExist = os.path.exists(fname)\n",
    "    if not isExist:\n",
    "        os.makedirs(fname)\n",
    "    np.save(fname + \"/{}.npy\".format(alg[4]), regret)\n",
    "\n",
    "    # plot\n",
    "    cum_regret = regret.cumsum(axis=0)\n",
    "    step = np.arange(1, n + 1)\n",
    "    sube = (step.size // 10) * np.arange(1, 11) - 1\n",
    "    plt.plot(step, cum_regret.mean(axis=1),\n",
    "      alg[2], dashes=linestyle2dashes(alg[3]), label=alg[4])\n",
    "    plt.errorbar(step[sube], cum_regret[sube, :].mean(axis=1),\n",
    "      cum_regret[sube, :].std(axis=1) / np.sqrt(cum_regret.shape[1]),\n",
    "      fmt=\"none\", ecolor=alg[2])\n",
    "\n",
    "  plt.legend(loc=\"upper left\", frameon=False)\n",
    "  plt.title(\"Linear difusion, K = %d, L=%d, d_l =\" % (K, L) + str(d_l) +  \", d = %d\" % (d))\n",
    "  plt.xlabel(\"Round n\")\n",
    "  plt.ylabel(\"Regret\")\n",
    "  #plt.ylim(0)\n",
    "\n",
    "  plt.tight_layout()\n",
    "  plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "GenHierTS.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python Kernel (MOAB #56460)",
   "language": "python",
   "name": "python-kernel-56460"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
