{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!/usr/bin/env python\n",
    "# coding: utf-8\n",
    "\n",
    "import numpy as np\n",
    "from scipy.stats import norm\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn import linear_model\n",
    "from numpy.linalg import inv\n",
    "# from models import LassoBandit, DRLassoBandit, SALassoBandit"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def sample_spherical(N, k):\n",
    "    vec = np.random.randn(k, N)\n",
    "    vec /= np.linalg.norm(vec, axis=0)\n",
    "    return vec\n",
    "\n",
    "def sample_elliptical(N, d, k, mu):\n",
    "    S = sample_spherical(N, k)\n",
    "    A = np.random.rand(d,k)\n",
    "    R = np.random.normal(size=N)\n",
    "    return mu + A.dot(R*S)\n",
    "\n",
    "def SMInv(Ainv, u, v):\n",
    "    return Ainv - np.dot(Ainv, np.dot(np.outer(u,v), Ainv)) / ( 1 + np.dot(v, np.dot(Ainv, u)))\n",
    "\n",
    "\n",
    "def random_ball(num_points, dimension, radius=1):\n",
    "    from numpy import random, linalg\n",
    "    # First generate random directions by normalizing the length of a\n",
    "    # vector of random-normal values (these distribute evenly on ball).\n",
    "    random_directions = random.normal(size=(dimension,num_points))\n",
    "    random_directions /= linalg.norm(random_directions, axis=0)\n",
    "    # Second generate a random radius with probability proportional to\n",
    "    # the surface area of a ball with a given radius.\n",
    "    random_radii = random.random(num_points) ** (1/dimension)\n",
    "    # Return the list of random (direction & length) points.\n",
    "    return radius * (random_directions * random_radii).T, "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "class linUCB:\n",
    "    def __init__(self,sigma,d,lam):\n",
    "        self.d=d\n",
    "        self.x=[]\n",
    "        self.r=[]\n",
    "        self.sigma=sigma\n",
    "        self.lam = lam\n",
    "        self.theta=np.zeros(d)\n",
    "        self.Vinv=1/lam*np.identity(d)\n",
    "        \n",
    "    def choose_a(self,t,x):  # x is N*d matrix\n",
    "        self.alpha = self.sigma*np.sqrt(2*self.d*np.log(t**2+t**3/self.lam)) + np.sqrt(self.lam)\n",
    "        means = np.dot(x,self.theta)\n",
    "        xv = np.sqrt((np.matmul(x, self.Vinv) * x).sum(axis = 1))\n",
    "        u = means + self.alpha * xv\n",
    "        self.action=np.argmax(u)\n",
    "        xt = x[self.action]\n",
    "        self.x.append(xt)\n",
    "        self.Vinv = SMInv(self.Vinv, xt, xt)\n",
    "        return(self.action)\n",
    "\n",
    "    def update_beta(self,rwd,t):\n",
    "        self.r.append(rwd)\n",
    "        if t>3:\n",
    "            model=linear_model.Ridge(alpha=self.lam)\n",
    "            model.fit(self.x,self.r)\n",
    "            self.theta=model.coef_\n",
    "            \n",
    "\n",
    "class linTS:\n",
    "    def __init__(self,sigma,d,lam):\n",
    "        self.d=d\n",
    "        self.x=[]\n",
    "        self.r=[]\n",
    "        self.sigma=sigma\n",
    "        self.lam = lam\n",
    "        self.theta=np.zeros(d)\n",
    "        self.Vinv=1/lam*np.identity(d)\n",
    "        \n",
    "    def choose_a(self,t,x):  # x is N*d matrix\n",
    "        self.alpha = self.sigma*np.sqrt(self.d*np.log(self.lam+t)+d*np.log(self.lam) + 4*np.log(t)) + np.sqrt(self.lam)\n",
    "        theta_tilde = np.random.multivariate_normal(self.theta, np.square(self.alpha)*self.Vinv)\n",
    "        means = np.dot(x,theta_tilde)\n",
    "        self.action=np.argmax(means)\n",
    "        xt = x[self.action]\n",
    "        self.x.append(xt)\n",
    "        self.Vinv = SMInv(self.Vinv, xt, xt)\n",
    "        return(self.action)\n",
    "\n",
    "    def update_beta(self,rwd,t):\n",
    "        self.r.append(rwd)\n",
    "        if t>3:\n",
    "            model=linear_model.Ridge(alpha=self.lam)\n",
    "            model.fit(self.x,self.r)\n",
    "            self.theta=model.coef_\n",
    "\n",
    "\n",
    "class linGreedy:\n",
    "    def __init__(self,sigma,d,lam):\n",
    "        self.d=d\n",
    "        self.x=[]\n",
    "        self.r=[]\n",
    "        self.theta=np.zeros(d)\n",
    "        self.lam = lam\n",
    "        \n",
    "    def choose_a(self,t,x):  # x is N*d matrix\n",
    "        means = np.dot(x,self.theta)\n",
    "        self.action=np.argmax(means)\n",
    "        xt = x[self.action]\n",
    "        self.x.append(xt)\n",
    "        return(self.action)\n",
    "\n",
    "    def update_beta(self,rwd,t):\n",
    "        self.r.append(rwd)\n",
    "        if t>3:\n",
    "            model=linear_model.Ridge(alpha=self.lam)\n",
    "            model.fit(self.x,self.r)\n",
    "            self.theta=model.coef_\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "#simulation settings\n",
    "\n",
    "K=20\n",
    "d=100\n",
    "R=0.2\n",
    "T=1000\n",
    "dist =5\n",
    "\n",
    "sigma_sq=1.\n",
    "rho_sq=0.7\n",
    "V=(sigma_sq-rho_sq)*np.eye(K) + rho_sq*np.ones((K,K))\n",
    "\n",
    "fixed = 0\n",
    "\n",
    "\n",
    "np.random.seed(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n",
      "100\n",
      "200\n",
      "300\n",
      "400\n",
      "500\n",
      "600\n",
      "700\n",
      "800\n",
      "900\n",
      "1000\n",
      "2\n",
      "100\n",
      "200\n",
      "300\n",
      "400\n",
      "500\n",
      "600\n",
      "700\n",
      "800\n",
      "900\n",
      "1000\n",
      "3\n",
      "100\n",
      "200\n",
      "300\n",
      "400\n",
      "500\n",
      "600\n",
      "700\n",
      "800\n",
      "900\n",
      "1000\n",
      "4\n",
      "100\n",
      "200\n",
      "300\n",
      "400\n",
      "500\n",
      "600\n",
      "700\n",
      "800\n",
      "900\n",
      "1000\n",
      "5\n",
      "100\n",
      "200\n",
      "300\n",
      "400\n",
      "500\n",
      "600\n",
      "700\n",
      "800\n",
      "900\n",
      "1000\n",
      "6\n",
      "100\n",
      "200\n",
      "300\n",
      "400\n",
      "500\n",
      "600\n",
      "700\n",
      "800\n",
      "900\n",
      "1000\n",
      "7\n",
      "100\n",
      "200\n",
      "300\n",
      "400\n",
      "500\n",
      "600\n",
      "700\n",
      "800\n",
      "900\n",
      "1000\n",
      "8\n",
      "100\n",
      "200\n",
      "300\n",
      "400\n",
      "500\n",
      "600\n",
      "700\n",
      "800\n",
      "900\n",
      "1000\n",
      "9\n",
      "100\n",
      "200\n",
      "300\n",
      "400\n",
      "500\n",
      "600\n",
      "700\n",
      "800\n",
      "900\n",
      "1000\n",
      "10\n",
      "100\n",
      "200\n",
      "300\n"
     ]
    }
   ],
   "source": [
    "simul_n=10\n",
    "\n",
    "if dist == 0:\n",
    "    x=np.random.multivariate_normal(np.zeros(K),V,d).T\n",
    "    x=x/np.sqrt(d)\n",
    "elif dist == 1:\n",
    "#   x=(np.random.random((K, d)) * 2 - 1)\n",
    "    x = np.asarray(random_ball(K,d,radius=np.sqrt(d))).reshape((K, d))\n",
    "    #x = np.random.laplace(0, 1, K*d).reshape((K, d))\n",
    "elif dist == 2:\n",
    "    x = np.random.standard_t(10, size=K*d).reshape((K, d))\n",
    "    #x=np.random.multivariate_normal(np.zeros(K),V,d).T\n",
    "    #x = np.random.exponential(scale=1.0, size=K*d).reshape((K, d))\n",
    "    #x = np.random.laplace(0, 1, K*d).reshape((K, d))\n",
    "elif dist == 3:\n",
    "    location= 0\n",
    "    scale= 1\n",
    "    x = np.random.laplace(location, scale, K*d).reshape((K, d))\n",
    "    #x = np.random.exponential(1, size=K*d).reshape((K, d))\n",
    "elif dist == 4:\n",
    "    x=np.random.multivariate_normal(np.zeros(K),V,d).T   \n",
    "elif dist == 5:\n",
    "    location= 0\n",
    "    scale= 1\n",
    "    x = np.random.gumbel(location, scale, K*d).reshape((K, d))\n",
    "    \n",
    "sim_start = 1\n",
    "for simul in range(sim_start, sim_start+simul_n+1):\n",
    "    \n",
    "    beta=np.random.uniform(0.1,1.,d)/np.sqrt(d)\n",
    "    \n",
    "    print(simul)\n",
    "    \n",
    "    if fixed == 1:\n",
    "        savename = \"results/linBandit_N={}_d={}_rho={}_dist={}_id={}_fixed.csv\".format(K, d, rho_sq, dist, simul)\n",
    "    else:\n",
    "        savename = \"results/linBandit_N={}_d={}_rho={}_dist={}_id={}.csv\".format(K, d, rho_sq, dist, simul)\n",
    "    \n",
    "    cumulated_regret_linUCB=[]\n",
    "    cumulated_regret_linTS=[]\n",
    "    cumulated_regret_Greedy=[]\n",
    "\n",
    "    models = []\n",
    "    models.append(linUCB(sigma=R,d=d,lam = 0.5))\n",
    "    models.append(linTS(sigma=R,d=d,lam = 0.5))\n",
    "    models.append(linGreedy(sigma=R,d=d,lam = 0.5))\n",
    "    \n",
    "    num_models = len(models)\n",
    "    \n",
    "    cumulated_regret = [ [] for _ in range(num_models) ]\n",
    "    rewards = [ [] for _ in range(num_models) ]\n",
    "    optRWD=list()\n",
    "    \n",
    "    for t in range(1,T+1):\n",
    "        if (t % 100)==0:\n",
    "            print(t)\n",
    "                \n",
    "        if fixed == 0:\n",
    "            if dist == 0:\n",
    "                x=np.random.multivariate_normal(np.zeros(K),V,d).T\n",
    "            elif dist == 1:\n",
    "                x = np.asarray(random_ball(K,d,radius=np.sqrt(d))).reshape((K, d))\n",
    "            elif dist == 2:\n",
    "                x = np.random.standard_t(10, size=K*d).reshape((K, d))\n",
    "            elif dist == 3:\n",
    "                x = np.random.laplace(location, scale, K*d).reshape((K, d))\n",
    "            elif dist == 4:\n",
    "                x=np.random.multivariate_normal(np.zeros(K),V,d).T\n",
    "            elif dist == 5:\n",
    "                x = np.random.gumbel(0, 1, K*d).reshape((K, d))\n",
    "\n",
    "        err=R*np.random.randn()\n",
    "\n",
    "        for m in range(num_models):\n",
    "            a = models[m].choose_a(t+1,x)\n",
    "            mean_rwd = np.dot(x[a],beta)\n",
    "            rwd= mean_rwd + err\n",
    "            rewards[m].append(mean_rwd)\n",
    "            models[m].update_beta(rwd,t+1)\n",
    "\n",
    "        optRWD.append(np.amax(np.dot(x,beta)))\n",
    "        \n",
    "    for m in range(num_models):\n",
    "        cumulated_regret[m].append(np.cumsum(optRWD)-np.cumsum(rewards[m]))\n",
    "    \n",
    "    regret = np.vstack(cumulated_regret)\n",
    "    np.savetxt(savename, regret, delimiter=\",\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# d=5\n",
    "# rho_sq=0.0\n",
    "reg_list = []\n",
    "for i in range(1,simul_n+1):\n",
    "    if fixed == 3: \n",
    "        reg_list.append(np.genfromtxt(\n",
    "            'results/linBandit_N={}_d={}_rho={}_dist={}_id={}_fixed.csv'.format(K, d, rho_sq, dist, i), delimiter=','))\n",
    "    else:\n",
    "        reg_list.append(np.genfromtxt(\n",
    "            'results/linBandit_N={}_d={}_rho={}_dist={}_id={}.csv'.format(K, d, rho_sq, dist, i), delimiter=','))\n",
    "        \n",
    "total_reg = np.stack(reg_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = plt.figure(figsize=(5.2, 4))\n",
    "plt.style.use('default')\n",
    "\n",
    "steps=np.arange(1,T+1)\n",
    "freq = int(T/10)\n",
    "\n",
    "avg_reg = total_reg.mean(axis=0)\n",
    "sd_req = total_reg.std(axis=0)\n",
    "\n",
    "# plt.errorbar(steps, avg_reg, sd_req, errorevery=freq, marker='^', markevery=freq, label='linUCB', markersize=4, linewidth=2, elinewidth=1, capsize=3)\n",
    "\n",
    "plt.errorbar(steps, avg_reg[0,:], sd_req[0,:], errorevery=freq, marker='^', markevery=freq, label='LinUCB', markersize=4, linewidth=2, elinewidth=1, capsize=3)\n",
    "plt.errorbar(steps, avg_reg[1,:], sd_req[1,:], errorevery=freq, marker='x', markevery=freq, label='LinTS', linewidth=2, elinewidth=1, capsize=3)\n",
    "plt.errorbar(steps, avg_reg[2,:], sd_req[2,:], errorevery=freq, marker='.', markevery=freq, label='Ridge_Greedy', linewidth=2, elinewidth=1, capsize=3)\n",
    "# plt.errorbar(steps, avg_reg[2,:], sd_req[2,:], errorevery=freq, marker='.', markevery=freq, label=r'$\\epsilon$-Greedy + $\\ell_1 + 1/T$', linewidth=2, elinewidth=1, capsize=3)\n",
    "\n",
    "\n",
    "# plt.grid(True)\n",
    "plt.grid(color='0.85')\n",
    "plt.xlabel('Round ($t$)', size = 14)\n",
    "plt.ylabel('Cumulative Regret', size = 14)\n",
    "if rho_sq == 0.0:\n",
    "    plt.title(r'$d$={}, $K$={}, Gaussian'.format(d, K, int(rho_sq)), size = 14)\n",
    "else:\n",
    "    plt.title(r'$d$={}, $K$={}, $\\rho^2$={}'.format(d, K, rho_sq), size = 14)\n",
    "if dist == 1:\n",
    "    plt.title('$d$={}, $K$={}, Uniform'.format(d, K), size = 14) \n",
    "if dist == 2:\n",
    "    plt.title('$d$={}, $K$={}, Student t'.format(d, K), size = 14) \n",
    "if dist == 3:\n",
    "    plt.title('$d$={}, $K$={}, Laplace'.format(d, K), size = 14) \n",
    "if dist == 4:    \n",
    "    plt.title('$d$={}, $K$={}, Guassian'.format(d, K), size = 14)\n",
    "if dist == 5:    \n",
    "    plt.title('$d$={}, $K$={}, Gumbel'.format(d, K), size = 14)     \n",
    "plt.legend(loc='upper left', prop={'size': 12})\n",
    "plt.tick_params(labelsize=12)\n",
    "plt.savefig('linearBandit_d={}_K={}_dist={}.pdf'.format(d,K,dist))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#plt.style.use('seaborn-whitegrid')\n",
    "#fig = plt.figure(figsize=(6, 4))\n",
    "#steps=np.arange(1,T+1)\n",
    "#plt.plot(steps,np.median(cumulated_regret_Lasso,axis=0), label='Lasso Bandit')\n",
    "#plt.plot(steps,np.median(cumulated_regret_DR,axis=0), label='DR Lasso Bandit')\n",
    "#plt.plot(steps,np.median(cumulated_regret_PFLasso,axis=0),label='Ours')\n",
    "\n",
    "\n",
    "#plt.xlabel('Time (t)', size = 14)\n",
    "#plt.ylabel('Cumulative Regret', size = 14)\n",
    "#plt.title('Corr={}, d={}, N={}'.format(rho_sq,d,N), size = 14)\n",
    "#plt.legend(loc='upper left', prop={'size': 12})\n",
    "# plt.legend(loc='upper center', bbox_to_anchor=(0.5,-0.2),fancybox=True,ncol=5)\n",
    "#plt.tick_params(labelsize=12)\n",
    "#plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
