{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Method 2: Rule Based - From Lasso Bandit"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Ridge_CB:\n",
    "    def __init__(self, env, lam, h, q, seed):\n",
    "        self.K = env.K\n",
    "        self.T = env.T\n",
    "        self.delta = env.delta\n",
    "        self.d = env.d\n",
    "        self.reg_sq = env.reg_sq\n",
    "        self.C = env.C\n",
    "        self.gamma = self.C * np.sqrt(self.K*self.T/(np.log(2/self.delta)+self.reg_sq))\n",
    "        self.mu = self.K\n",
    "        self.p = np.zeros((self.T, self.K))\n",
    "        self.selected_arms = [0 for i in range(self.T)]\n",
    "        self.lam = lam\n",
    "        self.reward = np.zeros(self.T)\n",
    "        self.seed = seed\n",
    "        self.sigma = env.sigma\n",
    "        self.h = h\n",
    "        self.q = q\n",
    "        \n",
    "        # true parameters\n",
    "        self.theta = env.theta\n",
    "        self.theta_hat_forced = np.zeros((self.K, self.d))\n",
    "        self.theta_hat_all = np.zeros((self.K, self.d))\n",
    "        self.theta_hat_forced_traj = np.zeros((self.K, self.T, self.d))\n",
    "        self.theta_hat_all_traj = np.zeros((self.K, self.T, self.d))\n",
    "        self.count_forced = np.zeros(self.K) # number of pulls for each arm\n",
    "        self.count_all = np.zeros(self.K) \n",
    "        self.index_s1_opt = [[] for i in range(self.T)]\n",
    "        \n",
    "        # data  matrix\n",
    "        self.xt = env.xt\n",
    "        # reward matrix\n",
    "        self.yt = env.yt\n",
    "        self.yt_mean = env.yt_mean\n",
    "        self.true_arm = env.true_arm\n",
    "        \n",
    "        # Ridge contextual bandit parameters\n",
    "        self.forced_time_arm_convert = [0 for t in range(self.T)]\n",
    "        self._get_forced_sample_time()\n",
    "        \n",
    "        # initialize the estimator\n",
    "        self._init_forced_sample()\n",
    "        self._init_all_sample()\n",
    "        \n",
    "    def _init_forced_sample(self):\n",
    "        self.forced_x = np.zeros((self.K, self.T, self.d))\n",
    "        self.forced_y = np.zeros((self.K, self.T))\n",
    "       \n",
    "    def _init_all_sample(self):\n",
    "        self.all_x = np.zeros((self.K, self.T, self.d))\n",
    "        self.all_y = np.zeros((self.K, self.T))\n",
    "        \n",
    "    def _get_forced_sample_time(self):\n",
    "        # step 1: the forced time is based on the power of 2\n",
    "        self.epoch = int(np.floor(np.log2(self.T)))\n",
    "        self.Tau = [[] for i in range(self.K)]\n",
    "        \n",
    "        for n in range(self.epoch):\n",
    "            for i in range(self.K):\n",
    "                for j in range(self.q*(i+1-1)+1, self.q*(i+1)+1):\n",
    "                    self.Tau[i].append((2**n-1)*self.K*self.q+j-1)\n",
    "        \n",
    "        # step 2: combine the forced sample time self.Tau as a list\n",
    "        # convert list of list to list\n",
    "        self.forced_sample_time = sorted([item for sublist in self.Tau for item in sublist])\n",
    "        \n",
    "        # step 3: create a list to determine which arm to pull at each forced sample time\n",
    "        for t in range(self.T):\n",
    "            for i in range(self.K):\n",
    "                if t in self.Tau[i]:\n",
    "                    self.forced_time_arm_convert[t] = i\n",
    "                    break\n",
    "        \n",
    "        \n",
    "    def Select(self, xt, t):\n",
    "        # check whehter the current time is the forced sample time or not\n",
    "        if t in self.forced_sample_time:\n",
    "            # step 0: determine which arm to pull\n",
    "            arm_selected = self.forced_time_arm_convert[t]\n",
    "            self.count_forced[arm_selected] += 1\n",
    "            self.count_all[arm_selected] += 1\n",
    "            \n",
    "            # step 1: pull that arm\n",
    "            self.selected_arms[t] = arm_selected\n",
    "            self.reward[t] = self.yt[t, self.selected_arms[t]]\n",
    "            \n",
    "            self.update_forced_sample(xt, self.reward[t], self.selected_arms[t])\n",
    "            self.update_all_sample(xt, self.reward[t], self.selected_arms[t])\n",
    "            \n",
    "        else:\n",
    "            # step 1: do screening, use the forced samples estimator to predict the reward\n",
    "            y_forced_hat = [self.predict_forced_sample(xt, i) for i in range(self.K)]\n",
    "            \n",
    "            # sub-step A: do screening, just select the arm has the distance to the true theta is smaller than self.h/2\n",
    "            y_forced_highest = np.max(y_forced_hat)\n",
    "            for i in range(self.K):\n",
    "                if y_forced_hat[i] >= (y_forced_highest - self.h/2.0):\n",
    "                    self.index_s1_opt[t].append(i)\n",
    "            #print(len(self.index_s1_opt[t]))\n",
    "            # sub-step B: use the all sample estimator to select the optimal arm\n",
    "            y_s2_pred = [self.predict_all_sample(xt, i) for i in self.index_s1_opt[t]]\n",
    "            \n",
    "            # step 3: select the arm greedily\n",
    "            self.selected_arms[t] = np.argmax(y_s2_pred) \n",
    "            \n",
    "            # update the number of pulls for each arm in all sample estimator\n",
    "            self.count_all[self.selected_arms[t]] += 1\n",
    "            self.reward[t] = self.yt[t, self.selected_arms[t]]\n",
    "            \n",
    "            # step 4: update the model\n",
    "            self.update_all_sample(xt, self.reward[t], self.selected_arms[t])\n",
    "        \n",
    "    def predict_forced_sample(self, xt, id):\n",
    "        return self.theta_hat_forced[id].dot(xt)\n",
    "    \n",
    "    def update_forced_sample(self, xt, yt, id):\n",
    "        # store the current xt, yt to this id.\n",
    "        # step 1: update the forced sample data\n",
    "        self.forced_x[id, int(self.count_forced[id]), :] = xt\n",
    "        self.forced_y[id, int(self.count_forced[id])] = yt\n",
    "        \n",
    "        x_selected = self.forced_x[id, :int(self.count_forced[id]), :]\n",
    "        y_selected = self.forced_y[id, :int(self.count_forced[id])]\n",
    "        self.theta_hat_forced[id] = \\\n",
    "            np.linalg.inv(x_selected.T.dot(x_selected) + self.lam*np.identity(self.d)).dot(x_selected.T).dot(y_selected)\n",
    "\n",
    "        # step 2: store the theta_hat_forced to theta_hat_forced_traj\n",
    "        self.theta_hat_forced_traj[id, int(self.count_forced[id]), :] = self.theta_hat_forced[id]\n",
    "    \n",
    "    def predict_all_sample(self, xt, id):\n",
    "        return self.theta_hat_all[id].dot(xt)\n",
    "\n",
    "    def update_all_sample(self, xt, yt, id):\n",
    "        # step 1: update the all sample data\n",
    "        self.all_x[id, int(self.count_all[id]), :] = xt\n",
    "        self.all_y[id, int(self.count_all[id])] = yt\n",
    "\n",
    "        # step 2: update the all sample estimator\n",
    "        x_selected = self.all_x[id, :int(self.count_all[id]), :]\n",
    "        y_selected = self.all_y[id, :int(self.count_all[id])]\n",
    "        self.theta_hat_all[id] = \\\n",
    "            np.linalg.inv(x_selected.T.dot(x_selected) + self.lam*np.identity(self.d)).dot(x_selected.T).dot(y_selected)\n",
    "\n",
    "        self.theta_hat_all_traj[id, int(self.count_all[id]), :] = self.theta_hat_all[id]\n",
    "    \n",
    "    def regret(self):\n",
    "        # step 1: calculate the regret\n",
    "        regret = np.zeros(self.T)\n",
    "        for t in range(self.T):\n",
    "            regret[t] = np.max(self.yt_mean[t, :]) - self.yt_mean[t, self.selected_arms[t]]\n",
    "        accum_regret = np.cumsum(regret)\n",
    "        return accum_regret       "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'np' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m/Users/liyt/Documents/2023Spring/BIC/cBIC/code/lasso.ipynb Cell 4\u001b[0m in \u001b[0;36m1\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/liyt/Documents/2023Spring/BIC/cBIC/code/lasso.ipynb#W4sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m rep \u001b[39m=\u001b[39m \u001b[39m50\u001b[39m\n\u001b[1;32m     <a href='vscode-notebook-cell:/Users/liyt/Documents/2023Spring/BIC/cBIC/code/lasso.ipynb#W4sZmlsZQ%3D%3D?line=9'>10</a>\u001b[0m C \u001b[39m=\u001b[39m \u001b[39m20\u001b[39m\n\u001b[0;32m---> <a href='vscode-notebook-cell:/Users/liyt/Documents/2023Spring/BIC/cBIC/code/lasso.ipynb#W4sZmlsZQ%3D%3D?line=10'>11</a>\u001b[0m rep_regret \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mzeros((rep, T))\u001b[39m# all replicates\u001b[39;00m\n\u001b[1;32m     <a href='vscode-notebook-cell:/Users/liyt/Documents/2023Spring/BIC/cBIC/code/lasso.ipynb#W4sZmlsZQ%3D%3D?line=12'>13</a>\u001b[0m \u001b[39m# step 3: initialize the algorithm\u001b[39;00m\n\u001b[1;32m     <a href='vscode-notebook-cell:/Users/liyt/Documents/2023Spring/BIC/cBIC/code/lasso.ipynb#W4sZmlsZQ%3D%3D?line=13'>14</a>\u001b[0m \u001b[39m# parameters for Ridge_CB\u001b[39;00m\n\u001b[1;32m     <a href='vscode-notebook-cell:/Users/liyt/Documents/2023Spring/BIC/cBIC/code/lasso.ipynb#W4sZmlsZQ%3D%3D?line=14'>15</a>\u001b[0m h \u001b[39m=\u001b[39m \u001b[39m4\u001b[39m\n",
      "\u001b[0;31mNameError\u001b[0m: name 'np' is not defined"
     ]
    }
   ],
   "source": [
    "# step 0: set the parameters\n",
    "K = 5\n",
    "T = 20000\n",
    "delta = 0.05\n",
    "d = 2\n",
    "lam = 0.1\n",
    "seed = 90095\n",
    "sigma = 0.05\n",
    "rep = 50\n",
    "C = 20\n",
    "rep_regret = np.zeros((rep, T))# all replicates\n",
    "\n",
    "# step 3: initialize the algorithm\n",
    "# parameters for Ridge_CB\n",
    "h = 4\n",
    "q = 4\n",
    "\n",
    "# Step 1: Generate the data\n",
    "env = ENV(K, T, delta, d, seed, sigma, C)\n",
    "env.dgp()\n",
    "\n",
    "# step 2: run the algorithm multiple times\n",
    "for i in tqdm(range(rep)):\n",
    "    # step 2: initialize the algorithm\n",
    "    rcb = Ridge_CB(env, lam, h, q, seed+i)\n",
    "    # Run the algorithm\n",
    "    for t in range(T):\n",
    "        rcb.Select(rcb.xt[t], t)\n",
    "    \n",
    "    # step 3: get the regret and plot\n",
    "    regret = rcb.regret()\n",
    "    rep_regret[i, :] = regret\n",
    "    \n",
    "# with x axis with sqrt scale with mean rep_regret over rep\n",
    "plt.plot(range(1, T+1), np.mean(rep_regret, axis=0))\n",
    "plt.title(\"Regret with lambda=%.3f\" % (lam))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(rcb.forced_sample_time)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# plot the sqcb.theta_hat_traj[0,0:int(sqcb.count[0]),:] in a 2D plot of its trajectory\n",
    "for i in range(K):\n",
    "    # creat a new figure\n",
    "    plt.figure()\n",
    "    plt.plot(rcb.theta_hat_all_traj[i,0:int(sqcb.count[i]),0], rcb.theta_hat_all_traj[i,0:int(sqcb.count[i]),1], 'b-')\n",
    "    # add the true theta with red color\n",
    "    plt.plot(rcb.theta[i,0], rcb.theta[i,1], 'ro')\n",
    "    # add the starting point with black color\n",
    "    plt.plot(rcb.theta_hat_all_traj[i,0,0], rcb.theta_hat_all_traj[i,0,1], 'ko')\n",
    "    # add the title\n",
    "    plt.title(\"Trajectory of theta_hat for arm %d\" % (i+1))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rcb.count_all"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure()\n",
    "plt.hist(rcb.true_arm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Check Sampling Baysian Incentive Compatiable in guassian case\n",
    "k = 100\n",
    "lambda_0 = 0.1\n",
    "sigma = 0.1\n",
    "x = np.random.uniform(0, 1, k).reshape(int(k/2),2)\n",
    "beta = np.array([0.2, 0.6])\n",
    "Sigma_0 = lambda_0 * np.eye(2)\n",
    "y = x.dot(beta) + np.random.normal(0, sigma, int(k/2))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
