{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "7t0cpSc6864_"
      },
      "source": [
        "**Augmented RBMLE-UCB Approach for Adaptive\n",
        "Control of Linear Quadratic Systems **\n",
        "Simulation Code\n",
        "\n",
        "(Some part of code is borrowed from the code provided by Sarah Dean, Horia Mania, Nikolai Matni, Benjamin Recht, Stephen Tu, \n",
        "\"Regret Bounds for Robust Adaptive Control of the Linear Quadratic Regulator\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "VRazbOHU4Caf",
        "outputId": "4124c69b-2232-47b9-cb37-a22c9ab4199d"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Requirement already satisfied: control in /usr/local/lib/python3.7/dist-packages (0.9.2)\n",
            "Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from control) (3.2.2)\n",
            "Requirement already satisfied: scipy in /usr/local/lib/python3.7/dist-packages (from control) (1.7.3)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from control) (1.21.6)\n",
            "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->control) (2.8.2)\n",
            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->control) (1.4.4)\n",
            "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->control) (3.0.9)\n",
            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->control) (0.11.0)\n",
            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from kiwisolver>=1.0.1->matplotlib->control) (4.1.1)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.1->matplotlib->control) (1.15.0)\n"
          ]
        }
      ],
      "source": [
        "pip install control"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "PWIi3Su-j2Vj"
      },
      "outputs": [],
      "source": [
        "import numpy as np\n",
        "import control\n",
        "import scipy\n",
        "import matplotlib.pyplot as plt\n",
        "import seaborn as sns\n",
        "from examples import *\n",
        "import warnings\n",
        "warnings.filterwarnings(\"ignore\", category=RuntimeWarning)\n",
        "from numpy import inf\n",
        "sns.set_style('ticks')\n",
        "rgblist = sns.color_palette('tab10' )"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "meYQOcJEtBhQ"
      },
      "outputs": [],
      "source": [
        "def get_mean(regrets):\n",
        "    mean = np.mean(regrets, axis=0)\n",
        "    std = np.std(regrets, axis=0)\n",
        "    low = mean-std\n",
        "    high= mean+std\n",
        "    return mean, low, high\n",
        "\n",
        "def generate_log_plot(data_a,data_b,data_c,data_d,file_name,y_axis,labels):\n",
        "    \n",
        "    mean_a,low_a,high_a=get_mean(data_a)\n",
        "    mean_b,low_b,high_b=get_mean(data_b)\n",
        "    mean_c,low_c,high_c=get_mean(data_c)\n",
        "    mean_d,low_d,high_d=get_mean(data_d)\n",
        "    fig1=plt.figure()\n",
        "\n",
        "    plt.plot(range(len(mean_a)), np.log10(mean_a), color=rgblist[0], label=labels[0],marker='x',markevery=(100,100))\n",
        "    plt.fill_between(np.array(np.arange(len(mean_a))), np.log10(mean_a), \n",
        "                        np.log10(high_a), color=rgblist[0], alpha=00.1)\n",
        "    \n",
        "    plt.plot(range(len(mean_b)), np.log10(mean_b), color=rgblist[1], label=labels[1],marker='o',markevery=(75,100))\n",
        "    plt.fill_between(np.array(np.arange(len(mean_b))), np.log10(mean_b), \n",
        "                        np.log10(high_b), color=rgblist[1], alpha=00.1)\n",
        "    plt.plot(range(len(mean_c)), np.log10(mean_c), color=rgblist[2], label=labels[2],marker='*',markevery=(50,100))\n",
        "    plt.fill_between(np.array(np.arange(len(mean_c))), np.log10(mean_c), \n",
        "                        np.log10(high_c), color=rgblist[2], alpha=00.1)\n",
        "    plt.plot(range(len(mean_a)), np.log10(mean_d), color=rgblist[3], label=labels[3],marker='d',markevery=(25,100))\n",
        "    plt.fill_between(np.array(np.arange(len(mean_d))), np.log10(mean_d), \n",
        "                        np.log10(high_d), color=rgblist[3], alpha=00.1)    \n",
        "    plt.legend(loc=2,fontsize=14)\n",
        "    plt.grid()\n",
        "    plt.ylabel(y_axis,fontsize=16)\n",
        "    plt.xlabel('Time Horizon',fontsize=16)  \n",
        "    fig1.savefig(file_name, bbox_inches='tight')  \n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "2iMNLuNz-PKr"
      },
      "outputs": [],
      "source": [
        "def chained_integrator_dynamics():\n",
        "    A = np.array([\n",
        "        [1,.1],\n",
        "        [0, 1], ])\n",
        "    B=np.eye(2)\n",
        "    n,m=B.shape\n",
        "    Q=np.eye(n)\n",
        "    R=np.eye(m)\n",
        "    return A,B,Q,R\n",
        "\n",
        "def transient_dynamics():\n",
        "    A = np.array([\n",
        "        [1.01, 0 , 0],\n",
        "        [1.1, 1.01, 0],\n",
        "        [0 , 1.1, 1.01],\n",
        "    ])  \n",
        "    B = np.eye(3)\n",
        "    n,m=B.shape\n",
        "    Q=np.eye(n)\n",
        "    R=np.eye(m)\n",
        "    return A,B,Q,R\n",
        "\n",
        "def unstable_laplacian_dynamics():\n",
        "    A = np.array([\n",
        "        [1.01, 0.01 , 0],\n",
        "        [0.01, 1.01, 0.01],\n",
        "        [0 , 0.01, 1.01],\n",
        "    ])  \n",
        "    B = np.eye(3)\n",
        "    n,m=B.shape\n",
        "    Q=np.eye(n)\n",
        "    R=np.eye(m)\n",
        "    return A,B,Q,R\n",
        "\n",
        "def non_sparse_matrix():\n",
        "\n",
        "    A = np.array([\n",
        "        [.3, .4, .2, .2],\n",
        "        [0.2, .3, .2, .2],\n",
        "        [0.2, .2, .4, .4],\n",
        "        [0.4, 0.2 ,0.2 ,0.4] \n",
        "    ])\n",
        "    B=np.eye(4)\n",
        "    n,m=B.shape\n",
        "    Q=np.eye(n)\n",
        "    R=np.eye(m)  \n",
        "    return A,B,Q,R \n",
        "    \n",
        "\n",
        "def boeing():\n",
        "    A = np.array([[0.99, 0.03, -0.02, -0.32], [0.01, 0.47, 4.7, 0],[0.02, -0.06, 0.4, 0],[0.01, -0.04, 0.72, 0.99]])\n",
        "    B = np.array([[0.01, 0.99], [-3.44, 1.66],[-0.83, 0.44],[-0.47, 0.25]])\n",
        "    n,m=B.shape\n",
        "    Q=np.eye(n)\n",
        "    R=np.eye(m)\n",
        "    return A,B,Q,R \n",
        "\n",
        "def uav():\n",
        "    A = np.array([[1, 0.5, 0, 0], [0, 1, 0, 0],[0, 0, 1, 0.5],[0, 0, 0, 1]])\n",
        "    B = np.array([[0.125, 0], [0.5,0],[0, 0.125],[0,0.5]])\n",
        "    n,m=B.shape\n",
        "    Q= np.array([[1, 0, 0, 0], [0, .1, 0, 0],[0, 0, 2, 0],[0, 0, 0, .2]])\n",
        "    R= np.eye(m)\n",
        "    return A,B,Q,R \n",
        "\n",
        "def snc():\n",
        "    A = np.array([[-2, 0,1.1],[1.5, 0.9,1.3],[0,0,0.5]])\n",
        "    B = np.array([[1, 0], [0,1],[0, 0]])\n",
        "    n,m=B.shape\n",
        "    Q=np.eye(n)\n",
        "    R=np.eye(m)\n",
        "    return A,B,Q,R \n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "Ccku8Nvc3xUQ"
      },
      "outputs": [],
      "source": [
        "    \n",
        "#Adaptive Controller    \n",
        "class adaptive_controller():\n",
        "     def __init__(self,A,B,Q,R,horizon,T_init,num_repeats,reg,algorithm):\n",
        "      self.A_star=A\n",
        "      self.B_star=B\n",
        "      self.theta_star=np.hstack((A,B))\n",
        "      self.Q=Q\n",
        "      self.R=R\n",
        "      self.rl_algo=algorithm\n",
        "      self.horizon=horizon\n",
        "      self.T_init=T_init\n",
        "      self.num_repeats=num_repeats\n",
        "      self.n,self.m=B.shape\n",
        "      self.reg=reg\n",
        "      self.K_star,_,self.J_star=self.get_gain_matrix(theta_star)\n",
        "      self.K_init,_,_=control.dlqr(self.A_star,self.B_star,10*np.eye(self.n),np.eye(self.m))\n",
        "      #PGD Parameters\n",
        "      self.num_restarts=5\n",
        "      self.max_iters=500\n",
        "      self.step_size=.05\n",
        "      self.rel_tol=1e-5\n",
        "      #Confidence Interval Parameters\n",
        "      self.delta=1e-3\n",
        "      self.L=1\n",
        "      self.S=20*np.sqrt(np.trace(self.theta_star.dot(np.eye(self.m+self.n)).dot(self.theta_star.T)))\n",
        "      #RBMLE bias\n",
        "      self.bias=0.01\n",
        "      #STABL parameters\n",
        "      self.T_w=35\n",
        "      self.sigma_w=2\n",
        "\n",
        "     def reset(self,cur_seed):\n",
        "      np.random.seed(cur_seed)\n",
        "      self.states=np.zeros((self.n,self.horizon))\n",
        "      self.inputs=np.zeros((self.m,self.horizon))\n",
        "      self.costs=np.zeros((self.horizon,))\n",
        "      self.cov=self.reg*np.eye(self.n+self.m)\n",
        "      self.x=self.states[:,0]\n",
        "      self.u=self.inputs[:,0]\n",
        "      self.t=0\n",
        "      self.episode_no=10\n",
        "      self.theta_emp=np.zeros_like(theta_star)\n",
        "      self.K=np.zeros((self.K_star.shape[0],self.K_star.shape[1],self.horizon))\n",
        "      self.theta=np.zeros((self.theta_star.shape[0],self.theta_star.shape[1],self.horizon))\n",
        "      self.J=np.zeros((self.horizon))\n",
        "      self.event=np.ones((self.horizon))\n",
        "\n",
        "     def project_weighted_ball(self,M,theta_center,cov,eps):\n",
        "\n",
        "      assert len(M.shape) == 2\n",
        "      assert M.shape == theta_center.shape\n",
        "      assert len(cov.shape) == 2\n",
        "      assert cov.shape[0] == cov.shape[1]\n",
        "      assert M.shape[1] == cov.shape[0]\n",
        "      assert self.eps > 0\n",
        "\n",
        "      TOL = 1e-3\n",
        "\n",
        "      if not np.allclose(theta_center, np.zeros_like(theta_center)):\n",
        "          ret = self.project_weighted_ball(M - theta_center, np.zeros_like(theta_center),cov,eps)\n",
        "          ret += theta_center\n",
        "          assert np.trace((ret - theta_center).dot(cov).dot((ret - theta_center).T)) <= (1+TOL)*eps\n",
        "          return ret\n",
        "\n",
        "    # now we can treat theta_hat = 0\n",
        "\n",
        "    # first check easy case:\n",
        "      if np.trace(M.dot(cov).dot(M.T)) <= eps:\n",
        "          return M\n",
        "\n",
        "    # otherwise, solution takes form\n",
        "    # theta_star = M (I + lam * cov)^{-1} for some lam > 0\n",
        "\n",
        "      w, V = np.linalg.eigh(cov)\n",
        "\n",
        "    # find lam such that\n",
        "    # Tr( M * (I + lam * cov)^{-1} cov * (I + lam * cov)^{-1} M.T ) = eps\n",
        "\n",
        "      MV = M.dot(V)\n",
        "      VTMT_MV = MV.T.dot(MV)\n",
        "      term2 = np.diag(VTMT_MV)\n",
        "\n",
        "      def func(lam):\n",
        "          assert lam >= 0\n",
        "          term1 = (w / ((1 + lam * w) ** 2))\n",
        "          val=self.eps - np.sum(term1 * term2) \n",
        "          if np.abs(val)<1e-5:\n",
        "             val=0 \n",
        "          return val\n",
        "      #print(func(0))\n",
        "      #assert func(0) <= 0\n",
        "      lam_ub = 1\n",
        "      lam_lb = 0\n",
        "      while func(lam_lb) <= 0 and func(2*lam_ub) < 0 :\n",
        "          lam_lb = lam_ub\n",
        "          lam_ub *= 2\n",
        "\n",
        "      lam_star, results = scipy.optimize.brentq(func, lam_lb, lam_ub, full_output=True)\n",
        "\n",
        "      theta_star = MV.dot(np.diag(1/(1 + lam_star * w))).dot(V.T)\n",
        "\n",
        "\n",
        "      return theta_star\n",
        "\n",
        "\n",
        "     def pd_inv_sqrt(self,P):\n",
        "      assert len(P.shape) == 2\n",
        "      assert P.shape[0] == P.shape[1]\n",
        "      w, v = np.linalg.eigh(P)\n",
        "      TOL = 1e-5\n",
        "      if (w < TOL).any():\n",
        "        for i in range(int(w.shape[0])):\n",
        "            w[i]=max(TOL,w[i]);\n",
        "      return v.dot(np.diag(1/np.sqrt(w))).dot(v.T)\n",
        "\n",
        "     def get_gain_matrix(self,theta):\n",
        "      A=theta[:,:self.n]\n",
        "      B=theta[:,self.n:]\n",
        "      K,P,_= control.dlqr(A,B,self.Q,self.R)\n",
        "      J=np.trace(P)\n",
        "      return K,P,J \n",
        "\n",
        "     def get_input(self,K):\n",
        "      u= -1*np.dot(K,self.x)\n",
        "      return u    \n",
        "\n",
        "     def get_next_state(self,w): \n",
        "      x_next=np.dot(self.A_star,self.x)+np.dot(self.B_star,self.u)+w\n",
        "      return x_next\n",
        "\n",
        "     def calculate_cost(self,x,u):\n",
        "      c=np.dot(np.dot(x.T,self.Q),x)+np.dot(np.dot(u.T,self.R),u)#is there is better /faster way\n",
        "      return c\n",
        "\n",
        "     def get_lse(self,):\n",
        "      X1=np.dot(self.Z.T,self.Z)+reg*np.eye(self.n+self.m)\n",
        "      X2=np.dot(self.Z.T,self.X)\n",
        "      theta_hat,_,_,_=np.linalg.lstsq(X1,X2,rcond=None)\n",
        "      return theta_hat.T\n",
        "\n",
        "     def get_estimate(self):\n",
        "      \n",
        "      self.theta_emp=self.get_lse()\n",
        "      if self.rl_algo=='LSE':\n",
        "        theta=self.get_lse()\n",
        "      elif self.rl_algo=='OFU':\n",
        "        theta=self.get_ofu()\n",
        "      elif self.rl_algo=='ARBMLE':\n",
        "        theta=self.get_aug_rbmle()\n",
        "      elif self.rl_algo=='RBMLE':\n",
        "        theta=self.get_rbmle()  \n",
        "      elif self.rl_algo=='STABL':\n",
        "        theta=self.get_stbl()\n",
        "      elif self.rl_algo=='TS':\n",
        "        theta=self.get_ts()\n",
        "      elif self.rl_algo=='IP':\n",
        "        theta=self.get_lse() \n",
        "      elif self.rl_algo=='RCE':\n",
        "        theta=self.get_lse()+(1/10)*(np.log(self.episode_no)/self.episode_no)**0.25*np.random.normal(0,1,size=self.theta_star.shape)           \n",
        "      elif self.rl_algo=='OPTIMAL':\n",
        "        theta=self.theta_star\n",
        "      return theta\n",
        "\n",
        "     def confidence_interval(self):\n",
        "  \n",
        "      term1=np.sqrt(np.linalg.det(self.cov))\n",
        "      term2=np.sqrt(np.linalg.det(self.reg*np.eye(self.n+self.m)))\n",
        "      term3=term1/(term2*self.delta)\n",
        "      term4=self.n*np.sqrt(2*np.log(term3))\n",
        "      term5=np.sqrt(self.reg)*self.S\n",
        "      eps=(term4+term5)**2\n",
        "\n",
        "      return eps\n",
        "    \n",
        "     def update_history(self):\n",
        "      self.states[:,self.t]=self.x\n",
        "      self.inputs[:,self.t-1]=self.u\n",
        "      self.costs[self.t-1]=self.c-self.J_star\n",
        "      z=np.matrix(np.concatenate((self.states[:,self.t-1],self.u)))\n",
        "      self.cov+=np.multiply(z.T,z)\n",
        "      self.eps=self.confidence_interval()\n",
        "      self.X=(self.states[:,1:self.t]).T\n",
        "      self.Z=np.concatenate((self.states[:,:self.t-1],self.inputs[:,:self.t-1])).T\n",
        "    \n",
        "      return  \n",
        "\n",
        "     def run_experiment(self,noise,input_noise,cur_seed):\n",
        "      self.reset(cur_seed)\n",
        "      while self.t<self.T_init:\n",
        "        self.u=self.get_input(self.K_init)+input_noise[:,self.t]\n",
        "        self.c=self.calculate_cost(self.x,self.u)\n",
        "        self.x=self.get_next_state(noise[:,self.t])\n",
        "        self.t+=1\n",
        "        self.update_history()\n",
        "\n",
        "      while self.t<self.horizon-1:\n",
        "        theta_t=self.get_estimate()\n",
        "        Kt,_,J=self.get_gain_matrix(theta_t)\n",
        "        last_cov=np.linalg.det(self.cov)\n",
        "        episode_length=0\n",
        "        self.episode_no+=1\n",
        "        while ((np.linalg.det(self.cov)<=2*last_cov) or (episode_length<10)) and self.t<self.horizon-1:\n",
        "            self.u=self.get_input(Kt)\n",
        "            if self.rl_algo=='IP':\n",
        "              self.u+=np.random.normal(0,1/self.episode_no,size=self.u.shape)\n",
        "            if self.rl_algo=='STABL':\n",
        "              if (self.t<=self.T_init+self.T_w):\n",
        "                self.u+=np.random.normal(0,self.sigma_w)  \n",
        "            self.c=self.calculate_cost(self.x,self.u)\n",
        "            self.x=self.get_next_state(noise[:,self.t])\n",
        "            self.t+=1        \n",
        "            episode_length+=1\n",
        "            self.update_history()\n",
        "\n",
        "      return  \n",
        "  \n",
        "     def squared_error(self,theta):\n",
        "      temp1=self.states[:,1:self.t]-np.dot(theta,(self.Z).T)\n",
        "      error=0\n",
        "      for i in range(temp1.shape[1]):\n",
        "        error+=np.dot(temp1[:,i].T,temp1[:,i])\n",
        "      error+=self.reg*np.linalg.norm(theta)  \n",
        "      return error\n",
        "\n",
        "     def lse_gradient(self,theta):\n",
        "      grad=2*(np.dot(theta,(np.dot(self.Z.T,self.Z)+self.reg*np.eye(self.n+self.m)).T)-np.dot(self.X.T,self.Z))\n",
        "      return grad  \n",
        "\n",
        "     def get_ofu(self):\n",
        "      self.rl_algo='OFU'\n",
        "      self.function=self.cost\n",
        "      self.gradient=self.cost_gradient\n",
        "      theta_emp=self.get_lse()\n",
        "      theta=self.pgd(self.cov,self.eps)\n",
        "      return theta\n",
        "    \n",
        "     def get_stbl(self):\n",
        "      self.rl_algo='STABL'\n",
        "      self.function=self.cost\n",
        "      self.gradient=self.cost_gradient\n",
        "      theta_emp=self.get_lse()\n",
        "      theta=self.pgd(self.cov,self.eps)\n",
        "      return theta\n",
        "\n",
        "     def get_rbmle(self):\n",
        "      self.rl_algo='RBMLE'\n",
        "      cov=np.eye(self.n+self.m)\n",
        "      eps=self.S*self.S\n",
        "      self.theta_emp=np.zeros_like(self.theta_star)\n",
        "      self.function=self.rbmle\n",
        "      self.gradient=self.rbmle_gradient\n",
        "      theta=self.pgd(cov,eps)\n",
        "      return theta\n",
        "\n",
        "   \n",
        "     def get_ts(self):\n",
        "\n",
        "      eta = np.random.normal(size=self.theta_emp.shape)\n",
        "      inv=self.pd_inv_sqrt(self.cov)\n",
        "      eta *= np.power(np.random.uniform(), 1/(self.theta_emp.shape[0] * self.theta_emp.shape[1])) / np.linalg.norm(eta, ord=\"fro\")\n",
        "      theta= self.theta_emp + np.sqrt(self.eps) * eta.dot(inv)  \n",
        "      return theta  \n",
        "\n",
        "     def get_aug_rbmle(self):\n",
        "      self.function=self.rbmle\n",
        "      self.gradient=self.rbmle_gradient\n",
        "      theta=self.pgd(self.cov,self.eps)\n",
        "      return theta   \n",
        "\n",
        "     def rbmle(self,theta):\n",
        "      rbmle_cost=self.bias*(self.horizon**0.5)*self.cost(theta)+self.squared_error(theta)\n",
        "      return rbmle_cost\n",
        "\n",
        "     def rbmle_gradient(self,theta):\n",
        "      rbmle_grad=self.bias*(self.horizon**0.5)*self.cost_gradient(theta)+self.lse_gradient(theta)\n",
        "      return rbmle_grad\n",
        "\n",
        "     def cost(self,theta):\n",
        "      _,_,cost=self.get_gain_matrix(theta)\n",
        "      return cost\n",
        "\n",
        "     def cost_gradient(self,theta):\n",
        "      A=theta[:,:self.n]\n",
        "      B=theta[:,self.n:]\n",
        "      K,P,J= self.get_gain_matrix(theta)\n",
        "      A_c = A + B.dot(K)\n",
        "      grad = np.zeros((self.n, self.n + self.m))\n",
        "\n",
        "      for idx in range(self.n):\n",
        "        for jdx in range(self.n + self.m):\n",
        "            U = np.zeros((self.n, self.n + self.m))\n",
        "            U[idx, jdx] = 1\n",
        "            target = A_c.T.dot(P.dot(U)).dot(np.vstack((np.eye(self.n), K)))\n",
        "            target += target.T\n",
        "            DU = scipy.linalg.solve_discrete_lyapunov(A.T, Q, None)\n",
        "\n",
        "            grad[idx, jdx] = np.trace(DU)\n",
        "\n",
        "      return  grad \n",
        "\n",
        "     def pgd(self,cov,eps):\n",
        "      theta_initial=self.get_lse()\n",
        "      theta_opt=theta_initial\n",
        "      f_opt=self.function(theta_opt)\n",
        "      for j in range(self.num_restarts):\n",
        "        theta_cur=theta_initial+np.sign(j)*np.random.normal(0,1,size=theta_initial.shape)\n",
        "        theta_cur=self.project_weighted_ball(theta_cur,self.theta_emp,self.cov,self.eps)\n",
        "        f_cur=self.function(theta_cur)\n",
        "        i=0\n",
        "        while i <=self.max_iters:\n",
        "          i+=1\n",
        "          grad=self.gradient(theta_cur)\n",
        "          theta_next=theta_cur-self.step_size*grad/np.linalg.norm(grad)\n",
        "          theta_next=self.project_weighted_ball(theta_next,self.theta_emp,self.cov,self.eps)\n",
        "          f_next=self.function(theta_next)\n",
        "          rel_dec=((f_cur-f_next)/f_cur)\n",
        "          theta_cur=theta_next\n",
        "          f_cur=f_next\n",
        "          if f_cur<=f_opt:\n",
        "            f_opt=f_cur\n",
        "            theta_opt=theta_cur\n",
        "          if (rel_dec<self.rel_tol):\n",
        "            break\n",
        "\n",
        "      return theta_opt\n",
        "\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "etcHtyhOENCL"
      },
      "source": [
        "Use the next cell to run experiment for different examples."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "nP-BiD46wEQo",
        "outputId": "0e0709b9-bc5b-4986-dd95-e682cafd56a8"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[[0.3 0.4 0.2 0.2]\n",
            " [0.2 0.3 0.2 0.2]\n",
            " [0.2 0.2 0.4 0.4]\n",
            " [0.4 0.2 0.2 0.4]] [[1. 0. 0. 0.]\n",
            " [0. 1. 0. 0.]\n",
            " [0. 0. 1. 0.]\n",
            " [0. 0. 0. 1.]] [[1. 0. 0. 0.]\n",
            " [0. 1. 0. 0.]\n",
            " [0. 0. 1. 0.]\n",
            " [0. 0. 0. 1.]] [[1. 0. 0. 0.]\n",
            " [0. 1. 0. 0.]\n",
            " [0. 0. 1. 0.]\n",
            " [0. 0. 0. 1.]]\n"
          ]
        }
      ],
      "source": [
        "#Experimental Setup\t  \n",
        "#Uncomment the line for the specific example to get results\n",
        "A_star,B_star,Q,R=boeing()# Boeing \n",
        "#A_star,B_star,Q,R=uav()# Unmanned Aerial Vehicle\n",
        "#A_star,B_star,Q,R=unstable_laplacian_dynamics() # Unstable laplacian \n",
        "#A_star,B_star,Q,R=transient_dynamics() # large transient dynamics\n",
        "#A_star,B_star,Q,R=snc() # stabalizable but not controllable\n",
        "#A_star,B_star,Q,R=non_sparse_matrix() # non sparse matrix\n",
        "#A_star,B_star,Q,R=chained_integrator_dynamics() # chained integrator dynamics\n",
        "n,m=B_star.shape\n",
        "theta_star=np.hstack((A_star,B_star))\n",
        "print(A_star,B_star,Q,R)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "WwKOhDTaib4Q",
        "outputId": "95ec05e5-f3c9-4bbb-d9c8-acabe3c280ad"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "0\n",
            "1\n",
            "2\n",
            "3\n",
            "4\n",
            "5\n",
            "6\n",
            "7\n",
            "8\n",
            "9\n",
            "10\n",
            "11\n",
            "12\n",
            "13\n",
            "14\n",
            "15\n",
            "16\n",
            "17\n",
            "18\n",
            "19\n",
            "20\n",
            "21\n",
            "22\n",
            "23\n",
            "24\n",
            "25\n",
            "26\n",
            "27\n",
            "28\n",
            "29\n",
            "30\n",
            "31\n",
            "32\n",
            "33\n",
            "34\n",
            "35\n",
            "36\n",
            "37\n",
            "38\n",
            "39\n",
            "40\n",
            "41\n",
            "42\n",
            "43\n",
            "44\n",
            "45\n",
            "46\n",
            "47\n",
            "48\n",
            "49\n"
          ]
        }
      ],
      "source": [
        "\n",
        "horizon=550 #length of each experiement #positive integer\n",
        "num_repeats=50#number of repeatations #positive integer\n",
        "reg=1e-4\n",
        "T_init=50\n",
        "algorithms=['RBMLE','ARBMLE','OFU','TS','IP','RCE','STABL']\n",
        "env1=adaptive_controller(A_star,B_star,Q,R,horizon,T_init,num_repeats,reg,algorithms[0])\n",
        "env2=adaptive_controller(A_star,B_star,Q,R,horizon,T_init,num_repeats,reg,algorithms[1])\n",
        "env3=adaptive_controller(A_star,B_star,Q,R,horizon,T_init,num_repeats,reg,algorithms[2])\n",
        "env4=adaptive_controller(A_star,B_star,Q,R,horizon,T_init,num_repeats,reg,algorithms[3])\n",
        "env5=adaptive_controller(A_star,B_star,Q,R,horizon,T_init,num_repeats,reg,algorithms[4])\n",
        "env6=adaptive_controller(A_star,B_star,Q,R,horizon,T_init,num_repeats,reg,algorithms[5])\n",
        "env7=adaptive_controller(A_star,B_star,Q,R,horizon,T_init,num_repeats,reg,algorithms[6])\n",
        "\n",
        "cost1=np.zeros((num_repeats,horizon))\n",
        "cost2=np.zeros((num_repeats,horizon))\n",
        "cost3=np.zeros((num_repeats,horizon))\n",
        "cost4=np.zeros((num_repeats,horizon))\n",
        "cost5=np.zeros((num_repeats,horizon))\n",
        "cost6=np.zeros((num_repeats,horizon))\n",
        "cost7=np.zeros((num_repeats,horizon))\n",
        "\n",
        "for i in range(num_repeats):\n",
        "  print(i)\n",
        "  np.random.seed(i)\n",
        "\n",
        "  input_noise =np.random.normal(1, size=(m,T_init)) #same noise across algorithms #random noise of dimension Txnum_repeats\n",
        "\n",
        "  noise =np.random.normal(1, size=(n,horizon)) #same noise across algorithms #random noise of dimension Txnum_repeats\n",
        "  env1.run_experiment(noise,input_noise,i)\n",
        "  env2.run_experiment(noise,input_noise,i)\n",
        "  env3.run_experiment(noise,input_noise,i)\n",
        "  env4.run_experiment(noise,input_noise,i)\n",
        "  env5.run_experiment(noise,input_noise,i)\n",
        "  env6.run_experiment(noise,input_noise,i)\n",
        "  env7.run_experiment(noise,input_noise,i)\n",
        "\n",
        "\n",
        "  cost1[i,:]=env1.costs\n",
        "  cost2[i,:]=env2.costs \n",
        "  cost3[i,:]=env3.costs \n",
        "  cost4[i,:]=env4.costs \n",
        "  cost5[i,:]=env5.costs \n",
        "  cost6[i,:]=env6.costs \n",
        "  cost7[i,:]=env7.costs \n",
        "\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "_JjjSJMq6JLy"
      },
      "outputs": [],
      "source": [
        "reg1=np.cumsum(cost1[:,T_init:-1],axis=1)\n",
        "reg2=np.cumsum(cost2[:,T_init:-1],axis=1)\n",
        "reg3=np.cumsum(cost3[:,T_init:-1],axis=1)\n",
        "reg4=np.cumsum(cost4[:,T_init:-1],axis=1)\n",
        "reg5=np.cumsum(cost5[:,T_init:-1],axis=1)\n",
        "reg6=np.cumsum(cost6[:,T_init:-1],axis=1)\n",
        "reg7=np.cumsum(cost7[:,T_init:-1],axis=1)\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "Vnw4rOE-Fuyw"
      },
      "outputs": [],
      "source": [
        "np.save(algorithms[0],reg1)\n",
        "np.save(algorithms[1],reg2)\n",
        "np.save(algorithms[2],reg3)\n",
        "np.save(algorithms[3],reg4)\n",
        "np.save(algorithms[4],reg5)\n",
        "np.save(algorithms[5],reg6)\n",
        "np.save(algorithms[6],reg7)\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "1_CZMBQoauTl"
      },
      "outputs": [],
      "source": [
        "reg_RBMLE=np.load('RBMLE.npy')\n",
        "reg_ARBMLE=np.load('ARBMLE.npy')\n",
        "reg_OFU=np.load('OFU.npy')\n",
        "reg_TS=np.load('TS.npy')\n",
        "reg_IP=np.load('IP.npy')\n",
        "reg_RCE=np.load('RCE.npy')\n",
        "reg_STABL=np.load('STABL.npy')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "EM-Vradn5GN8",
        "outputId": "5fc2e862-dc35-45b3-ee38-ff8dbc620b6f"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "RBMLE 5930.556886612869\n",
            "ARBMLE 5930.556886612869\n",
            "OFU 5486013418934.105\n",
            "TS 28319440298218.11\n",
            "IP 5955.788005702936\n",
            "RCE 6395.937458931984\n",
            "STABL 19013845900.496964\n"
          ]
        }
      ],
      "source": [
        "r_RBMLE=np.mean(reg_RBMLE,axis=0)\n",
        "r_ARBMLE=np.mean(reg_ARBMLE,axis=0)\n",
        "r_OFU=np.mean(reg_OFU,axis=0)\n",
        "r_TS=np.mean(reg_TS,axis=0)\n",
        "r_IP=np.mean(reg_IP,axis=0)\n",
        "r_RCE=np.mean(reg_RCE,axis=0)\n",
        "r_STABL=np.mean(reg_STABL,axis=0)\n",
        "print(algorithms[0],r_RBMLE[-1])\n",
        "print(algorithms[1],r_ARBMLE[-1])\n",
        "print(algorithms[2],r_OFU[-1])\n",
        "print(algorithms[3],r_TS[-1])\n",
        "print(algorithms[4],r_IP[-1])\n",
        "print(algorithms[5],r_RCE[-1])\n",
        "print(algorithms[6],r_STABL[-1])\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "generate_log_plot (reg_ARBMLE,reg_OFU,reg_TS,reg_STABL,'1.pdf','$Log_{10} (Regret)$',['ARBMLE','OFULQ','TS','STABL'])\n",
        "generate_log_plot (reg_RBMLE,reg_ARBMLE,reg_IP,reg_RCE,'2.pdf','$Log_{10} (Regret)$',['RBMLE','ARBMLE','IP','RCE'])\n"
      ],
      "metadata": {
        "id": "UpU6HIGSSsgw"
      },
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "colab": {
      "collapsed_sections": [],
      "machine_shape": "hm",
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}