{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np;\n",
    "import scipy\n",
    "import random\n",
    "import numpy.random as ra;\n",
    "import numpy.linalg as la;\n",
    "import matplotlib.pyplot as plt\n",
    "import sklearn\n",
    "from sklearn import preprocessing\n",
    "#import Matrix_Norm as mn\n",
    "from scipy.stats import bernoulli\n",
    "#import glrt as glrt\n",
    "import kld as kl\n",
    "#import Action_Set_generation as action_set\n",
    "import theta_1_set_gen as big_theta\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "random.seed(20)\n",
    "\n",
    "T=5000 #Length of the time horizon\n",
    "epsilon=0.2 #epsilon parameter\n",
    "itr=5000 #total number of iterations (number of Monte Carlo runs)\n",
    "\n",
    "var=.5 #variance of the Gaussian noise\n",
    "path_len=1 #length of  path connected nodes\n",
    "\n",
    "d_list=[i for i in range(10,30,5)] #list of lenghts of line graph. Graph length = 10,15,20,25\n",
    "tau_list=[i for i in range(10,50,10)] #list of change points. Change point = 10,20,30,40\n",
    "\n",
    "\n",
    "#----------list to store candidate post change parameter (unknown to the algorithms, but common to all algorithms) \n",
    "#----------for various length of the graph------------------------------------------------------------------------\n",
    "\n",
    "theta_1_list=[]\n",
    "\n",
    "#loop to store candidate post change parameter for various length of the graph \n",
    "\n",
    "for d in d_list:\n",
    "    theta_1_set=np.identity(d)\n",
    "    theta_1_idx=random.randrange(0,theta_1_set.shape[1],1)\n",
    "    theta_1=theta_1_set[:,theta_1_idx]\n",
    "    theta_1_list.append(theta_1)\n",
    "\n",
    "l_stop_alg_d=[]\n",
    "l_stop_rand_d=[]\n",
    "l_stop_oracle_d=[]\n",
    "l_stop_alg_d_full=[]\n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "URS\n",
      "d= 10\n",
      "tau= 10\n",
      "tau= 20\n",
      "tau= 30\n",
      "tau= 40\n",
      "d= 15\n",
      "tau= 10\n",
      "tau= 20\n",
      "tau= 30\n",
      "tau= 40\n",
      "d= 20\n",
      "tau= 10\n",
      "tau= 20\n",
      "tau= 30\n",
      "tau= 40\n",
      "d= 25\n",
      "tau= 10\n",
      "tau= 20\n",
      "tau= 30\n",
      "tau= 40\n",
      "Runtime of URS is 1471.9654092788696\n"
     ]
    }
   ],
   "source": [
    "#_________________________________________Bandit Loop for URS______________________________________________________________\n",
    "\n",
    "beta=30 #the choice of beta for which the false alarm of URS change detector is less than 1%\n",
    "start=time.time()\n",
    "print(\"URS\")\n",
    "\n",
    "for (i,d) in enumerate(d_list):\n",
    "    print(\"d=\",d)\n",
    "    theta_1=theta_1_list[i]\n",
    "    theta_1=theta_1.reshape(theta_1.shape[0],)\n",
    "    \n",
    "    #post change parameter set\n",
    "    theta_1_set=np.identity(d)\n",
    "    \n",
    "    #isolated action set\n",
    "    Action_set=theta_1_set\n",
    "    \n",
    "    #Total number of actions\n",
    "    K=Action_set.shape[1]\n",
    "    \n",
    "    #WLOG, for synthetic experiments, we set pre-change parameter to zero.\n",
    "    theta_not=np.zeros((d,))\n",
    "    \n",
    "    #List to store stopping time of an algorithm for a fixed change point and fixed length of the graph.\n",
    "    l_stop=[]\n",
    "    \n",
    "    for (idx,tau) in enumerate(tau_list):\n",
    "        print(\"tau=\",tau)\n",
    "        \n",
    "            #----------------------------- Start of Monte Carlo run----------------------------\n",
    "\n",
    "        for j in range(itr):\n",
    "            b1=0\n",
    "            V=0\n",
    "            flag=0\n",
    "        #___________________________________Start of Timeline______________________________________________________________\n",
    "\n",
    "\n",
    "            for t in range(1,T+1):\n",
    "                \n",
    "                #Condition to check whehter the change has occurred. \n",
    "                #If there is no change has been detected by the algorithm, then we manually stop at time horizon.\n",
    "                \n",
    "                if np.max(V) >=beta or t==T:\n",
    "                    change=t\n",
    "                    l_stop.append(change)\n",
    "                    break\n",
    "\n",
    "                #At time step 1, play a random action to get initial observation\n",
    "\n",
    "                if (t<=1):\n",
    "                    \n",
    "                    #Random action at time step 1\n",
    "                    A1_idx=random.randrange(0,K,1)\n",
    "                    A=Action_set[:,A1_idx]\n",
    "                    \n",
    "                    #observation at time step 1\n",
    "                    X=np.random.normal(0,var,1)\n",
    "                    \n",
    "                    \n",
    "\n",
    "                        #calculation of Q^{(1)} statistics -- V is Q^{(1)} -- first sample update\n",
    "                        \n",
    "                    b1=X*A\n",
    "                    temp4=theta_1_set[:,A1_idx]\n",
    "                    temp3=(2*b1)-temp4**(2)\n",
    "\n",
    "                    V=temp3\n",
    "                    \n",
    "\n",
    "\n",
    "\n",
    "                else:\n",
    "                    \n",
    "                    #Pick action uniformly at random\n",
    "                    play_idx=random.randrange(0,K,1)\n",
    "\n",
    "                                #Play an action\n",
    "                    A=Action_set[:,play_idx]\n",
    "\n",
    "                                #Get an observation\n",
    "\n",
    "                    if t<tau:\n",
    "                        X=np.random.normal(0,var,1)\n",
    "                    else:\n",
    "                        X=np.random.normal(theta_1[play_idx],var,1)\n",
    "\n",
    "                        #Calculation of Q^{(1)} statistics -- V is Q^{(1)} \n",
    "                        \n",
    "#Calculating g(X_t|A_t):\n",
    "                    b1=X*A\n",
    "                    temp4=theta_1_set[:,play_idx]\n",
    "                    temp3=(2*b1)-temp4**(2)\n",
    "\n",
    "#Recursice update of Q^{(1)}:\n",
    "\n",
    "                    V=np.maximum(0,V+temp3) \n",
    "    \n",
    "\n",
    "\n",
    "\n",
    "    l_stop_rand_d.append(l_stop)\n",
    "\n",
    "np.save(\"l_stop_rand_d.npy\",l_stop_rand_d)\n",
    "end=time.time()\n",
    "print(f\"Runtime of URS is {end - start}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Oracle\n",
      "d= 10\n",
      "tau= 10\n",
      "tau= 20\n",
      "tau= 30\n",
      "tau= 40\n",
      "d= 15\n",
      "tau= 10\n",
      "tau= 20\n",
      "tau= 30\n",
      "tau= 40\n",
      "d= 20\n",
      "tau= 10\n",
      "tau= 20\n",
      "tau= 30\n",
      "tau= 40\n",
      "d= 25\n",
      "tau= 10\n",
      "tau= 20\n",
      "tau= 30\n",
      "tau= 40\n",
      "Runtime of the Oracle is 292.4441833496094\n"
     ]
    }
   ],
   "source": [
    "#_______________________________________Bandit Loop for  Oracle_______________________________________________________________\n",
    "\n",
    "beta=50 #the choice of beta for which the false alarm of Oracle change detector is less than 1%\n",
    "start=time.time()\n",
    "\n",
    "print(\"Oracle\")\n",
    "\n",
    "\n",
    "for (i,d) in enumerate(d_list):\n",
    "    print(\"d=\",d)\n",
    "    theta_1=theta_1_list[i]\n",
    "    theta_1=theta_1.reshape(theta_1.shape[0],)\n",
    "    \n",
    "    #post change parameter set\n",
    "    theta_1_set=np.identity(d)\n",
    "    \n",
    "    #isolated action set\n",
    "    Action_set=theta_1_set\n",
    "    \n",
    "    #Total number of actions\n",
    "    K=Action_set.shape[1]\n",
    "    \n",
    "    #WLOG, for synthetic experiments, we set pre-change parameter to zero.\n",
    "    theta_not=np.zeros((d,))\n",
    "    \n",
    "    #List to store stopping time of an algorithm for a fixed change point and fixed length of the graph.\n",
    "    l_stop=[]\n",
    "    \n",
    "    for (idx,tau) in enumerate(tau_list):\n",
    "        print(\"tau=\",tau)\n",
    "\n",
    "    #----------------------------- Start of Monte Carlo run----------------------------\n",
    "\n",
    "        for j in range(itr):\n",
    "            b1=0\n",
    "            V=0\n",
    "            flag=0\n",
    "        #________________________________________Start of Timeline_________________________________________________________\n",
    "\n",
    "\n",
    "            for t in range(1,T+1):\n",
    "                \n",
    "                 #Condition to check whehter the change has occurred. \n",
    "                #If there is no change has been detected by the algorithm, then we manually stop at time horizon.\n",
    "                \n",
    "                if flag==1 or t==T:\n",
    "                    change=t\n",
    "                    l_stop.append(change)\n",
    "                    break\n",
    "\n",
    "                if (t<=1):\n",
    "                    # Random action at time step 1\n",
    "                    A1_idx=random.randrange(0,K,1)\n",
    "                    A=Action_set[:,A1_idx]\n",
    "                    \n",
    "                    #Observation at time step 1\n",
    "                    X=np.random.normal(0,var,1)\n",
    "                    \n",
    "                    \n",
    "\n",
    "                        #calculation of Q^{(1)} statistics -- V is Q^{(1)} -- first sample update\n",
    "                        \n",
    "                    b1=X*A\n",
    "                    temp4=theta_1_set[:,A1_idx]\n",
    "                    temp3=(2*b1)-temp4**(2)\n",
    "\n",
    "                    V=temp3\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "                else:\n",
    "                    \n",
    "                    #Calculation of KL Divergence for different set of actions \n",
    "                    mu1=np.dot(Action_set.T,theta_not)\n",
    "                    mu2=np.dot(Action_set.T,theta_1)#Note: We use theta_1 here becuase oracle knows the knowledge \n",
    "                                                     #      of true post change parameter.\n",
    "                    \n",
    "                    \n",
    "                    a_list=kl.kld(mu2,mu1,var**2)\n",
    "                    \n",
    "                    #Play the action that is more informative, that is, action for which KL divergence is maximum\n",
    "                    play_idx=np.argmax(a_list)\n",
    "\n",
    "                    A=Action_set[:,play_idx]\n",
    "\n",
    "                                #Get an Observation\n",
    "\n",
    "                    if t<tau:\n",
    "                        X=np.random.normal(0,var,1)\n",
    "                    else:\n",
    "                        X=np.random.normal(theta_1[play_idx],var,1)\n",
    "                        \n",
    "                \n",
    "                #Calculation of Q^{(1)} statistics -- V is Q^{(1)} -- One sample update\n",
    "\n",
    "#Calculating g(X_t|A_t):\n",
    "                    b1=X*A\n",
    "                    temp4=theta_1_set[:,play_idx]\n",
    "                    temp3=(2*b1)-temp4**(2)\n",
    "            \n",
    "#Recursive update of Q^{(1)}:\n",
    "\n",
    "                    V=np.maximum(0,(V+temp3))\n",
    "\n",
    "\n",
    "# Stopping Criteria. If criteria is met, then set flag variable to 1. Otherwise, continue and update theta_hat. \n",
    "# theta_hat is the estimate of post change paramaeter at time step t.\n",
    "                    if np.max(V)>=beta:\n",
    "                        flag=1\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "    l_stop_oracle_d.append(l_stop)\n",
    "\n",
    "np.save(\"l_stop_oracle_d.npy\",l_stop_oracle_d)\n",
    "end=time.time()\n",
    "print(f\"Runtime of the Oracle is {end - start}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EG_full\n",
      "d= 10\n",
      "tau= 10\n",
      "tau= 20\n",
      "tau= 30\n",
      "tau= 40\n",
      "d= 15\n",
      "tau= 10\n",
      "tau= 20\n",
      "tau= 30\n",
      "tau= 40\n",
      "d= 20\n",
      "tau= 10\n",
      "tau= 20\n",
      "tau= 30\n",
      "tau= 40\n",
      "d= 25\n",
      "tau= 10\n",
      "tau= 20\n",
      "tau= 30\n",
      "tau= 40\n",
      "Runtime of EG(full) is 680.0428578853607\n"
     ]
    }
   ],
   "source": [
    "# #______________________________________Bandit Loop for EG_full_________________________________________________________________\n",
    "\n",
    "beta=40 #the choice of beta for which the false alarm of Epsilon-Greedy change detector is less than 1%\n",
    "start=time.time()\n",
    "print(\"EG_full\")\n",
    "\n",
    "    \n",
    "for (i,d) in enumerate(d_list):\n",
    "    print(\"d=\",d)\n",
    "    theta_1=theta_1_list[i]\n",
    "    theta_1=theta_1.reshape(theta_1.shape[0],)\n",
    "    \n",
    "    #post change parameter set, generated by big_theta module.\n",
    "    theta_1_set=np.identity(d)\n",
    "    \n",
    "    #localised action set\n",
    "    Action_set=theta_1_set\n",
    "    \n",
    "    #Total number of actions\n",
    "    K=Action_set.shape[1]\n",
    "    \n",
    "    #WLOG, for synthetic experiments, we set pre-change parameter to zero.\n",
    "    theta_not=np.zeros((d,))\n",
    "    \n",
    "    #List to store stopping time of an algorithm for a fixed change point and fixed length of the graph. \n",
    "    l_stop=[]\n",
    "    \n",
    "    for (idx,tau) in enumerate(tau_list):\n",
    "        print(\"tau=\",tau)\n",
    "        \n",
    "    #----------------------------- Start of Monte Carlo run----------------------------\n",
    "\n",
    "        for j in range(itr):\n",
    "            b1=0\n",
    "            theta_hat=0\n",
    "            V=0\n",
    "            flag=0 #flag variable. If flag=1, then we stop and report change \n",
    "\n",
    "        #________________________________Start of Timeline_________________________________________________________________\n",
    "            \n",
    "            for t in range(1,T+1):\n",
    "                #Condition to check whehter the change has occurred. \n",
    "                #If there is no change has been detected by the algorithm, then we manually stop at time horizon.\n",
    "                \n",
    "                if flag==1 or t==T:\n",
    "                    change=t\n",
    "                    l_stop.append(change)\n",
    "                    #past_arm.append(arm[tau:change])\n",
    "                    break\n",
    "                    \n",
    "            #At time step 1, play a random action to get initial observation \n",
    "\n",
    "                if (t<=1):\n",
    "                    # Random action at time step 1\n",
    "                    A1_idx=random.randrange(0,K,1)\n",
    "                    A=Action_set[:,A1_idx]\n",
    "                    A=A.reshape(A.shape[0],)\n",
    "                    \n",
    "                    #Observation at time step 1\n",
    "                    X=np.random.normal(0,var,1)\n",
    "                    \n",
    "\n",
    "                        #calculation of Q^{(1)} statistics -- V is Q^{(1)} -- first sample update\n",
    "                        \n",
    "                    b1=X*A\n",
    "                    temp4=theta_1_set[:,A1_idx]\n",
    "\n",
    "                    #temp6=                    \n",
    "                    temp3=(2*b1)-temp4**(2)\n",
    "\n",
    "                    V=temp3\n",
    "                    theta_hat_idx=np.argmax(V)\n",
    "                    theta_hat=theta_1_set[:,theta_hat_idx]\n",
    "\n",
    "\n",
    "\n",
    "                else:\n",
    "\n",
    "\n",
    "                            #Exploration Phase\n",
    "\n",
    "                    if (np.random.random()<=epsilon):\n",
    "                        \n",
    "                        #Play an action uniformly at random\n",
    "                        play_idx=random.randrange(0,K,1)\n",
    "                        \n",
    "                        #Exploitation Phase\n",
    "                    else:\n",
    "                        \n",
    "                        #Calculation of KL Divergence for different set of actions\n",
    "\n",
    "                        mu1=theta_not\n",
    "                        mu2=theta_hat\n",
    "                        a_list=kl.kld(mu2,mu1,var**2)\n",
    "                        \n",
    "                    #Play the action that is more informative, that is, action for which KL divergence is maximum\n",
    "                        play_idx=np.argmax(a_list)\n",
    "\n",
    "\n",
    "\n",
    "                              #Play an action\n",
    "                    A=Action_set[:,play_idx]\n",
    "                    A=A.reshape(A.shape[0],)\n",
    "\n",
    "                                #Get an observation\n",
    "\n",
    "                    if t<tau:\n",
    "                        X=np.random.normal(0,var,1)\n",
    "                    else:\n",
    "                        X=np.random.normal(theta_1[play_idx],var,1)\n",
    "\n",
    "                        #Calculation of Q^{(1)} statistics -- V is Q^{(1)} -- One sample update\n",
    "                        \n",
    "#Calculating g(X_t|A_t):\n",
    "\n",
    "                    b1=X*A\n",
    "                    temp4=theta_1_set[:,play_idx]\n",
    "                    temp3=(2*b1)-temp4**(2)\n",
    "\n",
    "\n",
    "\n",
    "#Recursive update of Q^{(1)}:\n",
    "\n",
    "\n",
    "\n",
    "                    V=np.maximum(0,V+temp3)\n",
    "    \n",
    "# Stopping Criteria. If criteria is met, then set flag variable to 1. Otherwise, continue and update theta_hat. \n",
    "# theta_hat is the estimate of post change paramaeter at time step t.\n",
    "\n",
    "                    if np.max(V)>=beta:\n",
    "                        flag=1\n",
    "                    else:\n",
    "\n",
    "                        theta_hat_idx=np.argmax(V)\n",
    "\n",
    "                        theta_hat=theta_1_set[:,theta_hat_idx]\n",
    "\n",
    "\n",
    "                    \n",
    "    l_stop_alg_d_full.append(l_stop)\n",
    "\n",
    "np.save(\"l_stop_alg_d.npy\",l_stop_alg_d_full)\n",
    "end=time.time()\n",
    "print(f\"Runtime of EG(full) is {end - start}\")\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
