{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np;\n",
    "import scipy\n",
    "import random\n",
    "import numpy.random as ra;\n",
    "import numpy.linalg as la;\n",
    "import matplotlib.pyplot as plt\n",
    "import sklearn\n",
    "from sklearn import preprocessing\n",
    "#import Matrix_Norm as mn\n",
    "from scipy.stats import bernoulli\n",
    "#import glrt as glrt\n",
    "import kld as kl\n",
    "#import Action_Set_generation as action_set\n",
    "import theta_1_set_gen as big_theta\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "random.seed(20)\n",
    "\n",
    "T=5000 #Length of the time horizon \n",
    "epsilon=0.2 #epsilon parameter \n",
    "itr=5000 #total number of iterations (number of Monte Carlo runs)\n",
    "\n",
    "var=.5 #variance of the Gaussian noise \n",
    "path_len=5 #length of  path connected nodes\n",
    " \n",
    "d_list=[i for i in range(10,30,5)] #list of lenghts of line graph. Graph length = 10,15,20,25\n",
    "tau_list=[i for i in range(10,50,10)] #list of change points. Change point = 10,20,30,40\n",
    " \n",
    "\n",
    "#----------list containing candidate post change parameter (unknown to the algorithms, but common to all algorithms) \n",
    "#----------for various length of the graph------------------------------------------------------------------------\n",
    "\n",
    "theta_1_list=np.load(\"theta_1_list.npy\",allow_pickle=True).tolist()\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "#list to store stopping time of algorithm \n",
    "l_stop_alg_d_full=[]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EG_full\n",
      "d= 10\n",
      "tau= 10\n",
      "tau= 20\n",
      "tau= 30\n",
      "tau= 40\n",
      "d= 15\n",
      "tau= 10\n",
      "tau= 20\n",
      "tau= 30\n",
      "tau= 40\n",
      "d= 20\n",
      "tau= 10\n",
      "tau= 20\n",
      "tau= 30\n",
      "tau= 40\n",
      "d= 25\n",
      "tau= 10\n",
      "tau= 20\n",
      "tau= 30\n",
      "tau= 40\n",
      "Time Taken by EG(full) 90.36076617240906\n"
     ]
    }
   ],
   "source": [
    "# #______________________________________Bandit Loop for EG_full_________________________________________________________________\n",
    "\n",
    "\n",
    "start=time.time()\n",
    "\n",
    "beta=50 #the choice of beta for which the false alarm of Epsilon-Greedy change detector is less than 1%\n",
    "\n",
    "print(\"EG_full\")\n",
    "    \n",
    "for (i,d) in enumerate(d_list):\n",
    "    print(\"d=\",d)\n",
    "    theta_1=theta_1_list[i]\n",
    "    theta_1=theta_1.reshape(theta_1.shape[0],1)\n",
    "    \n",
    "    #post change parameter set, generated by big_theta module.\n",
    "    \n",
    "    theta_1_set=big_theta.theta_1_set(d,path_len)\n",
    "    \n",
    "    #All post change parameters are normalised to 1.\n",
    "    theta_1_set=sklearn.preprocessing.normalize(theta_1_set,norm='l2',axis=0) \n",
    "    \n",
    "    #diffused action set\n",
    "    Action_set=theta_1_set\n",
    "    \n",
    "    #Total number of actions \n",
    "    K=Action_set.shape[1] \n",
    "    \n",
    "    #WLOG, for synthetic experiments, we set pre-change parameter to zero. \n",
    "    theta_not=np.zeros((d,1))\n",
    "\n",
    "    #List to store stopping time of an algorithm for a fixed change point and fixed length of the graph. \n",
    "    l_stop=[]\n",
    "    \n",
    "    for (idx,tau) in enumerate(tau_list):\n",
    "        print(\"tau=\",tau)\n",
    "\n",
    "#----------------------------- Start of Monte Carlo run----------------------------\n",
    "        for j in range(itr):\n",
    "            b1=0\n",
    "            theta_hat=0\n",
    "            V=0\n",
    "            flag=0 #flag variable. If flag=1, then we stop and report change \n",
    "\n",
    "        #________________________________Start of Time Horizon_________________________________________________________________\n",
    "            for t in range(1,T+1):\n",
    "                #Condition to check whehter the change has occurred. \n",
    "                #If there is no change has been detected by the algorithm, then we manually stop at time horizon. \n",
    "            \n",
    "                if flag==1 or t==T:\n",
    "                    change=t\n",
    "                    l_stop.append(change)\n",
    "                    break\n",
    "            #At time step 1, play a random action to get initial observation \n",
    "            \n",
    "                if (t<=1):\n",
    "                    \n",
    "                    # Random action at time step 1\n",
    "                    A1_idx=random.randrange(0,K,1)\n",
    "                    A=Action_set[:,A1_idx]\n",
    "                    A=A.reshape(A.shape[0],)\n",
    "                    \n",
    "                    #Observation at time step 1\n",
    "                    X=np.random.normal(0,var,1)\n",
    "                    \n",
    "\n",
    "                        #calculation of Q^{(1)} statistics -- V is Q^{(1)} -- first sample update\n",
    "                    \n",
    "                    b1=X*A\n",
    "                    temp4=np.matmul((theta_1_set).T,A)               \n",
    "                    temp3=(2*np.dot((theta_1_set).T,b1))-temp4**(2)                  \n",
    "\n",
    "                    V=temp3\n",
    "                    #V=np.exp(temp3)\n",
    "                    theta_hat_idx=np.argmax(V)\n",
    "                    theta_hat=theta_1_set[:,theta_hat_idx]\n",
    "                    #M=np.mean(V)\n",
    "\n",
    "\n",
    "\n",
    "                else:\n",
    "                   \n",
    "\n",
    "                            #Exploration Phase\n",
    "\n",
    "                    if (np.random.random()<=epsilon):\n",
    "                        \n",
    "                        #play an action uniformly at random\n",
    "                        play_idx=random.randrange(0,K,1)\n",
    "                        \n",
    "                    #Exploitation Phase\n",
    "                    else:\n",
    "                        #Calculation of KL Divergence for different set of actions \n",
    "                        mu1=np.dot(Action_set.T,theta_not)\n",
    "                        mu2=np.dot(Action_set.T,theta_hat)\n",
    "                        a_list=kl.kld(mu2,mu1,var**2)\n",
    "                        \n",
    "                        \n",
    "                    #Play the action that is more informative, that is, action for which KL divergence is maximum\n",
    "                        play_idx=np.argmax(a_list)\n",
    "\n",
    "\n",
    "                       \n",
    "                    #arm.append(play_idx)\n",
    "                    \n",
    "                              #Play an action\n",
    "                        \n",
    "                    A=Action_set[:,play_idx]\n",
    "                    A=A.reshape(A.shape[0],)\n",
    "\n",
    "                                #Get an observation\n",
    "\n",
    "                    if t<tau:\n",
    "                        X=np.random.normal(0,var,1)\n",
    "                    else:\n",
    "                        X=np.random.normal((np.dot(A,theta_1)),var,1)\n",
    "\n",
    "                        #Calculation of Q^{(1)} statistics -- V is Q^{(1)} -- One sample update\n",
    "                        \n",
    "#Calculating g(X_t|A_t):\n",
    "\n",
    "                    b1=X*A\n",
    "                    temp4=np.dot((theta_1_set).T,A)\n",
    "                    temp3=(2*np.dot((theta_1_set).T,b1))-temp4**(2) \n",
    "\n",
    "\n",
    "\n",
    "#Recursive update of Q^{(1)}:\n",
    "\n",
    "                    V=np.maximum(0,V+temp3)\n",
    "\n",
    "# Stopping Criteria. If criteria is met, then set flag variable to 1. Otherwise, continue and update theta_hat. \n",
    "# theta_hat is the estimate of post change paramaeter at time step t.\n",
    "\n",
    "                    if np.max(V)>=beta:\n",
    "                        flag=1\n",
    "                    else:\n",
    "                         \n",
    "                        theta_hat_idx=np.argmax(V)\n",
    "\n",
    "                        theta_hat=theta_1_set[:,theta_hat_idx]\n",
    "\n",
    "\n",
    "                    \n",
    "\n",
    "                    \n",
    "\n",
    "#Store the stopping times for each length of the graph and change point. \n",
    "#We then save stopping time as .npy file and use the .npy file in a separate .ipynb for visualisation.\n",
    "\n",
    "    l_stop_alg_d_full.append(l_stop)\n",
    "\n",
    "np.save(\"l_stop_alg_d.npy\",l_stop_alg_d_full)\n",
    "end=time.time()\n",
    "print(\"Time Taken by EG(full)\",end-start)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
