import numpy as np;
import scipy
import random
import numpy.random as ra;
import numpy.linalg as la;
import matplotlib.pyplot as plt
import sklearn
from sklearn import preprocessing
#import Matrix_Norm as mn
from scipy.stats import bernoulli
#import glrt as glrt
import kld as kl
#import Action_Set_generation as action_set
import theta_1_set_gen as big_theta
import time

random.seed(20)
np.random.seed(21)

T=5000 #Length of the time horizon
epsilon=0.2 #epsilon parameter
itr=5000 #total number of iterations (number of Monte Carlo runs)

var=.5 #variance of the Gaussian noise
path_len=1 #length of  path connected nodes

d_list=[i for i in range(10,30,5)] #list of lenghts of line graph. Graph length = 10,15,20,25
tau_list=[i for i in range(10,50,10)] #list of change points. Change point = 10,20,30,40


#----------list to store candidate post change parameter (unknown to the algorithms, but common to all algorithms) 
#----------for various length of the graph------------------------------------------------------------------------

theta_1_list=[]

#loop to store candidate post change parameter for various length of the graph 

for d in d_list:
    theta_1_set=np.identity(d)
    theta_1_idx=random.randrange(0,theta_1_set.shape[1],1)
    theta_1=theta_1_set[:,theta_1_idx]
    theta_1_list.append(theta_1)

l_stop_alg_d=[]
l_stop_rand_d=[]
l_stop_oracle_d=[]
l_stop_alg_d_full=[]



#_________________________________________Bandit Loop for URS______________________________________________________________

beta=30 #the choice of beta for which the false alarm of URS change detector is less than 1%
start=time.time()
print("URS")

for (i,d) in enumerate(d_list):
    print("d=",d)
    theta_1=theta_1_list[i]
    theta_1=theta_1.reshape(theta_1.shape[0],)
    
    #post change parameter set
    theta_1_set=np.identity(d)
    
    #isolated action set
    Action_set=theta_1_set
    
    #Total number of actions
    K=Action_set.shape[1]
    
    #WLOG, for synthetic experiments, we set pre-change parameter to zero.
    theta_not=np.zeros((d,))
    
    #List to store stopping time of an algorithm for a fixed change point and fixed length of the graph.
    l_stop=[]
    
    for (idx,tau) in enumerate(tau_list):
        print("tau=",tau)
        
            #----------------------------- Start of Monte Carlo run----------------------------

        for j in range(itr):
            b1=0
            V=0
            flag=0
        #___________________________________Start of Timeline______________________________________________________________


            for t in range(1,T+1):
                
                #Condition to check whehter the change has occurred. 
                #If there is no change has been detected by the algorithm, then we manually stop at time horizon.
                
                if np.max(V) >=beta or t==T:
                    change=t
                    l_stop.append(change)
                    break

                #At time step 1, play a random action to get initial observation

                if (t<=1):
                    
                    #Random action at time step 1
                    A1_idx=random.randrange(0,K,1)
                    A=Action_set[:,A1_idx]
                    
                    #observation at time step 1
                    X=np.random.normal(0,var,1)
                    
                    

                        #calculation of Q^{(1)} statistics -- V is Q^{(1)} -- first sample update
                        
                    b1=X*A
                    temp4=theta_1_set[:,A1_idx]
                    temp3=(2*b1)-temp4**(2)

                    V=temp3
                    



                else:
                    
                    #Pick action uniformly at random
                    play_idx=random.randrange(0,K,1)

                                #Play an action
                    A=Action_set[:,play_idx]

                                #Get an observation

                    if t<tau:
                        X=np.random.normal(0,var,1)
                    else:
                        X=np.random.normal(theta_1[play_idx],var,1)

                        #Calculation of Q^{(1)} statistics -- V is Q^{(1)} 
                        
#Calculating g(X_t|A_t):
                    b1=X*A
                    temp4=theta_1_set[:,play_idx]
                    temp3=(2*b1)-temp4**(2)

#Recursice update of Q^{(1)}:

                    V=np.maximum(0,V+temp3) 
    



    l_stop_rand_d.append(l_stop)

np.save("l_stop_rand_d.npy",l_stop_rand_d)
end=time.time()
print(f"Runtime of URS is {end - start}")


#_______________________________________Bandit Loop for  Oracle_______________________________________________________________

beta=50 #the choice of beta for which the false alarm of Oracle change detector is less than 1%
start=time.time()

print("Oracle")


for (i,d) in enumerate(d_list):
    print("d=",d)
    theta_1=theta_1_list[i]
    theta_1=theta_1.reshape(theta_1.shape[0],)
    
    #post change parameter set
    theta_1_set=np.identity(d)
    
    #isolated action set
    Action_set=theta_1_set
    
    #Total number of actions
    K=Action_set.shape[1]
    
    #WLOG, for synthetic experiments, we set pre-change parameter to zero.
    theta_not=np.zeros((d,))
    
    #List to store stopping time of an algorithm for a fixed change point and fixed length of the graph.
    l_stop=[]
    
    for (idx,tau) in enumerate(tau_list):
        print("tau=",tau)

    #----------------------------- Start of Monte Carlo run----------------------------

        for j in range(itr):
            b1=0
            V=0
            flag=0
        #________________________________________Start of Timeline_________________________________________________________


            for t in range(1,T+1):
                
                 #Condition to check whehter the change has occurred. 
                #If there is no change has been detected by the algorithm, then we manually stop at time horizon.
                
                if flag==1 or t==T:
                    change=t
                    l_stop.append(change)
                    break

                if (t<=1):
                    # Random action at time step 1
                    A1_idx=random.randrange(0,K,1)
                    A=Action_set[:,A1_idx]
                    
                    #Observation at time step 1
                    X=np.random.normal(0,var,1)
                    
                    

                        #calculation of Q^{(1)} statistics -- V is Q^{(1)} -- first sample update
                        
                    b1=X*A
                    temp4=theta_1_set[:,A1_idx]
                    temp3=(2*b1)-temp4**(2)

                    V=temp3




                else:
                    
                    #Calculation of KL Divergence for different set of actions 
                    mu1=np.dot(Action_set.T,theta_not)
                    mu2=np.dot(Action_set.T,theta_1)#Note: We use theta_1 here becuase oracle knows the knowledge 
                                                     #      of true post change parameter.
                    
                    
                    a_list=kl.kld(mu2,mu1,var**2)
                    
                    #Play the action that is more informative, that is, action for which KL divergence is maximum
                    play_idx=np.argmax(a_list)

                    A=Action_set[:,play_idx]

                                #Get an Observation

                    if t<tau:
                        X=np.random.normal(0,var,1)
                    else:
                        X=np.random.normal(theta_1[play_idx],var,1)
                        
                
                #Calculation of Q^{(1)} statistics -- V is Q^{(1)} -- One sample update

#Calculating g(X_t|A_t):
                    b1=X*A
                    temp4=theta_1_set[:,play_idx]
                    temp3=(2*b1)-temp4**(2)
            
#Recursive update of Q^{(1)}:

                    V=np.maximum(0,(V+temp3))


# Stopping Criteria. If criteria is met, then set flag variable to 1. Otherwise, continue and update theta_hat. 
# theta_hat is the estimate of post change paramaeter at time step t.
                    if np.max(V)>=beta:
                        flag=1








    l_stop_oracle_d.append(l_stop)

np.save("l_stop_oracle_d.npy",l_stop_oracle_d)
end=time.time()
print(f"Runtime of the Oracle is {end - start}")


# #______________________________________Bandit Loop for EG_full_________________________________________________________________

beta=40 #the choice of beta for which the false alarm of Epsilon-Greedy change detector is less than 1%
start=time.time()
print("EG_full")

    
for (i,d) in enumerate(d_list):
    print("d=",d)
    theta_1=theta_1_list[i]
    theta_1=theta_1.reshape(theta_1.shape[0],)
    
    #post change parameter set, generated by big_theta module.
    theta_1_set=np.identity(d)
    
    #localised action set
    Action_set=theta_1_set
    
    #Total number of actions
    K=Action_set.shape[1]
    
    #WLOG, for synthetic experiments, we set pre-change parameter to zero.
    theta_not=np.zeros((d,))
    
    #List to store stopping time of an algorithm for a fixed change point and fixed length of the graph. 
    l_stop=[]
    
    for (idx,tau) in enumerate(tau_list):
        print("tau=",tau)
        
    #----------------------------- Start of Monte Carlo run----------------------------

        for j in range(itr):
            b1=0
            theta_hat=0
            V=0
            flag=0 #flag variable. If flag=1, then we stop and report change 

        #________________________________Start of Timeline_________________________________________________________________
            
            for t in range(1,T+1):
                #Condition to check whehter the change has occurred. 
                #If there is no change has been detected by the algorithm, then we manually stop at time horizon.
                
                if flag==1 or t==T:
                    change=t
                    l_stop.append(change)
                    #past_arm.append(arm[tau:change])
                    break
                    
            #At time step 1, play a random action to get initial observation 

                if (t<=1):
                    # Random action at time step 1
                    A1_idx=random.randrange(0,K,1)
                    A=Action_set[:,A1_idx]
                    A=A.reshape(A.shape[0],)
                    
                    #Observation at time step 1
                    X=np.random.normal(0,var,1)
                    

                        #calculation of Q^{(1)} statistics -- V is Q^{(1)} -- first sample update
                        
                    b1=X*A
                    temp4=theta_1_set[:,A1_idx]

                    #temp6=                    
                    temp3=(2*b1)-temp4**(2)

                    V=temp3
                    theta_hat_idx=np.argmax(V)
                    theta_hat=theta_1_set[:,theta_hat_idx]



                else:


                            #Exploration Phase

                    if (np.random.random()<=epsilon):
                        
                        #Play an action uniformly at random
                        play_idx=random.randrange(0,K,1)
                        
                        #Exploitation Phase
                    else:
                        
                        #Calculation of KL Divergence for different set of actions

                        mu1=theta_not
                        mu2=theta_hat
                        a_list=kl.kld(mu2,mu1,var**2)
                        
                    #Play the action that is more informative, that is, action for which KL divergence is maximum
                        play_idx=np.argmax(a_list)



                              #Play an action
                    A=Action_set[:,play_idx]
                    A=A.reshape(A.shape[0],)

                                #Get an observation

                    if t<tau:
                        X=np.random.normal(0,var,1)
                    else:
                        X=np.random.normal(theta_1[play_idx],var,1)

                        #Calculation of Q^{(1)} statistics -- V is Q^{(1)} -- One sample update
                        
#Calculating g(X_t|A_t):

                    b1=X*A
                    temp4=theta_1_set[:,play_idx]
                    temp3=(2*b1)-temp4**(2)



#Recursive update of Q^{(1)}:



                    V=np.maximum(0,V+temp3)
    
# Stopping Criteria. If criteria is met, then set flag variable to 1. Otherwise, continue and update theta_hat. 
# theta_hat is the estimate of post change paramaeter at time step t.

                    if np.max(V)>=beta:
                        flag=1
                    else:

                        theta_hat_idx=np.argmax(V)

                        theta_hat=theta_1_set[:,theta_hat_idx]


                    
    l_stop_alg_d_full.append(l_stop)

np.save("l_stop_alg_d.npy",l_stop_alg_d_full)
end=time.time()
print(f"Runtime of EG(full) is {end - start}")

