#!/usr/bin/env python
# coding: utf-8

# In[32]:


import matplotlib.pyplot as plt
import numpy as np
from scipy.integrate import odeint
#from tqdm.notebook import tqdm
from tqdm import tqdm
from scipy.linalg import expm
import scipy as sc


# In[117]:


def get_cost_matrix(A, Q, gamma, d):
    A_tilde = A  + 1/2*np.log(gamma) * np.eye(d)
    P = sc.linalg.solve_continuous_lyapunov(A_tilde.T, - Q)
    return P


# In[ ]:


class MonteCarlo():
    
    def __init__(self, A, Q, T, X, imax, sigma, W, B, d, gamma=1):
        self.A = A
        self.Q = Q
        self.T = T
        self.X_0 = X
        self.sigma = sigma
        self.W = W
        self.B = B
        self.d = d
        self.gamma = gamma
        self.N_0 = 2**16
        self.h_0 = self.T/self.N_0
        N = int(T/h)
        self.M_0 = max(1,int(B/N))
        self.AA = np.zeros(((self.M_0,self.d,self.d)))
        print('generating random data',B)
        self.w_0 = np.random.normal(scale=sigma * np.sqrt(self.h_0),size=(self.N_0-1,self.M_0,self.d))
        self.x = np.zeros((self.N_0,self.M_0,self.d))
        print('getting matrix exponential')
        temp = expm(self.A*self.h_0)
        print('copying')
        self.AA[0:self.M_0] = temp
        
    def generate_CT_data(self):
        for k in range(self.N_0-1):
            self.x[k+1,:,:] = np.einsum('ijk,ik->ij', self.AA, self.x[k,:,:]) + self.w_0[k,:,:]
        
    def run(self):
        print('running experiment')
        self.generate_CT_data()
        
    
    def compute_Vhat(self,h):
        
        
        h_ratio = max(1,int(h/self.h_0))
        N = int(self.T/h)
        M = max(1,int(self.B/N))
        print('computing v_hat',h,M)
        xh = self.x[::h_ratio,:M,:]
        
        self.J = np.zeros(M)
        for m in range(M):
            for k in range(N-1):
                self.J[m] += self.gamma**(h*k) * h * np.inner(xh[k,m,:],xh[k,m,:])
        
        return np.mean(self.J)
        


# In[161]:


#The problem instance
d = 3 #dimension of problem
C = [-0.25,-0.5,-1.0,-2.0,-4.0] # the negative constant that scales A
Q = np.identity(d) #our cost
X = np.zeros(d) #our initial state.
sigma = 1.0 #scale parameter of the Wiener process
W = np.identity(d) #covariance matrix of our Wiener process
B = [2**14]
gamma = 0.95
T = int(1/(1-gamma))


# In[ ]:


imax = 9
trials = 40
results = np.zeros((len(C),imax,len(B),trials))
V = []
for i in tqdm(range(len(C))):
    A =  C[i] * np.identity(d) #our system
    P = get_cost_matrix(A, Q, gamma, d)
    V = - 1 / np.log(gamma) * sigma**2  * np.trace(P)
    for k in range(len(B)):
        b = B[k]
        for v in tqdm(range(trials)):
            for j in (range(imax)):
                h = 2**(-(j))
                if j == 0:
                    agent = MonteCarlo(A, Q, T, X, imax, sigma, W, b, d,gamma=1)
                    agent.run()
                V_hat = agent.compute_Vhat(h)
                results[i,j,k,v] = (V_hat-V)**2
            del agent
np.save('lqr_results_B_2_14_inf.npy',results,allow_pickle=True)           

