import numpy as np
import util
import plotly.graph_objs as go
from scipy.stats import multivariate_t, multivariate_normal
import tensorflow as tf
import tensorflow_probability as tfp
from scipy.optimize import minimize
import scipy
import math
import cProfile
import numpy.matlib
from scipy.special import logsumexp
import copy
import numba
import time

import logging

import pickle
import datetime
import random

# Set the logging level for TensorFlow to a higher level (e.g., ERROR)
logging.getLogger('tensorflow').setLevel(logging.ERROR)

# def plot_mixture_t_contours(means, scales, df, weights):
#     """
#     Plots the contour plot of a mixture of bivariate t-distributions using Plotly.
    
#     Parameters:
#     - means (ndarray): array of shape (K, 2) containing the means of each t-distribution in the mixture.
#     - scales (ndarray): array of shape (K, 2, 2) containing the covariance matrices of each t-distribution in the mixture.
#     - df (ndarray): array of shape (K,) containing the degrees of freedom of each t-distribution in the mixture.
#     - weights (ndarray): array of shape (K,) containing the weights of each t-distribution in the mixture.
#     """
#     x = np.linspace(-2, 2, 101)
#     y = np.linspace(-2, 2, 101)
#     X, Y = np.meshgrid(x, y)
#     pos = np.empty(X.shape + (2,))
#     pos[:, :, 0] = X
#     pos[:, :, 1] = Y
#     Z = np.zeros(X.shape)
#     for i in range(len(weights)):
#         rv = multivariate_t(loc=means[i], shape=scales[i], df=df[i])
#         Z += weights[i] * rv.pdf(pos)
#     # Z = np.log(Z)
#     fig = go.Figure(data=go.Contour(x=X.flatten(), y=Y.flatten(), z=Z.T.flatten()))
#     fig.show()

# def plot_gmm_contours(ws, mus, sigmas):
#     """
#     Plots the contour plot of a Gaussian Mixture Model using Plotly.
    
#     Parameters:
#     - ws (ndarray): array of shape (K,) containing the weights of each component in the mixture.
#     - mus (list): list of length K containing the means of each component in the mixture.
#     - sigmas (float): scalar value that will be multiplied by an identity matrix to obtain the covariance matrix
#                       of each component in the mixture.
#     """
#     K = len(ws)
#     x = np.linspace(-2, 2, 101)
#     y = np.linspace(-2, 2, 101)
#     X, Y = np.meshgrid(x, y)
#     pos = np.empty(X.shape + (2,))
#     pos[:, :, 0] = X
#     pos[:, :, 1] = Y
#     Z = np.zeros(X.shape)
#     for k in range(K):
#         rv = multivariate_normal(mean=mus[k].ravel(), cov=sigmas[k]*np.eye(2))
#         Z += ws[k] * rv.pdf(pos)
#     fig = go.Figure(data=go.Contour(x=X.flatten(), y=Y.flatten(), z=Z.T.flatten()))
#     fig.show()
    
def plot_mixture_t_contours(means, scales, df, weights):
    """
    Plots the contour plot of a mixture of bivariate t-distributions using Plotly.
    
    Parameters:
    - means (ndarray): array of shape (K, 2) containing the means of each t-distribution in the mixture.
    - scales (ndarray): array of shape (K, 2, 2) containing the covariance matrices of each t-distribution in the mixture.
    - df (ndarray): array of shape (K,) containing the degrees of freedom of each t-distribution in the mixture.
    - weights (ndarray): array of shape (K,) containing the weights of each t-distribution in the mixture.
    """
    x = np.linspace(-2, 2, 101)
    y = np.linspace(-2, 2, 101)
    X, Y = np.meshgrid(x, y)
    pos = np.empty(X.shape + (2,))
    pos[:, :, 0] = X
    pos[:, :, 1] = Y
    Z = np.zeros(X.shape)
    for i in range(len(weights)):
        rv = multivariate_t(loc=means[i], shape=scales[i], df=df[i])
        Z += weights[i] * rv.pdf(pos)
    # Z = np.log(Z)
    fig = go.Figure(data=go.Contour(x=X.flatten(), y=Y.flatten(), z=Z.T.flatten()))
    fig.show()
    
    # Extract max and min values of z axis for contour plot
    z_min, z_max = np.min(Z), np.max(Z)
    return z_min, z_max

def plot_gmm_contours(ws, mus, sigmas, z_min=None, z_max=None):
    """
    Plots the contour plot of a Gaussian Mixture Model using Plotly.
    
    Parameters:
    - ws (ndarray): array of shape (K,) containing the weights of each component in the mixture.
    - mus (list): list of length K containing the means of each component in the mixture.
    - sigmas (float): scalar value that will be multiplied by an identity matrix to obtain the covariance matrix
                      of each component in the mixture.
    - z_min (float): minimum value of the z-axis scale.
    - z_max (float): maximum value of the z-axis scale.
    """
    K = len(ws)
    x = np.linspace(-2, 2, 101)
    y = np.linspace(-2, 2, 101)
    X, Y = np.meshgrid(x, y)
    pos = np.empty(X.shape + (2,))
    pos[:, :, 0] = X
    pos[:, :, 1] = Y
    Z = np.zeros(X.shape)
    for k in range(K):
        rv = multivariate_normal(mean=mus[k].ravel(), cov=sigmas[k]*np.eye(2))
        Z += ws[k] * rv.pdf(pos)
    if z_min is None:
        z_min = np.min(Z)
    if z_max is None:
        z_max = np.max(Z)
    fig = go.Figure(data=go.Contour(x=X.flatten(), y=Y.flatten(), z=Z.T.flatten(),contours=dict(
            start=z_min,
            end=z_max,
            size=.02,
        )))
    fig.show()

def sample_mixture_t(n_samples, weights, means, scales, df):
    """
    Generates samples from a mixture of t-distributions with arbitrary dimensions.
    
    Parameters:
    - n_samples (int): number of samples to generate.
    - means (ndarray): array of shape (K, D) containing the means of each t-distribution in the mixture,
                      where D is the dimension of the t-distribution.
    - scales (ndarray): array of shape (K, D, D) containing the covariance matrices of each t-distribution in the mixture.
    - df (ndarray): array of shape (K,) containing the degrees of freedom of each t-distribution in the mixture.
    - weights (ndarray): array of shape (K,) containing the weights of each t-distribution in the mixture.
    
    Returns:
    - samples (ndarray): array of shape (n_samples, D) containing the generated samples.
    """
    # Get the dimension of the t-distribution
    D = means.shape[1]
    # Choose a component for each sample
    component_indices = np.random.choice(len(weights), size=n_samples, p=weights)
    # Generate samples for each component
    samples = np.zeros((n_samples, D))
    for i in range(len(weights)):
        indices = np.where(component_indices == i)[0]
        if len(indices) > 0:
            rv = multivariate_t(loc=means[i], shape=scales[i], df=df[i])
            samples[indices] = rv.rvs(size=len(indices))
    return samples

def sample_GMM(n_samples, ws, mus, sigmas):
    """
    Generates samples from a mixture of Gaussian distributions with arbitrary dimensions.

    Parameters:
    - n_samples (int): number of samples to generate.
    - ws (ndarray): array of shape (K,) containing the weights of each Gaussian distribution in the mixture.
    - mus (list): list of length K, where each element is an array of shape (D,) containing the mean of each
                  Gaussian distribution in the mixture, where D is the dimension of the Gaussian distribution.
    - sigmas (ndarray): array of shape (K,) containing the standard deviation of each Gaussian distribution in the mixture.

    Returns:
    - samples (ndarray): array of shape (n_samples, D) containing the generated samples.
    """
    # Get the dimension of the Gaussian distribution
    D = mus[0].shape[0]

    # Choose a component for each sample
    component_indices = np.random.choice(len(ws), size=n_samples, p=ws)

    # Generate samples for each component
    samples = np.zeros((n_samples, D))
    for i in range(len(ws)):
        indices = np.where(component_indices == i)[0]
        if len(indices) > 0:
            cov = sigmas[i] * np.eye(D)
            rv = multivariate_normal(mean=mus[i].flatten(), cov=cov)
            samples[indices] = rv.rvs(size=len(indices)).reshape(np.shape(samples[indices]))
    return samples

def JensensEntropy(mus,sigmas):
    K,d,_ = np.shape(mus)
    q = 0 
    for m in range(K):
        qm = np.zeros((K,1))
        for k in range(K):
            qm[k] = (2*np.pi*sigmas[k])**(-(d/2))*np.exp(-(1/2)*np.matmul((mus[m]-mus[k]).T,(mus[m]-mus[k]))/(sigmas[m]+sigmas[k]))
        q += (1/K)*np.log((1/K)*sum(qm))
    return q

tfd = tfp.distributions

def log_eval_mixture_t(x, means, scales, df, weights):
    """
    Evaluates a mixture of bivariate t-distributions at a given point x without using tensorflow.
    
    Parameters:
    - x (ndarray): array of shape (2,) containing the point at which to evaluate the mixture.
    - means (ndarray): array of shape (K, 2) containing the means of each t-distribution in the mixture.
    - scales (ndarray): array of shape (K, 2, 2) containing the covariance matrices of each t-distribution in the mixture.
    - df (ndarray): array of shape (K,) containing the degrees of freedom of each t-distribution in the mixture.
    - weights (ndarray): array of shape (K,) containing the weights of each t-distribution in the mixture.
    
    Returns:
    - value (float): the value of the mixture at the given point x.
    """
    K = means.shape[0]
    value = 0
    for i in range(K):
        loc = means[i]
        cov = scales[i]
        rv = multivariate_t(loc, cov, df[i])
        value += weights[i] * rv.pdf(x.T)
    return np.log(value)

def trace_log_eval_mixture_t_second_deriv(x, means, scales, df, weights):
    x_tf = tf.convert_to_tensor(x, dtype=tf.float64)
    means_tf = tf.convert_to_tensor(means, dtype=tf.float64)
    scales_tf = tf.convert_to_tensor(scales, dtype=tf.float64)
    df_tf = tf.convert_to_tensor(df, dtype=tf.float64)
    weights_tf = tf.convert_to_tensor(weights, dtype=tf.float64)

    # Compute the derivatives of the log of the mixture with respect to x using tensorflow
    with tf.GradientTape(persistent=True) as tape2:
        tape2.watch(x_tf)
        with tf.GradientTape(persistent=True) as tape1:
            tape1.watch(x_tf)
            log_probs = tf.stack([component_log_prob(i,x_tf,means_tf, scales_tf, df_tf, weights_tf) for i in range(len(weights))], axis=0)
            log_value_tf = tf.reduce_logsumexp(log_probs)
        dlog_value_tf = tape1.gradient(log_value_tf, x_tf)
        df_dx = []
        for i in range(len(dlog_value_tf)):
            df_dx.append(dlog_value_tf[i])
    # ddlog_value_hold = []
    ddlog_value = 0
    for j in range(len(dlog_value_tf)):
        holding = tape2.gradient(df_dx[j],x_tf)[j]
        # ddlog_value_hold.append(holding)
        ddlog_value += holding
    
    return ddlog_value.numpy()

# Define a function that computes the log probability of a single component of the mixture
def component_log_prob(i,x_tf,means_tf, scales_tf, df_tf, weights_tf):
    loc = means_tf[i]
    cov = tf.linalg.LinearOperatorFullMatrix(scales_tf[i])
    rv = tfd.MultivariateStudentTLinearOperator(loc=loc, scale=cov, df=df_tf[i])
    return rv.log_prob(tf.reshape(x_tf, shape=(-1,))) + tf.math.log(weights_tf[i])

def hesstrace(theta,means, scales, df, weights):
    """
    Calculate Hessian trace using finite differencing.
    
    Arguments:
    func -- the function to evaluate
    theta -- the point at which to evaluate the Hessian trace
    
    Returns:
    h -- the Hessian trace
    """
    
    ep = 2*np.sqrt(1e-12)*(1+np.linalg.norm(theta))/len(theta)
    h = 0
    
    for d in range(len(theta)):
        f = log_eval_mixture_t(theta,means, scales, df, weights)
        theta[d] = theta[d] + ep
        a = log_eval_mixture_t(theta,means, scales, df, weights)
        theta[d] = theta[d] - 2*ep
        b = log_eval_mixture_t(theta,means, scales, df, weights)
        h = h + (a + b - 2*f)/(ep**2)
    
    return h


def TaylorCrossEntropy1(mus,sigmas,means, scales, df, weights):
    K,d,_ = np.shape(mus)
    q = 0 
    for m in range(K):
        g = log_eval_mixture_t(mus[m], means, scales, df, weights)
        q+= 1/K*(g)
    return q

def TaylorCrossEntropy2(mus,sigmas,means, scales, df, weights,ddg):
    K,d,_ = np.shape(mus)
    q = 0 
    for m in range(K):
        g = log_eval_mixture_t(mus[m], means, scales, df, weights)
        q+= 1/K*(g+(sigmas[m]/2)*ddg[m])
    return q

def ELBO1(ws,mus,sigmas,means, scales, df, weights,Polymethod,N):
    if Polymethod == 'Taylor':
        M = len(ws)
        MaxBound = 0
        for m in range(M):
            MaxBound += ws[m]*(2*np.pi*sigmas[m])**(-np.shape(mus[0])[0]/2)
        Epp = util.isotropic_gmm_power_expected_value_test(N, ws, mus, sigmas, ws, mus, sigmas)
        Hqq = util.log_Taylor_series(N, MaxBound, Epp)
    elif Polymethod == 'Legendre':
        M = len(ws)
        MaxBound = 0
        for m in range(M):
            MaxBound += ws[m]*(2*np.pi*sigmas[m])**(-np.shape(mus[0])[0]/2)
        Epp = util.isotropic_gmm_power_expected_value_test(N, ws, mus, sigmas, ws, mus, sigmas)
        Hqq = util.log_Legendre_series(N, MaxBound, Epp)
    elif Polymethod == 'Chebyshev':
        M = len(ws)
        MaxBound = 0
        for m in range(M):
            MaxBound += ws[m]*(2*np.pi*sigmas[m])**(-np.shape(mus[0])[0]/2)
        Epp = util.isotropic_gmm_power_expected_value_test(N, ws, mus, sigmas, ws, mus, sigmas)
        Hqq = util.log_Chebyshev_series(N, MaxBound, Epp)
    else:
        Hqq = JensensEntropy(mus,sigmas)
        
    Hqp = TaylorCrossEntropy1(mus,sigmas,means, scales, df, weights)
    L1 = Hqp-Hqq
    return L1

def ELBO2(ws,mus,sigmas,means, scales, df, weights,ddg,Polymethod,N):
    if Polymethod == 'Taylor':
        M = len(ws)
        MaxBound = 0
        for m in range(M):
            MaxBound += ws[m]*(2*np.pi*sigmas[m])**(-np.shape(mus[0])[0]/2)
        Epp = util.isotropic_gmm_power_expected_value_test(N, ws, mus, sigmas, ws, mus, sigmas)
        Hqq = util.log_Taylor_series(N, MaxBound, Epp)
    elif Polymethod == 'Legendre':
        M = len(ws)
        MaxBound = 0
        for m in range(M):
            MaxBound += ws[m]*(2*np.pi*sigmas[m])**(-np.shape(mus[0])[0]/2)
        Epp = util.isotropic_gmm_power_expected_value_test(N, ws, mus, sigmas, ws, mus, sigmas)
        Hqq = util.log_Legendre_series(N, MaxBound, Epp)
    elif Polymethod == 'Chebyshev':
        M = len(ws)
        MaxBound = 0
        for m in range(M):
            MaxBound += ws[m]*(2*np.pi*sigmas[m])**(-np.shape(mus[0])[0]/2)
        Epp = util.isotropic_gmm_power_expected_value_test(N, ws, mus, sigmas, ws, mus, sigmas)
        Hqq = util.log_Chebyshev_series(N, MaxBound, Epp)
    else:
        Hqq = JensensEntropy(mus,sigmas)
    Hqp = TaylorCrossEntropy2(mus,sigmas,means, scales, df, weights,ddg)
    L2 = Hqp-Hqq
    return L2

def ELBO1_objective_fn(mus_i,ws,i,mus, sigmas,means, scales, df, weights,Polymethod,N):
    mus_copy = mus.copy()
    mus_copy[i] = mus_i.reshape(-1,1)
    return -1*ELBO1(ws,mus_copy,sigmas,means, scales, df, weights,Polymethod,N)

def ELBO2_objective_fn(sigmas,ws,mus,means, scales, df, weights,ddg,Polymethod,N):
    return -1*ELBO2(ws,mus, sigmas, means, scales, df, weights,ddg,Polymethod,N)

def NVPalg(ws,mus,sigmas, weights,means, scales, df, Polymethod,N):
    L2prev = 0
    delta = 1
    j=0
    while delta>.0001 and j<50:
        for i in range(len(mus)):
            res = minimize(ELBO1_objective_fn, mus[i].ravel(),args=(ws,i, mus, sigmas,means, scales, df, weights,Polymethod,N), method='L-BFGS-B')
            mus[i] = res.x.reshape(-1,1)
        ddg = []
        for m in range(len(mus)):
            ddg.append(trace_log_eval_mixture_t_second_deriv(mus[m], means, scales, df, weights))
        result = minimize(ELBO2_objective_fn, sigmas,args=(ws,mus,means, scales, df, weights,ddg,Polymethod,N), method='L-BFGS-B', bounds=[(0.00000000001, 10000)]*len(sigmas))#None
        sigmas = result.x
        if j == 0:
            delta = 1
            j+=1
            print("Itteration: %d    ELBO2: %f"%(j,result.fun))
        else:
            delta = np.abs(result.fun-L2prev)
            j+=1
            print("Itteration: %d    ELBO2: %f    Delta: %f"%(j,result.fun,delta))
        L2prev = result.fun
        
    if j>50:
        print("Didn't converge")
    return mus, sigmas

def NVPalgUpdated(ws,mus,sigmas, weights,means, scales, df, Polymethod,N1,N2,M2):
    mushold = []
    for i in range(len(mus)):
        mushold += mus[i].flatten().tolist()
    inParameters  = mushold+sigmas#(0, np.inf)
    Bounds  = [(-np.inf, np.inf)]*len(mushold)+[(0.00000000001, 10000)]*len(sigmas)
    result = minimize(ELBOPoly_objective_fn,inParameters,args=(ws,means, scales, df, weights,Polymethod,N1,N2,M2), method='L-BFGS-B', bounds=Bounds)
    outParameters = result.x
    d = int(len(outParameters[:-len(ws)])/len(ws))
    first_list = outParameters[:-len(ws)]
    mus = [np.array(first_list[i:i+d]).reshape(d,1) for i in range(0, len(first_list), d)]
    # mus = outParameters[:len(mus)]
    sigmas = outParameters[-len(mus):]
    return mus, sigmas

def ELBOPoly_objective_fn(params,ws,means, scales, df, weights,Polymethod,N1,N2,M2):
    d = int(len(params[:-len(ws)])/len(ws))
    first_list = params[:-len(ws)]
    mus = [np.array(first_list[i:i+d]).reshape(d,1) for i in range(0, len(first_list), d)]
    sigmas = params[-len(ws):]
    return -1*ELBOPoly(ws,mus,sigmas,means, scales, df, weights,Polymethod,N1,N2,M2)

def ELBOPoly_mean_objective_fn(mus_i,i,ws,mus,sigmas,means, scales, df, weights,Polymethod,N1,N2,M2):
    mus_copy = mus.copy()
    mus_copy[i] = mus_i.reshape(-1,1)
    return -1*ELBOPoly(ws,mus_copy,sigmas,means, scales, df, weights,Polymethod,N1,N2,M2)

def ELBOPoly_varaince_objective_fn(sigmas,ws,mus,means, scales, df, weights,Polymethod,N1,N2,M2):
    return -1*ELBOPoly(ws,mus,sigmas,means, scales, df, weights,Polymethod,N1,N2,M2)


def ELBOPoly(ws,mus,sigmas,means, scales, df, weights,Polymethod,N1,N2,M2):
    M = len(ws)
    MaxBound = 0
    for m in range(M):
        MaxBound += ws[m]*(2*np.pi*sigmas[m])**(-np.shape(mus[0])[0]/2)   
    if Polymethod == 'Taylor':
        Epp = util.isotropic_gmm_power_expected_value_test(N1, ws, mus, sigmas, ws, mus, sigmas)
        Hqq = util.log_Taylor_series(N1, MaxBound, Epp)
    elif Polymethod == 'Legendre':
        Epp = util.isotropic_gmm_power_expected_value_test(N1, ws, mus, sigmas, ws, mus, sigmas)
        Hqq = util.log_Legendre_series(N1, MaxBound, Epp)
    elif Polymethod == 'Limit':
        Epp = util.isotropic_gmm_power_expected_value_test(N1, ws, mus, sigmas, ws, mus, sigmas)
        Hqq = util.log_Taylor_limit(N1, MaxBound, Epp)
    else:
        Hqq = JensensEntropy(mus,sigmas)
    
    sigmas_full = []
    for i in range(len(sigmas)):
        sigmas_full.append(sigmas[i]*np.eye(np.shape(mus[0])[0]))
        
    # MaxBound1 = 0
    # for i in range(len(weights)):
    #     rv = multivariate_t(means[i], scales[i], df[i])
    #     MaxBound1 += weights[i] * rv.pdf(means[i])
    # int_func = lambda x: util.evaluate_mixture_t(x, means, scales, df, weights)
    # QuadEpp = util.multivariate_gauss_hermite_quad_gmm(N2,M2, ws, mus, sigmas_full, int_func)
    # # Hqp = util.log_Taylor_series(N2, MaxBound1, QuadEpp)
    # Hqp = util.log_Legendre_series(N2, MaxBound1, QuadEpp)# MaxBound

    func = lambda x: util.evaluate_log_mixture_t(x, means, scales, df, weights)
    Hqp = util.multivariate_gauss_hermite_quad_gmm(1,M2, ws, mus, sigmas_full, func)[1]

    L2 = Hqp-Hqq
    return L2

def generatefits(tDistParams,GMMParams,N):
    weights = tDistParams[0]
    means = tDistParams[1]
    scales = tDistParams[2]
    df = tDistParams[3]
    
    print("Jensen")
    Jstart = time.time()
    ws = copy.deepcopy(GMMParams[0])
    mus = copy.deepcopy(GMMParams[1])
    sigmas = copy.deepcopy(GMMParams[2])
    Jmus, Jsigmas = NVPalg(ws,mus,sigmas, weights,means, scales, df, 'Jensen',N)
    JGMMParams = [ws, Jmus, Jsigmas]
    Jend = time.time()
    Jtime = Jend-Jstart
    print(Jtime)
    
    Tstart = time.time()
    print("Taylor")
    ws = copy.deepcopy(GMMParams[0])
    mus = copy.deepcopy(GMMParams[1])
    sigmas = copy.deepcopy(GMMParams[2])
    Tmus, Tsigmas = NVPalg(ws,mus,sigmas, weights,means, scales, df, 'Taylor',N)
    TGMMParams = [ws, Tmus, Tsigmas]
    Tend = time.time()
    Ttime = Tend-Tstart
    print(Ttime)
    
    Lstart = time.time()
    print("Legendre")
    ws = copy.deepcopy(GMMParams[0])
    mus = copy.deepcopy(GMMParams[1])
    sigmas = copy.deepcopy(GMMParams[2])
    Lmus, Lsigmas = NVPalg(ws,mus,sigmas, weights,means, scales, df, 'Legendre',N)
    LGMMParams = [ws, Lmus, Lsigmas]
    Lend = time.time()
    Ltime = Lend-Lstart
    print(Ltime)
    
    Cstart = time.time()
    print("Chebyshev")
    ws = copy.deepcopy(GMMParams[0])
    mus = copy.deepcopy(GMMParams[1])
    sigmas = copy.deepcopy(GMMParams[2])
    Cmus, Csigmas = NVPalg(ws,mus,sigmas, weights,means, scales, df, 'Chebyshev',N)
    CGMMParams = [ws, Cmus, Csigmas]
    Cend = time.time()
    Ctime = Cend-Cstart
    print(Ctime)
    
    return JGMMParams, Jtime, TGMMParams, Ttime, LGMMParams, Ltime, CGMMParams, Ctime

def generatefitsupdated(tDistParams,GMMParams,N1,N2,M2):
    weights = tDistParams[0]
    means = tDistParams[1]
    scales = tDistParams[2]
    df = tDistParams[3]
    
    print("NPV")
    Jstart = time.time()
    ws = copy.deepcopy(GMMParams[0])
    mus = copy.deepcopy(GMMParams[1])
    sigmas = copy.deepcopy(GMMParams[2])
    Jmus, Jsigmas = NVPalg(ws,mus,sigmas, weights,means, scales, df, 'Jensen',N1)
    JGMMParams = [ws, Jmus, Jsigmas]
    Jend = time.time()
    Jtime = Jend-Jstart
    print(Jtime)
    
    Tstart = time.time()
    print("Taylor")
    ws = copy.deepcopy(GMMParams[0])
    mus = copy.deepcopy(GMMParams[1])
    sigmas = copy.deepcopy(GMMParams[2])
    Tmus, Tsigmas = NVPalgUpdated(ws,mus,sigmas, weights,means, scales, df, 'Taylor',N1,N2,M2)
    TGMMParams = [ws, Tmus, Tsigmas]
    Tend = time.time()
    Ttime = Tend-Tstart
    print(Ttime)
    
    Cstart = time.time()
    print("Taylor Limit")
    ws = copy.deepcopy(GMMParams[0])
    mus = copy.deepcopy(GMMParams[1])
    sigmas = copy.deepcopy(GMMParams[2])
    Cmus, Csigmas = NVPalgUpdated(ws,mus,sigmas, weights,means, scales, df, 'Limit',N1,N2,M2)
    CGMMParams = [ws, Cmus, Csigmas]
    Cend = time.time()
    Ctime = Cend-Cstart
    print(Ctime)
    
    Lstart = time.time()
    print("Legendre")
    ws = copy.deepcopy(GMMParams[0])
    mus = copy.deepcopy(GMMParams[1])
    sigmas = copy.deepcopy(GMMParams[2])
    Lmus, Lsigmas = NVPalgUpdated(ws,mus,sigmas, weights,means, scales, df, 'Legendre',N1,N2,M2)
    LGMMParams = [ws, Lmus, Lsigmas]
    Lend = time.time()
    Ltime = Lend-Lstart
    print(Ltime)
    
    return JGMMParams, Jtime, TGMMParams, Ttime, LGMMParams, Ltime, CGMMParams, Ctime

from sklearn.datasets import make_spd_matrix

def generate_mvt_params(n_components, n_dim, mean_range=(-2.0, 2.0), scale_range=(0.1, 1.0), df_range=(1, 30)):
    # Generate random weights that sum to 1
    weights = np.random.uniform(size=n_components)
    weights /= np.sum(weights)
    
    # Generate random means
    means = np.random.uniform(*mean_range, size=(n_components, n_dim))
    
    # Generate random scales
    scales = np.zeros((n_components, n_dim, n_dim))
    for k in range(n_components):
        # diag_vals = np.random.uniform(*scale_range, size=n_dim)
        # scales[k] = np.diag(diag_vals)
        scales[k] = make_spd_matrix(n_dim)
    
    # Generate random degrees of freedom
    df = np.random.randint(*df_range, size=n_components)
    
    return weights, means, scales, df

def generate_mixture_params(n,D):
    # Generate random means inside the box from (-2,-2, ..., -2) to (2,2,..., 2)
    mus = np.random.uniform(low=-2, high=2, size=(n, D))
    mus = [mus[i, :].reshape(-1, 1) for i in range(n)]
    
    # Generate random sigmas between .1 and 2
    sigmas = np.random.uniform(low=.25, high=3, size=n)
    sigmas = list(sigmas)

    # Generate weights that sum to 1
    ws = 1/n*np.ones((n))  
    return ws, mus, sigmas

def kl_divergence_mixture(mixture1, mixture2, samples):#,nsamples=10000
    """
    Compute the KL divergence between two mixture distributions using Monte Carlo integration.

    Parameters
    ----------
    mixture1 : tuple
        A tuple of mixture distribution parameters in the following format:
        (weights, means, scales, df)
        where weights is a 1D numpy array of mixture weights,
        means is a 2D numpy array of mixture means,
        scales is a 3D numpy array of mixture scales (covariance matrices),
        and df is a 1D numpy array of mixture degrees of freedom (for t distributions only).
    mixture2 : tuple
        A tuple of mixture distribution parameters in the following format:
        (weights, means, sigmas)
        where weights is a 1D numpy array of mixture weights,
        means is a list of 2D numpy arrays of mixture means,
        and sigmas is a 1D numpy array of mixture variances.
    nsamples : int, optional
        The number of Monte Carlo samples to use for integration. Default is 10000.

    Returns
    -------
    float
        The approximate KL divergence between the two mixture distributions.
    """
    # Unpack mixture1 parameters
    weights1, means1, scales1, df1 = mixture1

    # Unpack mixture2 parameters
    weights2, means2, sigmas2 = mixture2
    
    # samples = sample_mixture_t(nsamples, weights1, means1, scales1, df1)

    # Compute log-likelihood of mixture1 samples under mixture1
    loglikelihoods1 = []
    for i in range(len(weights1)):
        pdf1 = multivariate_t(df=df1[i], loc=means1[i], shape=scales1[i])
        logpdf = pdf1.logpdf(samples)
        loglikelihoods1.append(np.log(weights1[i]) + logpdf)
    loglikelihood1 = logsumexp(loglikelihoods1,axis=0)
    loglikelihood_mixture1 = np.mean(loglikelihood1)
    # print(loglikelihood_mixture1)
    # Compute log-likelihood of mixture1 samples under mixture2
    loglikelihoods2 = []
    for i in range(len(weights2)):
        pdf2 = multivariate_normal(mean=means2[i].flatten(), cov=sigmas2[i]*np.eye(len(means2[i])))
        logpdf = pdf2.logpdf(samples)
        loglikelihoods2.append(np.log(weights2[i]) + logpdf)
    loglikelihood2 = logsumexp(loglikelihoods2,axis=0)#np.concatenate(loglikelihoods, axis=0)
    loglikelihood_mixture2 = np.mean(loglikelihood2)
    # print(loglikelihood_mixture2)
    # Compute KL divergence
    kl_divergence = loglikelihood_mixture1 - loglikelihood_mixture2

    return kl_divergence

def kl_divergence_mixture_qp(mixture1, mixture2, n_samples):#,nsamples=10000
    """
    Compute the KL divergence between two mixture distributions using Monte Carlo integration.

    Parameters
    ----------
    mixture1 : tuple
        A tuple of mixture distribution parameters in the following format:
        (weights, means, scales, df)
        where weights is a 1D numpy array of mixture weights,
        means is a 2D numpy array of mixture means,
        scales is a 3D numpy array of mixture scales (covariance matrices),
        and df is a 1D numpy array of mixture degrees of freedom (for t distributions only).
    mixture2 : tuple
        A tuple of mixture distribution parameters in the following format:
        (weights, means, sigmas)
        where weights is a 1D numpy array of mixture weights,
        means is a list of 2D numpy arrays of mixture means,
        and sigmas is a 1D numpy array of mixture variances.
    nsamples : int, optional
        The number of Monte Carlo samples to use for integration. Default is 10000.

    Returns
    -------
    float
        The approximate KL divergence between the two mixture distributions.
    """
    # Unpack mixture1 parameters
    weights1, means1, scales1, df1 = mixture2

    # Unpack mixture2 parameters
    weights2, means2, sigmas2 = mixture1
    
    samples = sample_GMM(n_samples, weights2, means2, sigmas2)

    # Compute log-likelihood of mixture1 samples under mixture1
    loglikelihoods1 = []
    for i in range(len(weights1)):
        pdf1 = multivariate_t(df=df1[i], loc=means1[i], shape=scales1[i])
        logpdf = pdf1.logpdf(samples)
        loglikelihoods1.append(np.log(weights1[i]) + logpdf)
    loglikelihood1 = logsumexp(loglikelihoods1,axis=0)
    loglikelihood_mixture1 = np.mean(loglikelihood1)
    
    # Compute log-likelihood of mixture1 samples under mixture2
    loglikelihoods2 = []
    for i in range(len(weights2)):
        pdf2 = multivariate_normal(mean=means2[i].flatten(), cov=sigmas2[i]*np.eye(len(means2[i])))
        logpdf = pdf2.logpdf(samples)
        loglikelihoods2.append(np.log(weights2[i]) + logpdf)
    loglikelihood2 = logsumexp(loglikelihoods2,axis=0)#np.concatenate(loglikelihoods, axis=0)
    loglikelihood_mixture2 = np.mean(loglikelihood2)
    
    # Compute KL divergence
    kl_divergence = loglikelihood_mixture2- loglikelihood_mixture1

    return kl_divergence

def NMC_estimator_dist(tDistParams, GMMParams, n_samples, m_trials,method):
    KL_pq = np.zeros((m_trials,1))
    KL_qp = np.zeros((m_trials,1))
    for m in range(m_trials):
        T_samples = sample_mixture_t(n_samples, tDistParams[0], tDistParams[1], tDistParams[2], tDistParams[3])
        # GMM_samples = sample_GMM(n_samples, GMMParams[0], GMMParams[1], GMMParams[2])
        KL_pq[m] = kl_divergence_mixture(tDistParams, GMMParams, T_samples)
        KL_qp[m] = kl_divergence_mixture_qp(GMMParams, tDistParams, n_samples)
    plot_histograms(KL_pq, KL_qp,title=method)


import plotly.subplots as sp    
def plot_histograms(x, y, title, nbins=30):
    # Create histogram traces
    trace1 = go.Histogram(x=x.flatten(), nbinsx=nbins, name='KL_pq')
    trace2 = go.Histogram(x=y.flatten(), nbinsx=nbins, name='KL_qp')

    # Create subplots with shared y-axis
    fig = sp.make_subplots(rows=1, cols=2, subplot_titles=('Histogram of KL_pq', 'Histogram of KL_qp'))
    fig.append_trace(trace1, row=1, col=1)
    fig.append_trace(trace2, row=1, col=2)

    # Update layout
    fig.update_layout(
        title=title,
        xaxis=dict(title='KL'),
        yaxis=dict(title='Count'),
        bargap=0.1,
        width=800,
        height=400,
    )

    # Show the figure
    fig.show()
    
def NVPDimensionExperiment(Ds,K,N,M):
    KL = np.zeros((4,len(Ds),M))
    runTime = np.zeros((4,len(Ds),M))
    i=0
    for D in Ds:
        print(D)
        for m in range(M):
            n_components_t = 2
            weights, means, scales, df = generate_mvt_params(n_components_t, D)
            tDistParams = [weights, means, scales, df]
            ws, mus, sigmas = generate_mixture_params(K,D)
            GMMParams = [ws,mus,sigmas]
            # JGMMParams, Jtime, TGMMParams, Ttime, LGMMParams, Ltime, CGMMParams, Ctime = generatefits(tDistParams,GMMParams,N)
            JGMMParams, Jtime, TGMMParams, Ttime, LGMMParams, Ltime, CGMMParams, Ctime = generatefitsupdated(tDistParams,GMMParams,N,10,2)
            KL[0,i,m] = kl_divergence_mixture_qp(JGMMParams, tDistParams, 100000)
            runTime[0,i,m] = Jtime
            KL[1,i,m] = kl_divergence_mixture_qp(TGMMParams, tDistParams, 100000)
            runTime[1,i,m] = Ttime
            KL[3,i,m] = kl_divergence_mixture_qp(LGMMParams, tDistParams, 100000)
            runTime[3,i,m] = Ltime
            KL[2,i,m] = kl_divergence_mixture_qp(CGMMParams, tDistParams, 100000)
            runTime[2,i,m] = Ctime
        i+=1
    # Pickle KL and runTime in the same file
    current_time = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
    data = {'KL': KL, 'runTime': runTime}
    with open(f"dimension_data_{current_time}.pkl", "wb") as f:
        pickle.dump(data, f)
    plot_average_matrix(KL, Ds, 'Dimension', 'KL', plot_title=None, file_name='KLDimensionExperiment.pdf')#plot_title='KL vs Dimension'
    plot_average_matrix(runTime, Ds, 'Dimension', 'Run Time', plot_title=None, file_name='TimeDimensionExperiment.pdf')#plot_title='Run Time vs Dimension'
    return KL, runTime

def NVPComponentExperiment(D,Ks,N,M):
    KL = np.zeros((4,len(Ks),M))
    runTime = np.zeros((4,len(Ks),M))
    for m in range(M):
    # i=0
    # for K in Ks:
        print(m)
        # print(K)
        n_components_t = 2
        weights, means, scales, df = generate_mvt_params(n_components_t, D)
        tDistParams = [weights, means, scales, df]
        i=0
        for K in Ks:
        # for m in range(M):
        #     n_components_t = 2
        #     weights, means, scales, df = generate_mvt_params(n_components_t, D)
        #     tDistParams = [weights, means, scales, df]
            ws, mus, sigmas = generate_mixture_params(K,D)
            GMMParams = [ws,mus,sigmas]
            # JGMMParams, Jtime, TGMMParams, Ttime, LGMMParams, Ltime, CGMMParams, Ctime = generatefits(tDistParams,GMMParams,N)
            JGMMParams, Jtime, TGMMParams, Ttime, LGMMParams, Ltime, CGMMParams, Ctime = generatefitsupdated(tDistParams,GMMParams,N,10,2)
            # samples = sample_mixture_t(5000000, tDistParams[0], tDistParams[1], tDistParams[2], tDistParams[3])
            # KL[0,i,m] = kl_divergence_mixture(tDistParams, JGMMParams, samples)#nsamples=500000
            KL[0,i,m] = kl_divergence_mixture_qp(JGMMParams, tDistParams, 100000)
            runTime[0,i,m] = Jtime
            # KL[1,i,m] = kl_divergence_mixture(tDistParams, TGMMParams, samples)#nsamples=500000
            KL[1,i,m] = kl_divergence_mixture_qp(TGMMParams, tDistParams, 100000)
            runTime[1,i,m] = Ttime
            # KL[2,i,m] = kl_divergence_mixture(tDistParams, LGMMParams, samples)#nsamples=500000
            KL[3,i,m] = kl_divergence_mixture_qp(LGMMParams, tDistParams, 100000)
            runTime[3,i,m] = Ltime
            # KL[3,i,m] = kl_divergence_mixture(tDistParams, CGMMParams, samples)#nsamples=500000
            KL[2,i,m] = kl_divergence_mixture_qp(CGMMParams, tDistParams, 100000)
            runTime[2,i,m] = Ctime
            i+=1
        # i+=1
    # Pickle KL and runTime in the same file
    current_time = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
    data = {'KL': KL, 'runTime': runTime}
    with open(f"component_data_{current_time}.pkl", "wb") as f:
        pickle.dump(data, f)
    plot_average_matrix(KL, Ks, '# of GMM Components', 'KL', plot_title=None, file_name='KLComponentExperiment.pdf')#plot_title='KL vs GMM Components'
    plot_average_matrix(runTime, Ks, '# of GMM Components', 'Run Time', plot_title=None, file_name='TimeComponentExperiment.pdf')#plot_title='Run Time vs GMM Components'
    return KL, runTime
    
def NVPOrderExperiment(D,K,Ns,M):
    KL = np.zeros((4,len(Ns),M))
    runTime = np.zeros((4,len(Ns),M))
    for m in range(M):
    # i=0
    # for N in Ns:
        print(m)
        # print(N)
        n_components_t = 2
        weights, means, scales, df = generate_mvt_params(n_components_t, D)
        tDistParams = [weights, means, scales, df]
        i=0
        for N in Ns:
        # for m in range(M):
            # n_components_t = 2
            # weights, means, scales, df = generate_mvt_params(n_components_t, D)
            # tDistParams = [weights, means, scales, df]
            ws, mus, sigmas = generate_mixture_params(K,D)
            GMMParams = [ws,mus,sigmas]
            # JGMMParams, Jtime, TGMMParams, Ttime, LGMMParams, Ltime, CGMMParams, Ctime = generatefits(tDistParams,GMMParams,N)
            JGMMParams, Jtime, TGMMParams, Ttime, LGMMParams, Ltime, CGMMParams, Ctime = generatefitsupdated(tDistParams,GMMParams,N,10,2)
            KL[0,i,m] = kl_divergence_mixture_qp(JGMMParams, tDistParams, 100000)
            runTime[0,i,m] = Jtime
            KL[1,i,m] = kl_divergence_mixture_qp(TGMMParams, tDistParams, 100000)
            runTime[1,i,m] = Ttime
            KL[3,i,m] = kl_divergence_mixture_qp(LGMMParams, tDistParams, 100000)
            runTime[3,i,m] = Ltime
            KL[2,i,m] = kl_divergence_mixture_qp(CGMMParams, tDistParams, 100000)
            runTime[2,i,m] = Ctime
            i+=1
        # i+=1
    # Pickle KL and runTime in the same file
    current_time = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
    data = {'KL': KL, 'runTime': runTime}
    with open(f"order_data_{current_time}.pkl", "wb") as f:
        pickle.dump(data, f)
    plot_average_matrix(KL, Ns, 'Polynomial Order', 'KL', plot_title=None, file_name='KLOrderExperiment.pdf')#plot_title='KL vs Polynomial Order'
    plot_average_matrix(runTime, Ns, 'Polynomial Order', 'Run Time', plot_title=None, file_name='TimeOrderExperiment.pdf')#plot_title='Run Time vs Polynomial Order'
    return KL, runTime


def plot_average_matrix(matrix, Ks, x_axis_title, y_axis_title, plot_title=None, file_name=None):
    """
    Plot the average values and standard deviations of a matrix over all reruns for each method and number of components.

    Parameters:
        - matrix: a 3D numpy array with shape (num_methods, num_Ks, num_reruns) containing the data to be plotted
        - Ks: a 1D numpy array with shape (num_Ks,) containing the values of the x-axis
        - x_axis_title: a string specifying the title of the x-axis
        - y_axis_title: a string specifying the title of the y-axis
        - plot_title: a string specifying the title of the plot (default: None)
        - file_name: a string specifying the file name to save the plot as a PDF (default: None)

    Returns:
        None
    """

    # Compute the average over all reruns for each method and number of components
    averages = np.mean(matrix, axis=2)
    
    # Compute the standard deviation over all reruns for each method and number of components
    std_devs = np.std(matrix, axis=2)
    
    # Create a separate trace for each method
    traces = []
    colors = ['rgba(255,0,0, 1)', 'rgba(0,255,255, 1)', 'rgba(0,0,255, 1)', 'rgba(0,255,0, 1)']
    colorsfill = ['rgba(255,0,0, 0.4)', 'rgba(0,255,255, 0.4)', 'rgba(0,0,255, 0.4)', 'rgba(0,255,0, 0.4)']
    methods = ['NPV', 'Our Taylor', 'Taylor Limit', 'Our Legendre']#'Chebychev'
    for i, method in enumerate(methods):
        upper_bound = averages[i] + std_devs[i]
        lower_bound = averages[i] - std_devs[i]
        trace = go.Scatter(x=Ks, y=averages[i], name=method, line=dict(color=colors[i]), mode='lines')
        fill = go.Scatter(x=Ks + Ks[::-1], y=np.concatenate([upper_bound, lower_bound[::-1]]), fill='toself', fillcolor=colorsfill[i], line=dict(color='rgba(255,255,255,0)'), showlegend=False)
        traces.append(fill)
        traces.append(trace)    
    
    fig1 = go.Figure(data=traces, layout=go.Layout(xaxis=dict(title=x_axis_title), yaxis=dict(title=y_axis_title)))
    fig1.write_image('toss.pdf')
    
    # Create the plot
    fig = go.Figure(data=traces, layout=go.Layout(xaxis=dict(title=x_axis_title), yaxis=dict(title=y_axis_title)))
    fig.update_layout(font=dict(size=25),legend=dict(yanchor="bottom", y=0.01, xanchor="right", x=0.95))#,legend=dict(yanchor="bottom", y=0.01, xanchor="right", x=0.95),showlegend=False
    fig.update_layout(plot_bgcolor='white')
    fig.update_xaxes(
        range = [min(Ks),max(Ks)],
        mirror=True,
        ticks='outside',
        showline=True,
        linecolor='black',
        gridcolor='lightgrey'
    )
    fig.update_yaxes(
        # range = [-1.5,2],
        mirror=True,
        ticks='outside',
        showline=True,
        linecolor='black',
        gridcolor='lightgrey'
    )
    # Set the plot title if it is specified
    if plot_title is not None:
        fig.update_layout(title=plot_title)

    # Save the plot as a PDF file if the file name is specified
    if file_name is not None:
        fig.write_image(file_name)

    # Show the plot
    fig.show()

random.seed(10)
np.random.seed(10)


D = 10#5 # Dimesnions #Higher gives bigger seperation of error by method
K = 2#3 # GMM components # need lower components to show improvement in order try 2 or 3.
Ns = [1,2,3,5,7]#,10 # Polynomial Orders
M = 10#15 # Reruns
KLO, runTimeO = NVPOrderExperiment(D,K,Ns,M)

# plot_average_matrix(KLO, Ns, 'Order', 'KL', plot_title=None, file_name='KLOrderExperiment.pdf')#plot_title='KL vs Dimension'
# plot_average_matrix(runTimeO, Ns, 'Order', 'Run Time', plot_title=None, file_name='TimeOrderExperiment.pdf')#plot_title='Run Time vs Dimension'

D = 10 # Dimesnions
Ks = [1,2,5,7,10] # GMM components
N = 4 # Polynomial Orders
M = 10#15 # Reruns
KLC, runTimeC = NVPComponentExperiment(D,Ks,N,M)

# plot_average_matrix(KLC, Ks, 'Component', 'KL', plot_title=None, file_name='KLComponentExperiment.pdf')#plot_title='KL vs Dimension'
# plot_average_matrix(runTimeC, Ks, 'Component', 'Run Time', plot_title=None, file_name='TimeComponentExperiment.pdf')#plot_title='Run Time vs Dimension'

Ds = [1,3,5,7,9,11,13]# # Dimesnions # 20 is unstable
K = 4 # GMM components
N = 4 # Polynomial Orders
M = 10#15 # Reruns
KLD, runTimeD = NVPDimensionExperiment(Ds,K,N,M)

# plot_average_matrix(KLD, Ds, 'Dimension', 'KL', plot_title=None, file_name='KLDimensionExperiment.pdf')#plot_title='KL vs Dimension'
# plot_average_matrix(runTimeD, Ds, 'Dimension', 'Run Time', plot_title=None, file_name='TimeDimensionExperiment.pdf')#plot_title='Run Time vs Dimension'

# plot_average_matrix(KLD, Ds, 'Dimension', 'KL', plot_title=None, file_name='KLDimensionExperimentLegend.pdf')#plot_title='KL vs Dimension'
# plot_average_matrix(runTimeD, Ds, 'Dimension', 'Run Time', plot_title=None, file_name='TimeDimensionExperimentEdge.pdf')#plot_title='Run Time vs Dimension'

# with open("data_2023-04-18-20-11-09.pkl", "rb") as f:
#     data = pickle.load(f)
#     KL1 = data['KL']
#     runTime1 = data['runTime']



####### GOOD EXAMPLE TO USE ###################
# weights = np.array([0.75, 0.25])
# means = np.array([[-1, -1], [1, 1]])
# scales = np.array([[[2, 0], [0, 2]], [[.25, 0], [0, .25]]])#
# df = np.array([15, 15])
# tDistParams = [weights, means, scales, df]

# n_dim = 2
# n_components_t = 2
# weights, means, scales, df = generate_mvt_params(n_components_t, n_dim,df_range=(1, 2))
# tDistParams = [weights, means, scales, df]

# n_components_gmm = 3
# ws, mus, sigmas = generate_mixture_params(n_components_gmm, n_dim)
# GMMParams = [ws,mus,sigmas]
# sigmas_full = []
# for i in range(len(sigmas)):
#     sigmas_full.append(sigmas[i]*np.eye(n_dim))
    
# N = 6
# N2 = 10
# M2 = 2
# # JGMMParams, Jtime, TGMMParams, Ttime, LGMMParams, Ltime, CGMMParams, Ctime = generatefits(tDistParams,GMMParams,N)
# JGMMParams, Jtime, TGMMParams, Ttime, LGMMParams, Ltime, CGMMParams, Ctime = generatefitsupdated(tDistParams,GMMParams,N,N2,M2)
# if n_dim == 2:
#     zmin, zmax = plot_mixture_t_contours(means, scales, df, weights)
#     plot_gmm_contours(JGMMParams[0], JGMMParams[1], JGMMParams[2],z_min=zmin, z_max=zmax)
#     plot_gmm_contours(TGMMParams[0], TGMMParams[1], TGMMParams[2],z_min=zmin, z_max=zmax)
#     plot_gmm_contours(LGMMParams[0], LGMMParams[1], LGMMParams[2],z_min=zmin, z_max=zmax)
#     plot_gmm_contours(CGMMParams[0], CGMMParams[1], CGMMParams[2],z_min=zmin, z_max=zmax)

# Jerr = kl_divergence_mixture_qp(JGMMParams, tDistParams, 100000)
# Terr = kl_divergence_mixture_qp(TGMMParams, tDistParams, 100000)
# Lerr = kl_divergence_mixture_qp(LGMMParams, tDistParams, 100000)
# Cerr = kl_divergence_mixture_qp(CGMMParams, tDistParams, 100000)
# print("Jensen KL:     %f    Jensen time: %f"%(Jerr,Jtime))
# print("Taylor KL:     %f    Taylor time: %f"%(Terr,Ttime))
# print("Legendre KL:   %f    Legendre time: %f"%(Lerr,Ltime))
# print("Chebyschev KL: %f    Chebyschev time: %f"%(Cerr,Ctime))