import numpy as np
from joblib import Parallel, delayed
from pgpr_util import get_rmse, get_mnlp, load_data, load_data_gs, load_data_gs_vector, pgpr_timer
from pgpr_cov import pgpr_cov
from pgpr_chol import pgpr_chol
from pgpr_type import Mdoub, Vdoub, SUCC
from scipy.optimize import minimize


class pgpr_ppitc_ls:
    """
    This class provides the regression function using PITC Approximation, implemented in a parallel manner.
    """

    def __init__(self, hypf):
        """
        Initialize the pgpr_ppitc_ls class with a hyperparameter file.
        :param hypf: Path to the hyperparameter file.
        """
        self.cov = pgpr_cov(hypf=hypf)
        self.h_mu = self.cov.mu
        self.pmu = Vdoub()
        self.pvar = Vdoub()
        self.elapsed = 0.0
        self.rmse = 0.0
        self.mnlp = 0.0

    def chol_cov(self, K_dd):
        """
        Perform Cholesky decomposition on a covariance matrix.
        :param K_dd: Covariance matrix (Mdoub).
        :return: Cholesky decomposition (pgpr_chol).
        """
        try:
            return pgpr_chol(K_dd)
        except np.linalg.LinAlgError:
            # Add more regularization and retry
            for i in range(K_dd.nrows()):
                K_dd[i][i] += 1e-3  # Increase regularization
            print("Matrix not positive definite. Added regularization and retrying.")
            # Debug: Print diagonal elements of the covariance matrix
            print("Diagonal elements of the covariance matrix:")
            for i in range(K_dd.nrows()):
                print(K_dd[i][i])
            return pgpr_chol(K_dd)

    def chol_cov_obs(self, obs, dnum):
        """
        Compute Cholesky decomposition for observations.
        :param obs: Observation matrix (Mdoub).
        :param dnum: Number of observations.
        :return: Cholesky decomposition (pgpr_chol).
        """
        K_dd = Mdoub()
        self.cov.se_ard_n(obs, dnum, K_dd)
        return pgpr_chol(K_dd)

    def chol_pcov(self, obs, dnum, act, anum, chol_kuu=None):
        """
        Compute predictive covariance and perform Cholesky decomposition.
        :param obs: Observation matrix (Mdoub).
        :param dnum: Number of observations.
        :param act: Support set matrix (Mdoub).
        :param anum: Number of support set points.
        :param chol_kuu: Cholesky decomposition of K_uu (optional).
        :return: Cholesky decomposition (pgpr_chol).
        """
        kdd = Mdoub()
        if chol_kuu is None:
            chol_kuu = self.chol_cov(act)
        self.post_cov(act, anum, chol_kuu, obs, dnum, kdd)
        for i in range(dnum):
            kdd[i][i] += 1e-6
        # print("Computing Cholesky decomposition of K_dd ...")
        return self.chol_cov(kdd)

    def post_var(self, obs, ss, chol, xt, ts, t_var):
        """
        Compute posterior variance using FGP.
        :param obs: Observation matrix (Mdoub).
        :param ss: Number of observations.
        :param chol: Cholesky decomposition of K_dd.
        :param xt: Test set matrix (Mdoub).
        :param ts: Number of test points.
        :param t_var: Output variance vector (Vdoub).
        :return: Success status.
        """
        v = Vdoub(ss)
        beta = Vdoub(ss)

        if t_var.size() != ts:
            t_var.resize(ts)
        
        K_td = Mdoub()
        self.cov.se_ard_cross(xt, obs, K_td)

        for i in range(ts):
            t_var[i] = self.cov.nos + self.cov.sig
            K_ti = Vdoub(ss)
            for j in range(ss):
                K_ti[j] = K_td[i][j]
            v = chol.elsolve(K_ti)
            for j in range(ss):
                t_var[i] -= v[j] * v[j]
            t_var[i] = max(t_var[i], 1e-8)

        return SUCC

    '''
    The better (faster) way to compute is implemented using linalg library, use this only for small datasets

    # def compute_covariance_row(self, i, ss, chol, K_td):
    #     """
    #     Compute a single row of the covariance matrix.
    #     :param i: Row index.
    #     :param ss: Number of observations.
    #     :param chol: Cholesky decomposition of K_dd.
    #     :param K_td: Cross-covariance matrix (Mdoub).
    #     :return: A tuple containing the row index and the computed row.
    #     """
    #     K_ti = [K_td[i][j] for j in range(ss)]  # Convert to a serializable list
    #     v = chol.elsolve(K_ti)
    #     t_cov_row = [0.0] * (i + 1)  # Only compute the lower triangular part
    #     t_cov_row[i] = -sum(v[j] * v[j] for j in range(ss))
    #     beta = chol.solve(K_ti)
    #     for t in range(i + 1, ss):
    #         t_cov_row.append(-sum(K_td[t][j] * beta[j] for j in range(ss)))
    #     return i, Vdoub(t_cov_row)  # Convert to Vdoub before returning
    
    # def post_cov(self, obs, ss, chol, xt, ts, t_cov):
    #     """
    #     Compute posterior covariance using FGP with parallelization.
    #     :param obs: Observation matrix (Mdoub).
    #     :param ss: Number of observations.
    #     :param chol: Cholesky decomposition of K_dd.
    #     :param xt: Test set matrix (Mdoub).
    #     :param ts: Number of test points.
    #     :param t_cov: Output covariance matrix (Mdoub).
    #     :return: Success status.
    #     """
    #     t_cov.resize(ts, ts)
    #     K_td = Mdoub()
    #     self.cov.se_ard_cross(xt, obs, K_td)
    #     self.cov.se_ard_n_matrix(xt, t_cov)

    #     # Perform parallel computation
    #     results = Parallel(n_jobs=-1)(
    #         delayed(self.compute_covariance_row)(i, ss, chol, K_td) for i in range(ts)
    #     )

    #     # Aggregate results into t_cov
    #     for i, t_cov_row in results:
    #         for j in range(len(t_cov_row)):  # Use len() for Python lists
    #             t_cov[i][j] = t_cov_row[j]
    #             t_cov[j][i] = t_cov_row[j]  # Symmetric property of covariance matrix

    #     return SUCC
    '''

    def post_cov(self, obs, ss, chol, xt, ts, t_cov):
        """
        Compute posterior covariance using FGP.
        :param obs: Observation matrix (Mdoub).
        :param ss: Number of observations.
        :param chol: Cholesky decomposition of K_dd.
        :param xt: Test set matrix (Mdoub).
        :param ts: Number of test points.
        :param t_cov: Output covariance matrix (Mdoub).
        :return: Success status.
        """
        v = Vdoub(ss)
        beta = Vdoub(ss)
        t_cov.resize(ts, ts)
        K_td = Mdoub()
        # print("Computing K_td cov matrix ...")
        timer = pgpr_timer()
        timer.start()
        self.cov.se_ard_cross(xt, obs, K_td)
        self.elapsed = timer.end()
        # print(f"Elapsed time for K_td computation: {self.elapsed:.4f} seconds")
        timer.start()
        # print("Computing test cov matrix ...")
        self.cov.se_ard_n_matrix(xt, t_cov)
        self.elapsed = timer.end()
        # print(f"Elapsed time for test cov matrix computation: {self.elapsed:.4f} seconds")
        # print("Computing inverse ...")
        timer.start()
        for i in range(ts):
            K_ti = Vdoub(ss)
            for j in range(ss):
                K_ti[j] = K_td[i][j]
            v = chol.elsolve(K_ti)
            for j in range(ss):
                t_cov[i][i] -= v[j] * v[j]
            beta = chol.solve(K_ti)
            for t in range(i + 1, ts):
                for j in range(ss):
                    t_cov[t][i] -= K_td[t][j] * beta[j]
                t_cov[i][t] = t_cov[t][i]
        self.elapsed = timer.end()
        # print("Computing inverse done.")
        # print(f"Elapsed time for inverse computation: {self.elapsed:.4f} seconds")
        return SUCC

    def pitc_prep(self, D, ds, chol_sdd, U, us, chol_kuu, fu, suu):
        """
        Prepare the local summary for PITC block.
        :param D: Dataset matrix (Mdoub).
        :param ds: Number of data points.
        :param chol_sdd: Cholesky decomposition of Sigma_DD.
        :param U: Active set matrix (Mdoub).
        :param us: Number of active set points.
        :param chol_kuu: Cholesky decomposition of K_uu.
        :param fu: Output mean vector (Vdoub).
        :param suu: Output covariance matrix (Mdoub).
        :return: Success status.
        """
        v = Vdoub(ds)
        alpha = Vdoub(ds)
        beta = Vdoub(ds)
        K_ud = Mdoub()
        self.cov.se_ard_cross(U, D, K_ud)

        for i in range(ds):
            v[i] = D[i][self.cov.dim] - self.h_mu
        alpha = chol_sdd.solve(v)

        for i in range(us):
            fu[i] = 0
            for j in range(ds):
                fu[i] += K_ud[i][j] * alpha[j]
            K_ui = Vdoub(ds)
            for j in range(ds):
                K_ui[j] = K_ud[i][j]
            v = chol_sdd.elsolve(K_ui)
            suu[i][i] = sum(v[j] * v[j] for j in range(ds))
            beta = chol_sdd.solve(K_ui)
            for t in range(i + 1, us):
                suu[t][i] = sum(K_ud[t][j] * beta[j] for j in range(ds))
                suu[i][t] = suu[t][i]
        return SUCC
    
    def ppitc_local_summary(self, D, ds, aset, as_, ls_zu, ls_kuu, lmu, lcov):
        """
        Compute the local summary for PITC block.
        :param D: Dataset matrix (Mdoub).
        :param ds: Number of data points.
        :param aset: Active set matrix (Mdoub).
        :param as_: Number of active set points.
        :param ls_zu: Local summary mean vector (Vdoub).
        :param ls_kuu: Local summary covariance matrix (Mdoub).
        :param lmu: Path to save the local mean.
        :param lcov: Path to save the local covariance.
        """
        kuu = Mdoub()
        # print("Computing covariance matrix for suport 'k_uu' ...")
        self.cov.se_ard_matrix(aset, kuu)

        # Perform Cholesky decomposition on K_uu
        # print("Computing Cholesky decomposition of k_uu...")
        for i in range(as_):
            kuu[i][i] += 1e-6
        chol_kuu = self.chol_cov(kuu)

        # Compute Cholesky decomposition for predictive covariance
        # print("Computing predictive covariance and cholesky decomposition of K_DD ...")
        timer = pgpr_timer()
        timer.start()
        chol_sdd = self.chol_pcov(D, ds, aset, as_, chol_kuu)
        self.elapsed = timer.end()
        # print(f"Elapsed time for Cholesky decomposition: {self.elapsed:.4f} seconds")

        # Prepare the local summary
        # print("Preparing local summary matrices ...")
        self.pitc_prep(D, ds, chol_sdd, aset, as_, chol_kuu, ls_zu, ls_kuu)

        # Save the local mean to a file
        # print("Saving local mean ...")
        with open(lmu, "w") as fmu:
            for i in range(as_):
                fmu.write(f"{ls_zu[i]:.6f}\n")

        # Save the local covariance to a file
        # print("Saving local covariance ...")
        with open(lcov, "w") as fkuu:
            for i in range(as_):
                for j in range(as_):
                    fkuu.write(f"{ls_kuu[i][j]:.6f}\n")

    def regress_local(self, train, support, local_mean, local_cov):
        """
        Perform local regression using PITC.
        :param train: Path to the training data file.
        :param support: Path to the support set file.
        :param lmu: Path to save the local mean.
        :param lcov: Path to save the local covariance.
        :return: Success status.
        """
        ddim = self.cov.dim + 1
        traindata = Mdoub(1, ddim)
        supportset = Mdoub(1, ddim)
        load_data(train, traindata)
        load_data(support, supportset)

        # Extract features and targets from training data
        # train_features = np.array([traindata[i][:ddim - 1] for i in range(traindata.nrows())])
        # train_targets = np.array([traindata[i][ddim - 1] for i in range(traindata.nrows())])

        # Compute mean and standard deviation for normalization
        # feature_mean = np.mean(train_features, axis=0)
        # feature_std = np.std(train_features, axis=0)
        # target_mean = np.mean(train_targets)
        # target_std = np.std(train_targets)

        # Normalize training data
        # train_features = (train_features - feature_mean) / feature_std
        # train_targets = (train_targets - target_mean) / target_std

        # Update normalized training data back into traindata
        # for i in range(traindata.nrows()):
            # traindata[i][:ddim - 1] = train_features[i]
            # traindata[i][ddim - 1] = train_targets[i]

        # Normalize support data using training data statistics
        # support_features = np.array([supportset[i][:ddim - 1] for i in range(supportset.nrows())])
        # support_targets = np.array([supportset[i][ddim - 1] for i in range(supportset.nrows())])
        # support_features = (support_features - feature_mean) / feature_std
        # support_targets = (support_targets - target_mean) / target_std

        # Update normalized support data back into supportset
        # for i in range(supportset.nrows()):
            # supportset[i][:ddim - 1] = support_features[i]
            # support_targets = (support_targets - target_mean) / target_std

        ds = traindata.nrows()
        ss = supportset.nrows()
        ls_zu = Vdoub(ss)
        ls_kuu = Mdoub(ss, ss)
        timer = pgpr_timer()
        timer.start()
        # print("Computing local summary...")
        self.ppitc_local_summary(traindata, ds, supportset, ss, ls_zu, ls_kuu, local_mean, local_cov)
        self.elapsed = timer.end()
        return SUCC

    def pitc_regr_low_core(self, support, t_set, gs_k, gs_mu):
        """
        Perform regression using PITC with a low-rank approximation.
        :param support: Path to the support set file.
        :param t_set: Path to the test set file.
        :param gs_k: Path to the global summary covariance file.
        :param gs_mu: Path to the global summary mean file.
        """
        ddim = self.cov.dim + 1
        tset = Mdoub(1, ddim)
        supportset = Mdoub(1, ddim)
        load_data(support, supportset)
        load_data(t_set, tset)

        # Extract features and targets from training data
        # tset_features = np.array([tset[i][:ddim - 1] for i in range(tset.nrows())])
        # tset_targets = np.array([tset[i][ddim - 1] for i in range(tset.nrows())])

        # Compute mean and standard deviation for normalization
        # feature_mean = np.mean(tset_features, axis=0)
        # feature_std = np.std(tset_features, axis=0)
        # target_mean = np.mean(tset_targets)
        # target_std = np.std(tset_targets)

        # Normalize training data
        # tset_features = (tset_features - feature_mean) / feature_std
        # tset_targets = (tset_targets - target_mean) / target_std

        # Update normalized training data back into traindata
        # for i in range(tset.nrows()):
            # tset[i][:ddim - 1] = tset_features[i]
            # tset[i][ddim - 1] = tset_targets[i]

        # Normalize support data using training data statistics
        # support_features = np.array([supportset[i][:ddim - 1] for i in range(supportset.nrows())])
        # support_targets = np.array([supportset[i][ddim - 1] for i in range(supportset.nrows())])
        # support_features = (support_features - feature_mean) / feature_std
        # support_targets = (support_targets - target_mean) / target_std

        # Update normalized support data back into supportset
        # for i in range(supportset.nrows()):
            # supportset[i][:ddim - 1] = support_features[i]
            # support_targets = (support_targets - target_mean) / target_std

        ss = supportset.nrows()
        ts = tset.nrows()

        kuu = Mdoub()
        self.cov.se_ard_matrix(supportset, kuu)

        gs_kuu = kuu
        gs_zu = Vdoub(ss)
        suu = Mdoub(ss, ss)
        fu = Vdoub(ss)
        load_data_gs(gs_k, suu)
        load_data_gs_vector(gs_mu, fu)
        ksize = suu.nrows()
        msize = fu.size()

        # Add suu to gs_kuu
        for i in range(ss):
            for j in range(ss):
                gs_kuu[i][j] += suu[i][j]

        # Add regularization to ensure positive definiteness
        for i in range(ss):
            gs_kuu[i][i] += 1e-5

        alpha = Vdoub(ss)

        # Perform Cholesky decomposition on gs_kuu
        chol_kuu = pgpr_chol(gs_kuu)
        alpha = chol_kuu.solve(fu)
        if alpha is None:
            raise ValueError("Cholesky decomposition failed.")
        
        self.pmu.resize(ts)
        self.pvar.resize(ts)

        # Compute posterior variance
        # print("Computing posterior variance ...")
        time = pgpr_timer()
        time.start()
        self.post_var(supportset, ss, chol_kuu, tset, ts, self.pvar)
        self.elapsed = time.end()
        # print(f"Elapsed time for posterior variance computation: {self.elapsed:.4f} seconds")
        # Compute K_td
        K_td = Mdoub()
        self.cov.se_ard_cross(tset, supportset, K_td)

        # Compute predictive mean
        # print("Computing predictive mean ...")
        time.start()
        for k in range(ts):
            self.pmu[k] = self.h_mu
            for j in range(ss):
                self.pmu[k] += K_td[k][j] * alpha[j]
        self.elapsed = time.end()
        # print(f"Elapsed time for predictive mean computation: {self.elapsed:.4f} seconds")

        # Compute predictive variance
        # print("Computing predictive variance ...")
        time.start()
        for k in range(ts):
            for j in range(ss):
                self.pvar[k] += K_td[k][j] * alpha[j]
        self.elapsed = time.end()
        # print(f"Elapsed time for predictive variance computation: {self.elapsed:.4f} seconds")
        # Compute RMSE and MNLP
        trueval = Vdoub(ts)
        for i in range(ts):
            trueval[i] = tset[i][ddim - 1]
        
        # print("Computing Evaluation Results ...")
        time.start()
        self.rmse = get_rmse(trueval, self.pmu)
        self.mnlp = get_mnlp(trueval, self.pmu, self.pvar)
        self.elapsed = time.end()
        # print(f"Elapsed time for Evaluation metrics computation: {self.elapsed:.4f} seconds")

    def optimize_hyperparameters(self, train_file, support_file):
        """
        Better optimization of GP hyperparameters
        """
        # Load training and support data
        train_data = Mdoub()
        support_data = Mdoub()
        load_data(train_file, train_data)
        load_data(support_file, support_data)

        def objective(hyperparams):
            # Update only the signal variance, noise variance, and mean
            self.cov.sig = max(float(hyperparams[0]), 1e-6)  # Signal variance
            self.cov.nos = max(float(hyperparams[1]), 1e-6)  # Noise variance
            self.cov.mu = float(hyperparams[2])              # Mean
            
            # Keep length scales fixed to more reasonable values
            # self.cov.lsc remains unchanged
            
            # Compute log marginal likelihood
            log_marginal_likelihood = self.compute_log_marginal_likelihood(train_data, support_data)
            return -log_marginal_likelihood

        # Initial hyperparameters (signal variance, noise variance, mean)
        initial_hyperparams = [self.cov.sig, self.cov.nos, self.cov.mu]
        
        # Try multiple starting points
        best_result = None
        best_lml = -np.inf
        
        # Starting points
        starting_points = [
            [0.1, 0.01, 0.0],   # Low signal variance, low noise
            [1.0, 0.1, 0.0],    # Medium signal variance, medium noise
            [10.0, 1.0, 0.0]    # High signal variance, high noise
        ]
        
        for start_point in starting_points:
            # Bounds for the hyperparameters
            bounds = [
                (1e-6, 100.0),   # Signal variance (must be > 0, reasonable upper bound)
                (1e-6, 10.0),    # Noise variance (must be > 0, reasonable upper bound)
                (None, None)     # Mean (no bounds)
            ]
            
            result = minimize(objective, start_point, method='L-BFGS-B', bounds=bounds, 
                            options={'maxiter': 100, 'gtol': 1e-6})
            
            if result.success and -result.fun > best_lml:
                best_lml = -result.fun
                best_result = result
        
        if best_result is not None:
            # Update the optimized hyperparameters
            self.cov.sig = max(float(best_result.x[0]), 1e-6)
            self.cov.nos = max(float(best_result.x[1]), 1e-6)
            self.cov.mu = float(best_result.x[2])
            # print(f"Updated GP hyperparameters: Sig={self.cov.sig:.4f}, Nos={self.cov.nos:.4f}, Mu={self.cov.mu:.4f}")
        else:
            print("Hyperparameter optimization failed.")
        
        return SUCC

    def optimize_hyperparameters_init(self, train_file, support_file):
        """
        Optimize GP hyperparameters: signal variance, noise variance, mean, and length-scales.
        """
        # Load training and support data
        train_data = Mdoub()
        support_data = Mdoub()
        load_data(train_file, train_data)
        load_data(support_file, support_data)

        n_lsc = len(self.cov.lsc)

        def objective(hyperparams):
            # Update signal variance, noise variance, mean, and length-scales
            self.cov.sig = max(float(hyperparams[0]), 1e-6)  # Signal variance
            self.cov.nos = max(float(hyperparams[1]), 1e-6)  # Noise variance
            self.cov.mu = float(hyperparams[2])              # Mean
            for i in range(n_lsc):
                self.cov.lsc[i] = max(float(hyperparams[3 + i]), 1e-4)  # Length-scales

            # Compute log marginal likelihood
            log_marginal_likelihood = self.compute_log_marginal_likelihood(train_data, support_data)
            return -log_marginal_likelihood

        # Initial hyperparameters (signal variance, noise variance, mean, length-scales)
        initial_hyperparams = [self.cov.sig, self.cov.nos, self.cov.mu] + [float(l) for l in self.cov.lsc]

        # Bounds for the hyperparameters
        bounds = [
            (1e-6, 100.0),   # Signal variance
            (1e-6, 10.0),    # Noise variance
            (None, None)     # Mean
        ] + [(0.05, 1.0)] * n_lsc  # Length-scales for Ackley on [-5,5]^d

        # Try multiple starting points for robustness
        starting_points = [
            [1.0, 0.01, 0.0] + [0.5]*n_lsc,
            [5.0, 0.1, 0.0] + [1.0]*n_lsc,
            [10.0, 1.0, 0.0] + [2.0]*n_lsc,
            [50.0, 5.0, 0.0] + [10.0]*n_lsc,  # Add a more extreme starting point
        ]

        best_result = None
        best_lml = -np.inf

        for start_point in starting_points:
            result = minimize(objective, start_point, method='L-BFGS-B', bounds=bounds, 
                            options={'maxiter': 100, 'gtol': 1e-6})
            if result.success and -result.fun > best_lml:
                best_lml = -result.fun
                best_result = result

        if best_result is not None:
            # Update the optimized hyperparameters
            self.cov.sig = max(float(best_result.x[0]), 1e-6)
            self.cov.nos = max(float(best_result.x[1]), 1e-6)
            self.cov.mu = float(best_result.x[2])
            for i in range(n_lsc):
                self.cov.lsc[i] = max(float(best_result.x[3 + i]), 1e-4)
            print(f"Updated GP hyperparameters: Sig={self.cov.sig:.4f}, Nos={self.cov.nos:.4f}, Mu={self.cov.mu:.4f}, LSC={[f'{l:.4f}' for l in self.cov.lsc]}")
        else:
            print("Hyperparameter optimization failed.")

        return SUCC

    def compute_log_marginal_likelihood(self, train_data, support_data):
        """
        Compute the log marginal likelihood of the GP.
        """
        # Compute the covariance matrix for the training data
        K = Mdoub()
        self.cov.se_ard_matrix(train_data, K)

        # Add noise variance to the diagonal with regularization
        for i in range(K.nrows()):
            K[i][i] += self.cov.nos + 1e-6  # Add noise variance with small regularization

        # Perform Cholesky decomposition
        chol = self.chol_cov(K)

        # Extract the target values (last column of train_data)
        target_values = Vdoub(train_data.nrows())
        for i in range(train_data.nrows()):
            target_values[i] = train_data[i][train_data.ncols() - 1]

        # Compute the log marginal likelihood with error handling
        try:
            alpha = chol.solve(target_values)  # Solve for alpha
            log_likelihood = -0.5 * np.dot(target_values.data(), alpha.data())
            log_likelihood -= chol.logdet()
            log_likelihood -= 0.5 * train_data.nrows() * np.log(2 * np.pi)
        except Exception as e:
            print(f"Error computing log marginal likelihood: {e}")
            return -np.inf  # Return negative infinity if computation fails

        # Add prior on hyperparameters for regularization
        log_prior = 0.0
        log_prior -= 0.5 * (np.log(self.cov.sig) - 0.0)**2 / 1.0  # Prior on signal variance
        log_prior -= 0.5 * (np.log(self.cov.nos) - (-3.0))**2 / 1.0  # Prior on noise variance
        
        return log_likelihood + log_prior    
    #     """
    #     Optimize GP hyperparameters by maximizing the log marginal likelihood.
    #     """
    #     # Load training and support data
    #     train_data = Mdoub()
    #     support_data = Mdoub()
    #     load_data(train_file, train_data)
    #     load_data(support_file, support_data)

    #     # Define the objective function (negative log marginal likelihood)
    #     def objective(hyperparams):
    #         self.cov.update_hyperparameters(hyperparams)
    #         # print(f"Hyperparameters: {self.cov.get_hyperparameters()}")
    #         log_marginal_likelihood = self.compute_log_marginal_likelihood(train_data, support_data)
    #         return -log_marginal_likelihood  # Minimize the negative log marginal likelihood

    #     # Initial hyperparameters (e.g., signal variance, noise variance, length scales)
    #     initial_hyperparams = self.cov.get_hyperparameters()

    #     bounds = [(1e-6, None),  # Signal variance (must be > 0)
    #       (1e-6, None),  # Noise variance (must be > 0)
    #       (None, None)] + [(1e-6, None)] * len(self.cov.lsc)  # Length scales (must be > 0)
    #     # Optimize hyperparameters
    #     result = minimize(objective, initial_hyperparams, method='L-BFGS-B', bounds = bounds, options={'maxiter': 50})

    #     if result.success:
    #         self.cov.update_hyperparameters(result.x)
    #         print(f'GP hyperparametes updated')
    #         # print(f"Optimized hyperparameters: {result.x}")
    #     else:
    #         print("Hyperparameter optimization failed.")

    def output_rst(self, output):
        """
        Output the regression results to a file.
        :param output: Path to the output file.
        """
        ts = self.pmu.size()
        with open(output, "w") as fp:
            for i in range(ts):
                fp.write(f"{self.pmu[i]:.4f} {self.pvar[i]:.4f}\n")
        # print(f"Elapsed: {self.elapsed:.4f} | RMSE: {self.rmse:.4f} | MNLP: {self.mnlp:.4f}")