import time

start_time = time.time()

import os
import sys
import random
import numpy as np
import pandas as pd
from sklearn.linear_model import Lasso
from sklearn.model_selection import KFold
from math import sqrt
import scipy.stats as stats
import scipy.linalg as sp_linalg

print('Importing the packages:', time.time() - start_time)


def loss(y1, y2):
    '''
    defining the loss as the squared error loss
    '''
    return (y1 - y2)**2


def create_folders(n, res_folder):
    '''
    creating the necessary folders to save results and figures
    '''

    for algo in ALGOS:
        os.makedirs(os.path.join(res_folder, algo, f'n_{n}'), exist_ok=True)
    
    for algoB in ALGOS[1:]:
        algoA = ALGOS[0]
        if algoA != algoB:
            comp = '_'.join([algoA, algoB])
            os.makedirs(os.path.join(res_folder, comp, f'n_{n}'), exist_ok=True)


def set_seed(n, rep):
    '''
    setting random seed to ensure reproducibility
    '''
    random_seed = n + 50001 * rep
    random.seed(random_seed)
    np.random.seed(random_seed)


def OLS_solve(X, Y):
    '''
    computing ordinary least squares solution
    '''

    L = np.linalg.cholesky(X.T @ X)
    gamma_hat = sp_linalg.solve_triangular(L, X.T @ Y, lower=True, check_finite=False)
    beta_OLS = sp_linalg.solve_triangular(L.T, gamma_hat, check_finite=False)

    return beta_OLS


def LS_solve(X, Y, method, beta_OLS=None, alpha=None, delta=None):
    '''
    computing least squares solution for a given method
    '''

    sample_size = X.shape[0]
    nb_features = X.shape[1]

    if delta != None:
        alpha = alpha + delta
    
    if method == 'Ridge':
        L = np.linalg.cholesky(X.T @ X + alpha * np.identity(nb_features))
        gamma_hat = sp_linalg.solve_triangular(L, X.T @ Y, lower=True, check_finite=False)
        beta_hat = sp_linalg.solve_triangular(L.T, gamma_hat, check_finite=False)
    elif method == 'Lasso':
        model = Lasso(alpha=alpha/sample_size, fit_intercept=False)
        model.fit(X, Y)
        beta_hat = model.coef_
    elif method == 'ST':
        beta_hat = np.sign(beta_OLS) * np.maximum(np.abs(beta_OLS) - alpha/sample_size, 0)
    elif method == 'Oracle':
        beta_hat = BETA
    
    return beta_hat


def find_lambdas_opti_and_1se(n_splits, Xs, Ys, n, override='sqrt', verbose=False, nb_rounds=4):
    '''
    computing cross-validated lambdas
    '''

    k = n_splits + 1
    m = int(n * (1 - 1/k))

    N = len(Xs)
    lambdas_grids_temp = {j: LAMBDAS_GRID[::] for j in range(N)}
    lambdas_opti = [0]*N
    lambdas_1se = [0]*N
    lambdas_opti_idx = [0]*N

    if override == 'sqrt':
        for j in range(N):
            lambdas_opti[j] = np.sqrt(m)
            lambdas_1se[j] = np.sqrt(m)
        return lambdas_opti, lambdas_1se
    else:
        pass

    kfold_split = KFold(n_splits=n_splits, shuffle=False)
    one_ses_opti = [0]*N

    for u in range(nb_rounds):
        min_losses = [np.inf]*N
        for j in range(N):
            if u > 0:
                lambdas_grid_temp = lambdas_grids_temp[j]
                lambda_opti_idx = lambdas_opti_idx[j]
                if lambda_opti_idx == 0:
                    start = lambdas_grid_temp[lambda_opti_idx]
                    end = lambdas_grid_temp[lambda_opti_idx + 1]
                elif lambda_opti_idx == len(lambdas_grid_temp) - 1:
                    start = lambdas_grid_temp[lambda_opti_idx - 1]
                    end = lambdas_grid_temp[lambda_opti_idx]
                else:
                    start = lambdas_grid_temp[lambda_opti_idx - 1]
                    end = lambdas_grid_temp[lambda_opti_idx + 1]
                lambdas_grids_temp[j] = [np.exp(np.log(start) + (q/10) * (np.log(end)-np.log(start))) for q in range(11)]
                lambdas_opti_idx[j] = 5 # to ensure we always have a min, otherwise it might come from last iteration and the idx might be out of range

            if verbose:
                print(min_losses[j])
            for i, alpha in enumerate(lambdas_grids_temp[j]):
                current_loss = 0
                one_se = 0
                avg_fold_losses = np.zeros(n_splits)
                for fold, (train_index, test_index) in enumerate(kfold_split.split(Xs[0])):
                    X, Y = Xs[j], Ys[j]
                    X_train, X_test = X[train_index], X[test_index]
                    Y_train, Y_test = Y[train_index], Y[test_index]

                    beta_OLS = OLS_solve(X_train, Y_train)
                    beta_hat = LS_solve(X_train, Y_train, METHOD, beta_OLS, alpha=alpha)

                    pred_values = X_test @ beta_hat
                    losses = (Y_test - pred_values)**2
                    avg_fold_loss = np.mean(losses)
                    current_loss += avg_fold_loss/n_splits
                    one_se += np.var(losses, ddof=1) / n_splits
                    avg_fold_losses[fold] = avg_fold_loss
                one_se = np.sqrt(one_se)
                one_se_true = np.std(avg_fold_losses, ddof=1)
                if verbose:
                    print(alpha, current_loss)
                if current_loss <= min_losses[j]:
                    min_losses[j] = current_loss
                    lambdas_opti[j] = alpha
                    one_ses_opti[j] = one_se_true
                    lambdas_1se[j] = alpha
                    lambdas_opti_idx[j] = i
                elif current_loss <= min_losses[j] + one_ses_opti[j]:
                    lambdas_1se[j] = alpha
            
            if verbose:
                print(lambdas_grids_temp[j], lambdas_opti_idx[j], lambdas_opti[j])
    
    return lambdas_opti, lambdas_1se


def run_one_CV_exper(n, k):
    '''
    running one cross-validation replication and return the necessary results to be able to generate the KDE plots of the paper
    '''

    indiv_err_kfoldCV_dict = {} # to store the n h_n for k-fold CV
    cond_fold_err_kfoldCV_dict = {} # to store the 10 cond fold errors for k-fold CV
    cond_fold_std_kfoldCV_dict = {} # to store the 10 cond fold standard deviations for k-fold CV
    R_cond_dict = {}

    X = np.random.normal(0, 1, (n, FEATURES_DIM))
    y = X @ BETA + NOISE_SD * np.random.normal(0, 1, n)

    cond_data_size = 1000000
    X_cond_err_Rn = np.random.normal(0, 1, (cond_data_size, FEATURES_DIM))
    y_cond_err_Rn = X_cond_err_Rn @ BETA + NOISE_SD * np.random.normal(0, 1, cond_data_size)

    assert X.shape[1] == X_cond_err_Rn.shape[1]

    nb_features = X.shape[1]
    current_time = time.time()

    lambdas_opti = np.full(k, -1.0)
    lambdas_1se = np.full(k, -1.0)

    for algo in ALGOS:
        
        start_algo_time = time.time()
        
        indiv_err_kfoldCV_dict[algo] = []
        cond_fold_err_kfoldCV_dict[algo] = []
        cond_fold_std_kfoldCV_dict[algo] = []
        R_cond_dict[algo] = 0
        
        current_algo_time = time.time()
        
        kfold_split = KFold(n_splits=k, shuffle=False)

        for fold, (train_index, test_index) in enumerate(kfold_split.split(X)):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]

            if lambdas_opti[fold] >= 0:
                lambda_opti = lambdas_opti[fold]
                lambda_1se = lambdas_1se[fold]
            else:
                lambdas_opti_f, lambdas_1se_f = find_lambdas_opti_and_1se(k-1, [X_train], [y_train], n, override=OVERRIDE, verbose=(fold==0))
                lambda_opti, lambda_1se = lambdas_opti_f[0], lambdas_1se_f[0]
                lambdas_opti[fold] = lambda_opti
                lambdas_1se[fold] = lambda_1se
            
            beta_OLS = OLS_solve(X_train, y_train)

            if 'opti' not in algo:
                method = algo.split('x')[0]
                delta = float(algo.split('x')[1])
                beta_hat = LS_solve(X_train, y_train, method, beta_OLS, alpha=lambda_opti, delta=delta)
            else:
                method = algo[:-5]
                beta_hat = LS_solve(X_train, y_train, method, beta_OLS, alpha=lambda_opti)
                print(method)
                print(f'lambda optimal = {lambda_opti}')
                print(f'lambda 1se = {lambda_1se}')
                print(f'coefs = {beta_hat}')
            pred_values = X_test @ beta_hat
            pred_values_cond_err_Rn = X_cond_err_Rn @ beta_hat

            R_cond_dict[algo] += (NOISE_SD**2 + np.dot(BETA - beta_hat, BETA - beta_hat)) / k

            score_all = loss(y_test, pred_values)
            indiv_err_kfoldCV_dict[algo].extend(score_all)
            score_all_cond_err_Rn = loss(y_cond_err_Rn, pred_values_cond_err_Rn)
            cond_fold_err_kfoldCV_dict[algo].append(np.mean(score_all_cond_err_Rn))
            cond_fold_std_kfoldCV_dict[algo].append(np.std(score_all_cond_err_Rn, ddof=1))
        
        print(algo, 'k-fold CV:', time.time() - current_algo_time)
        
        indiv_err_kfoldCV_dict[algo] = np.array(indiv_err_kfoldCV_dict[algo])
        cond_fold_err_kfoldCV_dict[algo] = np.array(cond_fold_err_kfoldCV_dict[algo])
        cond_fold_std_kfoldCV_dict[algo] = np.array(cond_fold_std_kfoldCV_dict[algo])
        
        print(algo, 'total time for all procedures:', time.time() - start_algo_time)

    for algoB in ALGOS[1:]:
        algoA = ALGOS[0]
        if algoA != algoB:
            comp = '_'.join([algoA, algoB])
            indiv_err_kfoldCV_dict[comp] = indiv_err_kfoldCV_dict[algoA] - indiv_err_kfoldCV_dict[algoB]
            cond_fold_err_kfoldCV_dict[comp] = cond_fold_err_kfoldCV_dict[algoA] - cond_fold_err_kfoldCV_dict[algoB]
            cond_fold_std_kfoldCV_dict[comp] = cond_fold_std_kfoldCV_dict[algoA] - cond_fold_std_kfoldCV_dict[algoB] # not to be used for comparisons (just for simplicity when saving results)
            R_cond_dict[comp] = R_cond_dict[algoA] - R_cond_dict[algoB]

    return indiv_err_kfoldCV_dict, cond_fold_err_kfoldCV_dict, cond_fold_std_kfoldCV_dict, R_cond_dict, lambdas_opti


# Compute R_hat, R_cond and sigma_est

def aggr_ours(indiv_err_kfoldCV, cond_fold_err_kfoldCV, n, k, version='out'):
    r = int(n / k)
    squared_indiv_err_kfoldCV = indiv_err_kfoldCV ** 2
    fold_err_kfoldCV = []
    for i in range(k):
        fold_err_kfoldCV.append(np.mean(indiv_err_kfoldCV[(i*r):((i+1)*r)])) # works when k divides n (which is the case here with k = 10)
    fold_err_kfoldCV = np.array(fold_err_kfoldCV)
    squared_fold_err_kfoldCV = fold_err_kfoldCV ** 2
    kfoldCV_err = np.mean(fold_err_kfoldCV)
    kfold_test_err = np.mean(cond_fold_err_kfoldCV)
    if version=='in':
        if k != n:
            sigma_est = np.sqrt((n / (n - k)) * (np.mean(squared_indiv_err_kfoldCV) - np.mean(squared_fold_err_kfoldCV)))
        else: # LOOCV, sigma_in is not defined in this case
            sigma_est = 0
    else: # version=='out'
        sigma_est = np.sqrt(np.mean(squared_indiv_err_kfoldCV) - kfoldCV_err ** 2)
    return kfoldCV_err, kfold_test_err, sigma_est


def aggr_hold_out(indiv_err_kfoldCV, cond_fold_err_kfoldCV, n, k):
    r = int(n / k)
    ho_indiv_err_kfoldCV = indiv_err_kfoldCV[:r]
    ho_squared_indiv_err_kfoldCV = ho_indiv_err_kfoldCV ** 2
    ho_fold_err_kfoldCV = np.mean(ho_indiv_err_kfoldCV)
    ho_kfold_test_err = cond_fold_err_kfoldCV[0]
    sigma_est = np.sqrt(np.mean(ho_squared_indiv_err_kfoldCV) - ho_fold_err_kfoldCV ** 2)
    return ho_fold_err_kfoldCV, ho_kfold_test_err, sigma_est


def CI_2sided(center, scale, df=None, distrib="normal", alpha=0.05):
    if distrib == "normal":
        q = stats.norm.ppf(1-alpha/2, 0, 1)
    else:
        q = stats.t.ppf(1-alpha/2, df)
    variation = q * scale
    l_bound = center - variation
    u_bound = center + variation
    return (l_bound, u_bound)


# MSS, LS, EPE and sigma_n^2 MC estimation

def sigma2_MSS_and_LS_estimation(n, k, M):

    m = int(n * (1 - 1/k))

    array_len = 2 * len(ALGOS) - 1

    sigma2_values = {i: [] for i in range(array_len)}
    mss_values = {i: [] for i in range(array_len)}
    ls_values = {i: [] for i in range(array_len)}
    EPE_values = {i: [] for i in range(array_len)}

    sigma2_means = [0] * array_len
    sigma2_2ndmoments = [0] * array_len
    mss_means = [0] * array_len
    mss_2ndmoments = [0] * array_len
    ls_means = [0] * array_len
    ls_2ndmoments = [0] * array_len
    EPE_means = [0] * array_len
    EPE_2ndmoments = [0] * array_len

    for i in range(M):

        X_0 = np.random.normal(0, 1, FEATURES_DIM)
        Y_0 = np.dot(X_0, BETA) + NOISE_SD * np.random.normal(0, 1)
        X_02 = np.random.normal(0, 1, FEATURES_DIM)
        Y_02 = np.dot(X_02, BETA) + NOISE_SD * np.random.normal(0, 1)
        X = np.random.normal(0, 1, (m, FEATURES_DIM))
        Y = X @ BETA + NOISE_SD * np.random.normal(0, 1, m)
        X2 = np.random.normal(0, 1, (m, FEATURES_DIM))
        Y2 = X2 @ BETA + NOISE_SD * np.random.normal(0, 1, m)
        X_0_prime = np.random.normal(0, 1, FEATURES_DIM)
        Y_0_prime = np.dot(X_0_prime, BETA) + NOISE_SD * np.random.normal(0, 1)
        X_prime = X.copy()
        X_prime[0, :] = X_0_prime
        Y_prime = Y.copy()
        Y_prime[0] = Y_0_prime
        Xs = [X, X_prime, X2]
        Ys = [Y, Y_prime, Y2]
        lambdas_opti_f, lambdas_1se_f = find_lambdas_opti_and_1se(k-1, Xs, Ys, n, override=OVERRIDE)
        lambda_opti, lambda_opti_prime, lambda_opti2 = lambdas_opti_f
        lambda_1se, lambda_1se_prime, lambda_1se2 = lambdas_opti_f

        if i == 0:
            print(lambda_opti, lambda_opti_prime)
            print(lambda_1se, lambda_1se_prime)

        beta_OLS = OLS_solve(X, Y)
        beta_OLS2 = OLS_solve(X2, Y2)
        beta_opti = LS_solve(X, Y, METHOD, beta_OLS, alpha=lambda_opti)
        beta_OLS_prime = OLS_solve(X_prime, Y_prime)
        beta_opti_prime = LS_solve(X_prime, Y_prime, METHOD, beta_OLS_prime, alpha=lambda_opti_prime)

        if i == 0:
            print(beta_OLS)
    
        beta_opti2 = LS_solve(X2, Y2, METHOD, beta_OLS2, alpha=lambda_opti2)

        l1_opti1 = (Y_0 - X_0 @ beta_opti)**2
        l1_opti2 = (Y_0 - X_0 @ beta_opti2)**2
        l2_opti2 = (Y_02 - X_02 @ beta_opti2)**2

        sigma2_values[0].append(l1_opti1 * (l1_opti2 - l2_opti2))

        loss_beta_opti = (Y_0 - np.dot(X_0, beta_opti))**2
        loss_beta_opti_prime = (Y_0 - np.dot(X_0, beta_opti_prime))**2
        dot_beta_opti = np.dot(BETA-beta_opti, BETA-beta_opti)
        dot_beta_opti_prime = np.dot(BETA-beta_opti_prime, BETA-beta_opti_prime)

        mss_values[0].append((loss_beta_opti - loss_beta_opti_prime)**2)
        ls_values[0].append(((loss_beta_opti - dot_beta_opti) - (loss_beta_opti_prime - dot_beta_opti_prime))**2)
        EPE_values[0].append(loss_beta_opti)

        for j, algo in enumerate(ALGOS[1:]):
            if 'opti' not in algo:
                method = algo.split('x')[0]
                delta = float(algo.split('x')[1])
                beta_hat = LS_solve(X, Y, method, beta_OLS, alpha=lambda_opti, delta=delta)
                beta_hat_prime = LS_solve(X_prime, Y_prime, method, beta_OLS_prime, alpha=lambda_opti_prime, delta=delta)
                beta_hat2 = LS_solve(X2, Y2, method, beta_OLS2, alpha=lambda_opti2, delta=delta)
            else:
                method = algo[:-5]
                beta_hat = LS_solve(X, Y, method, beta_OLS, alpha=lambda_opti)
                beta_hat_prime = LS_solve(X_prime, Y_prime, method, beta_OLS_prime, alpha=lambda_opti_prime)
            
            l1_hat1 = (Y_0 - X_0 @ beta_hat)**2
            l1_hat2 = (Y_0 - X_0 @ beta_hat2)**2
            l2_hat2 = (Y_02 - X_02 @ beta_hat2)**2
            l1_comp1 = l1_opti1 - l1_hat1
            l1_comp2 = l1_opti2 - l1_hat2
            l2_comp2 = l2_opti2 - l2_hat2

            sigma2_values[2*(j+1)-1].append(l1_hat1 * (l1_hat2 - l2_hat2))
            sigma2_values[2*(j+1)].append(l1_comp1 * (l1_comp2 - l2_comp2))

            loss_beta_hat = (Y_0 - np.dot(X_0, beta_hat))**2
            loss_beta_hat_prime = (Y_0 - np.dot(X_0, beta_hat_prime))**2
            dot_beta_hat = np.dot(BETA-beta_hat, BETA-beta_hat)
            dot_beta_hat_prime = np.dot(BETA-beta_hat_prime, BETA-beta_hat_prime)

            mss_values[2*(j+1)-1].append((loss_beta_hat - loss_beta_hat_prime)**2)
            ls_values[2*(j+1)-1].append(((loss_beta_hat - dot_beta_hat) - (loss_beta_hat_prime - dot_beta_hat_prime))**2)
            EPE_values[2*(j+1)-1].append(loss_beta_hat)

            mss_values[2*(j+1)].append(((loss_beta_opti - loss_beta_hat) - (loss_beta_opti_prime - loss_beta_hat_prime))**2)
            ls_values[2*(j+1)].append((((loss_beta_opti - dot_beta_opti) - (loss_beta_hat - dot_beta_hat)) - ((loss_beta_opti_prime - dot_beta_opti_prime) - (loss_beta_hat_prime - dot_beta_hat_prime)))**2)
            EPE_values[2*(j+1)].append(loss_beta_opti - loss_beta_hat)
            if i == 0:
                print(beta_hat[0], beta_opti[0], beta_hat_prime[0], beta_opti_prime[0])
                print(beta_hat[-1], beta_opti[-1], beta_hat_prime[-1], beta_opti_prime[-1])
                print(beta_hat)
                print(beta_hat @ beta_hat.T)

    for i in range(array_len):
        sigma2_values[i] = np.array(sigma2_values[i])
        sigma2_means[i] = np.mean(sigma2_values[i])
        sigma2_2ndmoments[i] = np.mean(sigma2_values[i]**2)
        mss_values[i] = np.array(mss_values[i])
        mss_means[i] = np.mean(mss_values[i])
        mss_2ndmoments[i] = np.mean(mss_values[i]**2)
        ls_values[i] = np.array(ls_values[i])
        ls_means[i] = np.mean(ls_values[i])
        ls_2ndmoments[i] = np.mean(ls_values[i]**2)
        EPE_values[i] = np.array(EPE_values[i])
        EPE_means[i] = np.mean(EPE_values[i])
        EPE_2ndmoments[i] = np.mean(EPE_values[i]**2)

    return sigma2_means, sigma2_2ndmoments, mss_means, mss_2ndmoments, ls_means, ls_2ndmoments, EPE_means, EPE_2ndmoments


if __name__ == '__main__':

    start_time = time.time()

    METHOD = str(sys.argv[1])
    SAMPLE_SIZE = int(sys.argv[2])
    K = int(sys.argv[3])
    REP = int(sys.argv[4])
    RES_FOLDER = str(sys.argv[5])
    DENSE_BETA = int(sys.argv[6])
    FIG1 = int(sys.argv[7])

    ALGOS = [f'{METHOD}_opti', f'{METHOD}x1']

    if METHOD == 'Lasso' and not(FIG1):
        NB_SIMS = 10
        NB_REPS = 5000
        M_MSS_LS = 200
        M_SIGMA2 = 200
        OVERRIDE = None
    else:
        NB_SIMS = 100
        NB_REPS = 500
        M_MSS_LS = 10000
        M_SIGMA2 = 10000
        OVERRIDE = 'sqrt'

    if DENSE_BETA:
        BETA = [3, 1, -5, 3, 4, -3, 10, 8, 5, 2] # for experiments with ST with fully dense beta
    else:
        BETA = [3, 1, -5, 3, 0, 0, 0, 0, 0, 0]
    
    FEATURES_DIM = len(BETA)

    BETA = np.array(BETA)

    NOISE_SD = 10

    LAMBDAS_GRID = [1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100, 1000, 10000, 100000, 1000000]

    print(METHOD, SAMPLE_SIZE, K, REP, RES_FOLDER)

    create_folders(SAMPLE_SIZE, RES_FOLDER)

    set_seed(SAMPLE_SIZE, REP) # for reproducibility

    current_time = time.time()

    sigma2_means, sigma2_2ndmoments, mss_means, mss_2ndmoments, ls_means, ls_2ndmoments, EPE_means, EPE_2ndmoments = sigma2_MSS_and_LS_estimation(SAMPLE_SIZE, K, M_MSS_LS)

    print('sigma^2, MSS and LS estimation (MC):', time.time() - current_time)

    # Let's now store all the results we need

    current_time = time.time()

    method_keys = []
    method_keys.append(ALGOS[0])
    for algoB in ALGOS[1:]:
        algoA = ALGOS[0]
        comp = '_'.join([algoA, algoB])
        method_keys.append(algoB)
        method_keys.append(comp)

    # for METHOD = 'ST'
    # we have method_keys = ['ST_opti', 'STx1', 'ST_opti_STx1']

    res_array_list_dict = {method: [] for method in method_keys}    

    for b in range(NB_SIMS):

        indiv_err_kfoldCV_dict, cond_fold_err_kfoldCV_dict, cond_fold_std_kfoldCV_dict, R_cond_dict, lambdas_opti = run_one_CV_exper(SAMPLE_SIZE, K)

        for key in method_keys:

            if b == 0:
                print(key)

            R_hat_in, R_cond_in, sigma_in = aggr_ours(indiv_err_kfoldCV_dict[key], cond_fold_err_kfoldCV_dict[key], SAMPLE_SIZE, K, version='in')
            R_hat_out, R_cond_out, sigma_out = aggr_ours(indiv_err_kfoldCV_dict[key], cond_fold_err_kfoldCV_dict[key], SAMPLE_SIZE, K, version='out')
            R_hat_ho, R_cond_ho, sigma_ho = aggr_hold_out(indiv_err_kfoldCV_dict[key], cond_fold_err_kfoldCV_dict[key], SAMPLE_SIZE, K)
            
            res_array = np.concatenate([cond_fold_err_kfoldCV_dict[key], cond_fold_std_kfoldCV_dict[key]])
            res_array = np.append(res_array, [R_hat_in, R_cond_in, sigma_in, R_hat_out, R_cond_out, sigma_out, R_hat_ho, R_cond_ho, sigma_ho])
            assert len(res_array) == (K + K + 3*3)

            res_array = np.append(res_array, lambdas_opti) # storing CV optimal lambdas to plot histogram

            res_array = np.append(res_array, mss_means) # storing MSS MC means for all algos and comps
            res_array = np.append(res_array, mss_2ndmoments) # storing MSS MC 2nd moments for all algos and comps
            res_array = np.append(res_array, ls_means) # storing LS MC means for all algos and comps
            res_array = np.append(res_array, ls_2ndmoments) # storing LS MC 2nd moments for all algos and comps
            res_array = np.append(res_array, sigma2_means) # storing sigma^2 MC means for all algos and comps
            res_array = np.append(res_array, sigma2_2ndmoments) # storing sigma^2 MC 2nd moments for all algos and comps
            res_array = np.append(res_array, [R_cond_dict[key]])
            res_array = np.append(res_array, EPE_means) # storing EPE MC means for all algos and comps
            res_array = np.append(res_array, EPE_2ndmoments) # storing EPE MC 2nd moments for all algos and comps

            res_array_list_dict[key].append(res_array)

    # finally we store everything in a DataFrame

    for key in method_keys:

        df = pd.DataFrame(res_array_list_dict[key])
        res_file_name = os.path.join(RES_FOLDER, key, f'n_{SAMPLE_SIZE}', f'rep_{REP}.h5')
        df.to_hdf(res_file_name, key='res')

    print('Final results and storing:', time.time() - current_time)

    print(' ')
    print('Total run time:', time.time() - start_time)