#!/usr/bin/env python
# coding: utf-8
import os, pickle, json, sys, time
from itertools import product
from scipy import stats
import pandas as pd
import numpy as np
from numpy.testing import *
import matplotlib.pyplot as plt

# import packages from /workspace/ (current working directory)
sys.path.append('/workspace/')
from utils import sharpe, estimate_residual_by_time_series_regression, estimate_coef_by_time_series_regression, get_V_matrices

# import packages from /workspace/Estimators/
sys.path.append('/workspace/Estimators')
from rppca_adj import RPPCAadj as RPPCA
from pcaxc import PCA_XC


cov = lambda a: np.cov(a, rowvar=False).reshape(a.shape[1], a.shape[1])
mean = lambda a: np.mean(a, axis=0)
diag = np.diag


def rename_estimator(x:tuple, my_pca_name:str) -> str:
    """
    'p' is used for population parameters, and 's' for the sample estimates.
    """
    if x[0] == 'PCA':
        out = 'PCA'
    
    elif x[0] == 'RP-PCA':
        out = 'RP-PCA'

    elif x[0] == 'MY-PCA':
        out = my_pca_name
        if x[1] == 'population_cov_of_xs_alpha':
            out += '_pa'
        elif x[1] == 'population_diag_of_cov_of_xs_alpha':
            out += '_par'
        elif x[1] == 'population_cov_of_E':
            out += '_pe'
        elif x[1] == 'population_diag_of_cov_of_E':
            out += '_per'
        elif x[1] == 'sample_cov_of_E':
            out += '_s'
        elif x[1] == 'sample_diag_of_cov_of_E':
            out += '_sr'   
        else:
            raise AssertionError(x[1])

    else:
        raise AssertionError(x[0], x[1])

    return out

rename_estimator_ = lambda x: rename_estimator(x, 'PCA-XC')


def sample_gaussian(n_samples:int, mu:np.ndarray, Sigma:np.ndarray) -> np.ndarray:
    assert mu.ndim == 1
    assert Sigma.ndim == 2
    assert len(mu) == Sigma.shape[0] and Sigma.shape[0] == Sigma.shape[1]

    dim = len(mu) # Dimension of the samples
    sqrt_Sigma = np.linalg.cholesky(Sigma)
    std_normal = np.random.randn(n_samples, dim)
    out = np.matmul(std_normal, sqrt_Sigma.T) + mu[np.newaxis, :]

    return out


def sample_from_factor_model(mu_B, Sigma_B, mu_F, Sigma_F, Sigma_U, N, T, alpha=None):
    """
    This function serves as the data generating process given
    a model :=(mu_B, Sigma_B, mu_F, Sigma_F, Sigma_U, alpha)
    """
    # It runs one simulation experiment for fixed parameters in the arguments.
    assert N == len(Sigma_U) # the number of assets
    if alpha is not None:
        assert N == len(alpha) and alpha.ndim == 1

    F = sample_gaussian(T, mu_F, Sigma_F) # T by K
    B = sample_gaussian(N, mu_B, Sigma_B) # N by K
    U = sample_gaussian(T, np.zeros(N), Sigma_U) # T by N
    Y = np.matmul(F, B.T) + U # T by N
    
    if alpha is not None:
        Y += alpha[np.newaxis,:] # T by N
    
    return F, B, U, Y


def compute_performance(estimated_params:dict, V_matrices:dict, Y_trn:np.ndarray, Y_tst:np.ndarray) -> (pd.Series, pd.Series):
    T, N = Y_trn.shape
    T_tst, N_tst = Y_tst.shape

    F = estimated_params['F_hat']
    B = estimated_params['B_hat']
    U = estimated_params['U_hat']
    beta = estimated_params['beta_hat']
    alpha = estimated_params['alpha_hat'].reshape(-1,1)
    alpha_xs_regr = Y_trn @ (np.identity(N) - beta @ np.linalg.inv(beta.T @ beta) @ beta.T)

    ################################################################################
    # In-sample performance, i.e., performance on training data
    ################################################################################
    SDFweights = np.linalg.inv(cov(F)) @ np.mean(F, axis=0).T
    SDF = F @ SDFweights

    perf_ins = {'SR': sharpe(SDF),
                'RMS_a': np.sqrt((alpha.T @ alpha)[0,0]/N),
                'std_a': np.trace(cov(U))/N}
    for V_inv_type, V in V_matrices.items():
        if V_inv_type != 'identity':
            perf_ins[f'RMS_a_{V_inv_type}'] = np.sqrt((alpha.T @ V @ alpha)[0,0]/N)


    perf_ins = pd.Series(perf_ins)

    ################################################################################
    # Out-of-sample performance, i.e., performance on test data
    ################################################################################
    # Cross-sectional regression of test data on time-series regression betas
    Y_tst_estimated = Y_tst @ beta @ np.linalg.inv(beta.T @ beta) @ beta.T
    alpha_tst_xs_regr = Y_tst - Y_tst_estimated
    meanalpha = np.mean(alpha_tst_xs_regr, axis=0).reshape(-1,1)

    perf_oos = {'RMS_a': np.sqrt((meanalpha.T @ meanalpha)[0,0] / N),
                'std_a': np.trace(cov(alpha_tst_xs_regr))/N}
    for V_inv_type, V in V_matrices.items():
        if V_inv_type != 'identity':
            perf_oos[f'RMS_a_{V_inv_type}'] = np.sqrt((meanalpha.T @ V @ meanalpha)[0,0]/N)

    perf_oos = pd.Series(perf_oos)

    return perf_ins, perf_oos


def compute_stats(list_perf) -> pd.DataFrame:
    n_simuls = len(list_perf)

    out = []
    for key in list_perf[0].keys():
        # Parse
        df = pd.concat([perf_tmp[key] for perf_tmp in list_perf], axis=1)
        # Do the main job.
        stat_tmp = df.T.describe(percentiles=[0.01,0.05,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5,0.55,0.6,0.65,0.7,0.75,0.8,0.85,0.9,0.95,0.99]).T

        # Compute 
        conf_intv_lb, conf_intv_ub = {}, {}
        for confidence_level in [0.9, 0.95, 0.99]:
            len_half = stats.t.ppf(confidence_level+(1-confidence_level)/2, n_simuls-1) * stat_tmp['std'] / np.sqrt(n_simuls)
            conf_intv_lb[f"conf_intv_lb_{int(confidence_level*100)}%"] = stat_tmp['mean'] - len_half
            conf_intv_ub[f"conf_intv_ub_{int(confidence_level*100)}%"] = stat_tmp['mean'] + len_half

        stat_tmp = pd.concat([pd.concat(conf_intv_lb, axis=1), pd.concat(conf_intv_ub, axis=1), stat_tmp], axis=1)
        
        # Reformulate
        df_key = pd.DataFrame([key]*stat_tmp.shape[1],
                              index=stat_tmp.columns,
                              columns=['estimator','eta','V_inv_type']).T
        # Save
        out.append(pd.concat([df_key, stat_tmp]))

    return pd.concat(out,axis=1)


if __name__ == "__main__":
    strt_time = time.time()
    ############################################################################################################
    #################################### 1. Read configuration file, etc.  #####################################
    ############################################################################################################
    # 1.1. Configuration file
    if len(sys.argv) > 1:
        path_config_file = sys.argv[1]
    else:
        path_config_file = "/workspace/Config/monte_carlo_simulation.json"

    with open(path_config_file, 'r') as f:
        config = json.load(f)
    reference_period = config["reference_period_for_data_generation"]
    do_sanity_check = config["do_sanity_check"]

    # Save the config file
    out_directory = config["outpath_dir"]
    if not os.path.exists(out_directory):
        os.makedirs(out_directory)

    path_tmp = os.path.join(out_directory, os.path.split(path_config_file)[1])
    os.system(f"cp {path_config_file} {path_tmp}")
    print(f'The output file will be saved in {out_directory}.')

    # 1.2. Set random seed for reproducibility.
    np.random.seed(config["random_seed"])

    ############################################################################################################
    ######################################### 2. Read Fama-French data #########################################
    ############################################################################################################
    ffc_dat = []
    for key in ['5factors','momentum']:
        config2 = config[f"ffc_monthly_file"][key]
        tmp = pd.read_csv(config2['path'], skiprows=config2['skiprows'], skipfooter=config2['skipfooter'],
                          index_col=0, encoding='cp1252', engine='python', dtype=float)

        def restrict_data_date(df:pd.DataFrame, freq:str, left_mth:int, right_mth:int) -> pd.DataFrame:
            assert_array_equal(df.index.map(lambda x: isinstance(x, int)), True)
            if freq == 'daily':
                left = left_mth * 100
                right = (right_mth+1) * 100
            elif freq == 'monthly':
                left = left_mth
                right = right_mth

            i = np.logical_and(left <= df.index, df.index <= right)
            return df.loc[i]

        tmp = restrict_data_date(tmp, freq='monthly', left_mth=reference_period[0], right_mth=reference_period[1])
        tmp /= 100
        ffc_dat.append(tmp)

    ffc_m = pd.concat(ffc_dat, axis=1)
    ffc_m.columns = ffc_m.columns.map(lambda x: x.strip())

    # Sanity check
    if do_sanity_check:
        assert_array_equal(ffc_d.applymap(lambda x: x not in [-99.99, -999]), True)
        assert_array_equal(ffc_d.isna(), False)

        assert_array_equal(ffc_m.applymap(lambda x: x not in [-99.99, -999]), True)
        assert_array_equal(ffc_m.isna(), False)


    ############################################################################################################
    ####################################### 3. Read test assets' returns #######################################
    ############################################################################################################
    # Read data on the LHS
    info = config["test_asset_for_data_gen"]
    df = pd.read_csv(info["path"], skiprows=info["skiprows"], nrows=info["nrows"], index_col=0, encoding='mac-roman', engine="python", dtype=float)
    df = df.loc[reference_period[0] : reference_period[1]]
    df /= 100

    test_asset = {}
    test_asset[info["name"]] = {"rtrn": df, "related_factors": info["related_factors"]}

    # Sanity check
    if do_sanity_check:
        for key, tst_asst in test_asset.items():
            assert_array_equal(tst_asst["rtrn"].index, ffc_m.index)
            assert_array_equal(tst_asst["rtrn"].applymap(lambda x: x not in [-99.99, -999]), True)
            assert_array_equal(tst_asst["rtrn"].isna(), False)


    ############################################################################################################
    #################################### 4. Compute descriptive statistics #####################################
    ############################################################################################################
    corr = {}
    descriptive_stat = {}
    for factors in [['Mkt-RF','SMB','HML'], ['Mkt-RF','RMW','CMA']]:
        key = ','.join(factors)

        f = ffc_m.loc[:,factors].copy()
        SR_F = f.mean()/f.std(axis=0, ddof=1)
        descriptive_stat[key, 'SR'] = SR_F.round(3)

        r = test_asset['5x5 Size-B/M']['rtrn']
        resid = estimate_residual_by_time_series_regression(r, f,
                                                            ffc_m['RF'].copy(),
                                                            intercept=True,
                                                            min_non_missing=0)
        avg_resid_var = resid.var(axis=0, ddof=1).mean()
        descriptive_stat[key, 'SNR'] = f.var(axis=0, ddof=1)/avg_resid_var
        descriptive_stat[key, 'ann. stddev'] = np.std(f, ddof=1) * np.sqrt(12)

        corr[key] = np.corrcoef(resid, rowvar=False)

    descriptive_stat = pd.concat(descriptive_stat).round(3)
    descriptive_stat = descriptive_stat.unstack([0,2])
    descriptive_stat = descriptive_stat.droplevel(0, axis=1)
    descriptive_stat = descriptive_stat.loc[['SR','SNR', 'ann. stddev']]
    print(descriptive_stat)


    ############################################################################################################
    ################################# 5. Second-order statistics of residuals ##################################
    ############################################################################################################
    r = test_asset['5x5 Size-B/M']['rtrn']
    stddev_resid = {}
    corr_resid = {}
    for factors in [('Mkt-RF','SMB','HML'), ('Mkt-RF','RMW','CMA')]:
        key = ','.join(factors)

        # Compute residuals of regression of 5x5 size-b/m portfolio returns on 3-factor models
        f = ffc_m.loc[:,factors].copy()
        resid = estimate_residual_by_time_series_regression(r, f,
                                                            ffc_m['RF'].copy(),
                                                            intercept=True,
                                                            min_non_missing=0)
        avg_resid_var = resid.var(axis=0, ddof=1).mean()
        Sigma_E = np.cov(resid, rowvar=False, ddof=1) / avg_resid_var
        assert_almost_equal(np.mean(diag(Sigma_E)), 1)

        stddev_resid[factors] = np.sqrt(diag(Sigma_E))
        corr_resid[factors] = diag(1/stddev_resid[factors]) @ Sigma_E @ diag(1/stddev_resid[factors])

        assert_allclose(diag(corr_resid[factors]), 1)



    ############################################################################################################
    # 6. Create DGP scenarios based on empirical data
    ############################################################################################################
    def int_to_date(period_int:list) -> str:
        return '-'.join([f"{x/100:.0f}:{x%100:02.0f}" for x in reference_period])

    # 4-1. Do the main job
    stats_samples = {'Date range (specified in config)': int_to_date(reference_period)}

    scenarios = []
    for tst_asst_nm, tst_asst in test_asset.items(): # 바꿔야 함.
        r = tst_asst['rtrn']
        T, N = r.shape
        stats_samples[tst_asst_nm, 'mu_R'] = mean(r.values)
        stats_samples[tst_asst_nm, 'Sigma_R'] = cov(r.values)
        stats_samples[tst_asst_nm, 'T'] = T
        stats_samples[tst_asst_nm, 'N'] = N
        stats_samples[tst_asst_nm, 'Date range'] = int_to_date([r.index.min(), r.index.max()])

        #
        for i, conf_dgp in filter(lambda x: x[1]["include"], config["dgp_scenarios"].items()):
            dgp_nm = conf_dgp["name"]

            #
            if dgp_nm == "weak factor":
                K = 4
                for factors in [('Mkt-RF','SMB','HML'), ('Mkt-RF','RMW','CMA')]:
                    # 1. Residual statistics
                    dict_tmp = {"dgp_name": dgp_nm, "tst_asst_name": tst_asst_nm, "rhs_factors": factors, "N": N, "K": K}
                    sd = diag(stddev_resid[factors])
                    dict_tmp['Sigma_U'] = sd @ corr_resid[factors] @ sd
                    assert_almost_equal(np.mean(diag(dict_tmp['Sigma_U'])), 1)

                    # 2. Loading sampler
                    dict_tmp['Sigma_B'] = np.identity(K)
                    dict_tmp['mu_B'] = np.zeros(K)

                    # 3. Factor statistics
                    for var_F, sr_F in product([0.1,0.03][1:], [0.2,0.3,0.5,0.8][2:3]):
                        # 3. Factor sampler
                        dict_tmp['Sigma_F'] = diag([5,0.3,0.1,var_F])
                        SR_F = np.array([0.12,0.1,0.3,sr_F])
                        dict_tmp['mu_F'] = SR_F * np.sqrt(diag(dict_tmp['Sigma_F']))

                        scenarios.append(dict_tmp)

            #
            elif dgp_nm in ["sparse large spike", "sparse small spike"]:
                factors_rhs = ['Mkt-RF','SMB','HML']
                dict_tmp = {"dgp_name": dgp_nm, "tst_asst_name": tst_asst_nm, "N": N, "K": K}

                # 1. Compute descriptive statistics of B
                # Fit F-F 3-factor model to get estimated loadings.
                r_ff3 = ffc_m[factors_rhs]
                excess_r = r.subtract(ffc_m["RF"], axis=0)
                estm_params = estimate_coef_by_time_series_regression(excess_r,
                                                                      r_ff3,
                                                                      rf=None,
                                                                      intercept=False,
                                                                      min_non_missing=0)
                B = estm_params.loc[factors_rhs].T.values # N by K

                dict_tmp['Sigma_B'] = cov(B)
                dict_tmp['mu_B'] = np.mean(B, axis=0)

                # 2. Compute descriptive statistics of F
                # Regress Y (excess rtrn) onto B (loadings) in order to get F (factors).
                BTB_inv = np.linalg.inv(B.T @ B)
                BTY = B.T @ excess_r.values.T
                F = (BTB_inv @ BTY).T # T by K

                dict_tmp['Sigma_F'] = cov(F)
                dict_tmp['mu_F'] = np.mean(F, axis=0)

                # 3. Compute descriptive statistics of U (Residual covariance matrix)

                scenarios.append(dict_tmp)

            #
            elif dgp_nm == 'correlated alphas':
                factors_rhs = ['Mkt-RF','SMB','HML']
                dict_tmp = {"dgp_name": dgp_nm, "tst_asst_name": tst_asst_nm, "N": N, "K": 3}

                # 1. Compute descriptive statistics of B
                # Fit F-F 3-factor model to get estimated loadings.
                r_ff3 = ffc_m[factors_rhs]
                excess_r = r.subtract(ffc_m["RF"], axis=0)
                estm_params = estimate_coef_by_time_series_regression(excess_r,
                                                                      r_ff3,
                                                                      rf=None,
                                                                      intercept=False,
                                                                      min_non_missing=0)
                B = estm_params.loc[factors_rhs].T.values # N by K

                dict_tmp['Sigma_B'] = cov(B)
                dict_tmp['mu_B'] = np.mean(B, axis=0)

                # 2. Compute descriptive statistics of F
                dict_tmp['Sigma_F'] = cov(r_ff3.values)
                dict_tmp['mu_F'] = mean(r_ff3.values)

                # 3. Compute descriptive statistics of U (Residual covariance matrix)
                resid = estimate_residual_by_time_series_regression(excess_r,
                                                                    r_ff3,
                                                                    rf=None,
                                                                    intercept=False,
                                                                    min_non_missing=0)
                dict_tmp['Sigma_U_base'] = cov(resid)
                scenarios.append(dict_tmp)


    elapsed_time = int(time.time() - strt_time)
    print(f"Done with creating DGP scenarios. -- Elapsed time: {elapsed_time//86400} day {elapsed_time%86400//3600} hr {elapsed_time%86400%3600//60} min {elapsed_time%86400%3600%60} sec")
    print(f"The number of scenarios: {len(scenarios)}")


    ############################################################################################################
    # 7. Define the main job.
    ############################################################################################################
    def main_fn(mu_B, Sigma_B, mu_F, Sigma_F, Sigma_U, N, T, K, T_test, dgp_name, config, do_sanity_check):
        """
        Rule #1:
            main_fn must take one model, and generate one pair (training data, test data).

            A "model"  is defined as a tuple (mu_B, Sigma_B, mu_F, Sigma_F, Sigma_U, alpha).
        """
        ############################################################################################################
        # 7-1. Data generation
        ############################################################################################################
        # Scenario 1: Weak factor w/ K=4 & K=6, respectively.
        if dgp_name in ['weak factor']:
            tmp_trn = sample_from_factor_model(mu_B, Sigma_B, mu_F, Sigma_F, Sigma_U, N, T)
            tmp_tst = sample_from_factor_model(mu_B, Sigma_B, mu_F, Sigma_F, Sigma_U, N, T_test)
            F, B, U, Y = tmp_trn
            F_tst, B_tst, U_tst, Y_tst = tmp_tst

        # Scenario 2 & 3: Sparse & big non-zero alpha
        elif dgp_name in ["sparse large spike", "sparse small spike"]:
            raise AsseritionError(f"{dpg_name} -- not implemented yet")
            tmp_trn = sample_from_model_under_sparse_alternative(mu_B, Sigma_B, mu_F, Sigma_F, Sigma_U, N, T,
                                                                 'large' if 'large' in dgp_name else 'small')
            tmp_tst = sample_from_model_under_sparse_alternative(mu_B, Sigma_B, mu_F, Sigma_F, Sigma_U, N, T_test,
                                                                 'large' if 'large' in dgp_name else 'small')
            F, B, U, Y, alpha = tmp_trn
            F_tst, B_tst, U_tst, Y_tst, alpha_tst = tmp_tst

        # Scenario 4: Significant cross-sectional correlation in alphas
        elif dgp_name in ['correlated alphas']:
            tmp_trn = sample_from_factor_model(mu_B, Sigma_B, mu_F, Sigma_F, Sigma_U, N, T, alpha)
            tmp_tst = sample_from_factor_model(mu_B, Sigma_B, mu_F, Sigma_F, Sigma_U, N, T_test, alpha)
            F, B, U, Y = tmp_trn
            F_tst, B_tst, U_tst, Y_tst = tmp_tst


        ############################################################################################################
        # 7-2. Declare estimator class and compute sample covariance using PCA residuals
        ############################################################################################################
        rppca = RPPCA(Y, eta=0.0, K=K,
                      orthogonalize_lambda=config["orthogonalize_lambda"],
                      normalization_of_factors=config["normalization_of_factors"],
                      signnormalization=config["signnormalization"])

        pca_xc = PCA_XC(Y,
                        V=np.identity(N),
                        eta=0.0,
                        K=K,
                        orthogonalize_lambda=config["orthogonalize_lambda"],
                        normalization_of_factors=config["normalization_of_factors"],
                        signnormalization=config["signnormalization"],
                        max_iter=config["mypca"]["max_iter"],
                        sanity_check_full_rank=config['do_sanity_check_if_Lambda_F_are_of_full_rank'],
                        compute_objvals=config['do_compute_objvals'],
                        compute_gradient_norm=config['do_compute_grad_norm'])

        # PCA to obtain sample covariance of alpha
        rppca.set_problem_specifiers(eta=0.0)
        estm = rppca.run()
        estm_params_tmp = {'F_hat': estm['factors'], 'B_hat': estm['loadings'], 'U_hat': estm['residuals'][K], 'alpha_hat': estm['alphas'][:,K-1], 'beta_hat': estm['betas'][K]}
        sample_Sigma_U = cov(estm_params_tmp['U_hat'])


        ############################################################################################################
        # 7-3. Get V matricies
        ############################################################################################################
        V = {}
        for V_inv_type in config['mypca']['V_inv_type']:
            V_tmp = get_V_matrices(V_inv_type, N=N, Sigma_F=Sigma_F, Lambda=B, Sigma_U=Sigma_U, sample_Sigma_U=sample_Sigma_U, K_min=config['mypca']['K_min'], normalization=0)
            V[V_inv_type] = V_tmp


        ############################################################################################################
        # 7-4. Estimate model
        ############################################################################################################
        # 7-4-1. Conventional PCA.
        # We run PCA once again for readability of the code.
        rppca.set_problem_specifiers(eta=0.0)
        estm = rppca.run()
        estm_params_tmp = {'F_hat': estm['factors'], 'B_hat': estm['loadings'], 'U_hat': estm['residuals'][K], 'alpha_hat': estm['alphas'][:,K-1], 'beta_hat': estm['betas'][K]}
        estm_params = {('PCA',None,None): estm_params_tmp}

        for eta in config['params_sweep']['all_etas']:
            ######################################################################
            # 7-4-2. RP-PCA
            ######################################################################
            rppca.set_problem_specifiers(eta=eta)
            estm = rppca.run()
            estm_params_tmp = {'F_hat': estm['factors'], 'B_hat': estm['loadings'], 'U_hat': estm['residuals'][K], 'alpha_hat': estm['alphas'][:,K-1], 'beta_hat': estm['betas'][K]}
            estm_params[('RP-PCA',eta,None)] = estm_params_tmp

            ######################################################################
            # 7-4-3. PCA-XC
            ######################################################################
            objval = {}
            gradnorm = {}
            for V_inv_type in config['mypca']['V_inv_type']:
                pca_xc.set_problem_specifiers(V=V[V_inv_type], eta=eta)
                estm = pca_xc.run(debug=do_sanity_check)
                estm_params_tmp = {'F_hat': estm['factors'], 'B_hat': estm['loadings'], 'U_hat': estm['residuals'][K], 'alpha_hat': estm['alphas'][:,K-1], 'beta_hat': estm['betas'][K]}
                estm_params[('MY-PCA',eta,V_inv_type)] = estm_params_tmp

                if config['do_compute_objvals']:
                    objval[('MY-PCA',eta,V_inv_type)] = pca_xc.obj_vals

                if config['do_compute_grad_norm']:
                    gradnorm[('MY-PCA',eta,V_inv_type)] = pca_xc.obj_grad_norm

        objval = pd.DataFrame(objval)
        gradnorm = pd.DataFrame(gradnorm)


        ############################################################################################################
        # 7-5. Compute performance
        ############################################################################################################
        perf_ins_tmp = {}
        perf_oos_tmp = {}
        for key2, estimated_params in estm_params.items():
            perf_ins_tmp[key2], perf_oos_tmp[key2] = compute_performance(estimated_params, V, Y, Y_tst)

        return perf_ins_tmp, perf_oos_tmp, objval, gradnorm


    ############################################################################################################
    # 8. Do the main job.
    ############################################################################################################
    # 8-1. Compute the number of loops
    num_all_loops = len(scenarios)
    num_all_loops *= len(config['params_sweep']['T/N'])

    # 8-2. Run
    ii = 1
    perf_ins = []
    perf_oos = []
    scenarios_exp = []
    for scenario, T_over_N in product(scenarios, config['params_sweep']['T/N']):
        mu_B, Sigma_B, mu_F, Sigma_F, Sigma_U = scenario['mu_B'], scenario['Sigma_B'], scenario['mu_F'], scenario['Sigma_F'], scenario['Sigma_U']

        dgp_name = scenario['dgp_name']
        tst_asst_name = scenario['tst_asst_name']
        N, K = scenario['N'], scenario['K']

        T = int(np.floor(scenario['N'] * T_over_N))
        print((f"Loop#{ii}/{num_all_loops}", f"T/N={T_over_N}", tst_asst_name, dgp_name), end='\t')
        print(f"N={N}, T={T}, K={K}", end='\t')

        ##
        tmp = []
        for _ in range(config['n_simul']):
            tmp2 = main_fn(mu_B=mu_B,
                           Sigma_B=Sigma_B,
                           mu_F=mu_F,
                           Sigma_F=Sigma_F,
                           Sigma_U=Sigma_U,
                           N=N,
                           T=T,
                           K=K,
                           T_test=config["T_test"],
                           dgp_name=dgp_name,
                           config=config,
                           do_sanity_check=do_sanity_check,
                           )
            tmp.append(tmp2)

        perf_ins.append(compute_stats([x[0] for x in tmp]))
        perf_oos.append(compute_stats([x[1] for x in tmp]))
        scenarios_exp.append((scenario, T_over_N))

        ##
        elapsed_time = int(time.time() - strt_time)
        print(f"Elapsed time: {elapsed_time//86400} day {elapsed_time%86400//3600} hr {elapsed_time%86400%3600//60} min {elapsed_time%86400%3600%60} sec")
        ii += 1

    elapsed_time = int(time.time() - strt_time)
    print(f"Done with running experiments. -- Elapsed time: {elapsed_time//86400} day {elapsed_time%86400//3600} hr {elapsed_time%86400%3600//60} min {elapsed_time%86400%3600%60} sec")
    print(f"Plotting figures is left.")


    ############################################################################################################
    # 9. Plot
    ############################################################################################################
    def rename_1(name):
        if name in ['PCA', 'RP-PCA']:
            out = name
        elif name == 'PCA-XC_pa':
            out = 'PCA-XC$_p$'
        elif name == 'PCA-XC_par':
            out = 'PCA-XC$_{pr}$'
        elif name == 'PCA-XC_s':
            out = 'PCA-XC$_{s}$'
        elif name == 'PCA-XC_sr':
            out = 'PCA-XC$_{sr}$'

        return out


    def rename_2(name):
        if name == 'RMS_a':
            out = 'RMS$_\\alpha$'
        elif name == 'RMS_a_population_cov_of_xs_alpha':
            out = 'RMS$_\\alpha^{\Sigma_\\alpha}$'
        elif name == 'SR':
            out = 'SR'

        return out


    ############################################################################################################
    colors = ['k','c','r','b','g','m']
    line_styles = ['--','-','-.',':','--','--']
    linewidth = 1
    markersize = 6
    # markers = ['','^','s','o','d','v']
    markers = ['','','','','','']


    nrows, ncols = 2, 3
    central_stat = 'mean'
    # central_stat = '50%'
    font_size = {'supfig_title':35, 'subplot_title':16, 'legend':16, 'xylabel':16, 'ticker': 35, 'xytick': 20}
    fig_suptitle_bottom_loc = 1.2
    conf_lev = 95


    fig, axes = plt.subplots(nrows, ncols, sharex=True, figsize=(ncols*3.4, nrows*3))
    fig.subplots_adjust(hspace=0.1, wspace=0.34)

    # i is the row index of plt.subplots
    for i, (perf_, scenario) in enumerate(zip(perf_ins, scenarios)):
        df_ = perf_.copy().T.reset_index().T
        df_.loc['estimator'] = df_.loc[['estimator','V_inv_type']].apply(rename_estimator_, axis=0)
        df_ = df_.drop('V_inv_type')
        df_ = df_.T.set_index(['index','estimator','eta']).T
        
        # j is the column index of plt.subplots
        for j, metric in enumerate(['RMS_a', 'RMS_a_population_cov_of_xs_alpha', 'SR']):
            # Parse central stats
            df2 = df_.loc[metric,central_stat].unstack(0).drop('PCA', axis=1).dropna()
            pca = pd.Series(df_.loc[metric, central_stat]['PCA'].iloc[0], index=df2.index, name='PCA')
            cent_stat = pd.concat([df2, pca], axis=1)
            cent_stat = cent_stat[['PCA','RP-PCA','PCA-XC_pa','PCA-XC_par','PCA-XC_sr','PCA-XC_s',][:4]]
            cent_stat = cent_stat.rename(rename_1, axis=1)

            # Prase error bound based on confidence level
            err_lb = df_.loc[metric,f"conf_intv_lb_{conf_lev}%"].unstack(0).drop('PCA', axis=1).dropna()
            err_lb = err_lb[['RP-PCA','PCA-XC_pa','PCA-XC_par','PCA-XC_sr','PCA-XC_s',][:3]]
            err_lb = err_lb.rename(rename_1, axis=1)

            err_ub = df_.loc[metric,f"conf_intv_ub_{conf_lev}%"].unstack(0).drop('PCA', axis=1).dropna()
            err_ub = err_ub[['RP-PCA','PCA-XC_pa','PCA-XC_par','PCA-XC_sr','PCA-XC_s',][:3]]
            err_ub = err_ub.rename(rename_1, axis=1)

            # Plot 
            for ii, (estm_nm, cent) in enumerate(cent_stat.items()):
                x = np.array(cent.index) # 1-d array
                y = np.array(cent.values) # 1-d array
                
                if estm_nm == 'PCA':
                    axes[i,j].plot(
                        x,
                        y,
                        color=colors[ii],
                        linestyle=line_styles[ii],
                        label=estm_nm
                    )
                else:
                    axes[i,j].errorbar(
                        x,
                        y,
                        yerr=(y-err_lb[estm_nm].values),
                        ecolor=colors[ii],
                        color=colors[ii],
                        linestyle=line_styles[ii],
                        capsize=5,
                        marker=markers[ii],
                        markersize=markersize+2,
                        markerfacecolor='none',
                        label=estm_nm
                    )
                    
            plt.xscale('log')
            axes[i,j].set_xlabel('')
            axes[i,j].grid(True)
            axes[i,j].xaxis.set_tick_params(labelsize=font_size['xylabel']-2)
            axes[i,j].yaxis.set_tick_params(labelsize=font_size['xylabel']-2)
            if j == 0:
                if 'SMB' in scenario['rhs_factors']:
                    axes[i,j].set_ylabel(f'$\Sigma_E=\Sigma_1$', fontsize=font_size['xylabel'])
                else:
                    axes[i,j].set_ylabel(f'$\Sigma_E=\Sigma_2$', fontsize=font_size['xylabel'])

                axes[i,j].set_ylim(0.104,0.15)
            if i == 0:
                axes[i,j].set_title(rename_2(metric), fontsize=font_size['subplot_title'])

    fig.legend(*axes[0,0].get_legend_handles_labels(),
               fontsize=font_size['legend'],
               bbox_to_anchor=(0.9,0.9,1,0),
               loc='upper left',
               ncol=1)

    fig.supxlabel('Regularization parameter $\eta$', fontsize=font_size['xylabel'], y=0)

    ############################################################################################################
    # 10. Save the figure.
    ############################################################################################################
    filepath = os.path.join(config['outpath_dir'], "economic_performance.eps")
    fig.savefig(filepath, format="eps", dpi=1200, bbox_inches="tight")
    print(f"The figure is saved in {filepath}.")

    elapsed_time = int(time.time() - strt_time)
    print(f"Done! -- Elapsed time: {elapsed_time//86400} day {elapsed_time%86400//3600} hr {elapsed_time%86400%3600//60} min {elapsed_time%86400%3600%60} sec")

