# -*- coding: utf-8 -*-v
from signatures import *
from sklearn.linear_model import LassoLarsCV


##############################################################################################
# Experiment: Brownian motion (Figure A9 in the appendix)


np.random.seed(0)
random.seed(0)


random_experi_num = 1000
sample_num = 100
sig_degree = 4
d = 2
sig_num = calculate_sig_num(sig_degree, d)
path_length = 100
T = 1

true_factor_num_range = [2,4,6,8]
rho_range = np.arange(-0.9, 1, 0.1)
correct_rate_I_list = pd.DataFrame(columns=rho_range, index=true_factor_num_range)
correct_rate_S_list = pd.DataFrame(columns=rho_range, index=true_factor_num_range)
avg_max_precision_I_list = pd.DataFrame(columns=rho_range, index=true_factor_num_range)
avg_max_precision_S_list = pd.DataFrame(columns=rho_range, index=true_factor_num_range)
avg_max_recall_I_list = pd.DataFrame(columns=rho_range, index=true_factor_num_range)
avg_max_recall_S_list = pd.DataFrame(columns=rho_range, index=true_factor_num_range)
avg_max_F1_I_list = pd.DataFrame(columns=rho_range, index=true_factor_num_range)
avg_max_F1_S_list = pd.DataFrame(columns=rho_range, index=true_factor_num_range)
avg_MSE_I_list = pd.DataFrame(columns=rho_range, index=true_factor_num_range)
avg_MSE_S_list = pd.DataFrame(columns=rho_range, index=true_factor_num_range)


for true_factor_num in true_factor_num_range:
    select_result_I_list = pd.DataFrame(columns=rho_range, index=range(random_experi_num))
    select_result_S_list = pd.DataFrame(columns=rho_range, index=range(random_experi_num))
    max_precision_I_list = pd.DataFrame(columns=rho_range, index=range(random_experi_num))
    max_precision_S_list = pd.DataFrame(columns=rho_range, index=range(random_experi_num))
    max_recall_I_list = pd.DataFrame(columns=rho_range, index=range(random_experi_num))
    max_recall_S_list = pd.DataFrame(columns=rho_range, index=range(random_experi_num))
    max_F1_I_list = pd.DataFrame(columns=rho_range, index=range(random_experi_num))
    max_F1_S_list = pd.DataFrame(columns=rho_range, index=range(random_experi_num))
    MSE_I_list = pd.DataFrame(columns=rho_range, index=range(random_experi_num))
    MSE_S_list = pd.DataFrame(columns=rho_range, index=range(random_experi_num))
    
    for rho in rho_range:
    
        rho1 = rho
        rho2 = rho
        rho3 = rho
        Omega = np.array([[1, rho1, rho2], [rho1, 1, rho3], [rho2, rho3, 1]])
        
        
        for exp in range(random_experi_num):
            print(rho, exp)
            signature_list_I = pd.DataFrame(np.zeros((sample_num*2, sig_num)))
            signature_list_S = pd.DataFrame(np.zeros((sample_num*2, sig_num)))
            
            
            
            beta_location = random.sample(range(sig_num), true_factor_num)
            beta_values = np.random.randn(true_factor_num)
            
            
            for i in range(sample_num*2):
                # BM
                Z1_list = np.cumsum(np.random.randn(path_length)) * np.sqrt(1/path_length)
                Z2_list = np.cumsum(np.random.randn(path_length)) * np.sqrt(1/path_length)
                W1_list = Z1_list
                W2_list = rho1 * Z1_list + np.sqrt(1-rho1**2) * Z2_list
                
                X_all = np.array([W1_list, W2_list]).T
                
                signature_I = calculate_signature_to_K(X_all, sig_degree, method="Ito")
                signature_list_I.loc[i,:] = signature_I
                signature_S = calculate_signature_to_K(X_all, sig_degree, method="S")
                signature_list_S.loc[i,:] = signature_S
            
            # standardize
            X_list_I = np.array(signature_list_I) 
            X_list_S = np.array(signature_list_S) 
            X_list_I_tra = np.array(signature_list_I.iloc[:sample_num,:]) 
            X_list_I_tes = np.array(signature_list_I.iloc[sample_num:,:])
            X_list_S_tra = np.array(signature_list_S.iloc[:sample_num,:])
            X_list_S_tes = np.array(signature_list_S.iloc[sample_num:,:])
            X_list_I_tra = X_list_I_tra / np.sqrt(np.sum(X_list_I**2, axis=0)/2) 
            X_list_S_tra = X_list_S_tra / np.sqrt(np.sum(X_list_S**2, axis=0)/2)
            X_list_I_tes = X_list_I_tes / np.sqrt(np.sum(X_list_I**2, axis=0)/2) 
            X_list_S_tes = X_list_S_tes / np.sqrt(np.sum(X_list_S**2, axis=0)/2) 
            
            y_list_I_tra =  X_list_I_tra[:,beta_location] @ beta_values + np.random.randn(sample_num)*np.sqrt(0.00000001)
            y_list_S_tra =  X_list_S_tra[:,beta_location] @ beta_values + np.random.randn(sample_num)*np.sqrt(0.00000001)
            y_list_I_tes =  X_list_I_tes[:,beta_location] @ beta_values + np.random.randn(sample_num)*np.sqrt(0.00000001)
            y_list_S_tes =  X_list_S_tes[:,beta_location] @ beta_values + np.random.randn(sample_num)*np.sqrt(0.00000001)
            
            
            
            reg_I = LassoLarsCV(cv=5).fit(X_list_I_tra, y_list_I_tra)
            predict_I_tes = reg_I.predict(X_list_I_tes)
            MSE_I = np.mean((predict_I_tes - y_list_I_tes)**2)
            
            reg_S = LassoLarsCV(cv=5).fit(X_list_S_tra, y_list_S_tra)
            predict_S_tes = reg_S.predict(X_list_S_tes)
            MSE_S = np.mean((predict_S_tes - y_list_S_tes)**2)
            
            
            MSE_I_list.loc[exp, rho] = MSE_I
            MSE_S_list.loc[exp, rho] = MSE_S
            
            
            _, _, coefs_I = linear_model.lars_path(X_list_I_tra, y_list_I_tra, method="lasso", verbose=True)
            _, _, coefs_S = linear_model.lars_path(X_list_S_tra, y_list_S_tra, method="lasso", verbose=True)
            
            
            select_result_I = check_Lasso_select_result(coefs_I, beta_location, beta_values)
            select_result_S = check_Lasso_select_result(coefs_S, beta_location, beta_values)
        
            select_result_I_list.loc[exp, rho] = select_result_I
            select_result_S_list.loc[exp, rho] = select_result_S
            
            max_precision_I, max_recall_I, max_F1_I = check_Lasso_confusion_matrix(coefs_I, beta_location, beta_values)
            max_precision_S, max_recall_S, max_F1_S = check_Lasso_confusion_matrix(coefs_S, beta_location, beta_values)
        
            max_precision_I_list.loc[exp, rho] = max_precision_I
            max_precision_S_list.loc[exp, rho] = max_precision_S
            max_recall_I_list.loc[exp, rho] = max_recall_I
            max_recall_S_list.loc[exp, rho] = max_recall_S
            max_F1_I_list.loc[exp, rho] = max_F1_I
            max_F1_S_list.loc[exp, rho] = max_F1_S
    
    
    MSE_I_all = np.sum(MSE_I_list, axis=0) / random_experi_num
    MSE_S_all = np.sum(MSE_S_list, axis=0) / random_experi_num
    
    avg_MSE_I_list.loc[true_factor_num, :] = MSE_I_all
    avg_MSE_S_list.loc[true_factor_num, :] = MSE_S_all
    
    
    
    correct_rate_I = np.sum(select_result_I_list, axis=0) / random_experi_num
    correct_rate_S = np.sum(select_result_S_list, axis=0) / random_experi_num
    
    correct_rate_I_list.loc[true_factor_num, :] = correct_rate_I
    correct_rate_S_list.loc[true_factor_num, :] = correct_rate_S
    
    max_precision_I = np.sum(max_precision_I_list, axis=0) / random_experi_num
    max_precision_S = np.sum(max_precision_S_list, axis=0) / random_experi_num
    
    avg_max_precision_I_list.loc[true_factor_num, :] = max_precision_I
    avg_max_precision_S_list.loc[true_factor_num, :] = max_precision_S
    
    max_recall_I = np.sum(max_recall_I_list, axis=0) / random_experi_num
    max_recall_S = np.sum(max_recall_S_list, axis=0) / random_experi_num
    
    avg_max_recall_I_list.loc[true_factor_num, :] = max_recall_I
    avg_max_recall_S_list.loc[true_factor_num, :] = max_recall_S
    
    max_F1_I = np.sum(max_F1_I_list, axis=0) / random_experi_num
    max_F1_S = np.sum(max_F1_S_list, axis=0) / random_experi_num
    
    avg_max_F1_I_list.loc[true_factor_num, :] = max_F1_I
    avg_max_F1_S_list.loc[true_factor_num, :] = max_F1_S
        
    avg_MSE_I_list.to_csv('Experiment_otherdef_MSE_I.csv')
    avg_MSE_S_list.to_csv('Experiment_otherdef_MSE_S.csv')
    correct_rate_I_list.to_csv('Experiment_otherdef_correct_rate_I.csv')
    correct_rate_S_list.to_csv('Experiment_otherdef_correct_rate_S.csv')
    avg_max_precision_I_list.to_csv('Experiment_otherdef_avg_max_precision_I.csv')
    avg_max_precision_S_list.to_csv('Experiment_otherdef_avg_max_precision_S.csv')
    avg_max_recall_I_list.to_csv('Experiment_otherdef_avg_max_recall_I.csv')
    avg_max_recall_S_list.to_csv('Experiment_otherdef_avg_max_recall_S.csv')
    avg_max_F1_I_list.to_csv('Experiment_otherdef_avg_max_F1_I.csv')
    avg_max_F1_S_list.to_csv('Experiment_otherdef_avg_max_F1_S.csv')





data_I = pd.read_csv('Experiment_otherdef_MSE_I.csv', index_col=0)
data_S = pd.read_csv('Experiment_otherdef_MSE_S.csv', index_col=0)

data_I.columns = [float(x) for x in data_I.columns]
data_S.columns = [float(x) for x in data_S.columns]


plt.figure(figsize=(10,6))

linecolor = ['tab:blue', 'tab:orange', 'tab:green']

for i in range(np.shape(data_I)[0]-1):
    data = data_I.iloc[i,:]
    plt.plot(data, marker="o", markersize=10, markeredgewidth=2, linewidth=2, markerfacecolor='none',linestyle="-",color=linecolor[i],label='$q=$'+str(data_I.index[i]))
for i in range(np.shape(data_S)[0]-1):
    data = data_S.iloc[i,:]
    plt.plot(data, marker="^", markersize=10, markeredgewidth=2, linewidth=2, markerfacecolor='none',linestyle="--",color=linecolor[i])

plt.legend(fontsize=18, loc="upper right")
plt.xlabel("$\\rho$", fontsize=20)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.xlim(-1, 1)
plt.ylabel("Out-of-sample $R^2$", fontsize=20)
plt.grid()
plt.savefig('Figure_A9.pdf', bbox_inches = 'tight' , dpi=150, pad_inches = 0.05)
    




