import sys
sys.path.append("../../../src")

import os
from time import time
import numpy as np
import pandas as pd
import jax.numpy as jnp

from utils.kernel_utils import RBF, ColumnwiseRBF, MaternKernel, ColumnwiseMaternKernel
from causal_models.doubly_robust_pcl import DoublyRobustKernelProxyATE
from utils.experimental_data_functions import generate_synthetic_ATE_data
from utils.ml_utils import data_transform

if not os.path.exists("../../Results"):
    os.mkdir("../../Results")

data_size_list = [2000]
seed_list = np.arange(0, 3000, 100)
matern_p_list = [0, 1, 2, 3, 10]

df_results = pd.DataFrame(columns = ["Algorithm", "Data_Size", "Seed", "matern_p", "Causal_MSE", "Algo_Run_Time"])

for n_plus_m in data_size_list:
    for seed_ in seed_list:
        for matern_p_ in matern_p_list:
            np.random.seed(seed_)
            
            U, W, Z, A, Y, do_A, EY_do_A = generate_synthetic_ATE_data(size = n_plus_m, seed = seed_)
            W, Z, A, Y, do_A, EY_do_A = jnp.array(W), jnp.array(Z), jnp.array(A), jnp.array(Y), jnp.array(do_A), jnp.array(EY_do_A)

            A_transformed, A_transformer = data_transform(A)
            Z_transformed, Z_transformer = data_transform(Z)
            W_transformed, W_transformer = data_transform(W)
            Y_transformed, Y_transformer = data_transform(Y)

            data_size = A_transformed.shape[0]
            A_transformed = jnp.array(A_transformed).reshape(data_size, -1)
            Z_transformed = jnp.array(Z_transformed).reshape(data_size, -1)
            W_transformed = jnp.array(W_transformed).reshape(data_size, -1)
            Y_transformed = jnp.array(Y_transformed).reshape(data_size, -1)

            t0 = time()
            treatment_bridge_algo_param_dict_default = {
                                                        "kernel_A" : MaternKernel(p = matern_p_, use_length_scale_heuristic = True),
                                                        "kernel_W" : ColumnwiseMaternKernel(p = matern_p_, use_length_scale_heuristic = True), 
                                                        "kernel_Z" : MaternKernel(p = matern_p_, use_length_scale_heuristic = True),
                                                        # "kernel_X" : RBF(use_length_scale_heuristic = True, use_jit_call = True),
                                                        "lambda_" : 1e-3,
                                                        "eta" : 1e-3,
                                                        "lambda2_" : 1e-3,
                                                        "optimize_lambda_parameters" : True,
                                                        "optimize_eta_parameter" : True,
                                                        "lambda_optimization_range" : (5*1e-5, 1.0),
                                                        "eta_optimization_range" : (5*1e-5, 1.0),
                                                        "stage1_perc" : 0.5,
                                                        "regularization_grid_points" : 25, 
                                                        "make_psd_eps" : 1e-9,
                                                        "label_variance_in_lambda_opt" : 0.,
                                                        "label_variance_in_eta_opt" : 1.0,
                                                        }

            outcome_bridge_kpv_algo_param_dict_default = {
                                                        "algorithm_name" : "Kernel_Proxy_Variable",
                                                        "kernel_A" : MaternKernel(p = matern_p_, use_length_scale_heuristic = True),
                                                        "kernel_W" : MaternKernel(p = matern_p_, use_length_scale_heuristic = True),
                                                        "kernel_Z" : MaternKernel(p = matern_p_, use_length_scale_heuristic = True),
                                                        # "kernel_X" : RBF(use_length_scale_heuristic = True, use_jit_call = True),      
                                                        "lambda1_" : 0.1,
                                                        "lambda2_" : 0.1,
                                                        "optimize_lambda1_parameter" : True,
                                                        "optimize_lambda2_parameter" : True,
                                                        "lambda1_optimization_range" : (5*1e-5, 1.0),
                                                        "lambda2_optimization_range" : (5*1e-5, 1.0),
                                                        "stage1_perc" : 0.5,
                                                        "regularization_grid_points" : 25, 
                                                        "make_psd_eps" : 1e-9,
                                                        }

            model_DR = DoublyRobustKernelProxyATE(  treatment_bridge_algo_param_dict = treatment_bridge_algo_param_dict_default,
                                                    outcome_bridge_algo_param_dict = outcome_bridge_kpv_algo_param_dict_default,
                                                    lambda_DR = 1*1e-3,
                                                    optimize_lambda_DR_parameter = True,
                                                    lambda_DR_optimization_range = (5*1e-5, 1.0),
                                                    regularization_grid_points = 25, 
                                                    )

            model_DR.fit((A_transformed, W_transformed, Z_transformed), Y_transformed)
            do_A_size = do_A.shape[0]
            do_A_transformed = (A_transformer.transform(do_A)).reshape(do_A_size, -1)
            f_struct_pred_transformed = model_DR.predict(do_A_transformed)
            f_struct_pred_DR = Y_transformer.inverse_transform(f_struct_pred_transformed.reshape(do_A_size, -1)).reshape(do_A_size, -1)
            f_struct_pred_KAP = Y_transformer.inverse_transform(model_DR.treatment_bridge_algo_pred.reshape(do_A_size, -1)).reshape(do_A_size, -1)
            f_struct_pred_KPV = Y_transformer.inverse_transform(model_DR.outcome_bridge_algo_pred.reshape(do_A_size, -1)).reshape(do_A_size, -1)

            t1 = time()
            algo_run_time = t1 - t0

            structured_pred_mse = (np.mean((f_struct_pred_DR.reshape(-1, 1) - EY_do_A.reshape(-1, 1)) ** 2))
            structured_pred_mse_KAP = (np.mean((f_struct_pred_KAP.reshape(-1, 1) - EY_do_A.reshape(-1, 1)) ** 2))
            structured_pred_mse_KPV = (np.mean((f_struct_pred_KPV.reshape(-1, 1) - EY_do_A.reshape(-1, 1)) ** 2))


            DRKPV_Dict = {
                "Algorithm" : "DRKPV",
                "Data_Size" : n_plus_m,
                "Seed" : seed_,
                "matern_p": matern_p_,
                "Causal_MSE" : structured_pred_mse,
                "Algo_Run_Time" : algo_run_time
            }

            KPV_Dict = {
                "Algorithm" : "KPV",
                "Data_Size" : n_plus_m,
                "Seed" : seed_,
                "matern_p": matern_p_,
                "Causal_MSE" : structured_pred_mse_KPV,
                "Algo_Run_Time" : algo_run_time
            }

            KAP_Dict = {
                "Algorithm" : "KAP",
                "Data_Size" : n_plus_m,
                "Seed" : seed_,
                "matern_p": matern_p_,
                "Causal_MSE" : structured_pred_mse_KAP,
                "Algo_Run_Time" : algo_run_time
            }

            df_results = pd.concat([df_results, pd.DataFrame([DRKPV_Dict])], ignore_index=True)
            df_results = pd.concat([df_results, pd.DataFrame([KPV_Dict])], ignore_index=True)
            df_results = pd.concat([df_results, pd.DataFrame([KAP_Dict])], ignore_index=True)

            df_results.to_pickle("../../Results/DoublyRobustKPV_Matern_Ablation_SyntheticLowDim_Experiment.pkl")