# load p_values.pkl, errors.pkl, errors_proxy.pkl, errors_subject.pkl
import pickle
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import scipy.stats as stats

# load data, 
dataset_names_regression = ['tereco', 'calculus', 'kin8nm', 'nuscale', 'ad']
dataset_names_classification = ['withdraw', 'higgs', 'grid', 'insurance', 'climate', 'alert',]
p_values_regression = {}
p_values_regression_key_names = None
for dataset_name in dataset_names_regression:
    with open('./expdata/' + dataset_name + '/p_values.pkl', 'rb') as f:
        p_values_regression[dataset_name] = pickle.load(f)
        p_values_regression_key_names = p_values_regression[dataset_name].keys()
        # print(dataset_name, p_values_regression[dataset_name]>0.05)
        for key in p_values_regression_key_names:
            # only keep key with fusion,
            if 'fusion' not in key:
                continue
            # get the ratio that pvalue > 0.05,
            list_p = p_values_regression[dataset_name][key]
            # print(dataset_name, key, p_values_regression[dataset_name][key])
            ratio = len([x for x in list_p if x >= 0.05]) / len(list_p)
            # print(dataset_name, key, ratio)

p_values_classification = {}
p_values_classification_key_names = None
for dataset_name in dataset_names_classification:
    with open('./expdata/' + dataset_name + '/p_values.pkl', 'rb') as f:
        p_values_classification[dataset_name] = pickle.load(f)
        p_values_classification_key_names = p_values_classification[dataset_name].keys()
#         for key in p_values_classification_key_names:
#         # only keep key with fusion, 
#             if 'fusion' not in key:
#                 continue
#             # get the ratio that pvalue > 0.05,
#             list_p = p_values_classification[dataset_name][key]
#             # print(dataset_name, key, p_values_regression[dataset_name][key])
#             ratio = len([x for x in list_p if x >= 0.05]) / len(list_p)
#             print(dataset_name, key, ratio)
# input()

errors_regression = {}
errors_regression_key_names = None
for dataset_name in dataset_names_regression:
    with open('./expdata/' + dataset_name + '/errors.pkl', 'rb') as f:
        errors_regression[dataset_name] = pickle.load(f)
        errors_regression_key_names = errors_regression[dataset_name].keys()

# # check 30 subset bias, 
# p_unbias = {}
# for dataset_name in dataset_names_regression:
#     p_unbias[dataset_name] = {}
#     for key_name in errors_regression_key_names:
#         n_exp = 30
#         p_unbias[dataset_name][key_name] = []
#         for exp_i in range(n_exp):
#             n_group = 30
#             group_means = []
#             for i in range(n_group):
#                 # randomly select half errors from error_list, 
#                 selected_errors = np.random.choice(errors_regression[dataset_name][key_name][exp_i], int(len(errors_regression[dataset_name][key_name][exp_i])/2))
#                 t, p = stats.ttest_1samp(selected_errors, 0) # 这个group的mean是否为0
#                 # group_means.append(np.mean(selected_errors))
#                 p_unbias[dataset_name][key_name].append(p)
#             # hypothesis test, null: mean is 0, 
#             # t, p = stats.ttest_1samp(group_means, 0)
#             # p_unbias[dataset_name][key_name].append(p)

# # print ratio that pvalue > 0.05,
# for dataset_name in dataset_names_regression:
#     for key in errors_regression_key_names:
#         # only keep key with fusion,
#         if 'fusion' not in key:
#             continue
#         # get the ratio that pvalue > 0.05,
#         list_p = p_unbias[dataset_name][key]
#         # print(dataset_name, key, p_values_regression[dataset_name][key])
#         ratio = len([x for x in list_p if x >= 0.05]) / len(list_p)
#         print(dataset_name, key, ratio)
# input()

errors_classification = {}
errors_classification_key_names = None
for dataset_name in dataset_names_classification:
    with open('./expdata/' + dataset_name + '/errors.pkl', 'rb') as f:
        errors_classification[dataset_name] = pickle.load(f)
        errors_classification_key_names = errors_classification[dataset_name].keys()

# check 30 subset bias, 
p_unbias = {}
for dataset_name in dataset_names_classification:
    p_unbias[dataset_name] = {}
    for key_name in errors_classification_key_names:
        n_exp = 30
        p_unbias[dataset_name][key_name] = []
        for exp_i in range(n_exp):
            n_group = 30
            group_means = []
            for i in range(n_group):
                # randomly select half errors from error_list, 
                selected_errors = np.random.choice(errors_classification[dataset_name][key_name][exp_i], int(len(errors_classification[dataset_name][key_name][exp_i])/2))
                t, p = stats.ttest_1samp(selected_errors, 0) # 这个group的mean是否为0
                # group_means.append(np.mean(selected_errors))
                p_unbias[dataset_name][key_name].append(p)
            # hypothesis test, null: mean is 0, 
            # t, p = stats.ttest_1samp(group_means, 0)
            # p_unbias[dataset_name][key_name].append(p)

# # print ratio that pvalue > 0.05,
# for dataset_name in dataset_names_classification:
#     for key in errors_classification_key_names:
#         # only keep key with fusion,
#         if 'fusion' not in key:
#             continue
#         # get the ratio that pvalue > 0.05,
#         list_p = p_unbias[dataset_name][key]
#         # print(dataset_name, key, p_values_regression[dataset_name][key])
#         ratio = len([x for x in list_p if x >= 0.05]) / len(list_p)
#         print(dataset_name, key, ratio)
# input()


errors_subject_regression = {}
errors_subject_regression_key_names = None
for dataset_name in dataset_names_regression:
    with open('./expdata/' + dataset_name + '/errors_subject.pkl', 'rb') as f:
        errors_subject_regression[dataset_name] = pickle.load(f)
        errors_subject_regression_key_names = errors_subject_regression[dataset_name].keys()
        # print(dataset_name, errors_subject_regression_key_names)

errors_subject_classification = {}
errors_subject_classification_key_names = None
for dataset_name in dataset_names_classification:
    with open('./expdata/' + dataset_name + '/errors_subject.pkl', 'rb') as f:
        errors_subject_classification[dataset_name] = pickle.load(f)
        errors_subject_classification_key_names = errors_subject_classification[dataset_name].keys()

errors_proxy_regression = {}
errors_proxy_regression_key_names = None
for dataset_name in dataset_names_regression:
    with open('./expdata/' + dataset_name + '/errors_proxy.pkl', 'rb') as f:
        errors_proxy_regression[dataset_name] = pickle.load(f)
        errors_proxy_regression_key_names = errors_proxy_regression[dataset_name].keys()

errors_proxy_classification = {}
errors_proxy_classification_key_names = None
for dataset_name in dataset_names_classification:
    with open('./expdata/' + dataset_name + '/errors_proxy.pkl', 'rb') as f:
        errors_proxy_classification[dataset_name] = pickle.load(f)
        errors_proxy_classification_key_names = errors_proxy_classification[dataset_name].keys()

# #------------------------------------------regression rmse------------------------------------------
rmse_shownames = ['linear_rmse_fusion', 'mlp_rmse_fusion', 'svm_rmse_fusion', 
            'rf_rmse_fusion', 'lgbm_rmse_fusion', 'xgb_rmse_fusion', 
            'catboost_rmse_fusion','rmse_holdout100_fusion','rmse_holdout50_fusion',
            'rmse_holdout10_fusion','rmse_cv5_fusion','rmse_cv10_fusion',
            'rmse_bootstrap_fusion']
# split the key names and dataset names, 
# # rmse of each experiment, 
errors_rmse_regression = {}
errors_rmse_regression_list = {}
for dataset_name in dataset_names_regression:
    errors_rmse_regression[dataset_name] = {}
    errors_rmse_regression_list[dataset_name] = {}
    for key_name in errors_regression_key_names:
        dataset_key_rmse_list = []
        errors_rmse_regression_list[dataset_name][key_name] = []
        n_exp = len(errors_regression[dataset_name][key_name])
        for i in range(n_exp): # rmse of each experiment, 
            array = errors_regression[dataset_name][key_name][i]
            rmse = -np.sqrt(np.mean(np.square(array))) # rmse of array, 
            dataset_key_rmse_list.append(rmse)
            errors_rmse_regression_list[dataset_name][key_name].append(rmse)
        # mean of rmse, 
        errors_rmse_regression[dataset_name][key_name] = np.mean(dataset_key_rmse_list) # mean of rmse, 
        print(dataset_name, key_name, errors_rmse_regression[dataset_name][key_name])

input()

# reduced error of linear comparing with holdout 100
reduced_error = {}
for dataset_name in dataset_names_regression:
    reduced_error[dataset_name] = -(errors_rmse_regression[dataset_name]['linear_rmse_fusion'] - errors_rmse_regression[dataset_name]['rmse_holdout100_fusion'])/errors_rmse_regression[dataset_name]['rmse_holdout100_fusion']

print("rmse", reduced_error)

# normalize inside the dataset,
# select the rmse shownames,
errors_rmse_regression_normalized = {}
errors_rmse_regression_normalized_list = {}
for key in rmse_shownames:
    errors_rmse_regression_normalized_list[key] = []
for dataset_name in dataset_names_regression:
    errors_rmse_regression_normalized[dataset_name] = {}
    for rmse_showname in rmse_shownames:
        errors_rmse_regression_normalized[dataset_name][rmse_showname] = errors_rmse_regression[dataset_name][rmse_showname]
    # normalize inside the dataset, 
    min_rmse = min(errors_rmse_regression_normalized[dataset_name].values())
    max_rmse = max(errors_rmse_regression_normalized[dataset_name].values())
    for key_name in errors_rmse_regression_normalized[dataset_name].keys():
        errors_rmse_regression_normalized[dataset_name][key_name] = (errors_rmse_regression_normalized[dataset_name][key_name] - min_rmse) / (max_rmse - min_rmse)
    for key in rmse_shownames:
        errors_rmse_regression_normalized_list[key].append(errors_rmse_regression_normalized[dataset_name][key])

# show errors_rmse_regression_normalized_list with error bar, 
df = pd.DataFrame(errors_rmse_regression_normalized_list)
# remove fusion from the names,
df.columns = df.columns.str.replace('_fusion', '')
# remove rmse from the names,
df.columns = df.columns.str.replace('_rmse', '')
# remove rmse from the names,
df.columns = df.columns.str.replace('rmse_', '')
# rearrange the columns,
x_names = ['holdout100', 'holdout50', 'holdout10', 'cv5', 'cv10', 'bootstrap', 
           'linear', 'mlp', 'svm', 'rf', 'lgbm', 'xgb', 'catboost']
df = df[x_names]
# replace svm with svr,
df.columns = df.columns.str.replace('svm', 'svr')
# rename
df.columns = ['Holdout100', 'Holdout50', 'Holdout10', 'CV5', 'CV10', 'Bootstrap',
              'HetEM-Linear', 'HetEM-MLP', 'HetEM-SVR', 'HetEM-RF', 'HetEM-LGBM', 'HetEM-XGB', 'HetEM-CatBoost']
# x-axis is 'evaluation model', y-axis is 'normalized negative RMSE', bar color is same, 
plt.figure(figsize=(10, 8))
sns.barplot(data=df) # 95% confidence interval,
plt.xticks(rotation=90)
plt.ylabel('Normalized Negative RMSE')
plt.xlabel('Evaluation Model')
plt.show()
# 0.108,1.193,0.93,0.905,0.2,0.2
# # # ------------------------------------------------------------------------------------------------------------------------------

# # ------------------------------------------regression r2------------------------------------------
# # errors_r2_regression,
errors_r2_regression = {}
for dataset_name in dataset_names_regression:
    errors_r2_regression[dataset_name] = {}
    for key_name in errors_regression_key_names:
        dataset_key_rmse_list = []
        # print(dataset_name, key_name)
        n_exp = len(errors_regression[dataset_name][key_name])
        for i in range(n_exp): # rmse of each experiment,
            array = errors_regression[dataset_name][key_name][i]
            rmse = -np.sqrt(np.mean(np.square(array))) # rmse of array, 
            dataset_key_rmse_list.append(rmse)
        # mean of rmse, 
        errors_r2_regression[dataset_name][key_name] = np.mean(dataset_key_rmse_list) # mean of rmse,
        print(dataset_name, key_name, errors_r2_regression[dataset_name][key_name])
input()
# reduced error of linear comparing with holdout 100
reduced_error = {}
for dataset_name in dataset_names_regression:
    reduced_error[dataset_name] = -(errors_r2_regression[dataset_name]['linear_r2_fusion'] - errors_r2_regression[dataset_name]['r2_holdout100_fusion'])/errors_r2_regression[dataset_name]['r2_holdout100_fusion']
print("r2", reduced_error)

# normalize inside the dataset,
r2_shownames = ['linear_r2_fusion', 'mlp_r2_fusion', 'svm_r2_fusion', 
            'rf_r2_fusion', 'lgbm_r2_fusion', 'xgb_r2_fusion', 
            'catboost_r2_fusion','r2_holdout100_fusion','r2_holdout50_fusion',
            'r2_holdout10_fusion','r2_cv5_fusion','r2_cv10_fusion',
            'r2_bootstrap_fusion']
# select the rmse shownames,
errors_r2_regression_normalized = {}
errors_r2_regression_normalized_list = {}
for key in r2_shownames:
    errors_r2_regression_normalized_list[key] = []
for dataset_name in dataset_names_regression:
    errors_r2_regression_normalized[dataset_name] = {}
    for rmse_showname in r2_shownames:
        errors_r2_regression_normalized[dataset_name][rmse_showname] = errors_r2_regression[dataset_name][rmse_showname]
    # normalize inside the dataset, 
    min_rmse = min(errors_r2_regression_normalized[dataset_name].values())
    max_rmse = max(errors_r2_regression_normalized[dataset_name].values())
    for key_name in errors_r2_regression_normalized[dataset_name].keys():
        errors_r2_regression_normalized[dataset_name][key_name] = (errors_r2_regression_normalized[dataset_name][key_name] - min_rmse) / (max_rmse - min_rmse)
    for key in r2_shownames:
        errors_r2_regression_normalized_list[key].append(errors_r2_regression_normalized[dataset_name][key])

# show errors_rmse_regression_normalized_list with error bar,
df = pd.DataFrame(errors_r2_regression_normalized_list)
# remove fusion from the names,
df.columns = df.columns.str.replace('_fusion', '')
# remove rmse from the names,
df.columns = df.columns.str.replace('_r2', '')
# remove rmse from the names,
df.columns = df.columns.str.replace('r2_', '')
# rearrange the columns,
x_names = ['holdout100', 'holdout50', 'holdout10', 'cv5', 'cv10', 'bootstrap', 
           'linear', 'mlp', 'svm', 'rf', 'lgbm', 'xgb', 'catboost']
df = df[x_names]
# replace svm with svr,
df.columns = df.columns.str.replace('svm', 'svr')
# rename
df.columns = ['Holdout100', 'Holdout50', 'Holdout10', 'CV5', 'CV10', 'Bootstrap',
              'HetEM-Linear', 'HetEM-MLP', 'HetEM-SVM', 'HetEM-RF', 'HetEM-LGBM', 'HetEM-XGB', 'HetEM-CatBoost']
# x-axis is 'evaluation model', y-axis is 'normalized negative RMSE', bar color is same,
plt.figure(figsize=(10, 8))
sns.barplot(data=df) # 95% confidence interval,
plt.xticks(rotation=90)
plt.ylabel('Normalized Negative RMSE')
plt.xlabel('Evaluation Model')
plt.show()
# # ------------------------------------------------------------------------------------------------------------------------------


# #------------------------------------------classification rocauc------------------------------------------
# errors_rmse_classification,
errors_rocauc_classification = {}
for dataset_name in dataset_names_classification:
    errors_rocauc_classification[dataset_name] = {}
    for key_name in errors_classification_key_names:
        dataset_key_rmse_list = []
        n_exp = len(errors_classification[dataset_name][key_name])
        for i in range(n_exp): # rmse of each experiment,
            array = errors_classification[dataset_name][key_name][i]
            rmse = -np.sqrt(np.mean(np.square(array))) # rmse of array, 
            dataset_key_rmse_list.append(rmse)
        # mean of rmse, 
        errors_rocauc_classification[dataset_name][key_name] = np.mean(dataset_key_rmse_list) # mean of rmse,
        print(dataset_name, key_name, errors_rocauc_classification[dataset_name][key_name])
input()
# reduced error of linear comparing with holdout 100
reduced_error = {}
for dataset_name in dataset_names_classification:
    reduced_error[dataset_name] = -(errors_rocauc_classification[dataset_name]['linear_rocauc_fusion'] - errors_rocauc_classification[dataset_name]['rocauc_holdout100_fusion'])/errors_rocauc_classification[dataset_name]['rocauc_holdout100_fusion']
print("rocauc", reduced_error)

# normalize inside the dataset,
rocauc_shownames = ['linear_rocauc_fusion', 'mlp_rocauc_fusion', 'svm_rocauc_fusion', 
            'rf_rocauc_fusion', 'lgbm_rocauc_fusion', 'xgb_rocauc_fusion', 
            'catboost_rocauc_fusion','rocauc_holdout100_fusion','rocauc_holdout50_fusion',
            'rocauc_holdout20_fusion','rocauc_holdout10_fusion','rocauc_cv5_fusion','rocauc_cv10_fusion',
            'rocauc_bootstrap_fusion']
# select the rmse shownames,
errors_rmse_classification_normalized = {}
errors_rmse_classification_normalized_list = {}
for key in rocauc_shownames:
    errors_rmse_classification_normalized_list[key] = []
for dataset_name in dataset_names_classification:
    errors_rmse_classification_normalized[dataset_name] = {}
    for rmse_showname in rocauc_shownames:
        errors_rmse_classification_normalized[dataset_name][rmse_showname] = errors_rocauc_classification[dataset_name][rmse_showname]
    # normalize inside the dataset, 
    min_rmse = min(errors_rmse_classification_normalized[dataset_name].values())
    max_rmse = max(errors_rmse_classification_normalized[dataset_name].values())
    for key_name in errors_rmse_classification_normalized[dataset_name].keys():
        errors_rmse_classification_normalized[dataset_name][key_name] = (errors_rmse_classification_normalized[dataset_name][key_name] - min_rmse) / (max_rmse - min_rmse)
    for key in rocauc_shownames:
        errors_rmse_classification_normalized_list[key].append(errors_rmse_classification_normalized[dataset_name][key])

# # mean and std of normalized rmse,
# for key in rocauc_shownames:
#     print(key, np.mean(errors_rmse_classification_normalized_list[key]), np.std(errors_rmse_classification_normalized_list[key]))

# show errors_rmse_classification_normalized_list with error bar,
df = pd.DataFrame(errors_rmse_classification_normalized_list)
# remove fusion from the names,
df.columns = df.columns.str.replace('_fusion', '')
# remove rmse from the names,
df.columns = df.columns.str.replace('_rocauc', '')
# remove rmse from the names,
df.columns = df.columns.str.replace('rocauc_', '')
# rearrange the columns,
x_names = ['holdout100', 'holdout50', 'holdout10', 'cv5', 'cv10', 'bootstrap', 
           'linear', 'mlp', 'svm', 'rf', 'lgbm', 'xgb', 'catboost']
df = df[x_names]
# rename
df.columns = ['Holdout100', 'Holdout50', 'Holdout10', 'CV5', 'CV10', 'Bootstrap',
              'HetEM-Linear', 'HetEM-MLP', 'HetEM-SVM', 'HetEM-RF', 'HetEM-LGBM', 'HetEM-XGB', 'HetEM-CatBoost']
# x-axis is 'evaluation model', y-axis is 'normalized negative RMSE', bar color is same, 
plt.figure(figsize=(10, 8))
# 95% confidence interval, 
sns.barplot(data=df)
# large text
plt.xticks(rotation=90)
plt.ylabel('Normalized Negative RMSE')
plt.xlabel('Evaluation Model')
# # left is label as 'Classification', 
# plt.title('Evaluate Classification Model by ROC_AUC')
plt.show()
# #------------------------------------------------------------------------------------------------------------------------------


# ------------------------------------------classification ACC------------------------------------------
# errors_acc_classification,
errors_acc_classification = {}
for dataset_name in dataset_names_classification:
    errors_acc_classification[dataset_name] = {}
    for key_name in errors_classification_key_names:
        dataset_key_rmse_list = []
        # print(dataset_name, key_name)
        n_exp = len(errors_classification[dataset_name][key_name])
        for i in range(n_exp): # rmse of each experiment,
            array = errors_classification[dataset_name][key_name][i]
            rmse = -np.sqrt(np.mean(np.square(array))) # rmse of array, 
            dataset_key_rmse_list.append(rmse)
        # mean of rmse, 
        errors_acc_classification[dataset_name][key_name] = np.mean(dataset_key_rmse_list) # mean of rmse,
        print(dataset_name, key_name, errors_acc_classification[dataset_name][key_name])
    # print(errors_rmse_classification[dataset_name])
    # input(),
input()
# reduced error of linear comparing with holdout 100
reduced_error = {}
for dataset_name in dataset_names_classification:
    reduced_error[dataset_name] = -(errors_acc_classification[dataset_name]['linear_acc_fusion'] - errors_acc_classification[dataset_name]['acc_holdout100_fusion'])/errors_acc_classification[dataset_name]['acc_holdout100_fusion']
print("acc", reduced_error)

# normalize inside the dataset,
acc_shownames = ['linear_acc_fusion', 'mlp_acc_fusion', 'svm_acc_fusion', 
            'rf_acc_fusion', 'lgbm_acc_fusion', 'xgb_acc_fusion', 
            'catboost_acc_fusion','acc_holdout100_fusion','acc_holdout50_fusion',
            'acc_holdout20_fusion','acc_holdout10_fusion','acc_cv5_fusion','acc_cv10_fusion',
            'acc_bootstrap_fusion']
# select the rmse shownames,
errors_acc_classification_normalized = {}
errors_acc_classification_normalized_list = {}
for key in acc_shownames:
    errors_acc_classification_normalized_list[key] = []
for dataset_name in dataset_names_classification:
    errors_acc_classification_normalized[dataset_name] = {}
    for rmse_showname in acc_shownames:
        errors_acc_classification_normalized[dataset_name][rmse_showname] = errors_acc_classification[dataset_name][rmse_showname]
    # normalize inside the dataset, 
    min_rmse = min(errors_acc_classification_normalized[dataset_name].values())
    max_rmse = max(errors_acc_classification_normalized[dataset_name].values())
    for key_name in errors_acc_classification_normalized[dataset_name].keys():
        errors_acc_classification_normalized[dataset_name][key_name] = (errors_acc_classification_normalized[dataset_name][key_name] - min_rmse) / (max_rmse - min_rmse)
    for key in acc_shownames:
        errors_acc_classification_normalized_list[key].append(errors_acc_classification_normalized[dataset_name][key])

# mean and std of normalized rmse,
# for key in acc_shownames:
#     print(key, np.mean(errors_acc_classification_normalized_list[key]), np.std(errors_acc_classification_normalized_list[key]))

# show errors_ACC_classification_normalized_list with error bar,
df = pd.DataFrame(errors_acc_classification_normalized_list)
# remove fusion from the names,
df.columns = df.columns.str.replace('_fusion', '')
# remove ACC from the names,
df.columns = df.columns.str.replace('_acc', '')
# remove ACC from the names,
df.columns = df.columns.str.replace('acc_', '')
# rearrange the columns,
x_names = ['holdout100', 'holdout50', 'holdout10', 'cv5', 'cv10', 'bootstrap', 
           'linear', 'mlp', 'svm', 'rf', 'lgbm', 'xgb', 'catboost']
df = df[x_names]
# rename
df.columns = ['Holdout100', 'Holdout50', 'Holdout10', 'CV5', 'CV10', 'Bootstrap',
              'HetEM-Linear', 'HetEM-MLP', 'HetEM-SVM', 'HetEM-RF', 'HetEM-LGBM', 'HetEM-XGB', 'HetEM-CatBoost']
# x-axis is 'evaluation model', y-axis is 'normalized negative RMSE', bar color is same,
plt.figure(figsize=(10, 8))
sns.barplot(data=df) # 95% confidence interval,
plt.xticks(rotation=90)
plt.ylabel('Normalized Negative RMSE')
plt.xlabel('Evaluation Model')
# left is label as 'Classification',
plt.title('Evaluate Classification Model by ACC')
plt.show()
# ------------------------------------------------------------------------------------------------------------------------------


# ----------------------------------------------------------------------------------------
# Shapley value of union (subject, proxy),
# get outcomes, 
# regression, rmse,
shapleys_rmse = {}
for dataset_name in dataset_names_regression:
    shapleys_rmse[dataset_name] = {'s': 0, 'p': 0}
outcomes_rmse = {}
for dataset_name in dataset_names_regression:
    outcomes_rmse[dataset_name] = {'s': 0, 'p': 0, 'sp': 0, 'zero': 0}
for dataset_name in dataset_names_regression:
    rmse_s, rmse_p, rmse_sp, rmse_zero = [], [], [], []
    for i in range(len(errors_subject_regression[dataset_name]['linear_rmse_fusion'])): # 30 exp, 
        s = -np.sqrt(np.mean(np.square(errors_subject_regression[dataset_name]['linear_rmse_fusion'][i])))
        p = -np.sqrt(np.mean(np.square(errors_proxy_regression[dataset_name]['linear_rmse_fusion'][i])))
        sp = -np.sqrt(np.mean(np.square(errors_regression[dataset_name]['linear_rmse_fusion'][i])))
        # print(errors_regression[dataset_name].keys())
        zero = -np.sqrt(np.mean(np.square(errors_regression[dataset_name]['rmse_holdout100_fusion'][i])))
        rmse_s.append(s)
        rmse_p.append(p)
        rmse_sp.append(sp)
        rmse_zero.append(zero)
    # mean of rmse,
    outcomes_rmse[dataset_name]['s'] = np.mean(rmse_s)
    outcomes_rmse[dataset_name]['p'] = np.mean(rmse_p)
    outcomes_rmse[dataset_name]['sp'] = np.mean(rmse_sp)
    outcomes_rmse[dataset_name]['zero'] = np.mean(rmse_zero)
    print(dataset_name, outcomes_rmse[dataset_name])
    shapleys_rmse[dataset_name]['s'] = 0.5 * (outcomes_rmse[dataset_name]['sp'] - outcomes_rmse[dataset_name]['p'] \
                            + outcomes_rmse[dataset_name]['s'] - outcomes_rmse[dataset_name]['zero'])
    shapleys_rmse[dataset_name]['p'] = 0.5 * (outcomes_rmse[dataset_name]['sp'] - outcomes_rmse[dataset_name]['s'] \
                            + outcomes_rmse[dataset_name]['p'] - outcomes_rmse[dataset_name]['zero'])
    print(dataset_name, shapleys_rmse[dataset_name])

# regression, r2,
shapleys_r2 = {}
for dataset_name in dataset_names_regression:
    shapleys_r2[dataset_name] = {'s': 0, 'p': 0}
outcomes_r2 = {}
for dataset_name in dataset_names_regression:
    outcomes_r2[dataset_name] = {'s': 0, 'p': 0, 'sp': 0, 'zero': 0}
for dataset_name in dataset_names_regression:
    r2_s, r2_p, r2_sp, r2_zero = [], [], [], []
    for i in range(len(errors_subject_regression[dataset_name]['linear_r2_fusion'])): # 30 exp
        s = -np.sqrt(np.mean(np.square(errors_subject_regression[dataset_name]['linear_r2_fusion'][i])))
        p = -np.sqrt(np.mean(np.square(errors_proxy_regression[dataset_name]['linear_r2_fusion'][i])))
        sp = -np.sqrt(np.mean(np.square(errors_regression[dataset_name]['linear_r2_fusion'][i])))
        zero = -np.sqrt(np.mean(np.square(errors_regression[dataset_name]['r2_holdout100_fusion'][i])))
        r2_s.append(s)
        r2_p.append(p)
        r2_sp.append(sp)
        r2_zero.append(zero)
    # mean of rmse,
    outcomes_r2[dataset_name]['s'] = np.mean(r2_s)
    outcomes_r2[dataset_name]['p'] = np.mean(r2_p)
    outcomes_r2[dataset_name]['sp'] = np.mean(r2_sp)
    outcomes_r2[dataset_name]['zero'] = np.mean(r2_zero)
    print(dataset_name, outcomes_r2[dataset_name])
    shapleys_r2[dataset_name]['s'] = 0.5 * (outcomes_r2[dataset_name]['sp'] - outcomes_r2[dataset_name]['p'] \
                            + outcomes_r2[dataset_name]['s'] - outcomes_r2[dataset_name]['zero'])
    shapleys_r2[dataset_name]['p'] = 0.5 * (outcomes_r2[dataset_name]['sp'] - outcomes_r2[dataset_name]['s'] \
                            + outcomes_r2[dataset_name]['p'] - outcomes_r2[dataset_name]['zero'])
    print(dataset_name, shapleys_r2[dataset_name])

# classification, rocauc,
shapleys_rocauc = {}
for dataset_name in dataset_names_classification:
    shapleys_rocauc[dataset_name] = {'s': 0, 'p': 0}
outcomes_rocauc = {}
for dataset_name in dataset_names_classification:
    outcomes_rocauc[dataset_name] = {'s': 0, 'p': 0, 'sp': 0, 'zero': 0}
for dataset_name in dataset_names_classification:
    rocauc_s, rocauc_p, rocauc_sp, rocauc_zero = [], [], [], []
    for i in range(len(errors_subject_classification[dataset_name]['linear_rocauc_fusion'])): # 30 exp
        s = -np.sqrt(np.mean(np.square(errors_subject_classification[dataset_name]['linear_rocauc_fusion'][i])))
        p = -np.sqrt(np.mean(np.square(errors_proxy_classification[dataset_name]['linear_rocauc_fusion'][i])))
        sp = -np.sqrt(np.mean(np.square(errors_classification[dataset_name]['linear_rocauc_fusion'][i])))
        zero = -np.sqrt(np.mean(np.square(errors_classification[dataset_name]['rocauc_holdout100_fusion'][i])))
        rocauc_s.append(s)
        rocauc_p.append(p)
        rocauc_sp.append(sp)
        rocauc_zero.append(zero)
    # mean of rmse,
    outcomes_rocauc[dataset_name]['s'] = np.mean(rocauc_s)
    outcomes_rocauc[dataset_name]['p'] = np.mean(rocauc_p)
    outcomes_rocauc[dataset_name]['sp'] = np.mean(rocauc_sp)
    outcomes_rocauc[dataset_name]['zero'] = np.mean(rocauc_zero)
    print(dataset_name, outcomes_rocauc[dataset_name])
    shapleys_rocauc[dataset_name]['s'] = 0.5 * (outcomes_rocauc[dataset_name]['sp'] - outcomes_rocauc[dataset_name]['p'] \
                            + outcomes_rocauc[dataset_name]['s'] - outcomes_rocauc[dataset_name]['zero'])
    shapleys_rocauc[dataset_name]['p'] = 0.5 * (outcomes_rocauc[dataset_name]['sp'] - outcomes_rocauc[dataset_name]['s'] \
                            + outcomes_rocauc[dataset_name]['p'] - outcomes_rocauc[dataset_name]['zero'])
    print(dataset_name, shapleys_rocauc[dataset_name])

# classification, acc,
shapleys_acc = {}
for dataset_name in dataset_names_classification:
    shapleys_acc[dataset_name] = {'s': 0, 'p': 0}
outcomes_acc = {}
for dataset_name in dataset_names_classification:
    outcomes_acc[dataset_name] = {'s': 0, 'p': 0, 'sp': 0, 'zero': 0}
for dataset_name in dataset_names_classification:
    acc_s, acc_p, acc_sp, acc_zero = [], [], [], []
    for i in range(len(errors_subject_classification[dataset_name]['linear_acc_fusion'])): # 30 exp
        s = -np.sqrt(np.mean(np.square(errors_subject_classification[dataset_name]['linear_acc_fusion'][i])))
        p = -np.sqrt(np.mean(np.square(errors_proxy_classification[dataset_name]['linear_acc_fusion'][i])))
        sp = -np.sqrt(np.mean(np.square(errors_classification[dataset_name]['linear_acc_fusion'][i])))
        zero = -np.sqrt(np.mean(np.square(errors_classification[dataset_name]['acc_holdout100_fusion'][i])))
        acc_s.append(s)
        acc_p.append(p)
        acc_sp.append(sp)
        acc_zero.append(zero)
    # mean of rmse,
    outcomes_acc[dataset_name]['s'] = np.mean(acc_s)
    outcomes_acc[dataset_name]['p'] = np.mean(acc_p)
    outcomes_acc[dataset_name]['sp'] = np.mean(acc_sp)
    outcomes_acc[dataset_name]['zero'] = np.mean(acc_zero)
    print(dataset_name, outcomes_acc[dataset_name])
    shapleys_acc[dataset_name]['s'] = 0.5 * (outcomes_acc[dataset_name]['sp'] - outcomes_acc[dataset_name]['p'] \
                            + outcomes_acc[dataset_name]['s'] - outcomes_acc[dataset_name]['zero'])
    shapleys_acc[dataset_name]['p'] = 0.5 * (outcomes_acc[dataset_name]['sp'] - outcomes_acc[dataset_name]['s'] \
                            + outcomes_acc[dataset_name]['p'] - outcomes_acc[dataset_name]['zero'])
    print(dataset_name, shapleys_acc[dataset_name])

# visualize the shapley values, 
shapley_list = []
for dataset_name in dataset_names_regression:
    shapley_list.append(shapleys_rmse[dataset_name]['s'])
    shapley_list.append(shapleys_rmse[dataset_name]['p'])
df = pd.DataFrame({'shapley': shapley_list})
df['type'] = ['Subject', 'Proxy'] * (len(dataset_names_regression))
df['dataset'] = ['TERECO','TERECO', 'Calculus','Calculus', 'kin8nm','kin8nm',
                  'Nuscale','Nuscale', 'Ad', 'Ad']
plt.figure(figsize=(10, 8))
sns.barplot(data=df, x='dataset', y='shapley', hue='type' , ci=95)
plt.xticks(rotation=90)
plt.ylabel('Shapley Value')
plt.xlabel('Dataset')
# plt.title('Shapley Value of Subject and Proxy when metric is RMSE')
plt.show()

# visualize the shapley values,
shapleys_list = []
for dataset_name in dataset_names_regression:
    shapleys_list.append(shapleys_r2[dataset_name]['s'])
    shapleys_list.append(shapleys_r2[dataset_name]['p'])
df = pd.DataFrame({'shapley': shapleys_list})
df['type'] = ['Subject', 'Proxy'] * (len(dataset_names_regression))
df['dataset'] = ['TERECO','TERECO', 'Calculus','Calculus', 'kin8nm','kin8nm',
                  'Nuscale','Nuscale', 'Ad', 'Ad']
print(df)
plt.figure(figsize=(10, 8))
sns.barplot(data=df, x='dataset', y='shapley', hue='type' , ci=95)
plt.xticks(rotation=90)
plt.ylabel('Shapley Value')
plt.xlabel('Dataset')
# plt.title('Shapley Value of Subject and Proxy when metric is R2')
plt.show()

# visualize the shapley values, 
shapleys_list = []
for dataset_name in dataset_names_classification:
    shapleys_list.append(shapleys_rocauc[dataset_name]['s'])
    shapleys_list.append(shapleys_rocauc[dataset_name]['p'])
df = pd.DataFrame({'shapley': shapleys_list})
df['type'] = ['Subject', 'Proxy'] * (len(dataset_names_classification))
df['dataset'] = ['Withdraw','Withdraw', 'Higgs','Higgs', 'Grid','Grid',
                  'Insurance','Insurance', 'Climate','Climate', 'Alert','Alert']
print(df)
plt.figure(figsize=(10, 8))
sns.barplot(data=df, x='dataset', y='shapley', hue='type' , ci=95)
plt.xticks(rotation=90)
plt.ylabel('Shapley Value')
plt.xlabel('Dataset')
# plt.title('Shapley Value of Subject and Proxy when metric is ROC_AUC')
plt.show()

# visualize the shapley values, 
shapleys_list = []
for dataset_name in dataset_names_classification:
    shapleys_list.append(shapleys_acc[dataset_name]['s'])
    shapleys_list.append(shapleys_acc[dataset_name]['p'])
df = pd.DataFrame({'shapley': shapleys_list})
df['type'] = ['Subject', 'Proxy'] * (len(dataset_names_classification))
df['dataset'] = ['Withdraw','Withdraw', 'Higgs','Higgs', 'Grid','Grid',
                  'Insurance','Insurance', 'Climate','Climate', 'Alert','Alert']
plt.figure(figsize=(10, 8))
sns.barplot(data=df, x='dataset', y='shapley', hue='type' , ci=95)
plt.xticks(rotation=90)
plt.ylabel('Shapley Value')
plt.xlabel('Dataset')
# plt.title('Shapley Value of Subject and Proxy when metric is ACC')
plt.show()
# ----------------------------------------------------------------------------------------