DATA_PATH = "/content/drive/MyDrive/research/fair_dpsgd/data/"
FIG_PATH = "/content/drive/MyDrive/research/Pate_GroupDP/figures/fig1/"

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

import torch, copy, pandas as pd, numpy as np
from torch import nn
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import accuracy_score

dataset_list = ['abalone', 'income', 'bank', 'credit_card', 'parkinsons', 'wine', 'stroke', 'churn']
for data in dataset_list:
    sub_K_list = [10, 30, 60]
    for metric in ['acc', 'loss']:
      try:
        if data == 'bank':
            pd00 = pd.read_csv(DATA_PATH + 'bank.csv')
            features = [x for x in pd00.columns if x not in ['label', 'z', 'age', 'is_train', 'y']]
            pd00[features] = scaler.fit_transform(pd00[features])

        elif data == 'income':
            pd00 = pd.read_csv(DATA_PATH + "income.csv")
            features = [x for x in pd00.columns if x not in ['label', 'z', 'race', 'is_train', 'y']]
            pd00[features] = scaler.fit_transform(pd00[features])
        elif data == 'abalone':
            pd00 = pd.read_csv(DATA_PATH + "formated_abalone.csv")
            features = [x for x in pd00.columns if x not in ['label', 'z', 'is_train', 'y']]
        elif data == 'parkinsons':
            pd00 = pd.read_csv(DATA_PATH + "formated_parkinsons.csv")
            features = [x for x in pd00.columns if x not in ['label', 'z', 'is_train', 'y']]
        elif data == 'wine':
            pd00 = pd.read_csv(DATA_PATH + "formated_wine.csv")
            features = [x for x in pd00.columns if x not in ['label', 'z', 'is_train', 'y']]
        elif data == 'stroke':
            pd00 = pd.read_csv(DATA_PATH + "formated_stroke.csv")
            features = [x for x in pd00.columns if x not in ['label', 'z', 'is_train', 'y']]
        elif data == 'credit_card':
            pd00 = pd.read_csv(DATA_PATH + "formated_credit_card.csv")
            features = [x for x in pd00.columns if x not in ['label', 'z', 'is_train', 'y']]
        else:
            pd00 = pd.read_csv(DATA_PATH + "formated_churn.csv")
            features = [x for x in pd00.columns if x not in ['label', 'z', 'is_train', 'y']]

        train_pd = pd00[pd00['is_train'] == 1]
        x_train, y_train, a_train = train_pd[features].values, train_pd['y'].values, train_pd['z'].values
        logreg = LogisticRegression().fit(x_train, y_train)
        y_pred = logreg.predict_proba(pd00[features])[:, 1]
        # test_pd = pd00[pd00['is_train']==0]
        # x_test, y_test, a_test = test_pd[features].values, test_pd['y'].values, test_pd['z'].values

        # train_tensor = TensorDataset(torch.Tensor(x_train), torch.Tensor(np.c_[y_train, a_train]))
        # train_loader = DataLoader(dataset=train_tensor, batch_size=16, shuffle=True)
        # x_test = torch.Tensor(x_test)
        # a_test = torch.Tensor(a_test)
        # y_test = torch.Tensor(y_test).reshape(-1, 1)
        # clf = CLF(train_loader, x_test, y_test, a_test)
        # options = {'lr': 1e-3, 'activation': 'relu', 'num_feats': x_test.shape[1], 'epochs': 300,'K':K, 'data_seed':1}
        # clf.fit(options)

        # y_pred = clf.model(torch.Tensor(pd00[features].values)).detach().numpy()

        pd00['pred'] = y_pred
        pd00['inv_dist'] = pd00['pred'] * (1 - pd00['pred'])
        temp_pd = pd00.groupby(['z']).agg({'inv_dist': np.mean}).reset_index()
        temp_pd = temp_pd.sort_values(by='z')
        dist_values = temp_pd['inv_dist'].values

        fig, axes = plt.subplots(nrows=1, ncols=3, sharex=True, sharey=True, figsize=(18, 6))
        for i in range(3):
            K = sub_K_list[i]
            ax = axes[i]
            if 1e-10 in res_dict[data][K]:
                sigma = 1e-10
            else:
                sigma = 1
            non_priv_group_1 = np.mean(res_dict[data][K][sigma]['group_{}_1'.format(metric)].values)
            non_priv_group_0 = np.mean(res_dict[data][K][sigma]['group_{}_0'.format(metric)].values)
            perf_group_1 = []
            perf_group_0 = []
            sigma_list = list(res_dict[data][K].keys())
            for sigma in sigma_list:
                priv_group_1 = np.mean(res_dict[data][K][sigma]['group_{}_1'.format(metric)].values)
                priv_group_0 = np.mean(res_dict[data][K][sigma]['group_{}_0'.format(metric)].values)
                perf_group_1.append(priv_group_1 - non_priv_group_1)
                perf_group_0.append(priv_group_0 - non_priv_group_0)

            ax.plot(sigma_list, perf_group_0, label='Group-0({})'.format(np.round(dist_values[0], 3)))
            ax.plot(sigma_list, perf_group_1, label='Group-1({})'.format(np.round(dist_values[1], 3)))

            ax.legend(fontsize=20)

        file_name= FIG_PATH + "motivation_{}_{}.pdf".format(data, metric)
        plt.savefig(file_name , dpi=300, bbox_inches='tight')

      except:
         pass

FIG_PATH = "/content/drive/MyDrive/research/Pate_GroupDP/figures/fig1/"
sigm_list = [1, 30, 50, 60, 70, 80, 90, 100]
for data in dataset_list:
    sub_K_list = list(set(org_res_dict[data].keys()) & soft_res_dict[data].keys())
    l = len(sub_K_list)
    for metric in ['acc', 'loss']:
        try:

                fig, axes = plt.subplots(nrows=1, ncols=l, sharex=True, sharey=True, figsize=(l * 5, 10))
                for i in range(2 * l):
                    r, c = i / l, i % l
                    ax = axes[r][c]
                    K = sub_K_list[c]
                    if r == 0:
                        non_priv_group_1 = np.mean(org_res_dict[data][K][1]['group_{}_1'.format(metric)].values)
                        non_priv_group_0 = np.mean(org_res_dict[data][K][1]['group_{}_0'.format(metric)].values)
                    else:
                        non_priv_group_1 = np.mean(soft_res_dict[data][K][1]['group_{}_1'.format(metric)].values)
                        non_priv_group_0 = np.mean(soft_res_dict[data][K][1]['group_{}_0'.format(metric)].values)

                    perf_group_1 = []
                    perf_group_0 = []
                    sigma_list = list(res_dict[data][K].keys())
                    for sigma in sigma_list:
                        if r == 0:
                            priv_group_1 = np.mean(org_res_dict[data][K][sigma]['group_{}_1'.format(metric)].values)
                            priv_group_0 = np.mean(org_res_dict[data][K][sigma]['group_{}_0'.format(metric)].values)
                        else:
                            priv_group_1 = np.mean(soft_res_dict[data][K][sigma]['group_{}_1'.format(metric)].values)
                            priv_group_0 = np.mean(soft_res_dict[data][K][sigma]['group_{}_0'.format(metric)].values)

                        perf_group_1.append(priv_group_1 - non_priv_group_1)
                        perf_group_0.append(priv_group_0 - non_priv_group_0)

                    ax.plot(sigma_list, perf_group_0, label='Group-0')
                    ax.plot(sigma_list, perf_group_1, label='Group-1')

                    ax.legend(fontsize=20)

                file_name = FIG_PATH + "compare_soft_hard_{}_{}.pdf".format(data, metric)
                plt.savefig(file_name, dpi=300, bbox_inches='tight')

        except:
            pass

