import pickle
import numpy as np
import random
from sklearn.utils import shuffle
from sklearn import preprocessing
from sklearn.datasets import load_iris
import pandas as pd
import matplotlib.pyplot as plt
import sys

epsilon = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
G = 1
C = 4
L = 1

experiments = 5

def sigmoid(N, data, weight):
    x = np.dot(data, weight)
    result = np.zeros(N, )
    for i in range(N):
        if x[i] >= 0:
            result[i] = 1. / (1 + np.exp(-x[i]))
        else:
            result[i] = np.exp(x[i]) / (1 + np.exp(x[i]))
    return result

def sigmoid2(data, weight):
    x = np.dot(data, weight)
    if x >= 0:
        return 1. / (1 + np.exp(-x))
    else:
        return np.exp(x) / (1 + np.exp(x))

def classifier(x, weights):
    prob = sigmoid2(x, weights)
    if prob > 0.5:
        return 1.0
    else:
        return 0.0

def cost(N, prob, labels):
    a = labels * np.log(prob + 1e-10)
    b = (1 - labels) * np.log(1 - prob + 1e-10)
    c = a + b
    return -1 / N * np.sum(c)


def classify(argv):
    if argv == 'Iris':
        data_iris = load_iris()
        pd.DataFrame(data=data_iris.data, columns=data_iris.feature_names)
        X = data_iris.data
        y = data_iris.target

        for id in range(y.shape[0]):
            if int(y[id]) == 2:
                y[id] = 1

        train_data_num = 120

        seed = 10

        eta = 0.1
        T = 30

        X = preprocessing.scale(X)

    elif argv == 'BC':
        X, y = pickle.load(open('BreastCancer_data.p', 'rb'))

        for id in range(y.shape[0]):
            if int(y[id]) == 2:
                y[id] = 1
            else:
                y[id] = 0

        train_data_num = 600

        seed = 10

        eta = 0.1
        T = 100

        X = preprocessing.scale(X)

    elif argv == 'CC':
        X, y = pickle.load(open('CreditCard_data.p', 'rb'))

        train_data_num = 800

        seed  = 1

        eta = 0.1
        T = 100

        X = preprocessing.scale(X)

    elif argv == 'Bank':
        X, y = pickle.load(open('bank_data.p', 'rb'))

        train_data_num = 25000

        seed = 1

        eta = 0.05
        T = 100

    elif argv == 'Adult':
        X, y = pickle.load(open('adult_data.p', 'rb'))

        for id in range(y.shape[0]):
            if y[id] == -1:
                y[id] = 0

        train_data_num = 30162

        seed = 10

        eta = 0.1
        T = 500

        X = preprocessing.scale(X)


    X, y = shuffle(X, y, random_state=0)

    train_X = X[0:train_data_num]
    train_y = y[0:train_data_num]
    N, d = train_X.shape[0], train_X.shape[1]
    w_init = np.ones(d)
    delta = 1 / N

    test_X = X[train_data_num:]
    test_y = y[train_data_num:]
    test_N = test_X.shape[0]

    # Traditional Gradient Perturbation
    random.seed(seed)
    np.random.seed(seed)
    print('Traditional Gradient Perturbation......')
    n = N
    acc_tra_final = []
    var_tra_final = []
    for e in epsilon:
        std = C * G * (T ** 0.5) * (np.log(1 / delta) ** 0.5) / (n * e)
        acc_tra = []
        for experiment in range(experiments):
            w = w_init
            for t in range(T):
                h = sigmoid(n, train_X, w)
                error = h - train_y
                error = error / n
                noise = np.random.normal(loc=0.0, scale=std, size=d)
                gradient = np.matmul(train_X.transpose(), error) + L * w

                w = w - eta * (gradient + noise)

            error_item = 0.0
            for j in range(test_N):
                predict = classifier(test_X[j], w)
                if int(predict) != int(test_y[j]):
                    error_item += 1

            acc_tra.append(1 - error_item / test_N)
        acc_tra_final.append(np.mean(acc_tra))
        var_tra_final.append(np.var(acc_tra))
    print('################################')

    # Normalized Gradient Perturbation
    random.seed(seed)
    np.random.seed(seed)
    print('Normalized Gradient Perturbation......')
    n = N
    acc_nor_final = []
    var_nor_final = []
    for e in epsilon:
        std = C * G * (T ** 0.5) * (np.log(1 / delta) ** 0.5) / (n * e)
        acc_nor = []
        for experiment in range(experiments):
            w = w_init
            for t in range(T):
                h = sigmoid(n, train_X, w)
                error = h - train_y
                error = error / n
                noise = np.random.normal(loc=0.0, scale=std, size=d)
                gradient = np.matmul(train_X.transpose(), error) + L * w
                gradient_l2 = np.linalg.norm(gradient, ord=2)
                if gradient_l2 < 1:
                    gradient = gradient / gradient_l2

                w = w - eta * (gradient + noise)

            error_item = 0.0
            for j in range(test_N):
                predict = classifier(test_X[j], w)
                if int(predict) != int(test_y[j]):
                    error_item += 1

            acc_nor.append(1 - error_item / test_N)
        acc_nor_final.append(np.mean(acc_nor))
        var_nor_final.append(np.var(acc_nor))
    print('################################')

    print('TGP mean: ', acc_tra_final)
    print('TGP variance: ', var_tra_final)
    print('m-NGP mean: ', acc_nor_final)
    print('m-NGP variance: ', var_nor_final)

    color = ['black', 'red']
    line = ['-', '--']
    marker = ['o', 's']
    plt.plot(epsilon, acc_tra_final, color=color[0], linestyle=line[0], lw=3, markersize=10, marker=marker[0],
             label='TGP')
    plt.plot(epsilon, acc_nor_final, color=color[1], linestyle=line[1], lw=3, markersize=10, marker=marker[1],
             label='m-NGP')
    plt.xlabel('Privacy Budget $\epsilon$')
    plt.ylabel('Accuracy')
    plt.legend(['TGP', 'm-NGP'])
    plt.xticks(epsilon)
    plt.show()

if __name__ == "__main__":
    classify(sys.argv[1])





