import os
# import pandas
from sklearn.metrics import precision_score
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import cross_val_predict
from sklearn import model_selection
from sklearn import metrics
from art.attacks.evasion import FastGradientMethod
from sklearn.datasets import make_classification
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier

import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.gaussian_process.kernels import RBF
from sklearn.model_selection import cross_val_score

from sklearn.utils import shuffle
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
import random
import numpy as np
import statistics
import collections
from sklearn import datasets, linear_model
import numpy.polynomial.polynomial as poly
from sklearn.pipeline import make_pipeline
from sklearn.base import BaseEstimator, TransformerMixin

from sklearn.gaussian_process import GaussianProcessClassifier





# range_ = [0.1,0.5,1,5,10, 100, 1000]
# for i in range_:
#     models.append((str(i), SVC(gamma=1e-3, C=i)))

# models.append(('LR', linear_model.LinearRegression()))
# models.append(('Rid', linear_model.Ridge(alpha=0.5)))
# models.append(('KNN', KNeighborsClassifier()))
# models.append(('DT', DecisionTreeClassifier()))
# models.append(('NB', GaussianNB()))
# models.append(('GBDT5',GradientBoostingClassifier(max_depth=5)))
# models.append(('DecisionTreeM5', DecisionTreeClassifier(max_depth=5)))
# models.append(('SVM',SVC()))

# models.append(('LS', SVC(kernel="linear", C=0.025)))
# models.append(('RS', SVC(gamma=0.5, C=1)))
# #models.append(('GS',GaussianProcessClassifier(1.0 * RBF(1.0))))
# models.append(('DT', DecisionTreeClassifier(max_depth=10)))
# models.append(('RF', RandomForestClassifier(max_depth=10, n_estimators=10, max_features=1)))
# models.append(('AB', AdaBoostClassifier()))
# models.append(('NB', GaussianNB()))
# models.append(('LR', LogisticRegression()))

# models = [
#     SVC(kernel="linear", C=0.025),
#     SVC(gamma=2, C=1),
#     GaussianProcessClassifier(1.0 * RBF(1.0)),
#     DecisionTreeClassifier(max_depth=10),
#     RandomForestClassifier(max_depth=10, n_estimators=10, max_features=1),
#     AdaBoostClassifier(),
#     GaussianNB()]

def get_PV_onlyPV_deep(create_deepclassifier, X_train, y_train, X_hold, y_hold, epoch, dropout, resultfilepath):
    '''
    Description
        This function calculates the values of all metrics
        (training accuracy, test accuracy, validation accuracy, and PV).

    Parameters
        create_deepclassifier: the function used to initilise classifier
        X_train: the training data instances (without labels); type: numpy.ndarray
        y_train: training data labels; type: numpy.ndarray
        X_hold: the hold-out data instances (without labels); type: numpy.ndarray
        y_hold: the hold-out data labels; type: numpy.ndarray
        epoch: epochs
        dropout: dropout rate

    Returns
        dic_metric_value: the dic of metric values
        key (string) of the dic:
            pv: PV
            cv: CV accuracy
            train: training accuracy
            test: test accuracy
    '''

    writefile = open(resultfilepath, 'a')
    writefile.write('algorithm,degree,trainaccuracy,cvaccuracy, testaccuracy\n')

    # noisedegreelist = np.arange(0.0,0.31,0.1) # label noise sequence [0,0.1,0.2,0.3]
    noisedegreelist = [0.2]

    # get validation accuracy
    # get validation accuracy
    # x_train2, x_valid, y_train2, y_valid = model_selection.train_test_split(X_train, y_train, test_size=0.4)
    if epoch < 20:
        model = create_deepclassifier(dropout)  # create model for mnist
    else:
        model = create_deepclassifier(dropout, X_train)  # create model for cifar
    # model.fit(x_train2, y_train2, batch_size=16, epochs=epoch, verbose=1)  # retrain the model with perturbed labels
    cvaccuracy = 0

    labeldic = {}
    label_list = list(unique_rows(y_train))
    labelid = 0
    for each in label_list:
        labeldic[labelid] = each
        labelid += 1

    dic_label_num = {}  # get the number of instances for each class

    for label in labeldic:
        dic_label_num[label] = 0
        for eachitem in y_train:
            if np.array_equal(labeldic[label], eachitem):
                dic_label_num[label] += 1

    trainingacculist = []  # list to put perturbed training accuracy

    cnt = 0

    testflag = False
    testaccuracy = 0

    while (cnt < len(noisedegreelist)):
        if epoch < 20:
            model = create_deepclassifier(dropout)  # create model for mnist
        else:
            model = create_deepclassifier(dropout, X_train)  # create model for cifar
        noisedegree = noisedegreelist[cnt]
        Y_changed = np.copy(y_train)
        dic_label_newnum = {}  # record the number of perturbed samples for each class

        for label in labeldic:
            dic_label_newnum[label] = 0  # initialize the dic; nothing is perturbed at the beginning

        for i in range(0, y_train.shape[0]):
            for eachelement in labeldic:
                if np.array_equal(Y_changed[i], labeldic[eachelement]) and (
                        dic_label_newnum[eachelement] < float(dic_label_num[eachelement]) * noisedegree):
                    try:
                        Y_changed[i] = labeldic[eachelement + 1]
                        dic_label_newnum[eachelement] += 1
                    except:
                        Y_changed[i] = labeldic[0]
                        dic_label_newnum[eachelement] += 1
                    break
                else:
                    continue

        model.fit(X_train, Y_changed, batch_size=128, epochs=epoch,
                  verbose=1)  # retrain the model with perturbed labels

        trainaccuracy_perturbed = model.evaluate(X_train, Y_changed, verbose=0)[1]
        trainaccuracy_withoriginal = model.evaluate(X_train, y_train, verbose=0)[1]
        trainingacculist.append(trainaccuracy_perturbed)

        # testaccuracy = -1 # default value of test accuracy

        # get test accuracy when noise degree is zero
        # if not testflag:
        # testaccuracy = model.evaluate(X_hold, y_hold, verbose=0)[1]
        testaccuracy = 0
        # testflag = True

        writefile.write(str(dropout) + ',' + str(noisedegree) + ',' + str(trainaccuracy_perturbed) + ',' + str(
            cvaccuracy) + ',' + str(testaccuracy) + '\n')
        cnt += 1

    # Ytest = trainingacculist
    #
    # Xtest = noisedegreelist
    # m, b = poly.polyfit(Xtest, Ytest, 1)  # conduct linear regression; b is the coefficient
    # pv = -b  # mirror PV by one, so that PV increases up to 1, then worsen afterwards
    # # pv = trainaccuracy_perturbed
    #
    # pv = 0.6 * trainaccuracy_withoriginal + 0.2 * pv + 0.2
    #
    # print('PV: ' + str(round(pv, 2)))
    # print('training accuracy: ' + str(round(trainingacculist[0], 2)))
    # print('----------')
    #
    # # return dic
    dic_metric_value = {}
    # dic_metric_value['pv'] = pv
    # dic_metric_value['train'] = trainingacculist[0]
    # dic_metric_value['test'] = testaccuracy
    # dic_metric_value['cv'] = cvaccuracy

    writefile.close()

    return dic_metric_value
def get_MV_CV_and_test_deep(create_deepclassifier, X_train_o, y_train_o, X_hold, y_hold, epoch, dropout, resultfilepath):
    '''
    Description
        This function calculates the values of all metrics
        (training accuracy, test accuracy, validation accuracy, and PV).

    Parameters
        create_deepclassifier: the function used to initilise classifier
        X_train: the training data instances (without labels); type: numpy.ndarray
        y_train: training data labels; type: numpy.ndarray
        X_hold: the hold-out data instances (without labels); type: numpy.ndarray
        y_hold: the hold-out data labels; type: numpy.ndarray
        epoch: epochs
        dropout: dropout rate

    Returns
        dic_metric_value: the dic of metric values
        key (string) of the dic:
            pv: PV
            cv: CV accuracy
            train: training accuracy
            test: test accuracy
    '''

    writefile = open(resultfilepath, 'a')
    writefile.write('algorithm,degree,trainaccuracy,cvaccuracy, testaccuracy\n')

    # noisedegreelist = np.arange(0.0,0.31,0.1) # label noise sequence [0,0.1,0.2,0.3]
    noisedegreelist = [0,0.2]

    # get validation accuracy
    # get validation accuracy
    X_train, X_valid, y_train, y_valid = model_selection.train_test_split(X_train_o, y_train_o, test_size=0.2)
    if epoch < 20:
        model = create_deepclassifier(dropout)  # create model for mnist
    else:
        model = create_deepclassifier(dropout, X_train)  # create model for cifar
    model.fit(X_train, y_train, batch_size=16, epochs=epoch, verbose=1)  # retrain the model with perturbed labels
    cvaccuracy = model.evaluate(X_valid, y_valid, verbose=0)[1]
    testaccuracy = model.evaluate(X_hold, y_hold, verbose=0)[1]
    trainaccuracyoriginal = model.evaluate(X_train, y_train, verbose=0)[1]
    trainingacculist = []
    trainingacculist.append(trainaccuracyoriginal)
    writefile.write(
        str(dropout)  + ',0.0,' + str(trainaccuracyoriginal) + ',' + str(cvaccuracy) + ',' + str(
            testaccuracy) + '\n')

    labeldic = {}
    label_list = list(unique_rows(y_train))
    labelid = 0
    for each in label_list:
        labeldic[labelid] = each
        labelid += 1

    dic_label_num = {}  # get the number of instances for each class

    for label in labeldic:
        dic_label_num[label] = 0
        for eachitem in y_train:
            if np.array_equal(labeldic[label], eachitem):
                dic_label_num[label] += 1

      # list to put perturbed training accuracy

    cnt = 1

    testflag = False

    while (cnt < len(noisedegreelist)):
        if epoch < 20:
            model = create_deepclassifier(dropout)  # create model for mnist
        else:
            model = create_deepclassifier(dropout, X_train)  # create model for cifar
        noisedegree = noisedegreelist[cnt]
        Y_changed = np.copy(y_train)
        dic_label_newnum = {}  # record the number of perturbed samples for each class

        for label in labeldic:
            dic_label_newnum[label] = 0  # initialize the dic; nothing is perturbed at the beginning

        for i in range(0, y_train.shape[0]):
            for eachelement in labeldic:
                if np.array_equal(Y_changed[i], labeldic[eachelement]) and (
                        dic_label_newnum[eachelement] < float(dic_label_num[eachelement]) * noisedegree):
                    try:
                        Y_changed[i] = labeldic[eachelement + 1]
                        dic_label_newnum[eachelement] += 1
                    except:
                        Y_changed[i] = labeldic[0]
                        dic_label_newnum[eachelement] += 1
                    break
                else:
                    continue

        model.fit(X_train, Y_changed, batch_size=128, epochs=epoch,
                  verbose=1)  # retrain the model with perturbed labels

        trainaccuracy_perturbed = model.evaluate(X_train, Y_changed, verbose=0)[1]
        trainaccuracy_withoriginal = model.evaluate(X_train, y_train, verbose=0)[1]
        trainingacculist.append(trainaccuracy_perturbed)

        # testaccuracy = -1 # default value of test accuracy

        # get test accuracy when noise degree is zero
        # if not testflag:

        # testflag = True

        writefile.write(str(dropout) + ',' + str(noisedegree) + ',' + str(trainaccuracy_perturbed) +','+str(cvaccuracy) + ',' + str(testaccuracy) + '\n')
        cnt += 1

    Ytest = trainingacculist

    Xtest = noisedegreelist
    m, b = poly.polyfit(Xtest, Ytest, 1)  # conduct linear regression; b is the coefficient
    pv = -b  # mirror PV by one, so that PV increases up to 1, then worsen afterwards
    # pv = trainaccuracy_perturbed

    pv = 0.6 * trainaccuracy_withoriginal + 0.2 * pv + 0.2

    print('PV: ' + str(round(pv, 2)))
    print('training accuracy: ' + str(round(trainingacculist[0], 2)))
    print('test accuracy: ' + str(round(testaccuracy, 2)))
    print('----------')

    # return dic
    dic_metric_value = {}
    dic_metric_value['pv'] = pv
    dic_metric_value['cv'] = cvaccuracy
    dic_metric_value['train'] = trainingacculist[0]
    dic_metric_value['test'] = testaccuracy

    writefile.close()

    return dic_metric_value


def get_MV_CV_and_test_deep_sametrain(create_deepclassifier, X_train, y_train, X_valid,y_valid, X_hold, y_hold, epoch, dropout, resultfilepath):
    '''
    Description
        This function calculates the values of all metrics
        (training accuracy, test accuracy, validation accuracy, and PV).

    Parameters
        create_deepclassifier: the function used to initilise classifier
        X_train: the training data instances (without labels); type: numpy.ndarray
        y_train: training data labels; type: numpy.ndarray
        X_hold: the hold-out data instances (without labels); type: numpy.ndarray
        y_hold: the hold-out data labels; type: numpy.ndarray
        epoch: epochs
        dropout: dropout rate

    Returns
        dic_metric_value: the dic of metric values
        key (string) of the dic:
            pv: PV
            cv: CV accuracy
            train: training accuracy
            test: test accuracy
    '''

    writefile = open(resultfilepath, 'a')
    writefile.write('algorithm,degree,trainaccuracy,cvaccuracy, testaccuracy\n')

    # noisedegreelist = np.arange(0.0,0.31,0.1) # label noise sequence [0,0.1,0.2,0.3]
    noisedegreelist = [0,0.2]

    # get validation accuracy
    # get validation accuracy
    # X_train, X_valid, y_train, y_valid = model_selection.train_test_split(X_train_o, y_train_o, test_size=0.2)
    if epoch < 20:
        model = create_deepclassifier(dropout)  # create model for mnist
    else:
        model = create_deepclassifier(dropout, X_train)  # create model for cifar
    model.fit(X_train, y_train, batch_size=16, epochs=epoch, verbose=1)  # retrain the model with perturbed labels
    cvaccuracy = model.evaluate(X_valid, y_valid, verbose=0)[1]
    testaccuracy = model.evaluate(X_hold, y_hold, verbose=0)[1]
    trainaccuracyoriginal = model.evaluate(X_train, y_train, verbose=0)[1]
    trainingacculist = []
    trainingacculist.append(trainaccuracyoriginal)
    writefile.write(
        str(dropout)  + ',0.0,' + str(trainaccuracyoriginal) + ',' + str(cvaccuracy) + ',' + str(
            testaccuracy) + '\n')

    labeldic = {}
    label_list = list(unique_rows(y_train))
    labelid = 0
    for each in label_list:
        labeldic[labelid] = each
        labelid += 1

    dic_label_num = {}  # get the number of instances for each class

    for label in labeldic:
        dic_label_num[label] = 0
        for eachitem in y_train:
            if np.array_equal(labeldic[label], eachitem):
                dic_label_num[label] += 1

      # list to put perturbed training accuracy

    cnt = 1

    testflag = False

    while (cnt < len(noisedegreelist)):
        if epoch < 20:
            model = create_deepclassifier(dropout)  # create model for mnist
        else:
            model = create_deepclassifier(dropout, X_train)  # create model for cifar
        noisedegree = noisedegreelist[cnt]
        Y_changed = np.copy(y_train)
        dic_label_newnum = {}  # record the number of perturbed samples for each class

        for label in labeldic:
            dic_label_newnum[label] = 0  # initialize the dic; nothing is perturbed at the beginning

        for i in range(0, y_train.shape[0]):
            for eachelement in labeldic:
                if np.array_equal(Y_changed[i], labeldic[eachelement]) and (
                        dic_label_newnum[eachelement] < float(dic_label_num[eachelement]) * noisedegree):
                    try:
                        Y_changed[i] = labeldic[eachelement + 1]
                        dic_label_newnum[eachelement] += 1
                    except:
                        Y_changed[i] = labeldic[0]
                        dic_label_newnum[eachelement] += 1
                    break
                else:
                    continue

        model.fit(X_train, Y_changed, batch_size=128, epochs=epoch,
                  verbose=1)  # retrain the model with perturbed labels

        trainaccuracy_perturbed = model.evaluate(X_train, Y_changed, verbose=0)[1]
        trainaccuracy_withoriginal = model.evaluate(X_train, y_train, verbose=0)[1]
        trainingacculist.append(trainaccuracy_perturbed)

        # testaccuracy = -1 # default value of test accuracy

        # get test accuracy when noise degree is zero
        # if not testflag:

        # testflag = True

        writefile.write(str(dropout) + ',' + str(noisedegree) + ',' + str(trainaccuracy_perturbed) +','+str(cvaccuracy) + ',' + str(testaccuracy) + '\n')
        cnt += 1

    Ytest = trainingacculist

    Xtest = noisedegreelist
    m, b = poly.polyfit(Xtest, Ytest, 1)  # conduct linear regression; b is the coefficient
    pv = -b  # mirror PV by one, so that PV increases up to 1, then worsen afterwards
    # pv = trainaccuracy_perturbed

    pv = 0.6 * trainaccuracy_withoriginal + 0.2 * pv + 0.2

    print('PV: ' + str(round(pv, 2)))
    print('training accuracy: ' + str(round(trainingacculist[0], 2)))
    print('test accuracy: ' + str(round(testaccuracy, 2)))
    print('----------')

    # return dic
    dic_metric_value = {}
    dic_metric_value['pv'] = pv
    dic_metric_value['cv'] = cvaccuracy
    dic_metric_value['train'] = trainingacculist[0]
    dic_metric_value['test'] = testaccuracy

    writefile.close()

    return dic_metric_value
def get_MV_forefficiency(create_deepclassifier, X_train, y_train, X_valid,y_valid, X_hold, y_hold, epoch, dropout, resultfilepath):


    # noisedegreelist = np.arange(0.0,0.31,0.1) # label noise sequence [0,0.1,0.2,0.3]
    noisedegreelist = [0,0.2]

    # get validation accuracy
    # get validation accuracy
    # X_train, X_valid, y_train, y_valid = model_selection.train_test_split(X_train_o, y_train_o, test_size=0.2)
    if epoch < 20:
        model = create_deepclassifier(dropout)  # create model for mnist
    else:
        model = create_deepclassifier(dropout, X_train)  # create model for cifar
    model.fit(X_train, y_train, batch_size=16, epochs=epoch, verbose=1)  # retrain the model with perturbed labels
    trainaccuracyoriginal = model.evaluate(X_train, y_train, verbose=0)[1]
    trainingacculist = []
    trainingacculist.append(trainaccuracyoriginal)
    labeldic = {}
    label_list = list(unique_rows(y_train))
    labelid = 0
    for each in label_list:
        labeldic[labelid] = each
        labelid += 1

    dic_label_num = {}  # get the number of instances for each class

    for label in labeldic:
        dic_label_num[label] = 0
        for eachitem in y_train:
            if np.array_equal(labeldic[label], eachitem):
                dic_label_num[label] += 1

      # list to put perturbed training accuracy

    cnt = 1


    while (cnt < len(noisedegreelist)):
        if epoch < 20:
            model = create_deepclassifier(dropout)  # create model for mnist
        else:
            model = create_deepclassifier(dropout, X_train)  # create model for cifar
        noisedegree = noisedegreelist[cnt]
        Y_changed = np.copy(y_train)
        dic_label_newnum = {}  # record the number of perturbed samples for each class

        for label in labeldic:
            dic_label_newnum[label] = 0  # initialize the dic; nothing is perturbed at the beginning

        for i in range(0, y_train.shape[0]):
            for eachelement in labeldic:
                if np.array_equal(Y_changed[i], labeldic[eachelement]) and (
                        dic_label_newnum[eachelement] < float(dic_label_num[eachelement]) * noisedegree):
                    try:
                        Y_changed[i] = labeldic[eachelement + 1]
                        dic_label_newnum[eachelement] += 1
                    except:
                        Y_changed[i] = labeldic[0]
                        dic_label_newnum[eachelement] += 1
                    break
                else:
                    continue

        model.fit(X_train, Y_changed, batch_size=128, epochs=epoch,
                  verbose=1)  # retrain the model with perturbed labels

        trainaccuracy_perturbed = model.evaluate(X_train, Y_changed, verbose=0)[1]
        trainaccuracy_withoriginal = model.evaluate(X_train, y_train, verbose=0)[1]
        trainingacculist.append(trainaccuracy_perturbed)


        cnt += 1

    Ytest = trainingacculist

    Xtest = noisedegreelist
    m, b = poly.polyfit(Xtest, Ytest, 1)  # conduct linear regression; b is the coefficient
    pv = -b  # mirror PV by one, so that PV increases up to 1, then worsen afterwards
    # pv = trainaccuracy_perturbed

    pv = 0.6 * trainaccuracy_withoriginal + 0.2 * pv + 0.2

    return pv

def get_MV_CV_and_test_deep_sametrain_diffnoise(create_deepclassifier, X_train, y_train, X_valid,y_valid, X_hold, y_hold, epoch, dropout, resultfilepath, noise):
    '''
    Description
        This function calculates the values of all metrics
        (training accuracy, test accuracy, validation accuracy, and PV).

    Parameters
        create_deepclassifier: the function used to initilise classifier
        X_train: the training data instances (without labels); type: numpy.ndarray
        y_train: training data labels; type: numpy.ndarray
        X_hold: the hold-out data instances (without labels); type: numpy.ndarray
        y_hold: the hold-out data labels; type: numpy.ndarray
        epoch: epochs
        dropout: dropout rate

    Returns
        dic_metric_value: the dic of metric values
        key (string) of the dic:
            pv: PV
            cv: CV accuracy
            train: training accuracy
            test: test accuracy
    '''

    writefile = open(resultfilepath, 'a')
    writefile.write('algorithm,degree,trainaccuracy,cvaccuracy, testaccuracy\n')

    # noisedegreelist = np.arange(0.0,0.31,0.1) # label noise sequence [0,0.1,0.2,0.3]
    noisedegreelist = [0,noise]

    # get validation accuracy
    # get validation accuracy
    # X_train, X_valid, y_train, y_valid = model_selection.train_test_split(X_train_o, y_train_o, test_size=0.2)
    if epoch < 20:
        model = create_deepclassifier(dropout)  # create model for mnist
    else:
        model = create_deepclassifier(dropout, X_train)  # create model for cifar
    model.fit(X_train, y_train, batch_size=16, epochs=epoch, verbose=1)  # retrain the model with perturbed labels
    cvaccuracy = model.evaluate(X_valid, y_valid, verbose=0)[1]
    testaccuracy = model.evaluate(X_hold, y_hold, verbose=0)[1]
    trainaccuracyoriginal = model.evaluate(X_train, y_train, verbose=0)[1]
    trainingacculist = []
    trainingacculist.append(trainaccuracyoriginal)
    writefile.write(
        str(dropout)  + ',0.0,' + str(trainaccuracyoriginal) + ',' + str(cvaccuracy) + ',' + str(
            testaccuracy) + '\n')

    labeldic = {}
    label_list = list(unique_rows(y_train))
    labelid = 0
    for each in label_list:
        labeldic[labelid] = each
        labelid += 1

    dic_label_num = {}  # get the number of instances for each class

    for label in labeldic:
        dic_label_num[label] = 0
        for eachitem in y_train:
            if np.array_equal(labeldic[label], eachitem):
                dic_label_num[label] += 1

      # list to put perturbed training accuracy

    cnt = 1

    testflag = False

    while (cnt < len(noisedegreelist)):
        if epoch < 20:
            model = create_deepclassifier(dropout)  # create model for mnist
        else:
            model = create_deepclassifier(dropout, X_train)  # create model for cifar
        noisedegree = noisedegreelist[cnt]
        Y_changed = np.copy(y_train)
        dic_label_newnum = {}  # record the number of perturbed samples for each class

        for label in labeldic:
            dic_label_newnum[label] = 0  # initialize the dic; nothing is perturbed at the beginning

        for i in range(0, y_train.shape[0]):
            for eachelement in labeldic:
                if np.array_equal(Y_changed[i], labeldic[eachelement]) and (
                        dic_label_newnum[eachelement] < float(dic_label_num[eachelement]) * noisedegree):
                    try:
                        Y_changed[i] = labeldic[eachelement + 1]
                        dic_label_newnum[eachelement] += 1
                    except:
                        Y_changed[i] = labeldic[0]
                        dic_label_newnum[eachelement] += 1
                    break
                else:
                    continue

        model.fit(X_train, Y_changed, batch_size=128, epochs=epoch,
                  verbose=1)  # retrain the model with perturbed labels

        trainaccuracy_perturbed = model.evaluate(X_train, Y_changed, verbose=0)[1]
        trainaccuracy_withoriginal = model.evaluate(X_train, y_train, verbose=0)[1]
        trainingacculist.append(trainaccuracy_perturbed)

        # testaccuracy = -1 # default value of test accuracy

        # get test accuracy when noise degree is zero
        # if not testflag:

        # testflag = True

        writefile.write(str(dropout) + ',' + str(noisedegree) + ',' + str(trainaccuracy_perturbed) +','+str(cvaccuracy) + ',' + str(testaccuracy) + '\n')
        cnt += 1

    Ytest = trainingacculist

    Xtest = noisedegreelist
    m, b = poly.polyfit(Xtest, Ytest, 1)  # conduct linear regression; b is the coefficient
    pv = -b  # mirror PV by one, so that PV increases up to 1, then worsen afterwards
    # pv = trainaccuracy_perturbed

    pv = (1 - 2 * noisedegree) * trainaccuracy_withoriginal + noisedegree * pv + noisedegree

    print('PV: ' + str(round(pv, 2)))
    print('training accuracy: ' + str(round(trainingacculist[0], 2)))
    print('test accuracy: ' + str(round(testaccuracy, 2)))
    print('----------')

    # return dic
    dic_metric_value = {}
    dic_metric_value['pv'] = pv
    dic_metric_value['cv'] = cvaccuracy
    dic_metric_value['train'] = trainingacculist[0]
    dic_metric_value['test'] = testaccuracy

    writefile.close()

    return dic_metric_value

def get_CV_for_efficiency(create_deepclassifier, X_train, y_train, epoch,dropout):

    x_train, x_valid, y_train, y_valid = model_selection.train_test_split(X_train, y_train, test_size=0.2)
    if epoch <20:
        model = create_deepclassifier(dropout) # create model for mnist
    else:
        model = create_deepclassifier(dropout,X_train) # create model for cifar
    model.fit(x_train, y_train, batch_size=16, epochs=epoch, verbose=1)  # retrain the model with perturbed labels
    cvaccuracy = model.evaluate(x_valid, y_valid, verbose=0)[1]


    return cvaccuracy



def get_CV_and_test_deep(create_deepclassifier, X_train, y_train, X_hold, y_hold,epoch,dropout,resultfilepath):
    '''
    Description
        This function calculates the values of all metrics
        (training accuracy, test accuracy, validation accuracy, and PV).

    Parameters
        create_deepclassifier: the function used to initilise classifier
        X_train: the training data instances (without labels); type: numpy.ndarray
        y_train: training data labels; type: numpy.ndarray
        X_hold: the hold-out data instances (without labels); type: numpy.ndarray
        y_hold: the hold-out data labels; type: numpy.ndarray
        epoch: epochs
        dropout: dropout rate

    Returns
        dic_metric_value: the dic of metric values
        key (string) of the dic:
            pv: PV
            cv: CV accuracy
            train: training accuracy
            test: test accuracy
    '''


    # noisedegreelist = np.arange(0.0,0.31,0.1) # label noise sequence [0,0.1,0.2,0.3]
    noisedegreelist = [0.2]

    # get validation accuracy
    # get validation accuracy
    x_train, x_valid, y_train, y_valid = model_selection.train_test_split(X_train, y_train, test_size=0.2)
    if epoch <20:
        model = create_deepclassifier(dropout) # create model for mnist
    else:
        model = create_deepclassifier(dropout,X_train) # create model for cifar
    model.fit(x_train, y_train, batch_size=16, epochs=epoch, verbose=1)  # retrain the model with perturbed labels
    cvaccuracy = model.evaluate(x_valid, y_valid, verbose=0)[1]
    testaccuracy = model.evaluate(X_hold, y_hold, verbose=0)[1]


    # return dic
    dic_metric_value = {}
    dic_metric_value['test'] = testaccuracy
    dic_metric_value['cv'] = cvaccuracy

    return dic_metric_value

def get_noisydata_labelperturbation(y, noisedegree):
    '''
    Description
        This function perturbs noisedegree ratio of labels in the training data and return noisy data

    Parameters
        y: training data labels; type: numpy.ndarray
        noisedegree: the ratio of labels per class that are going to be perturbed

    Returns
        y_noisy: perturbed training data
    '''
    label_list = list(set(list(y)))

    dic_label_num = {}  # get the number of instances for each class

    for label in label_list:
        dic_label_num[label] = (y == label).sum()

    Y_changed = np.copy(y)
    dic_label_newnum = {}  # record the number of perturbed samples for each class

    for label in label_list:
        dic_label_newnum[label] = 0  # initialize the dic; nothing is perturbed at the beginning

    for i in range(0, Y_changed.size):
        cnnn = 0
        for label in label_list:
            # perturb the label only if the perturbed labels in this class are fewer than required,
            if Y_changed[i] == label_list[cnnn] and dic_label_newnum[label_list[cnnn]] < float(
                    dic_label_num[label_list[cnnn]]) * noisedegree:
                #                 try:
                # replace the current label with its right neighbour label in label_list
                Y_changed[i] = label_list[(cnnn + 1) % len(label_list)]
                #                 except:
                #                     # if the label is the last in the label_list, replace it with the first element in label_list
                #                     Y_changed[i] = label_list[0]
                dic_label_newnum[label_list[cnnn]] += 1
                continue
            cnnn += 1

    return Y_changed


def get_PV_metrics_includingrobustness_deep(create_deepclassifier, X_train, y_train, X_hold, y_hold, epoch, dropout, resultfilepath):
    '''
    Description
        This function calculates the values of all metrics
        (training accuracy, test accuracy, validation accuracy, and PV).

    Parameters
        create_deepclassifier: the function used to initilise classifier
        X_train: the training data instances (without labels); type: numpy.ndarray
        y_train: training data labels; type: numpy.ndarray
        X_hold: the hold-out data instances (without labels); type: numpy.ndarray
        y_hold: the hold-out data labels; type: numpy.ndarray
        epoch: epochs
        dropout: dropout rate

    Returns
        dic_metric_value: the dic of metric values
        key (string) of the dic:
            pv: PV
            cv: CV accuracy
            train: training accuracy
            test: test accuracy
    '''

    writefile = open(resultfilepath, 'a')
    writefile.write('algorithm,degree,trainaccuracy,cvaccuracy, testaccuracy,attackedaccuracy\n')




    # noisedegreelist = np.arange(0.0,0.31,0.1) # label noise sequence [0,0.1,0.2,0.3]
    noisedegreelist = [0.0, 0.1, 0.2]


    # get validation accuracy
    x_train2, x_valid, y_train2, y_valid = model_selection.train_test_split(X_train, y_train, test_size=0.4)
    if epoch < 20:
        model = create_deepclassifier(dropout)  # create model for mnist
    else:
        model = create_deepclassifier(dropout, X_train)  # create model for cifar

    # get robustness
    # Generate adversarial test examples
    model.fit(X_train, y_train, batch_size=128, epochs=epoch, verbose=1)
    attack = FastGradientMethod(estimator=model, eps=0.2)
    X_hold_adv = attack.generate(x=X_hold)

    # Step 7: Evaluate the ART classifier on adversarial test examples

    predictions = model.predict(X_hold_adv)
    attackedaccuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_hold, axis=1)) / len(y_hold)

    model.fit(x_train2, y_train2, batch_size=128, epochs=epoch, verbose=1)  # retrain the model with perturbed labels
    cvaccuracy = model.evaluate(x_valid, y_valid, verbose=0)[1]

    labeldic = {}
    label_list = list(unique_rows(y_train))
    labelid = 0
    for each in label_list:
        labeldic[labelid] = each
        labelid += 1

    dic_label_num = {}  # get the number of instances for each class

    for label in labeldic:
        dic_label_num[label] = 0
        for eachitem in y_train:
            if np.array_equal(labeldic[label], eachitem):
                dic_label_num[label] += 1

    trainingacculist = []  # list to put perturbed training accuracy

    cnt = 0

    testflag = False
    testaccuracy = 0

    while (cnt < len(noisedegreelist)):
        if epoch < 20:
            model = create_deepclassifier(dropout)  # create model for mnist
        else:
            model = create_deepclassifier(dropout, X_train)  # create model for cifar
        noisedegree = noisedegreelist[cnt]
        Y_changed = np.copy(y_train)
        dic_label_newnum = {}  # record the number of perturbed samples for each class

        for label in labeldic:
            dic_label_newnum[label] = 0  # initialize the dic; nothing is perturbed at the beginning

        for i in range(0, y_train.shape[0]):
            for eachelement in labeldic:
                if np.array_equal(Y_changed[i], labeldic[eachelement]) and (
                        dic_label_newnum[eachelement] < float(dic_label_num[eachelement]) * noisedegree):
                    try:
                        Y_changed[i] = labeldic[eachelement + 1]
                        dic_label_newnum[eachelement] += 1
                    except:
                        Y_changed[i] = labeldic[0]
                        dic_label_newnum[eachelement] += 1
                    break
                else:
                    continue

        model.fit(X_train, Y_changed, batch_size=128, epochs=epoch,
                  verbose=1)  # retrain the model with perturbed labels

        trainaccuracy_perturbed = model.evaluate(X_train, Y_changed, verbose=0)[1]
        trainingacculist.append(trainaccuracy_perturbed)

        # testaccuracy = -1 # default value of test accuracy

        # get test accuracy when noise degree is zero
        # if not testflag:
        testaccuracy = model.evaluate(X_hold, y_hold, verbose=0)[1]
        # testflag = True

        writefile.write(str(dropout) + ',' + str(noisedegree) + ',' + str(trainaccuracy_perturbed) + ',' + str(
            cvaccuracy) + ',' + str(testaccuracy) + '\n')
        cnt += 1

    Ytest = trainingacculist

    Xtest = noisedegreelist
    m, b = poly.polyfit(Xtest, Ytest, 1)  # conduct linear regression; b is the coefficient
    pv = 1 - abs(1 - abs(b))  # mirror PV by one, so that PV increases up to 1, then worsen afterwards

    # print('PV: ' + str(round(pv, 2)))
    # print('training accuracy: ' + str(round(trainingacculist[0], 2)))
    # print('validation accuracy: ' + str(round(cvaccuracy, 2)))
    # print('test accuracy: ' + str(round(testaccuracy, 2)))
    # print('----------')

    # return dic
    dic_metric_value = {}
    dic_metric_value['pv'] = pv
    dic_metric_value['train'] = trainingacculist[0]
    dic_metric_value['test'] = testaccuracy
    dic_metric_value['cv'] = cvaccuracy
    dic_metric_value['attacked'] = attackedaccuracy

    writefile.close()

    return dic_metric_value


def fit_string_data(dataset, headlist):
    from sklearn.preprocessing import LabelEncoder
    lb = LabelEncoder()
    for each in headlist:
        dataset[each] = lb.fit_transform(dataset[each])
    return dataset

def apply_validation_known_distribution(modelnames,modelss, dataset,poslabe,neglabel,resultfilepath,noisedegreelist):
    writefile = open(resultfilepath,'w')
    writefile.write('algorithm, degree, self-modify,cv-accuracy,test-accuracy\n')


    seed = 5

    kfold = model_selection.KFold(n_splits=5, random_state=seed)
    X0,Y0 = dataset

    X, X_test,Y, Y_test = model_selection.train_test_split(X0, Y0, test_size=0.99502488, random_state=seed)
    #0.9756
    #0.95238


    poslabelnum = (Y == poslabe).sum()
    neslabelnum = (Y == neglabel).sum()


    names = []
    returnlist = []
    returndic = {}
    testerrordicdiff = {}
    bigerrordicdiff = {}
    trainerror = {}
    testerror = {}
    grounderror = {}


    for name, model in zip(modelnames, modelss):
        print(name)
        degree = 0.0
        cnt = 0
        while (cnt < len(noisedegreelist)):
            degree = noisedegreelist[cnt]

            poscount = 0
            negcount = 0
            Y_changed = np.copy(Y)


            # print float(poslabelnum)*degree
            # print float(neslabelnum)*degree
            for i in range(0,Y_changed.size):

                if Y_changed[i] == poslabe and poscount<float(poslabelnum)*degree:
                    Y_changed[i] = neglabel
                    poscount+= 1
                elif Y_changed[i] == neglabel and negcount<float(neslabelnum)*degree:
                    Y_changed[i] = poslabe
                    negcount+= 1

            cvresults = model_selection.cross_val_score(model, X, Y_changed, cv=kfold)
            cvaccuracy = cvresults.mean()

            model.fit(X, Y_changed)
            predictions_changed = model.predict(X)
            trainaccuracy = accuracy_score(Y_changed, predictions_changed)

            predictions_test = model.predict(X_test)
            testaccuracy_holdout = accuracy_score(Y_test, predictions_test)


            writefile.write(name+','+str(degree)+','+str(trainaccuracy)+','+str(cvaccuracy)+','+str(testaccuracy_holdout)+'\n')
            cnt+=1
    writefile.close()

    return returnlist


def analyse_slope_differentnoise_connect(resultfilepath,noisedegreelist):
    newresultfilepath = resultfilepath.replace('.csv', '-slope.csv')
    writefile = open(newresultfilepath, 'w')
    writefile.write('model,slope,CV-results,delta,test-accuracy\n')
    lines = open(resultfilepath).readlines()[1:]
    dic_name_valuelist = {}
    dic_model_slope = {}
    dic_name_cvaccu = {}
    dic_name_traincvdelta = {}
    dic_name_testaccu = {}

    for eachline in lines:
        splits = eachline.split(',')
        name = splits[0]
        trainingaccu = float(splits[2])
        degree = splits[1]
        cvaccu = float(splits[3])
        testaccu = float(splits[4])

        if name in dic_name_valuelist:
            dic_name_valuelist[name].append(trainingaccu)

        else:
            dic_name_valuelist[name] = [trainingaccu]

        if degree == '0.0':

            dic_name_cvaccu[name] = round(cvaccu,2)
            dic_name_traincvdelta[name] = round(trainingaccu-cvaccu,2)
            dic_name_testaccu[name] = round(testaccu,2)



    for i in dic_name_valuelist:
        Ytest = dic_name_valuelist[i]

        Xtest = noisedegreelist
        m, b = poly.polyfit(Xtest, Ytest, 1)

        dic_model_slope[i] = abs(b)
        writefile.write(i.replace('DT(','').replace(')','') + ',' + str(round(abs(b),2)) +','+str(dic_name_cvaccu[i])+','+str(dic_name_traincvdelta[i])+','+str(dic_name_testaccu[i])+ '\n')

    writefile.close()

    return dic_model_slope

def analyse_slope_differentnoise_deeplearning(resultfilepath,noisedegreelist):
    newresultfilepath = resultfilepath.replace('.csv', '-slope.csv')

    writefile = open(newresultfilepath, 'a')
    writefile.write('model,slope,test-accuracy\n')
    lines = open(resultfilepath).readlines()[1:]
    dic_name_valuelist = {}
    dic_model_slope = {}
    dic_name_testaccu = {}

    for eachline in lines:
        splits = eachline.split(',')
        name = float(splits[0])
        trainingaccu = float(splits[2])
        degree = float(splits[1])
        testaccu = float(splits[3])

        if name in dic_name_valuelist:
            dic_name_valuelist[name].append(trainingaccu)

        else:
            dic_name_valuelist[name] = [trainingaccu]

        if degree == 0.0:

            dic_name_testaccu[name] = round(testaccu,2)



    for i in dic_name_valuelist:
        Ytest = dic_name_valuelist[i]

        Xtest = noisedegreelist
        m, b = poly.polyfit(Xtest, Ytest, 1)

        dic_model_slope[i] = abs(b)
        writefile.write(str(i) + ',' + str(round(abs(b),2))+','+str(dic_name_testaccu[i])+ '\n')

    writefile.close()

    return dic_model_slope


def analyse_slope_differentnoise(resultfilepath,noisedegreelist):
    newresultfilepath = resultfilepath.replace('.csv', '-slope.csv')

    writefile = open(newresultfilepath, 'a')
    writefile.write('model,slope,CV-results,delta,test-accuracy\n')
    lines = open(resultfilepath).readlines()[1:]
    dic_name_valuelist = {}
    dic_model_slope = {}
    dic_name_cvaccu = {}
    dic_name_traincvdelta = {}
    dic_name_testaccu = {}

    for eachline in lines:
        splits = eachline.split(',')
        name = splits[0]
        trainingaccu = float(splits[2])
        degree = splits[1]
        cvaccu = float(splits[3])
        testaccu = float(splits[4])

        if name in dic_name_valuelist:
            dic_name_valuelist[name].append(trainingaccu)

        else:
            dic_name_valuelist[name] = [trainingaccu]

        if degree == '0.0':

            dic_name_cvaccu[name] = round(cvaccu,2)
            dic_name_traincvdelta[name] = round(trainingaccu-cvaccu,2)
            dic_name_testaccu[name] = round(testaccu,2)



    for i in dic_name_valuelist:
        Ytest = dic_name_valuelist[i]

        Xtest = noisedegreelist
        m, b = poly.polyfit(Xtest, Ytest, 1)

        dic_model_slope[i] = abs(b)
        writefile.write(i.replace('DT(','').replace(')','') + ',' + str(round(abs(b),2)) +','+str(dic_name_cvaccu[i])+','+str(dic_name_traincvdelta[i])+','+str(dic_name_testaccu[i])+ '\n')

    writefile.close()

    return dic_model_slope


def apply_model_different_noise_degree_regression(noisedegreelist, X_use, X_hold, y_use, y_hold, resultpath):
    writefile = open(resultpath, 'w')
    writefile.write('algorithm, degree, self-modify,cv-accuracy,test-accuracy\n')

    X = X_use
    Y = y_use


    for name, model in models:
        # create 10 perturbed datasets
        cnt = 0
        print (name)
        while (cnt < len(noisedegreelist)):
            noisedegree = noisedegreelist[cnt]
            Y_changed = np.copy(Y)
            already_changed_num = 0

            for i in range(0, Y_changed.size):
                cnnn = 0

                if already_changed_num < float(len(y_use)) * noisedegree:
                    noise = np.random.normal(-50, 50, 50*len(y_use))[i]
                    Y_changed[i]+= noise

            model.fit(X, Y_changed)
            y_changed_predictions = model.predict(X)
            #trainRMSE_perturbed = np.sqrt(metrics.r2_score(Y_changed, y_changed_predictions))
            trainRMSE_perturbed = metrics.r2_score(Y_changed, y_changed_predictions)

            # get validation results
            predicted = cross_val_predict(model, X, Y_changed, cv=5)
            cvRMSE = metrics.r2_score(Y_changed, predicted)

            # get test accuracy
            y_test_predictions = model.predict(X_hold)
            testRMSE = metrics.r2_score(y_hold, y_test_predictions)

            writefile.write(
                name + ',' + str(noisedegree) + ',' + str(trainRMSE_perturbed) +','+ str(cvRMSE)+','+str(testRMSE)+ '\n')
            cnt += 1

    writefile.close()

def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

def apply_model_different_noise_degree(models, noisedegreelist, X_use, X_hold, y_use, y_hold, resultpath,label_list):
    writefile = open(resultpath, 'w')
    writefile.write('algorithm, degree, self-modify,cv-accuracy,test-accuracy\n')

    X = X_use
    Y = y_use

    already_changed_ratio = 0

    names = []
    dic_model_trainingaccuracyoriginal = {}

    dic_label_num = {}

    for eachlabel in label_list:

        dic_label_num[eachlabel] = (Y == eachlabel).sum()

    # get the original training accuracy for each model & cross-validation results
    for name, model in models:
        print('----'+name)
        # get training accuracy
        m = model.fit(X, Y)
        y_use_predictions = m.predict(X)
        trainaccuracy = accuracy_score(Y, y_use_predictions)
        dic_model_trainingaccuracyoriginal[name] = trainaccuracy


    for name, model in models:
        # create 10 perturbed datasets
        cnt = 0
        print (name)
        while (cnt < len(noisedegreelist)):
            noisedegree = noisedegreelist[cnt]
            print('noise degree:'+str(noisedegree))
            Y_changed = np.copy(Y)
            dic_label_newnum = {}
            for eachlabel in label_list:
                dic_label_newnum[eachlabel] = 0

            for i in range(0, Y_changed.size):
                cnnn = 0
                for eachelement in label_list:
                    if Y_changed[i] == label_list[cnnn] and dic_label_newnum[label_list[cnnn]] < float(dic_label_num[label_list[cnnn]]) * noisedegree:
                        try:
                            Y_changed[i] = label_list[cnnn+1]
                            dic_label_newnum[label_list[cnnn]] += 1
                        except:
                            Y_changed[i] = label_list[0]
                            dic_label_newnum[label_list[cnnn]] += 1
                        continue
                    cnnn += 1


            model.fit(X, Y_changed)
            y_changed_predictions = model.predict(X)
            trainaccuracy_perturbed = accuracy_score(Y_changed, y_changed_predictions)

            # get test accuracy
            y_test_predictions = model.predict(X_hold)
            testaccuracy = accuracy_score(y_hold, y_test_predictions)

            # get validation results
            cvresults = model_selection.cross_val_score(model, X, Y_changed, cv=2)
            valiationaccuracy = cvresults.mean()

            writefile.write(
                name + ',' + str(noisedegree) + ',' + str(trainaccuracy_perturbed) +','+ str(valiationaccuracy)+','+str(testaccuracy)+ '\n')
            cnt += 1

    writefile.close()

def get_mean_sqr_regression(datasetname, filepath):
    readfile = open(filepath)
    lines = readfile.readlines()
    dic_model_pvlist={}
    dic_model_cvlist = {}
    dic_model_deltalist = {}
    dic_model_testacculist = {}



    addcnt = 0
    for eachline in lines:
        if 'model' in eachline:
            continue
        splits = eachline.strip().split(',')
        model = splits[0]
        if model in dic_model_pvlist:
            dic_model_pvlist[model].append(float(splits[1]))
            dic_model_cvlist[model].append(float(splits[2]))
            dic_model_deltalist[model].append(float(splits[3]))
            dic_model_testacculist[model].append(float(splits[4]))

        else:
            dic_model_pvlist[model] = [float(splits[1])]
            dic_model_cvlist[model]=[float(splits[2])]
            dic_model_deltalist[model]=[float(splits[3])]
            dic_model_testacculist[model]=[float(splits[4])]

    print(dic_model_pvlist)

    modellist = []
    pvmeanlist = []
    pvstdlist = []
    pvmean_add_std = []
    pvmean_subs_std = []
    for eachmodel in dic_model_pvlist:
        modellist.append(eachmodel)
        pvlist = dic_model_pvlist[eachmodel]
        pvmean = statistics.mean(pvlist)
        pvmeanlist.append(pvmean)
        pvstd = statistics.stdev(pvlist)
        pvstdlist.append(pvstd)
        pvmean_add_std.append(pvmean+pvstd)
        pvmean_subs_std.append(pvmean-pvstd)

    cvmeanlist = []
    cvstdlist = []
    cvmean_add_std = []
    cvmean_subs_std = []
    for eachmodel in dic_model_cvlist:
        cvlist = dic_model_cvlist[eachmodel]
        cvmean = statistics.mean(cvlist)
        cvmeanlist.append(cvmean)
        cvstd = statistics.stdev(cvlist)
        cvstdlist.append(cvstd)
        cvmean_add_std.append(cvmean + cvstd)
        cvmean_subs_std.append(cvmean - cvstd)

    plt.figure(figsize=(2, 4))

    ax = plt.subplot(211)
    ax.set_ylim([-0.05, 50.05])
    plt.title(datasetname)
    plt.ylabel("PV score")
    plt.grid()
    plt.plot(modellist, pvmeanlist, 'o-', color="r")

    plt.fill_between(modellist, pvmean_subs_std,
                     pvmean_add_std, alpha=0.3,
                     color="orange")


    bx = plt.subplot(212)
    bx.set_ylim([-0.05, 100.05])
    plt.xlabel("maximum tree-depth")
    plt.ylabel("CV score")
    plt.grid()
    plt.fill_between(modellist, cvmean_subs_std,
                     cvmean_add_std, alpha=0.3, color="orange")
    plt.plot(modellist, cvmeanlist, 'o-', color="g")
    plt.savefig('./../results-ICML/'+datasetname+'.pdf')

    plt.show()

def get_mean_sqr(datasetname, filepath):
    readfile = open(filepath)
    lines = readfile.readlines()
    dic_model_pvlist={}
    dic_model_cvlist = {}
    dic_model_deltalist = {}
    dic_model_testacculist = {}



    addcnt = 0
    for eachline in lines:
        if 'model' in eachline:
            continue
        splits = eachline.strip().split(',')
        model = splits[0]
        if model in dic_model_pvlist:
            dic_model_pvlist[model].append(float(splits[1]))
            dic_model_cvlist[model].append(float(splits[2]))
            dic_model_deltalist[model].append(float(splits[3]))
            dic_model_testacculist[model].append(float(splits[4]))

        else:
            dic_model_pvlist[model] = [float(splits[1])]
            dic_model_cvlist[model]=[float(splits[2])]
            dic_model_deltalist[model]=[float(splits[3])]
            dic_model_testacculist[model]=[float(splits[4])]

    print(dic_model_pvlist)

    modellist = []
    pvmeanlist = []
    pvstdlist = []
    pvmean_add_std = []
    pvmean_subs_std = []
    for eachmodel in dic_model_pvlist:
        modellist.append(eachmodel)
        pvlist = dic_model_pvlist[eachmodel]
        pvmean = statistics.mean(pvlist)
        pvmeanlist.append(pvmean)
        pvstd = statistics.stdev(pvlist)
        pvstdlist.append(pvstd)
        pvmean_add_std.append(pvmean+pvstd)
        pvmean_subs_std.append(pvmean-pvstd)

    cvmeanlist = []
    cvstdlist = []
    cvmean_add_std = []
    cvmean_subs_std = []
    for eachmodel in dic_model_cvlist:
        cvlist = dic_model_cvlist[eachmodel]
        cvmean = statistics.mean(cvlist)
        cvmeanlist.append(cvmean)
        cvstd = statistics.stdev(cvlist)
        cvstdlist.append(cvstd)
        cvmean_add_std.append(cvmean + cvstd)
        cvmean_subs_std.append(cvmean - cvstd)

    # testmeanlist = []
    # teststdlist = []
    # testaccumean_add_std = []
    # testaccumean_subs_std = []
    #
    # for eachmodel in dic_model_testacculist:
    #     testacclist =dic_model_testacculist[eachmodel]
    #     testaccumean = statistics.mean(testacclist)
    #     testmeanlist.append(testaccumean)
    #     testaccstd = statistics.stdev(testacclist)
    #     teststdlist.append(testaccstd)
    #     testaccumean_add_std.append(testaccumean+testaccstd)
    #     testaccumean_subs_std.append(testaccumean - testaccstd)



    plt.figure(figsize=(2.5, 4))

    ax = plt.subplot(211)
    ax.set_ylim([-0.05, 1.05])
    plt.title(datasetname)
    #plt.ylabel("PV score")
    plt.grid()
    plt.plot(modellist, pvmeanlist, 'o-', color="r")

    plt.fill_between(modellist, pvmean_subs_std,
                     pvmean_add_std, alpha=0.3,
                     color="orange")


    bx = plt.subplot(212)
    bx.set_ylim([-0.05, 1.05])
    plt.xlabel("maximum tree-depth")
    #plt.ylabel("CV score")
    plt.grid()
    plt.fill_between(modellist, cvmean_subs_std,
                     cvmean_add_std, alpha=0.3, color="orange")
    plt.plot(modellist, cvmeanlist, 'o-', color="g")

    # bx = plt.subplot(313)
    # bx.set_ylim([-0.05, 1.05])
    # plt.xlabel("maximum tree-depth")
    # plt.ylabel("test accuracy")
    # plt.grid()
    # plt.fill_between(modellist, testaccumean_subs_std,
    #                  testaccumean_add_std, alpha=0.3, color="orange")
    # plt.plot(modellist, cvmeanlist, 'o-', color="b")



    plt.savefig('./../results-ICML/'+datasetname+'.pdf',bbox_inches='tight')

    plt.show()


def get_mean_sqr_mnist(datasetname, filepath):
    readfile = open(filepath)
    lines = readfile.readlines()
    dic_model_pvlist={}
    dic_model_cvlist = {}
    dic_model_deltalist = {}
    dic_model_testacculist = {}



    addcnt = 0
    for eachline in lines:
        if 'model' in eachline:
            continue
        splits = eachline.strip().split(',')
        model = splits[0]
        if model in dic_model_pvlist:
            dic_model_pvlist[model].append(float(splits[1]))


        else:
            dic_model_pvlist[model] = [float(splits[1])]


    print(dic_model_pvlist)

    modellist = []
    pvmeanlist = []
    pvstdlist = []
    pvmean_add_std = []
    pvmean_subs_std = []
    for eachmodel in dic_model_pvlist:
        modellist.append(eachmodel)
        pvlist = dic_model_pvlist[eachmodel]
        pvmean = statistics.mean(pvlist)
        pvmeanlist.append(pvmean)
        pvstd = statistics.stdev(pvlist)
        pvstdlist.append(pvstd)
        pvmean_add_std.append(pvmean+pvstd)
        pvmean_subs_std.append(pvmean-pvstd)





    plt.figure(figsize=(2.2, 2.5))

    ax = plt.subplot(111)

    ax.set_ylim([-0.05, 1.05])
    plt.title(datasetname)
    plt.xlabel("dropout rate")
    plt.ylabel("PV score")
    # plt.grid(None)
    plt.plot(modellist, pvmeanlist, 'o-', color="r")

    plt.fill_between(modellist, pvmean_subs_std,
                     pvmean_add_std, alpha=0.3,
                     color="orange")




    # bx = plt.subplot(313)
    # bx.set_ylim([-0.05, 1.05])
    # plt.xlabel("maximum tree-depth")
    # plt.ylabel("test accuracy")
    # plt.grid()
    # plt.fill_between(modellist, testaccumean_subs_std,
    #                  testaccumean_add_std, alpha=0.3, color="orange")
    # plt.plot(modellist, cvmeanlist, 'o-', color="b")



    plt.savefig('./../results-ICML/'+datasetname+'.pdf',bbox_inches='tight')

    plt.show()
    
    
def unique_rows(a):
    a = np.ascontiguousarray(a)
    unique_a = np.unique(a.view([('', a.dtype)]*a.shape[1]))
    return unique_a.view(a.dtype).reshape((unique_a.shape[0], a.shape[1]))



def get_PV_deep(create_deepclassifier, X, y,epoch):
    '''
    Description
        This function calculates the PV (model fit) evaluation result of a deep learning classifier and a training data set.

    Parameters
        create_deepclassifier: function to create a classifier;
        X: the training data instances (without labels); type: numpy.ndarray
        y: training data labels; type: numpy.ndarray
        epoch: epoch used to train the model

    Returns
        PV: the degree of fit between classifier and data; type: float

    Examples:
        see readme

    '''
    noisedegreelist = [0,0.2] # label noise sequence [0,0.1,0.2,0.3]
    print(y)
    labeldic = {}
    label_list = list(unique_rows(y))
    labelid = 0
    for each in label_list:
        labeldic[labelid] = each
        labelid += 1


    dic_label_num = {} # get the number of instances for each class

    for label in labeldic:
        dic_label_num[label] = 0
        for eachitem in y:
            if np.array_equal(labeldic[label],eachitem):
                dic_label_num[label]+= 1


    trainingacculist = [] # list to put perturbed training accuracy

    cnt = 0
    while (cnt < len(noisedegreelist)):
        model = create_deepclassifier()
        noisedegree = noisedegreelist[cnt]
        Y_changed = np.copy(y)
        dic_label_newnum = {} # record the number of perturbed samples for each class

        for label in labeldic:
            dic_label_newnum[label] = 0 # initialize the dic; nothing is perturbed at the beginning

        for i in range(0, y.shape[0]):
            for eachelement in labeldic:
                if np.array_equal(Y_changed[i], labeldic[eachelement]) and (dic_label_newnum[eachelement] < float(dic_label_num[eachelement]) * noisedegree):
                    try:
                        Y_changed[i] = labeldic[eachelement + 1]
                        dic_label_newnum[eachelement] += 1
                    except:
                        Y_changed[i] = labeldic[0]
                        dic_label_newnum[eachelement] += 1
                    break
                else:
                    continue

        model.fit(X, Y_changed,batch_size=128,epochs=epoch, verbose=1) #retrain the model with perturbed labels

        trainaccuracy_perturbed = model.evaluate(X,y,verbose=0)[1]
        trainingacculist.append(trainaccuracy_perturbed)
        cnt+=1

    Ytest = trainingacculist

    Xtest = noisedegreelist
    # m, b = poly.polyfit(Xtest, Ytest, 1) # conduct linear regression; b is the coefficient
    # pv = 1-abs(1-abs(b)) # mirror PV by one, so that PV increases up to 1, then worsen afterwards
    pv = trainaccuracy_perturbed
    print('Perturbation results:')
    for i in np.arange(0,4,1):
        print('Label noise degree: '+str(round(noisedegreelist[i],2))+'  Training accuracy: '+str(round(trainingacculist[i]),3))
    print('PV: ' + str(round(pv, 2)))
    
    return pv


def get_testlist(filepath):
    lines = open(filepath).readlines()
    testlist = []

    for line in lines:
        if 'algorithm' not in line:
            splits = line.split(',')
            if splits[1] == '0.0':
                testlist.append(float(splits[4]))
    print(testlist)
    return testlist


def get_slope_learningrate(filepath):
    lines = open(filepath).readlines()
    trainzerolist = []
    trainnoisylist = []

    cvlist = []
    testlist = []
    pvlist = []
    for line in lines:

        if 'algorithm' not in line:
            splits = line.split(',')
            if float(splits[1])==0.0:
                trainzerolist.append(splits[2])
                cvlist.append(float(splits[3]))
                testlist.append(float(splits[3]))
            if splits[1] == '0.2':
                trainnoisylist.append(splits[2])


    for i in np.arange(0,len(trainnoisylist),1):
        pvlist.append((float(trainzerolist[i])-float(trainnoisylist[i]))/0.20)

    return pvlist,cvlist,testlist





