import os.path
import sys
import h5py
import math
import gc
import time
import numpy as np
#from numba import cuda
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Flatten, Dense, Input, Conv1D, MaxPooling1D, ReLU, Dropout, Concatenate, Activation, Multiply, BatchNormalization #, AveragePooling1D, Add, GlobalAveragePooling1D, GlobalMaxPooling1D
from tensorflow.keras.utils import plot_model   #, get_source_inputs
from tensorflow.keras.utils import get_file
from tensorflow.keras import backend as K
#from keras_applications.imagenet_utils import decode_predictions
#from keras_applications.imagenet_utils import preprocess_input
#from keras_applications.imagenet_utils import _obtain_input_shape
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
from tensorflow.keras.utils import to_categorical
#from tensorflow.keras.models import load_model
import tf_keras as keras
from keras.models import load_model
import tensorflow as tf
import matplotlib.pyplot as plt
import argparse
#from tensorflow.python.keras.layers import Lambda
#from sklearn.model_selection import train_test_split
#K-center: https://github.com/google/active-learning/blob/master/sampling_methods/kcenter_greedy.py
# Trace and metadata parameters
from pathlib import Path
from sklearn.cluster import KMeans
#from sklearn_extra.cluster import KMedoid
from sklearn.utils.class_weight import compute_class_weight

def parse_arguments():
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('--train_type', type=str, help='baseline or active')
    parser.add_argument('--sampling', type=str, help='random or uncertainty')
    parser.add_argument('--xType', type=str, help='number of ciphertext')
    parser.add_argument('--start_trace', type=int, help='start trace')
    parser.add_argument('--end_trace', type=int)
    parser.add_argument('--batch_size', type=int, help='batch_size', default=256)
    parser.add_argument('--num_epoch', type=int, help='batch_size', default=256)
    parser.add_argument('--trained_model_path', type=str)
    parser.add_argument('--eval_path', type=str)
    parser.add_argument('--all_ids', type=str)
    parser.add_argument('--num_iteration', type=int, help='iteration_num', default=5)
    parser.add_argument('--num_sample', type=int, default=100)
    parser.add_argument('--schedule_iteration', type=int, nargs='+', help='when to train with sampling data')
    parser.add_argument('--resume_it', type=int)
    parser.add_argument('--eval_interval', type=int, help='iteration_num', default=1)
    parser.add_argument('--update_sampling', type=str, help='iteration_num')
    parser.add_argument('--subtrain_interval', type=int, help='iteration_num', default=1)
    parser.add_argument('--name', type=str, help='experiment name', default='test')
    parser.add_argument('--medoids_path', type=str)
    parser.add_argument('--num_trace', type=int, help='iteration_num', default=5)
    parser.add_argument('--start_key', type=int, help='iteration_num', default=0)
    parser.add_argument('--end_key', type=int, help='iteration_num', default=300)
    parser.add_argument('--sim_metric', type=str)
    parser.add_argument('--sampling_file', type=str)
    parser.add_argument('--update_type', type=str, default ='none')
    parser.add_argument('--union_type', type=str, default ='none')
    parser.add_argument('--loss_type', type=str, default ='none')
    parser.add_argument('--loss_alpha', type=float, default =0.7)
    parser.add_argument('--normalize', type=int, default = 0)
    parser.add_argument('--transformer', type=int, default = 0)
    parser.add_argument('--cummulative', type=int, default = 0)
    parser.add_argument('--seed', type=int, default = 2024)
    parser.add_argument('--add_num', type=int, default = 0)

    return parser

parser = parse_arguments()
args = parser.parse_args()

bp_range = [0, 3329]
skpv_range = [0, 3328]
fqmul_range = [-1828, 1664]
tracelen = 600
NumFQMULclasses = fqmul_range[1] - fqmul_range[0] + 1;  # number of classes for fqmul(skpv, bp)
NumSKPVclasses = skpv_range[1] - skpv_range[0] + 1;     # number of classes for skpv
NumBPinput = bp_range[1] - bp_range[0] + 1;             # number of input for bp (ciphertext)
noClasses = NumSKPVclasses
noHypoKeys = NumSKPVclasses
sKeyNo = 0  # Note: sKeyNo is in range 0 to 3 and which subkeys are they are decided by code in m4 (NOT by code in PC)
work = 'train' #'train'  #'attack'
training_file_list = ['Kyber512_indcpa_dec_poly_frombytes_mul_skpv0_1_bp0_1_100kDatax5_h5_data000000to099999_600samples.h5',\
'Kyber512_indcpa_dec_poly_frombytes_mul_skpv0_1_bp0_1_100kDatax5_h5_data100000to199999_600samples.h5']
#'Kyber512_indcpa_dec_poly_frombytes_mul_skpv0_1_bp0_1_100kDatax5_h5_data200000to299999_600samples.h5']#,\
#'Kyber512_indcpa_dec_poly_frombytes_mul_skpv0_1_bp0_1_100kDatax5_h5_data300000to399999_600samples.h5',\
#'Kyber512_indcpa_dec_poly_frombytes_mul_skpv0_1_bp0_1_100kDatax5_h5_data400000to499999_600samples.h5']

trained_model_path = args.trained_model_path
data_path = 'data.npz'
nruns_default = 10
maxtrc_default = 200
testPortion = 1
attack_byModel_epNo = 232


# training parameters
train_batch_size = args.batch_size#100#150#200#250#500#640 #80 for mars45 #170 for mars56
period = 8 #8
maxEpochs = args.num_epoch#3072#2048#1536#1280#1024#512#256 #1536
attack_byModel_fileNo = int(attack_byModel_epNo/period)
N_TRACE = 20000
Threshold_Save = 200

#model hyper-parameters
noConv1Dbranch = 1
noLayers = 6    # if newly train
noClassificationLayer = 1
GPU_clear = True    # False

# training data type
xType = args.xType  #'wave' #'wavebp0' #'wavebp1' #'wavebp01' #'wavebp01next0' #'wavebp01next01'
yType = 'skpv'    #'fqmul0' #'fqmul1' #'skpv' 
trainPortion = 0.8

# Database and logs for model and training progress (epochs)
attackModel = 'Kyber512_indcpa_dec_poly_frombytes_mul_skpv0_1_bp0_1'
device = 'm4_CWLite'
attackModel_dev = attackModel + '_' + device
attackModel_dev_folder = '../' + attackModel_dev + '/'

MLmodelStruct = '4C4FC_2BP4FC4FC_J4FCSM'
#MLmodel_detail = '3C[512_128_64]_2BP4FC[1024_512_256_128]4FC[1024_512_256_128]_J4FC[1024_512_256_128]SM'
MLmodel_detail = '4C/512_256_128_64/_2BP4FC/1024_512_256_128/4FC/1024_512_256_128/_J4FC/1024_512_256_128/SM'

hyper_ver = 'hy0001010101_skpv0'    #hyper-parameter contains 5 groups: Conv1D, FC for Conv1D, BP0, BP1, FC for joined BPs
#dataFile_train = '100kDatax5_train'#'skvp0_0_700points100kDatax5train' #'skvp0_0_100kDatax5' #'skvp0_0_100kDatax1_attack'  #'20kDatax25'
dataFile_train_folder = '100kDatax5_train'#'skvp0_0_700points100kDatax5train' #'skvp0_0_100kDatax5' #'skvp0_0_100kDatax1_attack'    #'20kDatax25'
dataFile_attack = '100kDatax1_test'#'skvp0_0_700points100kDatax1attack' #'skvp0_0_100kDatax5' #'skvp0_0_100kDatax1_attack'  #'20kDatax25'
model_input_type = '_in[[][]]_tf2' #'in[]_tf2' #'[[][]]_tf2'
#data_type = dataFile_train + model_input_type
data_type = '100kDataxN' + str(len(training_file_list)) + model_input_type
#database_folder_train = attackModel_dev_folder + attackModel + '_' + dataFile_train_folder + '_h5/'
save_path = 'Phase_3_{}_{}_{}_{}_{}_{}_{}_{}_key_{}_{}_trace_{}_{}_{}_loss_{}'.format(args.name ,args.train_type, xType, args.loss_type , args.start_trace, args.end_trace, args.update_sampling, args.num_sample, args.start_key, args.end_key, args.num_trace, args.union_type, args.update_type, args.loss_alpha)
print(save_path)
#database_folder_train = os.path.join('trained_models', save_path)
database_folder_train = os.path.join('multi_attack_trained_models', save_path)
Path(database_folder_train).mkdir(parents=True, exist_ok=True)
database_folder_attack = attackModel_dev_folder + attackModel + '_' + dataFile_attack + '_h5/'
logFilename = MLmodelStruct + '_' + hyper_ver
DLmodel_name = logFilename
#DLmodel_folder = attackModel_dev_folder + logFilename + '_' + data_type + '/'
DLmodel_folder = 'models/'
modelLogFolder = DLmodel_folder + 'log' + DLmodel_name + '/'
logTrainedModel_byFile_folder = DLmodel_folder + 'trained' + DLmodel_name + '_byDataFile/'
#logTrainedModel_byEp_folder = DLmodel_folder + 'trained' + DLmodel_name + '_byEpoch/'
logTrainedModel_byEp_folder = logTrainedModel_byFile_folder
attackLogFolder = DLmodel_folder + 'log' + DLmodel_name + '_attack/'
if os.path.isdir(DLmodel_folder) == False:
    os.mkdir(DLmodel_folder)
if os.path.isdir(modelLogFolder) == False:
    os.mkdir(modelLogFolder)
if os.path.isdir(logTrainedModel_byFile_folder) == False:
    os.mkdir(logTrainedModel_byFile_folder)
if os.path.isdir(logTrainedModel_byEp_folder) == False:
    os.mkdir(logTrainedModel_byEp_folder)
print('DLmodel_folder =', DLmodel_folder)
print('modelLogFolder =', modelLogFolder)
print('logTrainedModel_byFile_folder =', logTrainedModel_byFile_folder)
print('logTrainedModel_byEp_folder =', logTrainedModel_byEp_folder)


################################################################################################
####################################### MODELS STRUCTURE #######################################
################################################################################################
# Input BatchNormalization for each PoI size
#                           subMod0 subMod1 subMod2 subMod3 subMod4 subMod5
subMods_inputBNorms =   [   1,      0,      0,      0,      0,      0]
###################### MULTI CONVOLUTIONAL-SIZE CONVOLUTION ######################
# Convolutional nodes
# matrix showing number of nodes in each convolutional layer in each PoI length
#                               layer0  layer1  layer2  layer3  layer4  layer5
subMods_NoConvNodes =   [   512,    256,    128,    64,     0,      0]
# Convolutional filter sizes
# matrix showing filter sizes in each convolutional layer in each PoI length
#                               layer0  layer1  layer2  layer3  layer4  layer5
subMods_convKernelSizes = [   3,      3,      3,      3,      0,      0] # subModel0

###############################################
# Pooling size in convolutional layers
# matrix showing MaxPooling sizes in each convolutional layer in each PoI length
#                               layer0  layer1  layer2  layer3  layer4  layer5
subMods_convPoolSizes = [   2,      2,      2,      2,      0,      0] # subModel0

# Pooling stride in convolutional layers
# matrix showing MaxPooling strike in each convolutional layer in each PoI length
#                               layer0  layer1  layer2  layer3  layer4  layer5
subMods_convPoolStrides = [   3,      3,      3,      3,      0,      0] # subModel0

# BatchNormalization in convolutional layers
# matrix showing BatchNormalization condition in each convolutional layer in each PoI length
#                           layer0  layer1  layer2  layer3  layer4  layer5
subMods_convBNorms = [   1,      1,      1,      1,      0,      0] # subModel0
# Dropout in convolutional layers
# matrix showing Dropout value in each convolutional layer in each PoI length
#                           layer0  layer1  layer2  layer3  layer4  layer5
subMods_convDrops = [   0,      0,      0,      0,      0,      0] # subModel0

###################### MULTI CONVOLUTIONAL-SIZE FULLY-CONNECTED ######################
# Flatten Convolutional feature map before Fully connected
#                           subMod0 subMod1 subMod2 subMod3 subMod4 subMod5
subMods_convFeatFlat = [    1,      0,      0,      0,      0,      0]
# Fully-connected for convolutional value before adding Plaintext
# matrix showing fully-connected condition before adding Plaintext
#                   layer0  layer1  layer2  layer3  layer4  layer5
subMods_FCs = [   1024,   512,    256,    128,    0,      0] # subModel0
# BatchNormalization for fully-connected of convolutional value before adding Plaintext
# matrix showing BatchNormalization for fully-connected condition before adding Plaintext
#                       layer0  layer1  layer2  layer3  layer4  layer5
subMods_FC_BNorms = [   1,      1,      1,      1,      0,      0] # subModel0

# Dropout for fully-connected of convolutional value before adding Plaintext
# matrix showing Dropout for fully-connected condition before adding Plaintext
#                       layer0  layer1  layer2  layer3  layer4  layer5
subMods_FC_Drops = [   0.2,    0,      0.2,    0,      0,      0] # subModel0

###################### MULTI_CONVOLUTIONAL-SIZE + PLAINTEXT-EXTENSION ######################
# Plaintext adding here
if xType == 'wave':
    noBPbranch = 0
    #                       sub0    sub1    sub2    sub3    sub4    sub5
    subMods_Pext =  [  0,      0,      0,      0,      0,      0]  # conv1D branch 0
elif xType == 'wavebp0':
    subMods_Pext =  [  0,      0,      0,      0,      0,      0] # conv1D branch 0
elif xType == 'wavebp1':
    subMods_Pext =  [  0,      0,      0,      0,      0,      0]  # conv1D branch 0
elif xType == 'wavebp01':
    noBPbranch = 2
    subMods_Pext =  [  1,      1,      0,      0,      0,      0]  # conv1D branch 0
elif xType == 'wavebp01next0':
    subMods_Pext =  [  0,      0,      0,      0,      0,      0]  # conv1D branch 0
elif xType == 'wavebp01next01':
    noBPbranch = 4
    subMods_Pext =  [  1,      1,      1,      1,      0,      0]  # conv1D branch 0


###################### (MULTI CONVOLUTIONAL-SIZE + PLAINTEXT-EXTENDED) FULLY-CONNECTED ######################
# Fully-connected for convolutional value after adding Plaintext
# matrix showing fully-connected condition after adding Plaintext
#                       layer0  layer1  layer2  layer3  layer4  layer5
subMods_Pext_FCs = [   [   1024,   1024,   512,    256,    128,    0], # subModel0
                        [   1024,   1024,   512,    256,    128,    0], # subModel1
                        [   1024,   1024,   512,    256,    128,    0], # subModel2
                        [   1024,   1024,   512,    256,    128,    0], # subModel3
                        [   0,      0,      0,      0,      0,      0], # subModel4
                        [   0,      0,      0,      0,      0,      0]]    # subModel5
# BatchNormalization for fully-connected of convolutional value after adding Plaintext
# matrix showing BatchNormalization for fully-connected condition after adding Plaintext
#                               layer0  layer1  layer2  layer3  layer4  layer5
subMods_Pext_FC_BNorms = [ [   1,      1,      1,      1,      1,      0], # subModel0
                            [   1,      1,      1,      1,      1,      0], # subModel1
                            [   1,      1,      1,      1,      1,      0], # subModel2
                            [   1,      1,      1,      1,      1,      0], # subModel3
                            [   0,      0,      0,      0,      0,      0], # subModel4
                            [   0,      0,      0,      0,      0,      0]]    # subModel5
# Dropout for fully-connected of convolutional value after adding Plaintext
# matrix showing Dropout for fully-connected condition after adding Plaintext
#                               layer0  layer1  layer2  layer3  layer4  layer5
subMods_Pext_FC_Drops = [  [   0.2,    0.2,    0,      0.1,    0,      0], # subModel0
                            [   0.2,    0.2,    0,      0.1,    0,      0], # subModel1
                            [   0.2,    0.2,    0,      0.1,    0,      0], # subModel2
                            [   0.2,    0.2,    0,      0.1,    0,      0], # subModel3
                            [   0,      0,      0,      0,      0,      0], # subModel4
                            [   0,      0,      0,      0,      0,      0]]    # subModel5

# Softmax for each sub-model if available
#                               subMod0 subMod1 subMod2 subMod3 subMod4 subMod5
subMods_classification =    [  0,      0,      0,      0,      0,      0]

if xType == 'wave':
    subMods_join =  [   0]
else:
    subMods_join =  [   1]  

subMods_join_FCs =  [  1024,   1024,   512,    256,    128,    0]
# BatchNormalization for fully-connected of convolutional value after joining PoIs
# matrix showing BatchNormalization for fully-connected condition after joining PoIs
#                           layer0  layer1  layer2  layer3  layer4  layer5
subMods_join_FC_BNorms =    [  1,      1,      1,      1,      1,      0]
# Dropout for fully-connected of convolutional value after joining PoIs
# matrix showing Dropout for fully-connected condition after joining PoIs
#                           layer0  layer1  layer2  layer3  layer4  layer5
subMods_join_FC_Drops = [  0.2,        0.2,        0,      0.1,        0,      0]

# Softmax for joined-model if available
subMods_join_classification =   [   1]

################################################################################################
##################################### MODELS STRUCTURE END #####################################
################################################################################################


def check_file_exists(file_path):
    if os.path.exists(file_path) == False:
        print("Error: provided file path '%s' does not exist!" % file_path)
        sys.exit(-1)
    return

def listDirWithExt(directory, extension):
    return (f for f in os.listdir(directory) if f.endswith('.' + extension))

def subModels_gen(xType,noConv1Dbranch, noBPbranch, noLayers, tracelen, NumBPinput, MLmodel_detail, modelLogFolder, logFilename, classes=noClasses):
    input_trace_shape = (tracelen,1)
    input_Ptext1hot_shape = (NumBPinput,1)
    m_traceinputs = []
    m_Ptextinputs = []
    inputs = []
    for dataNo in range(noConv1Dbranch):
        trace_input = Input(shape=input_trace_shape)    #trace_input need to be generated many times to shows that they are different inputs
        m_traceinputs.append(trace_input)
        #inputs.append(trace_input)
    Ptext_input1 = Input(shape=input_Ptext1hot_shape)    #Ptext_input need to be generated many times to shows that they are different inputs
        #inputs.append(Ptext_input)
    Ptext_input2 = Input(shape=input_Ptext1hot_shape)
    Ptext_input3 = Input(shape=input_Ptext1hot_shape)
    Ptext_input4 = Input(shape=input_Ptext1hot_shape)
    if xType == 'wave':
        inputs = [trace_input]
    elif xType == 'wavebp01':
        inputs = [trace_input, Ptext_input1, Ptext_input2]

    #First block: taking inputs and output features
    #x = Conv1D(128, 3, strides=2, padding="same")(inputs[0])
    #x = BatchNormalization()(x)
    #x = Activation("relu")(x)

    x = BatchNormalization()(inputs[0])
    #
    for layerNo in range(noLayers):
        if (subMods_NoConvNodes[layerNo]!=0 and subMods_convKernelSizes[layerNo]!=0 and subMods_convPoolSizes[layerNo]!=0 and subMods_convPoolStrides[layerNo]!=0):
            x = Conv1D(subMods_NoConvNodes[layerNo], subMods_convKernelSizes[layerNo], activation='relu', padding='same', name='ConvBlock_'+'subModels'+'_conv'+str(layerNo))(x)
            x = MaxPooling1D(subMods_convPoolSizes[layerNo], strides=subMods_convPoolStrides[layerNo], name='ConvBlock_'+'subModels'+'_pool'+str(layerNo))(x)
        if subMods_convBNorms[layerNo] != 0:
            x = BatchNormalization(trainable=True)(x)
        if (subMods_convDrops[layerNo]!=0):
            x = Dropout(subMods_convDrops[layerNo])(x)

    for layerNo in range(noLayers):
        # FC_PoI_size*y*_layer*x*
        if ((layerNo==0) and (subMods_convFeatFlat!=0)):
            x = Flatten()(x)
        if (subMods_FCs[layerNo]!=0):
            x = Dense(subMods_FCs[layerNo])(x)
        if (subMods_FC_BNorms[layerNo]!=0):
            x = BatchNormalization(trainable=True)(x)
        if (subMods_FC_Drops[layerNo]!=0):
            x = Dropout(subMods_FC_Drops[layerNo])(x)
    '''
    for layerNo in range(noLayers):
        # FC_Pext_size*y*_layer*x*
        if (subMods_join_FCs[layerNo]!=0):
            print(subMods_join_FCs[layerNo])
            x = Dense(subMods_join_FCs[layerNo], activation='relu')(x)
        if (subMods_join_FC_BNorms[layerNo]!=0):
            #BPbranchOuts_joined = tf.layers.batch_normalization(BPbranchOuts_joined, trainable=True, name='subMods_join_FCs_BNorm'+str(conv1DbranchNo)+'_'+str(layerNo))
            x = BatchNormalization(trainable=True )(x)
        if (subMods_join_FC_Drops[layerNo]!=0):
            x = Dropout(subMods_join_FC_Drops[layerNo])(x)
    '''
    output_cnn = x
    #Ptext Area
    BPbranchOuts_list = []
    for BPbranchNo in range(noBPbranch):
        # PtextExt_size*y*
        if (subMods_Pext[BPbranchNo]!=0):
            Ptext_flatten = Flatten()(inputs[1 + BPbranchNo])
            x = Concatenate()([output_cnn, Ptext_flatten])
        for layerNo in range(noLayers):
            # FC_Pext_size*y*_layer*x*
            if (subMods_Pext_FCs[BPbranchNo][layerNo]!=0):
                x = Dense(subMods_Pext_FCs[BPbranchNo][layerNo], activation='relu')(x)
            if (subMods_Pext_FC_BNorms[BPbranchNo][layerNo]!=0):
                x = BatchNormalization(trainable=True)(x)
            if (subMods_Pext_FC_Drops[BPbranchNo][layerNo]!=0):
                x = Dropout(subMods_Pext_FC_Drops[BPbranchNo][layerNo])(x)

        ###################### CLASSIFICATION (SOFTMAX) ######################
        if (subMods_classification[BPbranchNo]!=0):
            x = Dense(classes, activation='softmax')(x)
        print(x)
        BPbranchOuts_list.append(x)

    print(BPbranchOuts_list)
    if len(BPbranchOuts_list) != 0:
        if subMods_join != 0:
            x = Concatenate()(BPbranchOuts_list)
    
    for layerNo in range(noLayers):
        # FC_Pext_size*y*_layer*x*
        if (subMods_join_FCs[layerNo]!=0):
            x = Dense(subMods_join_FCs[layerNo], activation='relu')(x)
        if (subMods_join_FC_BNorms[layerNo]!=0):
            x = BatchNormalization(trainable=True)(x)
        if (subMods_join_FC_Drops[layerNo]!=0):
            x = Dropout(subMods_join_FC_Drops[layerNo])(x)
    
    #Softmax
    outputs = Dense(classes, activation='softmax')(x)
    
    sModel = Model(inputs, outputs, name='model')
    sModel.summary()
    tf.keras.utils.plot_model(sModel, show_shapes=True, to_file='model.png')
    # plot graph of ensemble
    #plot_model(sModel, show_shapes=True, to_file=modelLogFolder + logFilename + '_modelGraph.png')
    optimizer = RMSprop(learning_rate=0.00001)
    sModel.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
        #allBranchOuts_list.append(sModel)
    #return allBranchOuts_list
    return sModel

# make a prediction with a stacked model
# https://machinelearningmastery.com/stacking-ensemble-for-deep-learning-neural-networks/
def predict_stacked_model(model, inputX):
    # prepare input data
    X = [inputX for _ in range(len(model.input))]
    # make prediction
    return model.predict(X, verbose=0)

def load_sca_model(model_file):
    check_file_exists(model_file)
    try:
            model = load_model(model_file)
    except:
        print("Error: can't load Keras model file '%s'" % model_file)
        sys.exit(-1)
    return model

####### THESE FUNCTIONS ARE SPECIALIZED FOR KYBER    #######
####### Loading traces and metadata from file ############
#def load_meta_trace_file(database_file, sKeyNo, load_metadata=False):
def load_meta_trace_files(data_path, start_trace, end_trace):
    data = np.load(data_path)
    trace_profiling = data['data']
    bp_profiling = data['bp']
    skpv_profiling = data['label']

    return (trace_profiling[start_trace:end_trace], bp_profiling[:,start_trace:end_trace], skpv_profiling[start_trace:end_trace])#, fqmul_profiling)

def load_meta_trace_file_from_test(database_file, sKeyNo, load_metadata=False):
    print('\nLoad database_file =', database_file)
    check_file_exists(database_file)
    # Open the Kyber database HDF5 for reading
    try:
        in_file  = h5py.File(database_file, "r")
    except:
        print("Error: can't open HDF5 file '%s' for reading (it might be malformed) ..." % database_file)
        sys.exit(-1)
    # Load profiling traces
    #trace_profiling = np.array(in_file['wave'], dtype=np.float)
    trace_profiling = np.array(in_file['wave'], dtype=float)
    #skpv_a_vec0_evenCoeff0 = np.array(in_file['sca_tmp_skpv'][:,sKeyNo])
    skpv_a_vec0_evenCoeff0 = np.array(in_file['skpv_a_vec0_evenCoeff0'][:,sKeyNo].astype(int))
    skpv_a_vec0_evenCoeff0_next_sKeyNo = np.array(in_file['skpv_a_vec0_evenCoeff0'][:,sKeyNo+1].astype(int))
    skpv_a_vec0_oddCoeff1 = np.array(in_file['skpv_a_vec0_oddCoeff1'][:,sKeyNo].astype(int))
    skpv_a_vec0_oddCoeff1_next_sKeyNo = np.array(in_file['skpv_a_vec0_oddCoeff1'][:,sKeyNo+1].astype(int))
    skpv_a_vec1_evenCoeff0 = np.array(in_file['skpv_a_vec1_evenCoeff0'][:,sKeyNo].astype(int))
    skpv_a_vec1_evenCoeff0_next_sKeyNo = np.array(in_file['skpv_a_vec1_evenCoeff0'][:,sKeyNo+1].astype(int))
    skpv_a_vec1_oddCoeff1 = np.array(in_file['skpv_a_vec1_oddCoeff1'][:,sKeyNo].astype(int))
    skpv_a_vec1_oddCoeff1_next_sKeyNo = np.array(in_file['skpv_a_vec1_oddCoeff1'][:,sKeyNo+1].astype(int))
    skpv_profiling = skpv_a_vec0_evenCoeff0
    bp_b_vec0_evenCoeff0 = np.array(in_file['bp_b_vec0_evenCoeff0'][:,sKeyNo].astype(int))
    bp_b_vec0_evenCoeff0_next_sKeyNo = np.array(in_file['bp_b_vec0_evenCoeff0'][:,sKeyNo+1].astype(int))
    bp_b_vec0_oddCoeff1 = np.array(in_file['bp_b_vec0_oddCoeff1'][:,sKeyNo].astype(int))
    bp_b_vec0_oddCoeff1_next_sKeyNo = np.array(in_file['bp_b_vec0_oddCoeff1'][:,sKeyNo+1].astype(int))
    bp_b_vec1_evenCoeff0 = np.array(in_file['bp_b_vec1_evenCoeff0'][:,sKeyNo].astype(int))
    bp_b_vec1_evenCoeff0_next_sKeyNo = np.array(in_file['bp_b_vec1_evenCoeff0'][:,sKeyNo+1].astype(int))
    bp_b_vec1_oddCoeff1 = np.array(in_file['bp_b_vec1_oddCoeff1'][:,sKeyNo].astype(int))
    bp_b_vec1_oddCoeff1_next_sKeyNo = np.array(in_file['bp_b_vec1_oddCoeff1'][:,sKeyNo+1].astype(int))
    
    sca_bp_in = np.array(in_file['sca_bp_in'])
    bp_profiling = [bp_b_vec0_evenCoeff0, bp_b_vec0_oddCoeff1, bp_b_vec0_evenCoeff0_next_sKeyNo, bp_b_vec0_oddCoeff1_next_sKeyNo]
    a_vec0_evenCoeff_by_b_vec0_evenCoeff = np.array(in_file['a_vec0_evenCoeff_by_b_vec0_evenCoeff'][:,sKeyNo])
    a_vec0_evenCoeff_by_b_vec0_oddCoeff = np.array(in_file['a_vec0_evenCoeff_by_b_vec0_oddCoeff'][:,sKeyNo])
    fqmul_profiling = [a_vec0_evenCoeff_by_b_vec0_evenCoeff, a_vec0_evenCoeff_by_b_vec0_oddCoeff]

    return (trace_profiling, bp_profiling, skpv_profiling)
#### Converting traces and metadata to training format
# inputs = [[list of traces], [list of bp]]
#def create_training_data_form(database_folder_train_file, sKeyNo, trainPortion, xType, yType):

def create_training_data_form(data_path, sKeyNo, trainPortion, xType, yType, is_test, start_trace, end_trace):
    #(trace_profiling, bp_profiling, skpv_profiling, fqmul_profiling) = load_meta_trace_file(database_folder_train_file, sKeyNo)
    #(trace_profiling, bp_profiling, skpv_profiling) = load_meta_trace_file(database_folder_train_file, sKeyNo)
    if is_test:
        (trace_profiling, bp_profiling, skpv_profiling) = load_meta_trace_file_from_test(data_path, sKeyNo)
    else:
        (trace_profiling, bp_profiling, skpv_profiling) = load_meta_trace_files(data_path, start_trace, end_trace)

    Reshaped_trace_profiling= trace_profiling.reshape((trace_profiling.shape[0], trace_profiling.shape[1], 1))
    dataSize = Reshaped_trace_profiling.shape[0]
    trainSize = math.floor(dataSize * trainPortion)
    valLoc = trainSize
    if valLoc == dataSize:
        valLoc = dataSize - 1

    lineNo = list(range(0, bp_profiling[0].shape[0]))
    #bp0_1hot_profiling = np.zeros((bp_profiling[0].shape[0], NumBPinput)).astype(np.int)
    print('2')
    print((bp_profiling[0].shape[0], NumBPinput))
    bp0_1hot_profiling = np.zeros((bp_profiling[0].shape[0], NumBPinput)).astype(int)
    print('bp0_1hot_profiling.shape =', bp0_1hot_profiling.shape, '                bp_profiling[0] =', bp_profiling[0])
    bp0_1hot_profiling[lineNo,bp_profiling[0]] = 1
    Reshaped_bp0_1hot_profiling = bp0_1hot_profiling.reshape((bp0_1hot_profiling.shape[0], NumBPinput, 1))
    #print('Reshaped_bp0_1hot_profiling.shape = ', Reshaped_bp0_1hot_profiling.shape)
   
    lineNo = list(range(0, bp_profiling[1].shape[0]))
    #bp1_1hot_profiling = np.zeros((bp_profiling[1].shape[0], NumBPinput)).astype(np.int)
    bp1_1hot_profiling = np.zeros((bp_profiling[1].shape[0], NumBPinput)).astype(int)
    print('bp1_1hot_profiling.shape =', bp1_1hot_profiling.shape, '                bp_profiling[1] =', bp_profiling[1])
    #input()
    bp1_1hot_profiling[lineNo,bp_profiling[1]] = 1
    Reshaped_bp1_1hot_profiling = bp1_1hot_profiling.reshape((bp1_1hot_profiling.shape[0], NumBPinput, 1))
    
    lineNo = list(range(0, bp_profiling[2].shape[0]))
    #bp0_1hot_profiling_next_sKeyNo = np.zeros((bp_profiling[2].shape[0], NumBPinput)).astype(np.int)
    bp0_1hot_profiling_next_sKeyNo = np.zeros((bp_profiling[2].shape[0], NumBPinput)).astype(int)
    print('bp0_1hot_profiling_next_sKeyNo.shape =', bp0_1hot_profiling_next_sKeyNo.shape, '    bp_profiling[2] =', bp_profiling[2])
    bp0_1hot_profiling_next_sKeyNo[lineNo,bp_profiling[2]] = 1
    Reshaped_bp0_1hot_profiling_next_sKeyNo = bp0_1hot_profiling_next_sKeyNo.reshape((bp0_1hot_profiling_next_sKeyNo.shape[0], NumBPinput, 1))
    
    lineNo = list(range(0, bp_profiling[3].shape[0]))
    #bp1_1hot_profiling_next_sKeyNo = np.zeros((bp_profiling[3].shape[0], NumBPinput)).astype(np.int)
    bp1_1hot_profiling_next_sKeyNo = np.zeros((bp_profiling[3].shape[0], NumBPinput)).astype(int)
    print('bp1_1hot_profiling_next_sKeyNo.shape =', bp1_1hot_profiling_next_sKeyNo.shape, '    bp_profiling[3] =', bp_profiling[3])
    #input()
    bp1_1hot_profiling_next_sKeyNo[lineNo,bp_profiling[3]] = 1
    Reshaped_bp1_1hot_profiling_next_sKeyNo = bp1_1hot_profiling_next_sKeyNo.reshape((bp1_1hot_profiling_next_sKeyNo.shape[0], NumBPinput, 1))
    
    #y_train_fqmul0 = to_categorical(fqmul_profiling[0], num_classes=NumFQMULclasses)
    #y_train_fqmul1 = to_categorical(fqmul_profiling[1], num_classes=NumFQMULclasses)
    y_train_skpv = to_categorical(skpv_profiling, num_classes=NumSKPVclasses)

    #xTrain_wave = [Reshaped_trace_profiling[0:trainSize,:,:]]
    #xTrain_wavebp0 = [[Reshaped_trace_profiling[0:trainSize,:,:]], [Reshaped_bp0_1hot_profiling[0:trainSize,:]]]
    #xTrain_wavebp1 = [[Reshaped_trace_profiling[0:trainSize,:,:]], [Reshaped_bp1_1hot_profiling[0:trainSize,:]]]
    #xTrain_wavebp01 = [[Reshaped_trace_profiling[0:trainSize,:,:]], [Reshaped_bp0_1hot_profiling[0:trainSize,:], Reshaped_bp1_1hot_profiling[0:trainSize,:]]]
    #xTrain_wavebp01next0 = [[Reshaped_trace_profiling[0:trainSize,:,:]], [Reshaped_bp0_1hot_profiling[0:trainSize,:], Reshaped_bp1_1hot_profiling[0:trainSize,:], Reshaped_bp0_1hot_profiling_next_sKeyNo[0:trainSize,:]]]
    #xTrain_wavebp01next01 = [[Reshaped_trace_profiling[0:trainSize,:,:]], [Reshaped_bp0_1hot_profiling[0:trainSize,:], Reshaped_bp1_1hot_profiling[0:trainSize,:], Reshaped_bp0_1hot_profiling_next_sKeyNo[0:trainSize,:], Reshaped_bp1_1hot_profiling_next_sKeyNo[0:trainSize,:]]]
    #xTrain_wave = Reshaped_trace_profiling[0:trainSize,:,:]
    #xTrain_wavebp0 = [Reshaped_trace_profiling[0:trainSize,:,:], Reshaped_bp0_1hot_profiling[0:trainSize,:]]
    #xTrain_wavebp1 = [Reshaped_trace_profiling[0:trainSize,:,:], Reshaped_bp1_1hot_profiling[0:trainSize,:]]
    #xTrain_wavebp01 = [Reshaped_trace_profiling[0:trainSize,:,:], Reshaped_bp0_1hot_profiling[0:trainSize,:], Reshaped_bp1_1hot_profiling[0:trainSize,:]]
    #yTrain_fqmul0 = y_train_fqmul0[0:trainSize,:]
    #yTrain_fqmul1 = y_train_fqmul1[0:trainSize,:]
    #yTrain_skpv = y_train_skpv[0:trainSize,:]

    #xVal_wave = [Reshaped_trace_profiling[valLoc:,:,:]]
    #xVal_wavebp0 = [[Reshaped_trace_profiling[valLoc:,:,:]], [Reshaped_bp0_1hot_profiling[valLoc:,:]]]
    #xVal_wavebp1 = [[Reshaped_trace_profiling[valLoc:,:,:]], [Reshaped_bp1_1hot_profiling[valLoc:,:]]]
    #xVal_wavebp01 = [[Reshaped_trace_profiling[valLoc:,:,:]], [Reshaped_bp0_1hot_profiling[valLoc:,:], Reshaped_bp1_1hot_profiling[valLoc:,:]]]
    #xVal_wavebp01next0 = [[Reshaped_trace_profiling[valLoc:,:,:]], [Reshaped_bp0_1hot_profiling[valLoc:,:], Reshaped_bp1_1hot_profiling[valLoc:,:], Reshaped_bp0_1hot_profiling_next_sKeyNo[valLoc:,:]]]
    #xVal_wavebp01next01 = [[Reshaped_trace_profiling[valLoc:,:,:]], [Reshaped_bp0_1hot_profiling[valLoc:,:], Reshaped_bp1_1hot_profiling[valLoc:,:], Reshaped_bp0_1hot_profiling_next_sKeyNo[valLoc:,:], Reshaped_bp1_1hot_profiling_next_sKeyNo[valLoc:,:]]]
    #xVal_wave = Reshaped_trace_profiling[valLoc:,:,:]
    #xVal_wavebp0 = [Reshaped_trace_profiling[valLoc:,:,:], Reshaped_bp0_1hot_profiling[valLoc:,:]]
    #xVal_wavebp1 = [Reshaped_trace_profiling[valLoc:,:,:], Reshaped_bp1_1hot_profiling[valLoc:,:]]
    #xVal_wavebp01 = [Reshaped_trace_profiling[valLoc:,:,:], Reshaped_bp0_1hot_profiling[valLoc:,:], Reshaped_bp1_1hot_profiling[valLoc:,:]]
    #yVal_fqmul0 = y_train_fqmul0[valLoc:,:]
    #yVal_fqmul1 = y_train_fqmul1[valLoc:,:]
    #yVal_skpv = y_train_skpv[valLoc:,:]

    # Input data creation
    if xType == 'wave':
        xTrain = [[Reshaped_trace_profiling[0:trainSize,:,:]]]#xTrain_wave
        xVal = [[Reshaped_trace_profiling[valLoc:,:,:]]]#xVal_wave
    elif xType == 'wavebp0':
        xTrain = [[Reshaped_trace_profiling[0:trainSize,:,:]], [Reshaped_bp0_1hot_profiling[0:trainSize,:]]]#xTrain_wavebp0
        xVal = [[Reshaped_trace_profiling[valLoc:,:,:]], [Reshaped_bp0_1hot_profiling[valLoc:,:]]]#xVal_wavebp0
    elif xType == 'wavebp1':
        xTrain = [[Reshaped_trace_profiling[0:trainSize,:,:]], [Reshaped_bp1_1hot_profiling[0:trainSize,:]]]#xTrain_wavebp1
        xVal = [[Reshaped_trace_profiling[valLoc:,:,:]], [Reshaped_bp1_1hot_profiling[valLoc:,:]]]#xVal_wavebp1
    elif xType == 'wavebp01':
        xTrain = [[Reshaped_trace_profiling[0:trainSize,:,:]], [Reshaped_bp0_1hot_profiling[0:trainSize,:], Reshaped_bp1_1hot_profiling[0:trainSize,:]]]#xTrain_wavebp01
        xVal = [[Reshaped_trace_profiling[valLoc:,:,:]], [Reshaped_bp0_1hot_profiling[valLoc:,:], Reshaped_bp1_1hot_profiling[valLoc:,:]]]#xVal_wavebp01
    elif xType == 'wavebp01next0':
        xTrain = [[Reshaped_trace_profiling[0:trainSize,:,:]], [Reshaped_bp0_1hot_profiling[0:trainSize,:], Reshaped_bp1_1hot_profiling[0:trainSize,:], Reshaped_bp0_1hot_profiling_next_sKeyNo[0:trainSize,:]]]#xTrain_wavebp01next0
        xVal = [[Reshaped_trace_profiling[valLoc:,:,:]], [Reshaped_bp0_1hot_profiling[valLoc:,:], Reshaped_bp1_1hot_profiling[valLoc:,:], Reshaped_bp0_1hot_profiling_next_sKeyNo[valLoc:,:]]]#xVal_wavebp01next0
    elif xType == 'wavebp01next01':
        xTrain = [[Reshaped_trace_profiling[0:trainSize,:,:]], [Reshaped_bp0_1hot_profiling[0:trainSize,:], Reshaped_bp1_1hot_profiling[0:trainSize,:], Reshaped_bp0_1hot_profiling_next_sKeyNo[0:trainSize,:], Reshaped_bp1_1hot_profiling_next_sKeyNo[0:trainSize,:]]]#xTrain_wavebp01next01
        xVal = [[Reshaped_trace_profiling[valLoc:,:,:]], [Reshaped_bp0_1hot_profiling[valLoc:,:], Reshaped_bp1_1hot_profiling[valLoc:,:], Reshaped_bp0_1hot_profiling_next_sKeyNo[valLoc:,:], Reshaped_bp1_1hot_profiling_next_sKeyNo[valLoc:,:]]]#xVal_wavebp01next01
        #print('Created xType, len(xTrain) :', xType, len(xTrain))
        #print('len(xTrain[0]) =', len(xTrain[0]), ';    len(xTrain[1]) =', len(xTrain[1]), 'PRESS ENTER TO CONTINUE')
        #print('PRESS ENTER TO CONTINUE')
        #input()
    # Category creation
    if yType == 'fqmul0':
        yTrain = yTrain_fqmul0
        yTrain_value = fqmul_profiling[0][0:trainSize]
        yVal = yVal_fqmul0
        yVal_value = fqmul_profiling[0][valLoc:]
    elif yType == 'fqmul1':
        yTrain = yTrain_fqmul1
        yTrain_value = fqmul_profiling[1][0:trainSize]
        yVal = yVal_fqmul1
        yVal_value = fqmul_profiling[1][valLoc:]
    elif yType == 'skpv':
        yTrain = y_train_skpv[0:trainSize,:]#yTrain_skpv
        yTrain_value = skpv_profiling[0:trainSize]
        yVal = y_train_skpv[valLoc:,:]#yVal_skpv
        yVal_value = skpv_profiling[valLoc:]

    return xTrain, yTrain, xVal, yVal, yTrain_value, yVal_value

def get_subset(idxs, X_train, y_train):
    profile_data = X_train[0][0]
    print(len(profile_data))
    profile_bp0 = X_train[1][0]
    profile_bp1 = X_train[1][1]
    sub_X_train = [[profile_data[idxs,:,:]], [profile_bp0[idxs,:], profile_bp1[idxs,:]]]
    sub_y_train = y_train[idxs,:]

    return sub_X_train, sub_y_train

def mk_rankmat(model, nruns, maxtrc, batches, xTest, yTest_value, noHypoKeys, noClasses):
    realkey = int(yTest_value[0])
    rankmat_byKey = np.tile(0, (nruns, maxtrc))
    rankmat_byClass = np.tile(0, (nruns, maxtrc))
    ps_AllClasses_Nruns = np.zeros((maxtrc, noClasses, nruns))
    lps_AllClasses_Nruns = np.zeros((maxtrc, noClasses, nruns))
    lps_AllHypoKeys_Nruns = np.zeros((maxtrc, noHypoKeys, nruns))
    lpsums_AllHypoKeys_Nruns = np.zeros((maxtrc, noHypoKeys, nruns))
    print("-------------------------------------------")
    print(len(xTest[0][0]))
    #print('%s  is running' % (model.__name__))
    for krun in range(nruns):
        #print('%s  run %d of %d' % (model.__name__, krun+1, nruns))
        #if (krun % nruns) == 0:
        #    print('%s  run %d of %d' % (model.name, krun+1, nruns))
        samp = batches[krun,:]
        #ps = model.predict(U[samp,:])

        if xType == 'wave':
            ps = model.predict([xTest[0][0][samp,:,:]])
        elif xType == 'wavebp0':
            ps = model.predict([xTest[0][0][samp,:,:], xTest[1][0][samp,:]])
        elif xType == 'wavebp1':
            ps = model.predict([xTest[0][0][samp,:,:], xTest[1][1][samp,:]])
        elif xType == 'wavebp01':
            ps = model.predict([xTest[0][0][samp,:,:], xTest[1][0][samp,:], xTest[1][1][samp,:]])
        elif xType == 'wavebp01next0':
            ps = model.predict([xTest[0][0][samp,:,:], xTest[1][0][samp,:], xTest[1][1][samp,:], xTest[1][2][samp,:]])
        elif xType == 'wavebp01next01':
            ps = model.predict([xTest[0][0][samp,:,:], xTest[1][0][samp,:], xTest[1][1][samp,:], xTest[1][2][samp,:], xTest[1][3][samp,:]])

        lps = np.log(ps)
        lpsums = np.zeros(noHypoKeys)
        #lpsAllHypoKeys = np.zeros((maxtrc, noHypoKeys))
        for i in range(maxtrc):
            #S = AES_Sbox[P[samp[i]] ^ range(0x100)]
            #S = AES_Sbox[P[samp[i]] ^ range(noHypoKeys)]
            realClass = realkey#S[realkey]
            #S = AES_Sbox[P[samp[i]] ^ range(0x100)]
            #S = P[samp[i]] ^ range(noHypoKeys)
            #realClass = HWcompute(S[realkey])
            lpsAllHypoKeys = lps
            #for hypoKey in range(noHypoKeys):
            #   lpsAllHypoKeys[i, hypoKey] = lps[i, S[hypoKey]]
            #print('lpsums.shape =', lpsums.shape, ';   lps.shape =', lps.shape)
            lpsums += lps[i]#, S]
            lpsums_AllHypoKeys_Nruns[i,:,krun] = lpsums
            #print('realkey =', realkey)
            rnk_byKey = sum(lpsums > lpsums[realkey])
            rankmat_byKey[krun, i] = rnk_byKey
            rnk_byClass = sum(lps[i, :] > lps[i, realClass])
            rankmat_byClass[krun, i] = rnk_byClass
        ps_AllClasses_Nruns[:,:,krun] = ps
        lps_AllClasses_Nruns[:,:,krun] = lps
        lps_AllHypoKeys_Nruns[:,:,krun] = lpsAllHypoKeys
    return rankmat_byKey, rankmat_byClass, ps_AllClasses_Nruns, lps_AllClasses_Nruns, lps_AllHypoKeys_Nruns, lpsums_AllHypoKeys_Nruns

def eval_model(model, nruns, maxtrc, batches, xTest, yTest_value, noHypoKeys, noClasses):
    rankmat_byKey, rankmat_byClass, ps_AllClasses_Nruns, lps_AllClasses_Nruns, lps_AllHypoKeys_Nruns, lpsums_AllHypoKeys_Nruns = mk_rankmat(model, nruns, maxtrc, batches, xTest, yTest_value, noHypoKeys, noClasses)
    plot_data = ['model_type', rankmat_byKey]
    mr = np.mean(rankmat_byKey, 0)

    return mr[-1]

import pandas as pd

class CustomCallback(tf.keras.callbacks.Callback):
    def __init__(self, xTest, yTest_value, save_model_name, database_folder_train, eval_interval):
        super().__init__()
        self.xTest = xTest
        self.yTest_value = yTest_value
        self.save_model_name = save_model_name
        self.database_folder_train = database_folder_train
        self.all_rank = []
        self.best_model = self.model
        self.eval_interval = eval_interval

    def on_epoch_end(self, epoch, logs=None):
        #keys = list(logs.keys())
        #print("End epoch {} of training; got log keys: {}".format(epoch, keys))
        batches = np.zeros((nruns_default, maxtrc_default), 'int')
        if epoch % self.eval_interval == 0:
            for i in range(nruns_default):
                batches[i,:] = np.random.choice(len(self.xTest[0][0]), maxtrc_default, False)
            model = self.model
            test_rank = eval_model(model, nruns_default, maxtrc_default, batches, self.xTest, self.yTest_value, noHypoKeys, noClasses)
            
            if len(self.all_rank) > 0:
                if test_rank < np.min(np.array(self.all_rank)):
                    print(test_rank)
                    self.model.save(self.save_model_name + '.keras')
            self.all_rank.append(test_rank)

    def on_train_end(self, logs=None):
        df = pd.DataFrame({'Attack Mean Rank': self.all_rank})
        df.to_csv(os.path.join(self.database_folder_train, 'attack_rank.csv'))
        #Save best model

class MultiKeyCallback(tf.keras.callbacks.Callback):
    def __init__(self, xTest_multi, yTest_vals, save_model_name, database_folder_train, eval_interval):
        super().__init__()
        self.xTest_multi = xTest_multi
        self.yTest_vals = yTest_vals
        self.save_model_name = save_model_name
        self.database_folder_train = database_folder_train
        self.all_rank = []
        self.best_model = self.model
        self.maxtrc = args.num_trace #Max trace num for multi label
        self.eval_interval = eval_interval

    def on_epoch_end(self, epoch, logs=None):
        #keys = list(logs.keys())
        #print("End epoch {} of training; got log keys: {}".format(epoch, keys))
        if epoch % self.eval_interval == 0:
            multi_rank = []
            for i in range(len(self.yTest_vals)): #Iterate each key
                nruns = 1 #data for multi-label is limited, so we did 1 run only
                batches = np.zeros((nruns, self.maxtrc), 'int')
                batches[0] = np.arange(self.maxtrc)
                #for i in range(nruns_default):
                #    batches[i,:] = np.random.choice(len(self.xTest[0][0]), maxtrc_default, False)
                model = self.model
                test_rank = eval_model(model, nruns, self.maxtrc, batches, [[self.xTest_multi[i]]], [self.yTest_vals[i]], noHypoKeys, noClasses)
                multi_rank.append(test_rank)


            if len(self.all_rank) > 0:
                all_rank_np = np.array(self.all_rank)
                print(all_rank_np.shape)
                '''
                for i in range(len(self.yTest_vals)):
                    if multi_rank[i] < np.min(all_rank_np[:,i] ):
                        self.model.save(self.save_model_name + str(i) + '.keras') #Save best model for each key
                '''
                all_rank_avg = np.mean(all_rank_np, axis = 1)
                print(all_rank_avg.shape)
                print('ZZZZZZZZZZZZZZZZZZZZ')
                if np.mean(np.array(multi_rank)) < np.min(all_rank_avg):
                    self.model.save(self.save_model_name + '_overall.keras')
            self.all_rank.append(multi_rank)

    def on_train_end(self, logs=None):
        all_cols = ['Mean Rank Key No.' + str(i) for i in range(len(self.yTest_vals))]
        df = pd.DataFrame(self.all_rank, columns=all_cols)
        #df = pd.DataFrame({'Attack Mean Rank': self.all_rank})
        self.model.save(self.save_model_name + '_end.keras')
        df.to_csv(os.path.join(self.database_folder_train, 'attack_rank_multi.csv'))
        #Save best model

def get_subset(idxs, X_train, y_train, xType):
    profile_data = X_train[0][0]
    if xType == 'wave':
        sub_X_train = [[profile_data[idxs,:,:]]]
    elif xType == 'wavebp01':
        profile_bp0 = X_train[1][0]
        profile_bp1 = X_train[1][1]
        sub_X_train = [[profile_data[idxs,:,:]], [profile_bp0[idxs,:], profile_bp1[idxs,:]]]
    sub_y_train = y_train[idxs,:]

    return sub_X_train, sub_y_train

from sklearn.cluster import KMeans
import copy

def min_max_sampling(data, num_cluster, num_sample):
    sum_data = np.sum(data, axis = 1)
    exp_data = np.expand_dims(sum_data, axis=1)
    exp_data = np.squeeze(np.hstack((exp_data,exp_data)))
    #kmeans = KMedoids(n_clusters=num_cluster, random_state=0).fit(X)
    kmeans = KMeans(n_clusters=200, random_state=0, n_init="auto").fit(exp_data)
    #Get all maximum distances
    min_dist = []
    for sample in data:
        all_dist = []
        for clusterNo in range(len(kmeans.cluster_centers_)):
            centroid = kmeans.cluster_centers_[clusterNo]
            all_dist.append(np.linalg.norm(centroid-sample))
        all_dist = np.array(all_dist)
        min_dist.append(np.min(all_dist))
    #max_idx = np.argmax(min_dist)
    max_idxs = np.argpartition(min_dist, -num_sample)[-num_sample:]

    return max_idxs

import random

def random_sampling(data, exclude_ids, num_sample):
    all_ids = np.arange(len(data))
    print(len(all_ids))
    if len(exclude_ids) > 0:
        available_ids = np.delete(all_ids, all_ids==exclude_ids)
    else:
        available_ids = all_ids
    print(len(available_ids))
    rand_ids = np.random.choice(available_ids, num_sample)
    #rand_ids = random.sample(available_ids, num_sample)
    return rand_ids

def unceratainty_sampling_by_label(model, xTrain_in, yTrain_in, num_sample, chunk_size = 20000):
    with tf.device("CPU"):
        val = tf.data.Dataset.from_tensor_slices(xTrain_in).batch(192)
    preds = model.predict(val)
    print(preds.shape)
    idx = 0
    #Get samples that below the mean probability
    pred_probs = []
    for sample in preds:
        label_idx = yTrain_in[idx]
        pred_prob = preds[idx][label_idx]
        idx += 1
        pred_probs.append(pred_prob)
    pred_probs = np.squeeze(np.array(pred_probs))
    #max_idxs = np.argpartition(pred_probs, -num_sample)[-num_sample:]
    min_idxs = np.argpartition(pred_probs, num_sample)[:num_sample]

    return min_idxs

from tqdm import tqdm as tqdm

'''
def unceratainty_sampling_by_balance(model, xTrain_in, yTrain_in, exist_labels, num_sample):
    with tf.device("CPU"):
        val = tf.data.Dataset.from_tensor_slices(xTrain_in).batch(192)

    preds = model.predict(val)    
    exist_labels = np.squeeze(exist_labels)
    all_label_count = np.zeros(3329)
    alpha = 0.0
    smooth = 5
    sample_indexes = []
    sampling_rate = 10 #Take top 10 samples
    num_iter = int(num_sample/sampling_rate)
    print(exist_labels.shape)
    for i in tqdm(range(num_iter)):
        unique, counts = np.unique(exist_labels, return_counts=True) #Get the counts for sample in S
        all_label_count[unique] = counts
        print(np.std(all_label_count))
        print('----')
        range_x2 = (np.max(all_label_count) - np.min(all_label_count) ) / len(exist_labels)
        #print(range_x1)
        #print(range_x2)
        min_prob = np.min(all_label_count)
        
        pred_probs = 1- np.min(preds, axis = 1)
        sample_scores = []
        idx = 0
        for sample in preds:
            label_idx = yTrain_in[idx]
            pred_prob = 1- np.min(preds[idx]) #Get reversed prob
            idx += 1
            #pred_probs.append(pred_prob)
            sample_prob = all_label_count[label_idx] / len(exist_labels)
            x2 = 1-(sample_prob- min_prob)/ range_x2


            sample_score = alpha * math.exp( - pred_prob * smooth) + (1-alpha) * (1-math.exp(-x2[0] * smooth))
            if sample_indexes is not None and idx in sample_indexes:
                sample_scores.append(0) #Assign min score to exist indexes
            else:
                sample_scores.append(sample_score)
                #pred_probs = np.min(preds, axis = 1)
        sample_index = np.argpartition(sample_scores, -sampling_rate)[-sampling_rate:]
        
        exist_labels = np.concatenate((exist_labels, np.squeeze(yTrain_in[sample_index])))
    #pred_probs = np.squeeze(np.array(pred_probs))
    #max_idxs = np.argpartition(pred_probs, -num_sample)[-num_sample:]
    #min_idxs = np.argpartition(pred_probs, num_sample)[:num_sample]
        #sample_indexes.append(sample_index)
        sample_indexes = np.hstack((sample_indexes, sample_index))
    #print(exist_labels.shape)
    #print(sample_indexes.shape)

    return sample_indexes
'''

def unceratainty_sampling_by_balance(model, xTrain_in, yTrain_in, exist_labels, num_sample):
    with tf.device("CPU"):
        val = tf.data.Dataset.from_tensor_slices(xTrain_in).batch(192)

    preds = model.predict(val)
    pred_index = np.arange(len(preds))    
    exist_labels = np.squeeze(exist_labels)
    all_label_count = np.zeros(3329)
    alpha = 0.5
    smooth = 5
    sample_indexes = []
    sampling_rate = 10 #Take top 10 samples
    num_iter = int(num_sample/sampling_rate)
    print(exist_labels.shape)
    score = np.zeros(len(preds))
    for i in tqdm(range(num_iter)):
        unique, counts = np.unique(exist_labels, return_counts=True) #Get the counts for sample in S
        all_label_count[unique] = counts
        #print('----')
        #print(np.std(all_label_count))
        #print(np.max(all_label_count))
        #print(np.min(all_label_count))
        range_x2 = (np.max(all_label_count) - np.min(all_label_count) ) / len(exist_labels)
        #print(range_x1)
        #print(range_x2)
        min_prob = np.min(all_label_count) / len(exist_labels)
        
        #pred_probs = 1- np.min(preds, axis = 1)
        sample_scores = []
        
        for idx in pred_index:
            label_idx = yTrain_in[idx]
            pred_prob = 1- np.min(preds[idx]) #Get reversed prob
            
            #pred_probs.append(pred_prob)
            sample_prob = all_label_count[label_idx] / len(exist_labels)
            x2 = 1-(sample_prob- min_prob)/ range_x2
            '''
            print(label_idx)
            print(sample_prob)
            print(min_prob)
            print(x2)
            exit()
            '''

            sample_score = alpha * math.exp( - pred_prob * smooth) + (1-alpha) * (1-math.exp(-x2[0] * smooth))
            score[idx] = sample_score
            #if sample_indexes is not None and idx in sample_indexes:
            #    sample_scores.append(0) #Assign min score to exist indexes
            #else:
            #    sample_scores.append(sample_score)
                #pred_probs = np.min(preds, axis = 1)
        sample_index = np.argpartition(score, -sampling_rate)[-sampling_rate:]
        #print(np.max(score))
        #print(sample_index)
        #print(yTrain_in[sample_index])
        exist_labels = np.concatenate((exist_labels, np.squeeze(yTrain_in[sample_index])))
    #pred_probs = np.squeeze(np.array(pred_probs))
    #max_idxs = np.argpartition(pred_probs, -num_sample)[-num_sample:]
    #min_idxs = np.argpartition(pred_probs, num_sample)[:num_sample]
        #sample_indexes.append(sample_index)
        pred_index = np.setdiff1d(pred_index, sample_indexes)
        #print(len(pred_index))
        sample_indexes = np.hstack((sample_indexes, sample_index))

    #print(exist_labels.shape)
    #print(sample_indexes.shape)

    return sample_indexes

def unceratainty_sampling_by_balance_label(model, xTrain_in, yTrain_in, exist_labels, num_sample):
    with tf.device("CPU"):
        val = tf.data.Dataset.from_tensor_slices(xTrain_in).batch(192)

    pred_vector = model.predict(val)
    print(pred_vector.shape)
    preds = []
    lb_idx = 0
    for pred in pred_vector:
        preds.append(pred[yTrain_in[lb_idx]])
        lb_idx += 1
    preds = np.array(preds)
    print(np.min(preds))
    print(np.max(preds))
    pred_index = np.arange(len(preds))    
    exist_labels = np.squeeze(exist_labels)
    all_label_count = np.zeros(3329)
    alpha = 0.5
    smooth = 5
    sample_indexes = []
    sampling_rate = 10 #Take top 10 samples
    num_iter = int(num_sample/sampling_rate)
    print(exist_labels.shape)
    score = np.zeros(len(preds))
    for i in tqdm(range(num_iter)):
        unique, counts = np.unique(exist_labels, return_counts=True) #Get the counts for sample in S
        all_label_count[unique] = counts
        #print('----')
        #print(np.std(all_label_count))
        #print(np.max(all_label_count))
        #print(np.min(all_label_count))
        range_x2 = (np.max(all_label_count) - np.min(all_label_count) ) / len(exist_labels)
        #print(range_x1)
        #print(range_x2)
        min_prob = np.min(all_label_count) / len(exist_labels)
        
        #pred_probs = 1- np.min(preds, axis = 1)
        sample_scores = []
        
        for idx in pred_index:
            label_idx = yTrain_in[idx]
            pred_prob = 1- np.min(preds[idx]) #Get reversed prob
            
            #pred_probs.append(pred_prob)
            sample_prob = all_label_count[label_idx] / len(exist_labels)
            x2 = 1-(sample_prob- min_prob)/ range_x2
            '''
            print(label_idx)
            print(sample_prob)
            print(min_prob)
            print(x2)
            exit()
            '''

            sample_score = alpha * math.exp( - pred_prob * smooth) + (1-alpha) * (1-math.exp(-x2[0] * smooth))
            score[idx] = sample_score
            #if sample_indexes is not None and idx in sample_indexes:
            #    sample_scores.append(0) #Assign min score to exist indexes
            #else:
            #    sample_scores.append(sample_score)
                #pred_probs = np.min(preds, axis = 1)
        sample_index = np.argpartition(score, -sampling_rate)[-sampling_rate:]
        #print(np.max(score))
        #print(sample_index)
        #print(yTrain_in[sample_index])
        exist_labels = np.concatenate((exist_labels, np.squeeze(yTrain_in[sample_index])))
    #pred_probs = np.squeeze(np.array(pred_probs))
    #max_idxs = np.argpartition(pred_probs, -num_sample)[-num_sample:]
    #min_idxs = np.argpartition(pred_probs, num_sample)[:num_sample]
        #sample_indexes.append(sample_index)
        pred_index = np.setdiff1d(pred_index, sample_indexes)
        #print(len(pred_index))
        sample_indexes = np.hstack((sample_indexes, sample_index))

    #print(exist_labels.shape)
    #print(sample_indexes.shape)

    return sample_indexes

def unceratainty_sampling_by_balance_medoids(model, xTrain, yTrain, all_active_ids, num_sample, medoids_path):
    medoids_indexes = np.load(args.medoids_path)
    xTrain_in = xTrain[all_active_ids]
    yTrain_in = yTrain[all_active_ids]
    with tf.device("CPU"):
        val = tf.data.Dataset.from_tensor_slices(xTrain_in).batch(192)

    preds = model.predict(val)
    pred_index = np.arange(len(preds))    
    exist_labels = np.squeeze(yTrain_in[all_active_ids])
    all_label_count = np.zeros(3329)
    alpha = 0.5
    smooth = 5
    sample_indexes = []
    sampling_rate = 10 #Take top 10 samples
    num_iter = int(num_sample/sampling_rate)
    print(exist_labels.shape)
    score = np.zeros(len(preds))
    for i in tqdm(range(num_iter)):
        unique, counts = np.unique(exist_labels, return_counts=True) #Get the counts for sample in S
        all_label_count[unique] = counts
        
        range_x2 = (np.max(all_label_count) - np.min(all_label_count) ) / len(exist_labels)
        min_prob = np.min(all_label_count) / len(exist_labels)
        
        #pred_probs = 1- np.min(preds, axis = 1)
        sample_scores = []
        
        for idx in pred_index:
            label_idx = yTrain_in[idx]
            pred_prob = 1- np.min(preds[idx]) #Get reversed prob
            
            #pred_probs.append(pred_prob)
            sample_prob = all_label_count[label_idx] / len(exist_labels)
            x2 = 1-(sample_prob - min_prob)/ range_x2

            sample_score = alpha * math.exp( - pred_prob * smooth) + (1-alpha) * (1-math.exp(-x2[0] * smooth))
            score[idx] = sample_score

        sample_index = np.argpartition(score, -sampling_rate)[-sampling_rate:]
        #print(np.max(score))
        #print(sample_index)
        #print(yTrain_in[sample_index])
        exist_labels = np.concatenate((exist_labels, np.squeeze(yTrain_in[sample_index])))

        pred_index = np.setdiff1d(pred_index, sample_indexes)
        #print(len(pred_index))
        sample_indexes = np.hstack((sample_indexes, sample_index))

    return sample_indexes

#def GL_sampling(model, xTrain_in, yTrain_in, exist_labels, num_sample):
    #Sample by gradient length #https://papers.nips.cc/paper_files/paper/2007/file/a1519de5b5d44b31a01de013b9b51a80-Paper.pdf
    #I leave it here for late implementation
    #This is somewhat similar to uncerainty sampling so I wont included it here atm

#I need some representative/density-based sampling here -> K_Medoids

def K_Medoids_sampling(model, xTrain, yTrain, non_active_ids, num_sample, medoids_path):
    #Method
    #Choose sample -> get medoids cluster
    #Assess the percentage of sample label in cluster -> Choose the least abundant one
    medoid_labels = np.load('200k_2000cluster/clara_labels.npy')
    dominant_labels = []
    least_dom_labels = []
    for i in tqdm(range(2000)):
        indexes = np.where(medoid_labels == i)[0]
        curr_label = yTrain[indexes]
        unique, counts = np.unique(curr_label, return_counts=True)
        max_idx = np.argmax(counts)
        min_idx = np.argmin(counts)
        dominant_labels.append(unique[max_idx])
        least_dom_labels.append(unique[min_idx])
    least_dom_labels = np.array(least_dom_labels)

    all_dom_labels = []
    for i in range(num_iter):
        for idx in non_active_ids:
            all_dom_labels.append(least_dom_labels[medoid_labels[idx]])

    min_idxs = np.argpartition(all_dom_labels, num_sample)[:num_sample]

    return min_idxs

def K_Medoids_sampling_balance(model, xTrain, yTrain, all_active_ids, non_active_ids, num_sample, medoids_path):
    #Method
    #Choose sample -> get medoids cluster
    #Assess the percentage of sample label in cluster -> Choose the least abundant one
    medoid_labels = np.load('200k_2000cluster/clara_labels.npy')
    dominant_labels = []
    least_dom_labels = []
    count_perc = []
    for i in tqdm(range(2000)):
        indexes = np.where(medoid_labels == i)[0]
        curr_label = yTrain[indexes]
        unique, counts = np.unique(curr_label, return_counts=True)
        counts = counts/len(indexes)
        max_idx = np.argmax(counts)
        min_idx = np.argmin(counts)
        #dominant_labels.append(unique[max_idx])
        #least_dom_labels.append(unique[min_idx]/len(indexes)) #Normalize to 0-1
        count_perc.append(dict(zip(unique, counts)))
    #least_dom_labels = np.array(least_dom_labels)
    '''
    for idx in range(10):
        label_idx = yTrain[idx][0]
        print(label_idx)
        unique = count_perc[medoid_labels[idx]]
        label_perc = unique[label_idx]
        print(label_perc)
    '''
    alpha = 0.5
    smooth = 2
    sample_indexes = []
    all_label_count = np.zeros(3329)
    sampling_rate = 10 #Take top 10 samples
    num_iter = int(num_sample/sampling_rate)
    #all_dom_labels = []
    
    for i in tqdm(range(num_iter)):
        #Get 10 most suitable samples in each iteration
        exist_labels = yTrain[all_active_ids]
        unique, counts = np.unique(exist_labels, return_counts=True) #Get the counts for sample in S
        all_label_count[unique] = counts
        
        range_x2 = (np.max(all_label_count) - np.min(all_label_count) ) / len(exist_labels)
        min_prob = np.min(all_label_count) / len(exist_labels)
        
        #pred_probs = 1- np.min(preds, axis = 1)
        sample_scores = []
        
        for idx in non_active_ids:
            #all_dom_labels.append(least_dom_labels[medoid_labels[idx]])
            label_idx = yTrain[idx][0]
            label_perc = 1 - count_perc[medoid_labels[idx]][label_idx]
            #pred_prob = 1- np.min(preds[idx]) #Get reversed prob
            #pred_probs.append(pred_prob)
            sample_prob = all_label_count[label_idx] / len(exist_labels)
            x2 = 1-(sample_prob - min_prob)/ range_x2
            #print(label_perc)
            sample_score = alpha * math.exp( - label_perc * smooth) + (1-alpha) * (1-math.exp(-x2 * smooth))
            sample_scores.append(sample_score)
        score = np.array(sample_scores)
        sample_index = np.argpartition(score, -sampling_rate)[-sampling_rate:]
        sample_index = non_active_ids[sample_index]
        #print(len(sample_index))
        #print(sample_index)
        #print(non_active_ids.shape)
        #print(sample_index.shape)
        non_active_ids = np.setdiff1d(non_active_ids, sample_index) #np.delete(non_active_ids, sampled_ids)
        all_active_ids = np.hstack((all_active_ids, sample_index))
        #print('-------------')
        #print(len(non_active_ids))
        #print(len(all_active_ids))
        sample_indexes = np.hstack((sample_indexes, sample_index))

    #min_idxs = np.argpartition(all_dom_labels, num_sample)[:num_sample]
    print(len(sample_indexes))
    #unique, counts = np.unique(sample_indexes, return_counts=True)
    #print(len(unique))
    return sample_indexes

def K_Medoids_sampling_uncertainty_balance(model, xTrain, yTrain, all_active_ids, non_active_ids, num_sample, medoids_path):
    #Method
    #Choose sample -> get medoids cluster
    #Assess the percentage of sample label in cluster -> Choose the least abundant one
    medoid_labels = np.load('200k_2000cluster/clara_labels.npy')
    dominant_labels = []
    least_dom_labels = []
    count_perc = []
    for i in tqdm(range(2000)):
        indexes = np.where(medoid_labels == i)[0]
        curr_label = yTrain[indexes]
        unique, counts = np.unique(curr_label, return_counts=True)
        counts = counts/len(indexes)
        max_idx = np.argmax(counts)
        min_idx = np.argmin(counts)
        #dominant_labels.append(unique[max_idx])
        #least_dom_labels.append(unique[min_idx]/len(indexes)) #Normalize to 0-1
        count_perc.append(dict(zip(unique, counts)))
    #least_dom_labels = np.array(least_dom_labels)
    '''
    for idx in range(10):
        label_idx = yTrain[idx][0]
        print(label_idx)
        unique = count_perc[medoid_labels[idx]]
        label_perc = unique[label_idx]
        print(label_perc)
    '''

    with tf.device("CPU"):
        val = tf.data.Dataset.from_tensor_slices(xTrain[non_active_ids]).batch(192)

    preds = model.predict(val)

    pred_scores = np.zeros(len(yTrain))

    for i in range(len(preds)):
        pred_scores[non_active_ids[i]] = np.min(preds[i])

    alpha_balance = 0.3
    alpha_medoids = 0.5
    alpha_uncertain = 0.2
    smooth = 2
    sample_indexes = []
    all_label_count = np.zeros(3329)
    sampling_rate = 10 #Take top 10 samples
    num_iter = int(num_sample/sampling_rate)
    #all_dom_labels = []
    
    for i in tqdm(range(num_iter)):
        #Get 10 most suitable samples in each iteration
        exist_labels = yTrain[all_active_ids]
        unique, counts = np.unique(exist_labels, return_counts=True) #Get the counts for sample in S
        all_label_count[unique] = counts
        
        range_x2 = (np.max(all_label_count) - np.min(all_label_count) ) / len(exist_labels)
        min_prob = np.min(all_label_count) / len(exist_labels)
        
        #pred_probs = 1- np.min(preds, axis = 1)
        sample_scores = []
        
        for idx in non_active_ids:
            #all_dom_labels.append(least_dom_labels[medoid_labels[idx]])
            label_idx = yTrain[idx][0]
            label_perc = 1 - count_perc[medoid_labels[idx]][label_idx]
            uncertain_score = 1- pred_scores[idx] #Get reversed prob
            
            #pred_probs.append(pred_prob)
            sample_prob = all_label_count[label_idx] / len(exist_labels)
            x2 = 1-(sample_prob - min_prob)/ range_x2
            #print(label_perc)

            sample_score = alpha_medoids * math.exp( - label_perc * smooth) + alpha_balance * (1-math.exp(-x2 * smooth)) + alpha_uncertain * math.exp( - uncertain_score * smooth)
            sample_scores.append(sample_score)
        score = np.array(sample_scores)
        sample_index = np.argpartition(score, -sampling_rate)[-sampling_rate:]
        sample_index = non_active_ids[sample_index]
        #print(len(sample_index))
        #print(sample_index)
        #print(non_active_ids.shape)
        #print(sample_index.shape)
        non_active_ids = np.setdiff1d(non_active_ids, sample_index) #np.delete(non_active_ids, sampled_ids)
        all_active_ids = np.hstack((all_active_ids, sample_index))
        #print('-------------')
        #print(len(non_active_ids))
        #print(len(all_active_ids))
        sample_indexes = np.hstack((sample_indexes, sample_index))

    #min_idxs = np.argpartition(all_dom_labels, num_sample)[:num_sample]
    print(len(sample_indexes))
    #unique, counts = np.unique(sample_indexes, return_counts=True)
    #print(len(unique))
    return sample_indexes

def K_Medoids_Active(model, xTrain, yTrain, all_active_ids, non_active_ids, num_sample, medoids_path):
    #Method
    #Choose sample -> get medoids cluster
    #Assess the percentage of sample label in cluster BASED on ALREADY CHOSEN SAMPLES
    medoid_labels = np.load('200k_2000cluster/clara_labels.npy')
    dominant_labels = []
    count_dict = []
    cluster_length = []
    masked_index = np.zeros(len(yTrain))
    masked_index[all_active_ids] = 1
    print(medoid_labels.shape)
    print(masked_index.shape)
    chosen_label_wrt_medoids = np.zeros((2000, 3329)) #Num_label x num_medoids
    #test_sum = 0
    for i in tqdm(range(2000)):
        indexes = np.where(medoid_labels == i)[0]
        curr_label = yTrain[indexes]
        unique, counts = np.unique(curr_label, return_counts=True)
        chosen_indexes = []
        for idx in indexes:
            if masked_index[idx] == 1:
                chosen_indexes.append(idx)
        #print(len(chosen_indexes))
        chosen_indexes = np.array(chosen_indexes)
        if len(chosen_indexes) > 0:
            active_label = yTrain[chosen_indexes]
            unique_active, counts_active = np.unique(active_label, return_counts=True)
            chosen_label_wrt_medoids[i][unique_active] = counts_active
            #test_sum += np.sum(counts_active)
        max_idx = np.argmax(counts)
        min_idx = np.argmin(counts)
        #dominant_labels.append(unique[max_idx])
        #least_dom_labels.append(unique[min_idx]/len(indexes)) #Normalize to 0-1
        count_dict.append(dict(zip(unique, counts)))
        cluster_length.append(len(indexes))
        #print(np.max(counts))

    #print(test_sum)
    #print(np.sum(masked_index))
    #print(np.sum(chosen_label_wrt_medoids))
    #print(np.max(chosen_label_wrt_medoids))
    
    sample_scores = []
    for idx in non_active_ids:
        label_idx = yTrain[idx][0]
        label_perc = chosen_label_wrt_medoids[medoid_labels[idx]][label_idx]
        sample_scores.append(label_perc)
    min_idxs = np.argpartition(sample_scores, num_sample)[:num_sample]

    return non_active_ids[min_idxs]

def K_Medoids_Active_Balance(model, xTrain, yTrain, all_active_ids, non_active_ids, num_sample, medoids_path):
    #Method
    #Choose sample -> get medoids cluster
    #Assess the percentage of sample label in cluster BASED on ALREADY CHOSEN SAMPLES
    medoid_labels = np.load('200k_2000cluster/clara_labels.npy')
    dominant_labels = []
    count_dict = []
    cluster_length = []
    masked_index = np.zeros(len(yTrain))
    masked_index[all_active_ids] = 1
    print(medoid_labels.shape)
    print(masked_index.shape)
    chosen_label_wrt_medoids = np.zeros((2000, 3329)) #Num_label x num_medoids
    #test_sum = 0
    for i in tqdm(range(2000)):
        indexes = np.where(medoid_labels == i)[0]
        curr_label = yTrain[indexes]
        unique, counts = np.unique(curr_label, return_counts=True)
        chosen_indexes = []
        for idx in indexes:
            if masked_index[idx] == 1:
                chosen_indexes.append(idx)
        #print(len(chosen_indexes))
        chosen_indexes = np.array(chosen_indexes)
        if len(chosen_indexes) > 0:
            active_label = yTrain[chosen_indexes]
            unique_active, counts_active = np.unique(active_label, return_counts=True)
            chosen_label_wrt_medoids[i][unique_active] = counts_active
            #test_sum += np.sum(counts_active)
        max_idx = np.argmax(counts)
        min_idx = np.argmin(counts)
        #dominant_labels.append(unique[max_idx])
        #least_dom_labels.append(unique[min_idx]/len(indexes)) #Normalize to 0-1
        count_dict.append(dict(zip(unique, counts)))
        cluster_length.append(len(indexes))
        #print(np.max(counts))

    #print(test_sum)
    #print(np.sum(masked_index))
    #print(np.sum(chosen_label_wrt_medoids))
    #print(np.max(chosen_label_wrt_medoids))
    '''
    sample_scores = []
    for idx in non_active_ids:
        label_idx = yTrain[idx][0]
        label_perc = chosen_label_wrt_medoids[medoid_labels[idx]][label_idx]
        sample_scores.append(label_perc)
    min_idxs = np.argpartition(sample_scores, num_sample)[:num_sample]
    '''
    alpha = 0.5
    smooth = 2
    sample_indexes = []
    all_label_count = np.zeros(3329)
    sampling_rate = 10 #Take top 10 samples
    num_iter = int(num_sample/sampling_rate)
    #all_dom_labels = []
    
    for i in tqdm(range(num_iter)):
        #Get 10 most suitable samples in each iteration
        exist_labels = yTrain[all_active_ids]
        unique, counts = np.unique(exist_labels, return_counts=True) #Get the counts for sample in S
        all_label_count[unique] = counts
        
        range_x2 = (np.max(all_label_count) - np.min(all_label_count) ) / len(exist_labels)
        min_prob = np.min(all_label_count) / len(exist_labels)
        
        #pred_probs = 1- np.min(preds, axis = 1)
        sample_scores = []
        
        for idx in non_active_ids:
            #all_dom_labels.append(least_dom_labels[medoid_labels[idx]])
            label_idx = yTrain[idx][0]
            label_perc = 1 - chosen_label_wrt_medoids[medoid_labels[idx]][label_idx]
            #pred_prob = 1- np.min(preds[idx]) #Get reversed prob
            #pred_probs.append(pred_prob)
            sample_prob = all_label_count[label_idx] / len(exist_labels)
            x2 = 1-(sample_prob - min_prob)/ range_x2
            #print(label_perc)
            sample_score = alpha * math.exp( - label_perc * smooth) + (1-alpha) * (1-math.exp(-x2 * smooth))
            sample_scores.append(sample_score)
        score = np.array(sample_scores)
        sample_index = np.argpartition(score, -sampling_rate)[-sampling_rate:]
        sample_index = non_active_ids[sample_index]
        #Add chosen sample to chosen matrix
        for idx in sample_index:
            chosen_label_wrt_medoids[medoid_labels[idx]][label_idx] += 1
        #print(len(sample_index))
        #print(sample_index)
        #print(non_active_ids.shape)
        #print(sample_index.shape)
        non_active_ids = np.setdiff1d(non_active_ids, sample_index) #np.delete(non_active_ids, sampled_ids)
        all_active_ids = np.hstack((all_active_ids, sample_index))
        #print('-------------')
        #print(len(non_active_ids))
        #print(len(all_active_ids))
        sample_indexes = np.hstack((sample_indexes, sample_index))
    print(np.sum(chosen_label_wrt_medoids))
    #min_idxs = np.argpartition(all_dom_labels, num_sample)[:num_sample]
    print(len(sample_indexes))
    return sample_indexes

def K_Medoids_Active_Uncertain_Balance(model, xTrain, yTrain, all_active_ids, non_active_ids, num_sample, medoids_path):
    #Method
    #Choose sample -> get medoids cluster
    #Assess the percentage of sample label in cluster BASED on ALREADY CHOSEN SAMPLES
    medoid_labels = np.load('200k_2000cluster/clara_labels.npy')
    dominant_labels = []
    count_dict = []
    cluster_length = []
    masked_index = np.zeros(len(yTrain))
    masked_index[all_active_ids] = 1
    print(medoid_labels.shape)
    print(masked_index.shape)
    chosen_label_wrt_medoids = np.zeros((2000, 3329)) #Num_label x num_medoids
    #test_sum = 0
    with tf.device("CPU"):
        val = tf.data.Dataset.from_tensor_slices(xTrain[non_active_ids]).batch(192)

    preds = model.predict(val)

    pred_scores = np.zeros(len(yTrain))

    for i in range(len(preds)):
        pred_scores[non_active_ids[i]] = np.min(preds[i])

    for i in tqdm(range(2000)):
        indexes = np.where(medoid_labels == i)[0]
        curr_label = yTrain[indexes]
        unique, counts = np.unique(curr_label, return_counts=True)
        chosen_indexes = []
        for idx in indexes:
            if masked_index[idx] == 1:
                chosen_indexes.append(idx)
        #print(len(chosen_indexes))
        chosen_indexes = np.array(chosen_indexes)
        if len(chosen_indexes) > 0:
            active_label = yTrain[chosen_indexes]
            unique_active, counts_active = np.unique(active_label, return_counts=True)
            chosen_label_wrt_medoids[i][unique_active] = counts_active
            #test_sum += np.sum(counts_active)
        max_idx = np.argmax(counts)
        min_idx = np.argmin(counts)
        #dominant_labels.append(unique[max_idx])
        #least_dom_labels.append(unique[min_idx]/len(indexes)) #Normalize to 0-1
        count_dict.append(dict(zip(unique, counts)))
        cluster_length.append(len(indexes))
        #print(np.max(counts))

    #print(test_sum)
    #print(np.sum(masked_index))
    #print(np.sum(chosen_label_wrt_medoids))
    #print(np.max(chosen_label_wrt_medoids))
    '''
    sample_scores = []
    for idx in non_active_ids:
        label_idx = yTrain[idx][0]
        label_perc = chosen_label_wrt_medoids[medoid_labels[idx]][label_idx]
        sample_scores.append(label_perc)
    min_idxs = np.argpartition(sample_scores, num_sample)[:num_sample]
    '''
    #alpha = 0.5
    alpha_balance = 0.3
    alpha_medoids = 0.5
    alpha_uncertain = 0.2
    smooth = 2
    sample_indexes = []
    all_label_count = np.zeros(3329)
    sampling_rate = 10 #Take top 10 samples
    num_iter = int(num_sample/sampling_rate)
    #all_dom_labels = []
    
    for i in tqdm(range(num_iter)):
        #Get 10 most suitable samples in each iteration
        exist_labels = yTrain[all_active_ids]
        unique, counts = np.unique(exist_labels, return_counts=True) #Get the counts for sample in S
        all_label_count[unique] = counts
        
        range_x2 = (np.max(all_label_count) - np.min(all_label_count) ) / len(exist_labels)
        min_prob = np.min(all_label_count) / len(exist_labels)
        
        #pred_probs = 1- np.min(preds, axis = 1)
        sample_scores = []
        
        for idx in non_active_ids:
            #all_dom_labels.append(least_dom_labels[medoid_labels[idx]])
            label_idx = yTrain[idx][0]
            label_perc = 1 - chosen_label_wrt_medoids[medoid_labels[idx]][label_idx]
            uncertain_score = 1- pred_scores[idx] #Get reversed prob
            #pred_prob = 1- np.min(preds[idx]) #Get reversed prob
            #pred_probs.append(pred_prob)
            sample_prob = all_label_count[label_idx] / len(exist_labels)
            x2 = 1-(sample_prob - min_prob)/ range_x2
            #print(label_perc)
            sample_score = alpha_medoids * math.exp( - label_perc * smooth) + alpha_uncertain * math.exp( - uncertain_score * smooth) +alpha_balance * (1-math.exp(-x2 * smooth))
            sample_scores.append(sample_score)
        score = np.array(sample_scores)
        sample_index = np.argpartition(score, -sampling_rate)[-sampling_rate:]
        sample_index = non_active_ids[sample_index]
        #Add chosen sample to chosen matrix
        for idx in sample_index:
            chosen_label_wrt_medoids[medoid_labels[idx]][label_idx] += 1
        #print(len(sample_index))
        #print(sample_index)
        #print(non_active_ids.shape)
        #print(sample_index.shape)
        non_active_ids = np.setdiff1d(non_active_ids, sample_index) #np.delete(non_active_ids, sampled_ids)
        all_active_ids = np.hstack((all_active_ids, sample_index))
        #print('-------------')
        #print(len(non_active_ids))
        #print(len(all_active_ids))
        sample_indexes = np.hstack((sample_indexes, sample_index))
    print(np.sum(chosen_label_wrt_medoids))
    #min_idxs = np.argpartition(all_dom_labels, num_sample)[:num_sample]
    print(len(sample_indexes))
    return sample_indexes

def K_Medoids_Update(all_active_ids, sampled_ids, medoids_path):
    medoid_labels = np.load('200k_2000cluster/clara_labels.npy')
    #Get all possible cluster labels
    sampled_ids = medoid_labels[sampled_ids]
    unique_labels, counts = np.unique(sampled_ids, return_counts=True)

    update_indexes = []
    for label in unique_labels:
        label_idx = np.where(medoid_labels[all_active_ids] == label)[0]
        update_indexes = np.concatenate([update_indexes, label_idx])
    print(update_indexes.shape)
    update_indexes = update_indexes.astype(int)

    return all_active_ids[update_indexes]

def K_Medoids_Label_Update(all_active_ids, sampled_ids, yTrain, medoids_path):
    medoid_labels = np.load('200k_2000cluster/clara_labels.npy')
    sampled_medoids = medoid_labels[sampled_ids]
    #Get all possible cluster labels
    #unique_labels, counts = np.unique(sampled_ids, return_counts=True)

    '''
    update_indexes = []
    for label in unique_labels:
        label_idx = np.where(medoid_labels[all_active_ids] == label)[0]
        update_indexes = np.concatenate([update_indexes, label_idx])
    print(update_indexes.shape)
    update_indexes = update_indexes.astype(int)
    '''
    update_indexes = []
    for sampled_id in tqdm(sampled_ids):
        for act_idx in all_active_ids:
            if yTrain[sampled_id] == yTrain[act_idx] and medoid_labels[sampled_id] == medoid_labels[act_idx]:
                update_indexes.append(act_idx)
        all_active_ids = np.setdiff1d(all_active_ids, np.array(update_indexes))
        #print(len(update_indexes))
    print(len(update_indexes))
    update_indexes = np.concatenate((np.array(update_indexes), sampled_ids))
    print(len(update_indexes))
    return update_indexes

#(model, xTrain_original, yTrain_original,all_active_ids, non_active_ids, num_sample = args.num_sample)
def margin_sampling_by_balance(model, xTrain, yTrain, all_active_ids, non_active_ids, num_sample):
    xTrain_in = xTrain[all_active_ids]
    yTrain_in = yTrain[all_active_ids]
    with tf.device("CPU"):
        val = tf.data.Dataset.from_tensor_slices(xTrain_in).batch(192)

    preds = model.predict(val)
    print(preds[0])
    #test_arr = np.array([1,2,3,4,6,5])
    #max_idx = np.partition(test_arr, -2)[-2:]
    #print(np.abs(max_idx[0]-max_idx[1]))
    #exit()
    pred_index = np.arange(len(preds))
    margins = []
    for idx in pred_index:
        label_idx = yTrain_in[idx]
        max_idx = np.partition(preds[idx], -2)[-2:] #Get margin of 2 most likely class
        margin = np.abs(max_idx[0]-max_idx[1])
        margins.append(margin)
        #print(preds[idx][label_idx].shape) 
    pred_scores = np.zeros(len(yTrain))

    for i in range(len(preds)):
        pred_scores[non_active_ids[i]] = margins[i]

    alpha = 0.5
    smooth = 2
    sample_indexes = []
    all_label_count = np.zeros(3329)
    sampling_rate = 10 #Take top 10 samples
    num_iter = int(num_sample/sampling_rate)
    #all_dom_labels = []
    
    for i in tqdm(range(num_iter)):
        #Get 10 most suitable samples in each iteration
        exist_labels = yTrain[all_active_ids]
        unique, counts = np.unique(exist_labels, return_counts=True) #Get the counts for sample in S
        all_label_count[unique] = counts
        
        range_x2 = (np.max(all_label_count) - np.min(all_label_count) ) / len(exist_labels)
        min_prob = np.min(all_label_count) / len(exist_labels)
        
        #pred_probs = 1- np.min(preds, axis = 1)
        sample_scores = []
        
        for idx in non_active_ids:
            #all_dom_labels.append(least_dom_labels[medoid_labels[idx]])
            label_idx = yTrain[idx][0]
            uncertain_score = 1- pred_scores[idx] #Get reversed prob
            
            #pred_probs.append(pred_prob)
            sample_prob = all_label_count[label_idx] / len(exist_labels)
            x2 = 1-(sample_prob - min_prob)/ range_x2
            #print(label_perc)

            sample_score =  (1-alpha) * (1-math.exp(-x2 * smooth)) + alpha * math.exp( - uncertain_score * smooth)
            sample_scores.append(sample_score)
        score = np.array(sample_scores)
        sample_index = np.argpartition(score, -sampling_rate)[-sampling_rate:]
        sample_index = non_active_ids[sample_index]
        #print(len(sample_index))
        #print(sample_index)
        #print(non_active_ids.shape)
        #print(sample_index.shape)
        non_active_ids = np.setdiff1d(non_active_ids, sample_index) #np.delete(non_active_ids, sampled_ids)
        all_active_ids = np.hstack((all_active_ids, sample_index))
        #print('-------------')
        #print(len(non_active_ids))
        #print(len(all_active_ids))
        sample_indexes = np.hstack((sample_indexes, sample_index))

    #min_idxs = np.argpartition(all_dom_labels, num_sample)[:num_sample]
    print(len(sample_indexes))
    unique, counts = np.unique(sample_indexes, return_counts=True)
    print(len(unique))
    return sample_indexes

def margin_label_sampling_by_balance(model, xTrain, yTrain, all_active_ids, non_active_ids, num_sample):
    xTrain_in = xTrain[all_active_ids]
    yTrain_in = yTrain[all_active_ids]
    with tf.device("CPU"):
        val = tf.data.Dataset.from_tensor_slices(xTrain_in).batch(192)

    preds = model.predict(val)
    print(preds[0])
    #test_arr = np.array([1,2,3,4,6,5])
    #max_idx = np.partition(test_arr, -2)[-2:]
    #print(np.abs(max_idx[0]-max_idx[1]))
    #exit()
    pred_index = np.arange(len(preds))
    margins = []
    for idx in pred_index:
        label_idx = yTrain_in[idx]
        max_idx = np.partition(preds[idx], -2)[-2:] #Get margin of 2 most likely class
        if preds[idx][label_idx] not in max_idx:
            margin = np.abs(preds[idx][label_idx][0] - np.max(max_idx))
        else:
            margin = np.abs(max_idx[0]-max_idx[1])
        margins.append(margin)
        #print(preds[idx][label_idx].shape) 
    pred_scores = np.zeros(len(yTrain))

    for i in range(len(preds)):
        pred_scores[non_active_ids[i]] = margins[i]

    alpha = 0.5
    smooth = 2
    sample_indexes = []
    all_label_count = np.zeros(3329)
    sampling_rate = 10 #Take top 10 samples
    num_iter = int(num_sample/sampling_rate)
    #all_dom_labels = []
    
    for i in tqdm(range(num_iter)):
        #Get 10 most suitable samples in each iteration
        exist_labels = yTrain[all_active_ids]
        unique, counts = np.unique(exist_labels, return_counts=True) #Get the counts for sample in S
        all_label_count[unique] = counts
        
        range_x2 = (np.max(all_label_count) - np.min(all_label_count) ) / len(exist_labels)
        min_prob = np.min(all_label_count) / len(exist_labels)
        
        #pred_probs = 1- np.min(preds, axis = 1)
        sample_scores = []
        
        for idx in non_active_ids:
            #all_dom_labels.append(least_dom_labels[medoid_labels[idx]])
            label_idx = yTrain[idx][0]
            uncertain_score = 1- pred_scores[idx] #Get reversed prob
            
            #pred_probs.append(pred_prob)
            sample_prob = all_label_count[label_idx] / len(exist_labels)
            x2 = 1-(sample_prob - min_prob)/ range_x2
            #print(label_perc)

            sample_score =  (1-alpha) * (1-math.exp(-x2 * smooth)) + alpha * math.exp( - uncertain_score * smooth)
            sample_scores.append(sample_score)
        score = np.array(sample_scores)
        sample_index = np.argpartition(score, -sampling_rate)[-sampling_rate:]
        sample_index = non_active_ids[sample_index]
        #print(len(sample_index))
        #print(sample_index)
        #print(non_active_ids.shape)
        #print(sample_index.shape)
        non_active_ids = np.setdiff1d(non_active_ids, sample_index) #np.delete(non_active_ids, sampled_ids)
        all_active_ids = np.hstack((all_active_ids, sample_index))
        #print('-------------')
        #print(len(non_active_ids))
        #print(len(all_active_ids))
        sample_indexes = np.hstack((sample_indexes, sample_index))

    #min_idxs = np.argpartition(all_dom_labels, num_sample)[:num_sample]
    print(len(sample_indexes))
    unique, counts = np.unique(sample_indexes, return_counts=True)
    print(len(unique))
    print(sample_indexes[:10])
    return sample_indexes

from scipy.special import rel_entr
from math import log2
from scipy import stats
 
# calculate the kl divergence
def kl_divergence(p, q):
    return sum(p[i] * log2(p[i]/q[i]) for i in range(len(p)))

# calculate the js divergence
def js_divergence(p, q):
    m = 0.5 * (p + q)
    return 0.5 * kl_divergence(p, m) + 0.5 * kl_divergence(q, m)

def epsilon_onehot(label, epsilon):
    g_x = np.zeros(NumSKPVclasses)

    for i in range(len(g_x)):
        if i != label:
            g_x[i] = epsilon
        else:
            g_x[i] = 1 - (NumSKPVclasses - 1) * epsilon

    return g_x

def KL_Update(model, xTrain, yTrain, all_active_ids, sampled_ids, use_medoids=True, KL_STD = 1):
    epsilon = 1e-10
    #Get T/S, to get samples from a pool to reinforce these sample
    print(len(np.unique(all_active_ids)))
    if use_medoids:
        all_active_ids = np.setdiff1d(all_active_ids, sampled_ids)
    #print('XXXXXXXXXXXXX')
    #print(len(all_active_ids))
    #print(len(np.unique(all_active_ids)))

    with tf.device("CPU"):
        val = tf.data.Dataset.from_tensor_slices(xTrain[all_active_ids]).batch(192)
        #yTrain_onehot = tf.one_hot(yTrain, 3329)
    preds = model.predict(val)

    #update_set = sampled_ids
    all_kld = []
    i = 0
    for idx in tqdm(all_active_ids):
        l_x = preds[i]
        g_x = epsilon_onehot(yTrain[idx], epsilon)
        #print(l_x)
        #print(g_x)
        #print(g_x.shape)
        #print(np.sum(l_x))
        KLD = kl_divergence(l_x, g_x)
        all_kld.append(KLD)
        i +=1
        #KLD2 = js_divergence(l_x, g_x)
        #print(KLD)
        #print(l_x.shape)
        #print(yTrain[idx].shape)

    all_kld = np.array(all_kld)
    mean_kl = np.mean(all_kld)
    std_kl = np.std(all_kld)

    chosen_indexes = np.where(all_kld > mean_kl + KL_STD * std_kl)[0]
    update_set = all_active_ids[chosen_indexes]
    #update_set = np.concatenate((sampled_ids, chosen_active_idxs))
    #print(len(sampled_ids))
    #print(len(chosen_indexes))
    #print(len(update_set))
    unique = np.unique(update_set)
    #print(len(unique))
    #exit()
    return update_set

#Phase 3 strategies
from scipy.spatial import distance

def get_similar_samples(xTrain, yTrain, target_samples, target_labels, num_sample, num_samekey):
    print(target_samples.shape)
    target_samples = np.squeeze(target_samples)
    sample_indexes = []
    sample_index_disjoint = []
    print(target_samples.shape)
    idx = 0
    for key_sample in tqdm(target_samples):
        sample_key = []
        target_key = target_labels[idx]
        for target_sample in key_sample:
            #print(target_sample.shape)
            sim_score = []
            for i in range(len(xTrain)):
                dist = distance.euclidean(xTrain[i], target_sample)
                sim_score.append(dist)
            sim_score = np.array(sim_score)
            #print('---------------')
            #print(len(sim_score))
            samekey_index = np.where(yTrain == target_key)[0]
            #print(samekey_index[:10])
            all_index = np.arange(len(yTrain))
            #print(all_index[:10])
            #print(len(all_index))
            nonkey_index = np.setdiff1d(all_index, samekey_index)
            #print(nonkey_index)
            #print(len(nonkey_index))
            samekey_score = sim_score[samekey_index]
            nonkey_score = sim_score[nonkey_index]

            samekey_indexes_chosen = np.argpartition(samekey_score, -num_samekey)[-num_samekey:]
            samekey_indexes = samekey_index[samekey_indexes_chosen]

            nonkey_indexes_chosen = np.argpartition(nonkey_score, -(num_sample-num_samekey))[-(num_sample-num_samekey):]
            nonkey_indexes = nonkey_index[nonkey_indexes_chosen]
            #print(len(samekey_indexes))
            #print(len(nonkey_indexes))
            sample_index = np.concatenate((samekey_indexes, nonkey_indexes))
            #print(len(sample_index))
            #print(len(np.unique(sample_index)))

            sample_indexes = np.concatenate((sample_indexes, sample_index))
            sample_key.append(sample_index)
        sample_index_disjoint.append(sample_key)
        idx +=1
    print(len(sample_indexes))
    sample_indexes = np.unique(sample_indexes)
    sample_index_disjoint = np.array(sample_index_disjoint)
    print(len(sample_indexes))
    print(len(sample_index_disjoint))
    print(sample_index_disjoint.shape)
    return sample_index_disjoint

def get_similar_alpha_KL_GT(model, xTrain_original, yTrain_original, xTest_multi, yTest_multi, num_sample):
    
    with tf.device("CPU"):
        val = tf.data.Dataset.from_tensor_slices(xTrain_original).batch(192)
    
    input_preds = model.predict(val)

    epsilon = 1e-10
    #D1, D2, GT
    #Calculate KL(D1,D2) then a_KL(D1, D2):
    print(xTest_multi.shape)
    sample_index_disjoint = []
    ep_yTrain = np.ones((len(yTrain_original), 3329)) * epsilon
    KL_GT = []
    for i in range(len(yTrain_original)):
        ep_yTrain[yTrain_original[i]] = 1 - (3329 - 1) * epsilon
        p_x = input_preds[i]
        q_x = ep_yTrain[yTrain_original[i]]
        #ep_yTrain[yTrain_original[i]] = 1
        #q_x = torch.Tensor(ep_yTrain[yTrain_original[i]])
        #KLD = gs_div(p_x, q_x, alpha=-1, lmd=0.5).detach().cpu().numpy()
        KLD = stats.entropy(p_x, q_x) #+ KL_GT[i]
        KL_GT.append(KLD)
        #ep_yTrain.append(epsilon_onehot(yTrain, epsilon))

    for key_sample in tqdm(xTest_multi):
        sample = np.expand_dims(key_sample, axis = 2)
        print(sample.shape)
        preds = model.predict(sample)
        print(preds.shape)
        sample_key = []
        sample_indexes = []
        for pred in preds:
            l_x = pred
            KL_score = []
            for i in tqdm(range(len(input_preds))):
                g_x = input_preds[i]
                #q_x = ep_yTrain[i]
                #print(l_x)
                #print(g_x)
                #print(g_x.shape)
                #print(l_x.shape)
                #print(np.sum(l_x))
                #KLD = kl_divergence(l_x, g_x)
                #KLD = KL(l_x, g_x)
                KLD = stats.entropy(l_x, g_x) #+ KL_GT[i]
                #print(KLD)
                #print(kld)
                #exit()
                KL_score.append(KLD)
            KL_score = np.array(KL_score)
            #print('---------------')
            #print(len(sim_score))
            sample_index = np.argpartition(KL_score, num_sample)[:num_sample]
            sample_key.append(sample_index)
            sample_indexes.append(sample_index)
        sample_index_disjoint.append(sample_key)

    print(len(sample_indexes))
    sample_indexes = np.unique(sample_indexes)
    sample_index_disjoint = np.array(sample_index_disjoint)
    print(len(sample_indexes))
    print(len(sample_index_disjoint))
    print(sample_index_disjoint.shape)
    return sample_index_disjoint

def load_multi_attack(data_path):
    infile = np.load(data_path)
    data = infile['data']
    labels = infile['label']

    return data, labels

def unceratainty_sampling(model, xTrain_in, yTrain_in, num_sample):
    preds = model.predict(xTrain_in)
    pred_probs = np.min(preds, axis = 1)
    idx = 0
    #Get samples that below the mean probability
    '''
    pred_probs = []
    for sample in preds:
        #label_idx = np.where(yTrain_in[idx]==1)
        pred_prob = np.min(preds[idx])
        #idx += 1
        pred_probs.append(pred_prob)
    pred_probs = np.squeeze(np.array(pred_probs))
    '''
    #max_idxs = np.argpartition(pred_probs, -num_sample)[-num_sample:]
    min_idxs = np.argpartition(pred_probs, num_sample)[:num_sample]

    return min_idxs

from sklearn.preprocessing import StandardScaler
from scipy import stats

def normalize(timeseries):
    return (timeseries-timeseries.min())/(timeseries.max()-timeseries.min())

def z_norm(timeseries):
    '''
    timeseries = np.expand_dims(timeseries, 1)
    scaler = StandardScaler()
    scaler.fit(timeseries)
    res = scaler.transform(timeseries)[:,0]
    '''

    return stats.zscore(timeseries)

def normalize_data_per_trace(data):
    print(data.shape)
    for i in range(len(data)):
        data[i] = normalize(data[i])

    return data

def z_normalize_data_per_trace(data):
    print(data.shape)
    for i in range(len(data)):
        data[i] = z_norm(data[i])

    return data

def cummulative_transform(data):
    for i in range(len(data)):
        data[i] = np.cumsum(data[i], dtype=float)
    return data


def create_training_data_optimize(args, data_path, sKeyNo, trainPortion, xType, yType, is_test, start_trace, end_trace, add_num):
    val_num = 300000
    val_ids = np.load('val_ids.npz')['ids']
    print(len(val_ids))
    print(val_ids[:10])
    end_val_trace = end_trace + val_num
    if is_test:
        (trace_profiling, bp_profiling, skpv_profiling) = load_meta_trace_file_from_test(data_path, sKeyNo)
    else:
        (trace_profiling, bp_profiling, skpv_profiling) = load_meta_trace_files(data_path, start_trace, end_val_trace)

    #train_dataset = tf.data.Dataset.from_tensor_slices((trace_profiling[:150000], tf.one_hot(skpv_profiling[:150000], 3329) ))
    #exit()

    #Reshaped_trace_profiling= trace_profiling.reshape((trace_profiling.shape[0], trace_profiling.shape[1], 1))
    print(trace_profiling.shape)


    dataSize = end_trace - start_trace + add_num
    trainSize = math.floor(dataSize * trainPortion)
    #print(skpv_profiling[:10])
    y_train_skpv = to_categorical(skpv_profiling, num_classes=NumSKPVclasses)

    if args.normalize == 0:
        xTrain = np.expand_dims(trace_profiling[:dataSize], axis = 2)
    elif args.normalize == 1:
        if args.cummulative == 1:
            xTrain = np.expand_dims(normalize_data_per_trace(cummulative_transform(trace_profiling[:dataSize])), axis = 2)
        elif args.cummulative == 2:
            xTrain = np.expand_dims(cummulative_transform(normalize_data_per_trace(trace_profiling[:dataSize])), axis = 2)
        else:
            xTrain = np.expand_dims(normalize_data_per_trace(trace_profiling[:dataSize]), axis = 2)
    else:
        if args.cummulative == 1:
            xTrain = np.expand_dims(z_normalize_data_per_trace(cummulative_transform(trace_profiling[:dataSize])), axis = 2)
        elif args.cummulative == 2:
            xTrain = np.expand_dims(cummulative_transform(z_normalize_data_per_trace(trace_profiling[:dataSize])), axis = 2)
        else:
            xTrain = np.expand_dims(z_normalize_data_per_trace(trace_profiling[:dataSize]), axis = 2)
        #print(xTrain.shape)
    print(xTrain[0][:10])
    
    if is_test:
        xVal = np.expand_dims(trace_profiling[dataSize:end_val_trace], axis = 2)
        yVal = y_train_skpv[dataSize:end_val_trace]
        yVal_value = skpv_profiling[dataSize:end_val_trace]
    else:
        xVal = np.expand_dims(trace_profiling[val_ids], axis = 2)
        yVal = y_train_skpv[val_ids]
        yVal_value = skpv_profiling[val_ids]

    yTrain = y_train_skpv[:dataSize]
    yTrain_value = skpv_profiling[:dataSize]
    

    #del trace_profiling, bp_profiling, skpv_profiling
    return xTrain, yTrain, xVal, yVal, yTrain_value, yVal_value
def combine_loss(y_true, y_pred):
    alpha = args.loss_alpha
    y_true1 = y_true[:,:3329]
    y_true2 = y_true[:,3329:]
    CE_loss = tf.keras.losses.CategoricalCrossentropy()
    Score1 = CE_loss(y_true1, y_pred)
    Score2 = CE_loss(y_true2, y_pred)
    return alpha * Score1 + (1-alpha)*Score2


def train_model_multiEpochs(xType, database_folder_train, modelLogFolder, logTrainedModel_byFile_folder, logTrainedModel_byEp_folder, logFilename, MLmodel_detail, sKeyNo, class_weight, period, maxEpochs, train_batch_size, args):
    if args.train_type == 'baseline':
        model = subModels_gen(xType, noConv1Dbranch, noBPbranch, noLayers, tracelen, NumBPinput, MLmodel_detail, modelLogFolder, logFilename, classes=noClasses)
    else:
        model = load_model(trained_model_path)
    gc.collect()
    model.summary()
    print('load success')

    base_model = model.layers[:-1]

    trainPortion = 1.0
    
    xTrain_original, yTrain_original, xVal, yVal, yTrain_value, yVal_value = create_training_data_optimize(args, data_path,sKeyNo, trainPortion, xType, yType,False, args.start_trace, args.end_trace, args.add_num)
    #xTrain_Pool, yTrain_Pool, _, _, _, _ = create_training_data_form(data_path,sKeyNo, trainPortion, xType, yType,False, args.end_trace, 100000)
    yTrain_original = np.expand_dims(yTrain_value, axis = 1)
    yVal = yVal_value
    xTest, yTest, _xVal_, yVal_ , yTest_value, yVal_value = create_training_data_optimize(args, "KYBER51.H5", sKeyNo, testPortion, xType, yType,True, args.start_trace, args.end_trace, args.add_num)
    xTest = [[xTest]]
    print(xTest[0][0].shape)
    print(yTest_value)
    print(yTest.shape)
    xTest_multi, yTest_multi = load_multi_attack('attack_multi_data.npz')
    #xTest_300, yTest_300 = load_multi_attack('attack_multi_data_300key.npz')
    xTest_300, yTest_300 = load_multi_attack(args.eval_path)
    print(yTest_value)
    print(yTest_multi)
    xTest_multi = np.expand_dims(xTest_multi, axis = 3)
    print(xTest_multi.shape)
    #print(yTrain.shape)
    csv_logger = CSVLogger(filename=database_folder_train+'/log.csv', append=True, separator=';')
    all_active_ids = np.load(args.all_ids) #Load existing samples
    unique = np.unique(all_active_ids)
    print(len(unique))

    all_sample_ids = np.arange(args.start_trace, args.end_trace)
    non_active_ids = np.setdiff1d(all_sample_ids, all_active_ids)
    print(all_active_ids.shape)
    print(non_active_ids.shape)
    #print(len(all_sample_ids))
    #print(len(non_active_ids))
    #print(non_active_ids[:10])
    #print(len(xTrain[0][0]))
    #print(xTrain[0][0][non_active_ids[:10]])
    #xTrain_Pool_original, yTrain_Pool_original = copy.deepcopy(xTrain), copy.deepcopy(yTrain) #Need them for correct index
    
    #xTrain_Pool = [[xTrain_Pool]]
    xTrain_Pool, yTrain_Pool = xTrain_original[non_active_ids], yTrain_original[non_active_ids]
    #print(len(xTrain_Pool[0][0]))
    xTrain, yTrain = xTrain_original[all_active_ids], yTrain_original[all_active_ids]
    xTrainset, yTrainset = np.copy(xTrain), np.copy(yTrain)
    yTrainset = np.squeeze(yTrainset)
    print(len(xTrain))
    print('Len training:')
    print(xTrain.shape)
    print(yTrain.shape)
    #Set seed for fair comparison
    #np.random.seed(0)
    all_random_ids = np.random.choice(non_active_ids, args.num_sample * args.num_iteration, replace=False)

    start_time = time.time()
    #Start
    #if args.sim_metric == 'euclidean':
    #    disjoint_sampled_ids = get_similar_samples(xTrain_original[:,:,0],yTrain_original, xTest_300[args.start_key:args.end_key,:args.num_trace,:], yTest_300, args.num_sample, args.num_samekey)
    #else:
    #disjoint_sampled_ids = get_similar_alpha_KL_GT(model, xTrain_original[:,:,0], yTrain_original, xTest_300[args.start_key:args.end_key,:args.num_trace,:], yTest_300, args.num_sample)
    disjoint_sampled_ids = np.load(args.sampling_file)
    disjoint_sampled_ids = disjoint_sampled_ids[args.start_key:args.end_key,:]
    print(disjoint_sampled_ids.shape)        
    #Training with different keys
    for it in range(len(disjoint_sampled_ids)):
        print('Iteration: ', it)
        model = load_model(trained_model_path)
        base_model = model
        #Freeze layers
        if args.update_type == 'freeze':
            base_model=model.layers[:-1]
            base_model=tf.keras.Sequential(base_model)
            base_model.build((None,600,1)) 

            #Setting trainable = False will make them non-trainable.
            #base_model.trainable = False
            base_model.add(tf.keras.layers.Dense(3329,activation='softmax', name='dense_9'))
            for i in range(len(base_model.layers) - 1):
                base_model.layers[i].trainable = False

            base_model.summary()
        elif args.update_type == 'freeze_keep_last':
            for i in range(len(base_model.layers) - 1):
                base_model.layers[i].trainable = False

            base_model.summary()
        
        optimizer = RMSprop(learning_rate=0.00001) #learning_rate=0.000001
        base_model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

        database_folder_train_it = os.path.join(database_folder_train, 'it_'+str(it))
        Path(database_folder_train_it).mkdir(parents=True, exist_ok=True)
        save_model_name = (database_folder_train_it+'/model_best')
        save_ids_name = database_folder_train_it+'/all_ids.npy'
        attack_callback = CustomCallback(xTest, yTest_value, save_model_name, database_folder_train_it, args.eval_interval)
        xTest_curr = np.expand_dims(xTest_300, axis = 3)
        print(xTest_multi.shape)
        print(xTest_300.shape)
        #exit()
        attack_multi_callback = MultiKeyCallback(xTest_curr[args.start_key:args.end_key,:,:], yTest_300[args.start_key:args.end_key], save_model_name, database_folder_train_it, args.eval_interval)

        callbacks=[csv_logger, attack_callback, attack_multi_callback]
        #callbacks=[csv_logger, attack_callback]

        max_ids = [] #Init all ids, the ids depend on the sampling method
        #Need to fix so they dont choose alrady chosen samples
        print(xTrain_original.shape)
        print(xTest_300.shape)
        test_index = 100
        sampled_trace_ids = disjoint_sampled_ids[it].astype(int)
        if args.union_type == 'union':
            sampled_trace_ids = np.unique(sampled_trace_ids)
        elif args.union_type == 'intersect':
            #Half is KNN and half is KL
            data_size = int(len(sampled_trace_ids)/2)
            sampled_trace_ids = np.intersect1d(sampled_trace_ids[:data_size], sampled_trace_ids[data_size:])
        print(sampled_trace_ids.shape)
        num_smp = int(args.num_trace * 200)
        KL_num = int(len(sampled_trace_ids)/2)
        sampled_ids = np.concatenate((sampled_trace_ids[:num_smp], sampled_trace_ids[KL_num:KL_num+num_smp])) 
        print(sampled_ids.shape)
        print(args.sampling)
        print(xTrain_Pool.shape)
        print(yTrain_Pool.shape)
        if args.update_sampling == 'k_medoids':
            update_ids = K_Medoids_Update(all_active_ids, sampled_ids, args.medoids_path)
            np.save(database_folder_train_it+'/medoids_ids.npy', update_ids)
        else:
            print('ZZZZ')
            xTrain, yTrain = xTrain_original[sampled_ids], yTrain_original[sampled_ids]
        #TODO: Save labels
        #sub_xTrain, sub_yTrain = get_subset(max_ids.astype(int), xTrain_Pool, yTrain_Pool, xType)
        #non_active_ids = np.setdiff1d(non_active_ids, sampled_ids) #np.delete(non_active_ids, sampled_ids)
        #all_active_ids = np.hstack((all_active_ids, sampled_ids))
        print(non_active_ids[0])
        print(all_active_ids[0])
        print(len(non_active_ids))
        print(len(all_active_ids))
        #xTrain_Pool[0][0], yTrain_Pool = np.delete(xTrain_Pool[0][0], max_ids, 0), np.delete(yTrain_Pool, max_ids, 0) #---> If you want to remove samples in pool each iter
        #xTrain, yTrain = np.concatenate((xTrain, sub_xTrain)), np.concatenate((yTrain, sub_yTrain))

        #xTrain[0][0] = np.concatenate((xTrain[0][0], sub_xTrain[0][0]))
        #yTrain = np.concatenate((yTrain, sub_yTrain))

        yTrain = np.squeeze(yTrain)
        print(yTrain.shape)

        if args.loss_type == 'weighted':
            y = yTrain
            class_weights = compute_class_weight(class_weight="balanced", classes=np.unique(y), y=y)
            #print(class_weight)
            classWeights = np.zeros(noClasses).astype(int)
            class_weight = dict(enumerate(classWeights))
            idx = 0
            all_key = []
            all_num = []
            all_weight = []
            for key in np.unique(y):
                all_key.append(key)
                all_num.append(len(np.where(y == key)[0]))
                all_weight.append(class_weights[idx])
                class_weight[key] = class_weights[idx]
                idx += 1
            '''
            df = pd.DataFrame({
                'Key':all_key,
                'Num_sample': all_num,
                'weight': all_weight
                })
            df.to_csv('weight_analysis.csv')
            exit()
            '''
        elif args.loss_type == 'combine':
            model = load_model(trained_model_path)
            model.summary()
            with tf.device("CPU"):
                train = tf.data.Dataset.from_tensor_slices(xTrain).batch(192)
                val = tf.data.Dataset.from_tensor_slices(xVal).batch(192)

            yTrain_m2 = model.predict(train)
            yVal_m2 = model.predict(val)
            #base_model.summary()
            base_model.compile(loss=combine_loss, 
                                optimizer=optimizer, 
                                metrics=['accuracy'])

        #print(class_weight)
        #exit()
        print(xTrain.shape)
        print(yTrain.shape)
        print(yVal.shape)
        #print(sub_yTrain.shape)
        #print(sub_yTrain[10])
        #print('STD')
        #print(np.std(all_counts))
            
        #print(yTrain.shape)
        print(yTrain[-10:])
        #exit()
        #exit()


        #epoch_step = int(args.num_epoch/args.train_interval)
        with tf.device("CPU"):
            #train = tf.data.Dataset.from_tensor_slices(({"input_1": xTrain[0][0], "input_2": xTrain[1][0], "input_3": xTrain[1][1]}, yTrain)).shuffle(4*64).batch(64)
            #val = tf.data.Dataset.from_tensor_slices(({"input_1": xVal[0][0], "input_2": xVal[1][0], "input_3": xVal[1][1]}, yVal)).shuffle(4*64).batch(64)
            if args.loss_type == 'combine':
                yTrain_combine = np.hstack((tf.one_hot(yTrain, NumSKPVclasses),yTrain_m2)) #
                yVal_combine = np.hstack((tf.one_hot(yVal, NumSKPVclasses),yVal_m2)) #
                train = tf.data.Dataset.from_tensor_slices((xTrain,yTrain_combine)).shuffle(4*64).batch(train_batch_size)
                val = tf.data.Dataset.from_tensor_slices((xVal,yVal_combine)).shuffle(4*64).batch(train_batch_size)
            else:
                train = tf.data.Dataset.from_tensor_slices((xTrain, tf.one_hot(yTrain, NumSKPVclasses))).shuffle(4*64).batch(train_batch_size)
                val = tf.data.Dataset.from_tensor_slices((xVal, tf.one_hot(yVal, NumSKPVclasses))).shuffle(4*64).batch(train_batch_size)
        
            
            #if args.schedule_iteration is not None:
                #if it in args.schedule_iteration: #I made the train here to be eval to assess and avoid writing a bunch of code for verification                        #np.save(save_ids_name, sampled_ids)
                #else:
                    #np.save(save_ids_name, all_active_ids)
            #else:
        np.save(save_ids_name, all_active_ids)
        
        
        train_ep = args.subtrain_interval
        base_model.fit(train, batch_size=train_batch_size, verbose = 1, epochs=train_ep, callbacks=callbacks, validation_data=val) #, class_weight=class_weight       
        #else:
    #print('ZZZZZZZZZZZZZ')
    #    sub_train = tf.data.Dataset.from_tensor_slices((sub_xTrain, tf.one_hot(sub_yTrain, NumSKPVclasses))).shuffle(4*64).batch(train_batch_size)
    #    model.fit(sub_train, batch_size=train_batch_size, verbose = 1, epochs=train_ep, callbacks=callbacks, class_weight=class_weight, validation_data=val)        
    #test_rank = eval_model(model, nruns_default, maxtrc_default, batches, xTest, yTest_value, noHypoKeys, noClasses)
        #print('Test Rank:, ', test_rank)
    #xTrain_Pool[0][0]
        

    print('Training took {}s'.format(time.time() - start_time))
        #model.save(database_folder_train+'/model_iteration_{}.keras'.format(it))

if work == 'train':
    print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
    print('Work =', work)
    classWeights = np.ones(noClasses).astype(int)
    class_weight = dict(enumerate(classWeights))

    train_model_multiEpochs(xType, database_folder_train, modelLogFolder, logTrainedModel_byFile_folder, logTrainedModel_byEp_folder, logFilename, MLmodel_detail, sKeyNo, class_weight, period, maxEpochs, train_batch_size, args)