import os
import platform
import sys
import time
import pickle
from IPython.display import clear_output
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random

import sklearn
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

import tensorflow as tf
import tensorflow
from tensorflow import keras
from tensorflow.keras.models import Sequential, Model, model_from_json, load_model
from tensorflow.keras.layers import Dense, Input, Add, Lambda, Dropout, Subtract, Multiply, Concatenate, Dot, BatchNormalization, Activation, LeakyReLU, ReLU
from tensorflow.keras.losses import mse
from keras import backend as K
from tensorflow.keras import regularizers

import keras_tuner
from keras_tuner import Hyperband, BayesianOptimization



def fix_seed(seed):
    # Numpy
    np.random.seed(seed)
    # Tensorflow
    tf.random.set_seed(seed)
    # for built-in random
    random.seed(seed)
    # for hash seed
    os.environ["PYTHONHASHSEED"] = str(seed)

def build_model_reg(hp):
    fix_seed(seed)
    num_layers128 = hp.Int('num_layers128', 0, 3)
    num_layers64 = hp.Int('num_layers64', 0, 3)
    num_layers32 = hp.Int('num_layers32', 0, 3)
    
    model = keras.Sequential()
    if num_layers128 > 0:
        for i in range(num_layers128):
            model.add(Dense(
                units=128,
                kernel_initializer=keras.initializers.glorot_uniform(seed=seed),
                bias_initializer=keras.initializers.Zeros()))
            model.add(Activation('relu'))
    if num_layers64 > 0:
        for i in range(num_layers64):
            model.add(Dense(
                units=64,
                kernel_initializer=keras.initializers.glorot_uniform(seed=seed),
                bias_initializer=keras.initializers.Zeros()))
            model.add(Activation('relu'))
    if num_layers32 > 0:
        for i in range(num_layers32):
            model.add(Dense(
                units=32,
                kernel_initializer=keras.initializers.glorot_uniform(seed=seed),
                bias_initializer=keras.initializers.Zeros()))
            model.add(Activation('relu'))
    model.add(Dense(1, kernel_initializer=keras.initializers.glorot_uniform(seed=seed),bias_initializer=keras.initializers.Zeros()))
    model.compile(
        optimizer=keras.optimizers.Adagrad(learning_rate=0.01, epsilon=1e-7),#, decay=0.0),
        loss='mse',
        metrics=['mae', 'mse'])
    return model

def build_model_cls(hp):
    fix_seed(seed)
    num_layers128 = hp.Int('num_layers128', 0, 3)
    num_layers64 = hp.Int('num_layers64', 0, 3)
    num_layers32 = hp.Int('num_layers32', 0, 3)
    
    model = keras.Sequential()
    if num_layers128 > 0:
        for i in range(num_layers128):
            model.add(Dense(
                units=128,
                kernel_initializer=keras.initializers.glorot_uniform(seed=seed),
                bias_initializer=keras.initializers.Zeros()))
            model.add(Activation('relu'))
    if num_layers64 > 0:
        for i in range(num_layers64):
            model.add(Dense(
                units=64,
                kernel_initializer=keras.initializers.glorot_uniform(seed=seed),
                bias_initializer=keras.initializers.Zeros()))
            model.add(Activation('relu'))
    if num_layers32 > 0:
        for i in range(num_layers32):
            model.add(Dense(
                units=32,
                kernel_initializer=keras.initializers.glorot_uniform(seed=seed),
                bias_initializer=keras.initializers.Zeros()))
            model.add(Activation('relu'))
    model.add(Dense(1, activation='sigmoid', kernel_initializer=keras.initializers.glorot_uniform(seed=seed),bias_initializer=keras.initializers.Zeros()))
    model.compile(
        optimizer=keras.optimizers.Adagrad(learning_rate=0.01, epsilon=1e-7),#, decay=0.0),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

def build_model_reg_Affine(hp):
    fix_seed(seed)
    num_layers128 = hp.Int('num_layers128', 0, 3)
    num_layers64 = hp.Int('num_layers64', 0, 3)
    num_layers32 = hp.Int('num_layers32', 0, 3)
    lmbd = 10**hp.Float('L1 lambda power', min_value=-3, max_value=1)
    
    # Define input layers
    input_x = keras.layers.Input(shape=(dim_x,), name='x')
    input_f = keras.layers.Input(shape=(dim_f,), name='f')
    
    x = input_x
    f = input_f

    # Network for f
    if num_layers128 > 0:
        for i in range(num_layers128):
            f = keras.layers.Dense(
                    units=128,
                    kernel_initializer=keras.initializers.glorot_uniform(seed=seed),
                    bias_initializer=keras.initializers.Zeros(), activation='relu')(f)
    if num_layers64 > 0:
        for i in range(num_layers64):
            f = keras.layers.Dense(
                    units=64,
                    kernel_initializer=keras.initializers.glorot_uniform(seed=seed),
                    bias_initializer=keras.initializers.Zeros(), activation='relu')(f)
    if num_layers32 > 0:
        for i in range(num_layers32):
            f = keras.layers.Dense(
                    units=32,
                    kernel_initializer=keras.initializers.glorot_uniform(seed=seed),
                    bias_initializer=keras.initializers.Zeros(), activation='relu')(f)
    g1_g2 = keras.layers.Dense(units=2, kernel_regularizer=regularizers.l1(lmbd), name='g1_g2')(f)

    # Network for x
    g3 = keras.layers.Dense(units=1, name='g3')(input_x)

    # Merge the networks
    output = keras.layers.Add(name='output')([
        g1_g2[:,0],
        keras.layers.Multiply(name='g2_g3')([g1_g2[:,1], g3])
    ])

    # Define the model
    model = keras.models.Model(inputs=[input_x, input_f], outputs=output)

    model.compile(
        optimizer=keras.optimizers.Adagrad(learning_rate=0.01, epsilon=1e-7),
        loss='mse',
        metrics=['mae', 'mse']
    )
    
    return model

def build_model_cls_Affine(hp):
    fix_seed(seed)
    num_layers128 = hp.Int('num_layers128', 0, 3)
    num_layers64 = hp.Int('num_layers64', 0, 3)
    num_layers32 = hp.Int('num_layers32', 0, 3)
    lmbd = 10**hp.Float('L1 lambda power', min_value=-3, max_value=1)
    
    # Define input layers
    input_x = keras.layers.Input(shape=(dim_x,), name='x')
    input_f = keras.layers.Input(shape=(dim_f,), name='f')
    
    x = input_x
    f = input_f

    # Network for f
    if num_layers128 > 0:
        for i in range(num_layers128):
            f = keras.layers.Dense(
                    units=128,
                    kernel_initializer=keras.initializers.glorot_uniform(seed=seed),
                    bias_initializer=keras.initializers.Zeros(), activation='relu')(f)
    if num_layers64 > 0:
        for i in range(num_layers64):
            f = keras.layers.Dense(
                    units=64,
                    kernel_initializer=keras.initializers.glorot_uniform(seed=seed),
                    bias_initializer=keras.initializers.Zeros(), activation='relu')(f)
    if num_layers32 > 0:
        for i in range(num_layers32):
            f = keras.layers.Dense(
                    units=32,
                    kernel_initializer=keras.initializers.glorot_uniform(seed=seed),
                    bias_initializer=keras.initializers.Zeros(), activation='relu')(f)
    g1_g2 = keras.layers.Dense(units=2, kernel_regularizer=regularizers.l1(lmbd), name='g1_g2')(f)

    # Network for x
    g3 = keras.layers.Dense(units=1, name='g3')(input_x)

    # Merge the networks
    output = keras.layers.Add(name='output')([
        g1_g2[:,0],
        keras.layers.Multiply(name='g2_g3')([g1_g2[:,1], g3])
    ])
    output = keras.layers.Activation('softmax', name='softmax')(output)

    # Define the model
    model = keras.models.Model(inputs=[input_x, input_f], outputs=output)

    model.compile(
        optimizer=keras.optimizers.Adagrad(learning_rate=0.01, epsilon=1e-7),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    return model

def map_to_RA(x):
    if x >= 0.5:
        return 1
    else:
        return 0
    
def fun_Tuning(f, score, decision, ProjectName='tmp'):
    f_tr = f
    s_tr = score
    d_tr = decision
    
    ## Regression
    tuner_reg = BayesianOptimization(
        build_model_reg,
        objective='val_loss',
        project_name=ProjectName+'_reg',
        alpha=0.0001,
        beta=2.6,
        overwrite=True
    )

    fix_seed(seed)
    tuner_reg.search(
        f_tr.values,
        s_tr.values,
        batch_size=4,
        epochs=1000,
        validation_split=0.1,
        verbose=0,
        callbacks=[early_stop]
    )

    ## Classification
    tuner_cls = BayesianOptimization(
        build_model_cls,
        objective='val_loss',
        project_name=ProjectName+'_cls',
        alpha=0.0001,
        beta=2.6,
        overwrite=True
    )

    fix_seed(seed)
    tuner_cls.search(
        f_tr.values,
        d_tr.values,
        batch_size=4,
        epochs=1000,
        validation_split=0.1,
        verbose=0,
        callbacks=[early_stop]
    )

    return tuner_reg, tuner_cls

def fun_TuningAffine(x, f, score, decision, ProjectName='tmp'):
    x_tr = x
    f_tr = f
    s_tr = score
    d_tr = decision
    
    ## Regression
    tuner_reg = BayesianOptimization(
        build_model_reg_Affine,
        objective='val_loss',
        project_name=ProjectName+'_reg',
        alpha=0.0001,
        beta=2.6,
        overwrite=True
    )

    fix_seed(seed)
    tuner_reg.search(
        [x_tr.values, f_tr.values],
        s_tr.values,
        batch_size=4,
        epochs=1000,
        validation_split=0.1,
        verbose=0,
        callbacks=[early_stop]
    )

    ## Classification
    tuner_cls = BayesianOptimization(
        build_model_cls_Affine,
        objective='val_loss',
        project_name=ProjectName+'_cls',
        alpha=0.0001,
        beta=2.6,
        overwrite=True
    )

    fix_seed(seed)
    tuner_cls.search(
        [x_tr.values, f_tr.values],
        d_tr.values,
        batch_size=4,
        epochs=1000,
        validation_split=0.1,
        verbose=0,
        callbacks=[early_stop]
    )

    return tuner_reg, tuner_cls

def fun_Training(f, score, decision, tuner_reg, tuner_cls):
    f_tr = f[0]
    f_te = f[1]
    s_tr = score#[0]
    # s_te = score[1]
    d_tr = decision#[0]
    # d_te = decision[1]
    
    ## Regression
    best_hps = tuner_reg.get_best_hyperparameters(num_trials=1)[0]
    model_reg = tuner_reg.hypermodel.build(best_hps)

    fix_seed(seed)
    history_reg = model_reg.fit(
        f_tr,
        s_tr,
        batch_size=4,
        epochs=1000,
        validation_split=0.1,
        verbose=0,
        callbacks=[early_stop, PrintDot()])

    fits = model_reg.predict(f_tr).reshape(-1)
    pred = model_reg.predict(f_te).reshape(-1)
    s_fits = pd.Series(fun_invNormScale(fits, ScalParams), f_tr.index)
    s_pred = pd.Series(fun_invNormScale(pred, ScalParams), f_te.index)
    
    ## Classification
    best_hps = tuner_cls.get_best_hyperparameters(num_trials=1)[0]
    model_cls = tuner_cls.hypermodel.build(best_hps)

    fix_seed(seed)
    history_reg = model_cls.fit(
        f_tr,
        d_tr,
        batch_size=4,
        epochs=1000,
        validation_split=0.1,
        verbose=0,
        callbacks=[early_stop, PrintDot()])

    d_fits = model_cls.predict(f_tr).reshape(-1)
    d_pred = model_cls.predict(f_te).reshape(-1)
    ra_fits = np.array([map_to_RA(x) for x in d_fits])
    ra_pred = np.array([map_to_RA(x) for x in d_pred])
    
    d_fits = pd.Series(d_fits, index=f_train.index)
    d_pred = pd.Series(d_pred, index=f_test.index)
    ra_fits = pd.Series(ra_fits, index=f_train.index)
    ra_pred = pd.Series(ra_pred, index=f_test.index)

    return model_reg, model_cls, {
        'Score fits' : s_fits,
        'Score pred' : s_pred,
        'DecisionP fits' : d_fits,
        'DecisionP pred' : d_pred,
        'Decision fits' : ra_fits,
        'Decision pred' : ra_pred
    }

def fun_TrainingAffine(x, f, score, decision, tuner_reg, tuner_cls):
    x_tr = x[0]
    x_te = x[1]
    f_tr = f[0]
    f_te = f[1]
    s_tr = score#[0]
    # s_te = score[1]
    d_tr = decision#[0]
    # d_te = decision[1]
    
    ## Regression
    best_hps = tuner_reg.get_best_hyperparameters(num_trials=1)[0]
    model_reg = tuner_reg.hypermodel.build(best_hps)

    fix_seed(seed)
    history_reg = model_reg.fit(
        [x_tr, f_tr],
        s_tr,
        batch_size=4,
        epochs=1000,
        validation_split=0.1,
        verbose=0,
        callbacks=[early_stop, PrintDot()])

    fits = model_reg.predict([x_tr, f_tr]).reshape(-1)
    pred = model_reg.predict([x_te, f_te]).reshape(-1)
    s_fits = pd.Series(fun_invNormScale(fits, ScalParams), f_tr.index)
    s_pred = pd.Series(fun_invNormScale(pred, ScalParams), f_te.index)
    
    ## Classification
    best_hps = tuner_cls.get_best_hyperparameters(num_trials=1)[0]
    model_cls = tuner_cls.hypermodel.build(best_hps)

    fix_seed(seed)
    history_reg = model_cls.fit(
        [x_tr, f_tr],
        d_tr,
        batch_size=4,
        epochs=1000,
        validation_split=0.1,
        verbose=0,
        callbacks=[early_stop, PrintDot()])

    d_fits = model_cls.predict([x_tr, f_tr]).reshape(-1)
    d_pred = model_cls.predict([x_te, f_te]).reshape(-1)
    ra_fits = np.array([map_to_RA(x) for x in d_fits])
    ra_pred = np.array([map_to_RA(x) for x in d_pred])
    
    d_fits = pd.Series(d_fits, index=f_train.index)
    d_pred = pd.Series(d_pred, index=f_test.index)
    ra_fits = pd.Series(ra_fits, index=f_train.index)
    ra_pred = pd.Series(ra_pred, index=f_test.index)

    return model_reg, model_cls, {
        'Score fits' : s_fits,
        'Score pred' : s_pred,
        'DecisionP fits' : d_fits,
        'DecisionP pred' : d_pred,
        'Decision fits' : ra_fits,
        'Decision pred' : ra_pred
    }