import numpy as np
import pickle, gzip
import mlp_combined
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
import pdb
import time
from mpl_toolkits.axes_grid.inset_locator import (inset_axes, InsetPosition,
                                                  mark_inset, zoomed_inset_axes)

class MLPExperiment:
    def __init__(self, size_layers, dataset_type, act_funct='sigmoid',
                 iterations=4000, learning_rate=0.01, alpha = 0.82,
                 reg_lambda=0, bias_flag=True, batch_size = None
                 ):
        self.size_layers   = size_layers
        self.dataset_type  = dataset_type
        self.act_funct     = act_funct
        self.iterations    = iterations
        self.learning_rate = learning_rate
        self.alpha         = alpha
        self.reg_lambda    = reg_lambda
        self.bias_flag     = bias_flag
        self.batch_size    = batch_size
        self.setup()
        if dataset_type == 'boston':
            self.load_boston_dataset()

    def load_boston_dataset(self):
        X, y = load_boston(return_X_y=True)
        min_x = np.tile(np.min(X, axis=0), (X.shape[0],1))
        max_x = np.tile(np.max(X, axis=0), (X.shape[0],1))
        X = (X - min_x)/(max_x - min_x)
        y = (y - np.min(y))/(np.max(y) - np.min(y))
        # Training data
        self.train_X = X[:405,:]
        self.train_y = y[:405,]
        self.train_y = np.expand_dims(self.train_y, axis=1)
        print('Shape of training set: ' + str(self.train_X.shape))
        # Test data
        self.test_X = X[405:, :]
        self.test_y = y[405:,]
        self.test_y = np.expand_dims(self.test_y, axis=1)
        print('Shape of test set: ' + str(self.test_X.shape))

    def setup(self):
        self.cumulative_train_time_l1 = 0
        self.cumulative_train_time_l2 = 0
        self.cumulative_train_time_lyapunov = 0
        self.loss_l1 = [0] * (self.iterations+1)
        self.loss_l2 = [0] * (self.iterations+1)
        self.loss_lyapunov = [0] * (self.iterations+1)
        self.loss_test_l1 = [0] * (self.iterations+1)
        self.loss_test_l2 = [0] * (self.iterations+1)
        self.loss_test_lyapunov = [0] * (self.iterations+1)
        self.time_l1 = [0] * (self.iterations+1)
        self.time_l2 = [0] * (self.iterations+1)
        self.time_lyapunov = [0] * (self.iterations+1)
        self.mlp_classifier_l1 = self.initialize_mlp(type_loss='l1')
        self.mlp_classifier_l2 = self.initialize_mlp(type_loss='l2')
        self.mlp_classifier_lyapunov = self.initialize_mlp(type_loss='lyapunov')

    # Creating the MLP objects initialize the weights
    def initialize_mlp(self, type_loss):
        mlp_classifier = mlp_combined.Mlp(size_layers = self.size_layers,
                                          type_loss   = type_loss,
                                          act_funct   = self.act_funct,
                                          learning_rate = self.learning_rate,
			                              alpha 	     = self.alpha,
                                          reg_lambda  = self.reg_lambda,
                                          bias_flag   = self.bias_flag,
                                          batch_size  = self.batch_size)
        return mlp_classifier

    def calculate_theoretical_time_constraints(self):
        k_min = 1
        gamma = np.amin(self.train_X[np.where(self.train_X>0)])
        c = self.learning_rate * k_min * np.power(gamma, (self.alpha+1))
        denom = c * (1 - self.alpha)
        self.theoretical_time_constraint = np.power(self.loss_lyapunov[0],
                                                   1 - self.alpha) / denom
        print ("theoretical_time_constraint")
        print (theoretical_time_constraint)

    def convergence_analysis(self):
        self.convergence_training('l1')
        self.convergence_training('l2')
        self.convergence_training('lyapunov')

    def convergence_training(self, type_loss):
        if type_loss == 'l1':
            mlp_classifier = self.mlp_classifier_l1
            print ("L1 convergence training")
        elif type_loss == 'l2':
            mlp_classifier = self.mlp_classifier_l2
            print ("L2 convergence training")
        elif type_loss == 'lyapunov':
            mlp_classifier = self.mlp_classifier_lyapunov
            print ("Lyapunov convergence training")

        mlp_classifier.initialize_theta_weights()

        counter = []
        counter_val=0
        converge_time = []
        cumulative_train_time = 0
        constant_loss_epoch_count = 0

        condition = (constant_loss_epoch_count < 20)
        Y_hat = mlp_classifier.predict(self.train_X)
        if type_loss == 'l1':
            prev_cost_train = (np.abs(Y_hat - self.train_y).mean())
        if type_loss == 'l2':
            prev_cost_train = (np.power((Y_hat - self.train_y), 2).mean())/2
        if type_loss == 'lyapunov':
            prev_cost_train = (np.power(np.abs(Y_hat - self.train_y),
                               self.alpha+1).mean())/(self.alpha+1)
        # Convergence analysis
        while (condition):
            start_time = time.clock()
            mlp_classifier.train(self.train_X, self.train_y)
            Y_hat = mlp_classifier.predict(self.train_X)
            if type_loss == 'l1':
                cost_train = (np.abs(Y_hat - self.train_y).mean())
            if type_loss == 'l2':
                cost_train = (np.power((Y_hat - self.train_y), 2).mean())/2
            if type_loss == 'lyapunov':
                cost_train = (np.power(np.abs(Y_hat - self.train_y),
                              self.alpha+1).mean())/(self.alpha+1)
            end_time = time.clock()
            cumulative_train_time += (end_time - start_time)
            if np.abs(cost_train - prev_cost_train) < 10e-9:
                constant_loss_epoch_count += 1
            else:
                constant_loss_epoch_count = 0
            condition = (constant_loss_epoch_count < 20)
            counter_val += 1
            prev_cost_train = cost_train
            if (counter_val % 6000==0):
                print ("Epochs: " + str(counter_val))
                mlp_classifier.learning_rate = mlp_classifier.learning_rate/2.0

        converge_time.append(cumulative_train_time)
        counter.append(counter_val)
        print ("Convergence analysis")
        print ("Convergence of " + str(type_loss) + ": " +
               str(np.mean(converge_time)) + " seconds")

    def plot_combined(self):
        plt.rcParams.update({'font.size': 18})
        plt.figure()
        f, axes = plt.subplots(1, 2, figsize=(26, 9))
        ymax = max([max(self.loss_l1), max(self.loss_l2),
                   max(self.loss_lyapunov)])
        xmax = min([max(self.time_l1), max(self.time_l2),
                   max(self.time_lyapunov)])
        axes[0].plot(self.time_l1, self.loss_l1, '--k', linewidth=2.0,
                     label='L1 Loss')
        axes[0].plot(self.time_l2, self.loss_l2,'-.b', linewidth=2.0,
                     label='L2 Loss')
        axes[0].plot(self.time_lyapunov, self.loss_lyapunov, '-r',
                     linewidth=2.0, label='Lyapunov Loss')
        axes[0].set_xlabel("Time (in seconds) \n (a)")
        axes[0].set_ylabel("Training Loss")
        axes[0].grid(True)
        axes[0].set_ylim((0, ymax))
        axes[0].set_xlim((0, xmax))
        axes[0].legend(framealpha=1, frameon=True);
        ymax = max([max(self.loss_test_l1), max(self.loss_test_l2),
                   max(self.loss_test_lyapunov)])
        xmax = min([max(self.time_l1), max(self.time_l2),
                   max(self.time_lyapunov)])
        axes[1].plot(self.time_l1, self.loss_test_l1, '--k',
                     linewidth=2.0, label='L1 Loss')
        axes[1].plot(self.time_l2, self.loss_test_l2, '-.b',
                     linewidth=2.0, label='L2 Loss')
        axes[1].plot(self.time_lyapunov, self.loss_test_lyapunov, '-r',
                     linewidth=2.0, label='Lyapunov Loss')
        axes[1].set_xlabel("Time (in seconds) \n (b)")
        axes[1].set_ylabel("Test Loss")
        axes[1].grid(True)
        axes[1].set_ylim((0, ymax))
        axes[1].set_xlim((0, xmax))
        axes[1].legend(framealpha=1, frameon=True);
        f.savefig("combined_boston_dataset.png")

    def train(self):
        # Initialize training losses
        Y_hat_l1 = self.mlp_classifier_l1.predict(self.train_X)
        self.loss_l1[0] = (np.abs(Y_hat_l1 - self.train_y).mean())
        Y_hat_l2 = self.mlp_classifier_l2.predict(self.train_X)
        self.loss_l2[0] = (np.power((Y_hat_l2 - self.train_y), 2).mean())/2
        Y_hat_lyapunov = self.mlp_classifier_lyapunov.predict(self.train_X)
        self.loss_lyapunov[0] = (np.power(np.abs(Y_hat_lyapunov - self.train_y),
                                 self.alpha+1).mean())/(self.alpha+1)

        # Initialize test losses
        Y_hat_test_l1 = self.mlp_classifier_l1.predict(self.test_X)
        self.loss_test_l1[0] = (np.abs(Y_hat_test_l1 - self.test_y).mean())
        Y_hat_test_l2 = self.mlp_classifier_l2.predict(self.test_X)
        self.loss_test_l2[0] = (np.power((Y_hat_test_l2 - self.test_y), 2).mean())/2
        Y_hat_lyapunov_test = self.mlp_classifier_lyapunov.predict(self.test_X)
        self.loss_test_lyapunov[0] = (np.power(
                                      np.abs(Y_hat_lyapunov_test - self.test_y),
                                      self.alpha+1).mean())/(self.alpha+1)

        for ix in range(self.iterations):
            print ("Iteration number: " + str(ix))
            start_time_l1 = time.clock()
            self.mlp_classifier_l1.train(self.train_X, self.train_y)
            Y_hat_l1 = self.mlp_classifier_l1.predict(self.train_X)
            end_time_l1 = time.clock()
            self.cumulative_train_time_l1 += (end_time_l1 - start_time_l1)
            self.loss_l1[ix+1] = (np.abs(Y_hat_l1 - self.train_y).mean())
            self.time_l1[ix+1] = self.cumulative_train_time_l1
            Y_hat_test_l1 = self.mlp_classifier_l1.predict(self.test_X)
            self.loss_test_l1[ix+1] = (np.abs(Y_hat_test_l1 - self.test_y).mean())

            if (ix % 100 == 0):
                print ('L1 training loss: ' + str(self.loss_l1[ix+1]))
                print ('L1 test loss: ' + str(self.loss_test_l1[ix+1]))

            start_time_l2 = time.clock()
            self.mlp_classifier_l2.train(self.train_X, self.train_y)
            Y_hat_l2 = self.mlp_classifier_l2.predict(self.train_X)
            end_time_l2 = time.clock()
            self.cumulative_train_time_l2 += (end_time_l2 - start_time_l2)
            self.loss_l2[ix+1] = (np.power((Y_hat_l2 - self.train_y), 2).mean())/2
            self.time_l2[ix+1] = self.cumulative_train_time_l2
            Y_hat_test_l2 = self.mlp_classifier_l2.predict(self.test_X)
            self.loss_test_l2[ix+1] = (np.power((Y_hat_test_l2 - self.test_y), 2).mean())/2

            if (ix % 100 == 0):
                print ('L2 training loss: ' + str(self.loss_l2[ix+1]))
                print ('L2 test loss: ' + str(self.loss_test_l2[ix+1]))

            start_time_lyapunov = time.clock()
            self.mlp_classifier_lyapunov.train(self.train_X, self.train_y)
            Y_hat_lyapunov = self.mlp_classifier_lyapunov.predict(self.train_X)
            end_time_lyapunov = time.clock()
            self.cumulative_train_time_lyapunov += (end_time_lyapunov - start_time_lyapunov)
            self.loss_lyapunov[ix+1] = (np.power(np.abs(Y_hat_lyapunov - self.train_y),
                                            self.alpha+1).mean())/(self.alpha+1)
            self.time_lyapunov[ix+1] = self.cumulative_train_time_lyapunov
            Y_hat_test_lyapunov = self.mlp_classifier_lyapunov.predict(self.test_X)
            self.loss_test_lyapunov[ix+1] = (np.power(
                                             np.abs(Y_hat_test_lyapunov -
                                             self.test_y),
                                             self.alpha+1).mean())/(self.alpha+1)
            
            if (ix % 100 == 0):
                print ('Lyapunov training loss: ' + str(self.loss_lyapunov[ix+1]))
                print ('Lyapunov test loss: ' + str(self.loss_test_lyapunov[ix+1]))


            if (ix % 6000==0 and ix > 0):
                self.mlp_classifier_l1.learning_rate = \
                    self.mlp_classifier_l1.learning_rate/2.0
                print (self.mlp_classifier_l1.learning_rate)
                self.mlp_classifier_l2.learning_rate = \
                    self.mlp_classifier_l2.learning_rate/2.0
                self.mlp_classifier_lyapunov.learning_rate = \
                    self.mlp_classifier_lyapunov.learning_rate/2.0

        print ('Training complete!')
        Y_hat_l1 = self.mlp_classifier_l1.predict(self.train_X)
        loss_training_l1 = (np.abs(Y_hat_l1 - self.train_y).mean())
        print('Final Training Loss L1: ' + str(loss_training_l1))

        # Test Accuracy
        Y_hat_test_l1 = self.mlp_classifier_l1.predict(self.test_X)
        loss_test_l1 = (np.abs(Y_hat_l1 - self.train_y).mean())
        print('Final Test Loss L1: ' + str(loss_test_l1))

        Y_hat_l2 = self.mlp_classifier_l2.predict(self.train_X)
        loss_training_l2 = (np.power((Y_hat_l2 - self.train_y), 2).mean())/2
        print('Final Training Loss L2: ' + str(loss_training_l2))

        # Test Accuracy
        Y_hat_test_l2 = self.mlp_classifier_l2.predict(self.test_X)
        loss_test_l2 = (np.power(Y_hat_test_l2 - self.test_y, 2).mean())/2
        print('Final Test Loss L2: ' + str(loss_test_l2))

        # Training Accuracy
        Y_hat_lyapunov = self.mlp_classifier_lyapunov.predict(self.train_X)
        loss_training_lyapunov = (np.power(np.abs(Y_hat_lyapunov -
                                  self.train_y), self.alpha+1).mean())/(self.alpha+1)
        print('Final Training Loss Lyapunov: ' + str(loss_training_lyapunov))

        # Test Accuracy
        Y_hat_test_lyapunov = self.mlp_classifier_lyapunov.predict(self.test_X)
        loss_test_lyapunov = (np.power(np.abs(Y_hat_test_lyapunov - self.test_y),
                              self.alpha+1).mean())/(self.alpha+1)
        print('Final Test Loss Lyapunov: ' + str(loss_test_lyapunov))

def main():
    lr = 0.11
    alpha = 0.93
    bias_flag = False
    iterations = 4000
    activation = 'relu'
    reg_lambda = 0.0001
    size_layers = [13, 100, 50, 1]
    boston_mlp_experiment = MLPExperiment(size_layers,
                                          'boston',
                                          act_funct=activation,
                                          iterations=iterations,
                                          learning_rate=lr,
                                          alpha = alpha,
                                          reg_lambda=reg_lambda,
                                          bias_flag=bias_flag)
    boston_mlp_experiment.train()
    boston_mlp_experiment.plot_combined()
    boston_mlp_experiment.convergence_analysis()


if __name__ == "__main__":
    main()
