### MVRSM uses a piece-wise linear surrogate model for optimization of
### expensive cost functions with mixed-integer variables.
###
### MVRSM_minimize(obj, x0, lb, ub, num_int, max_evals, rand_evals) solves the minimization problem
###
### min f(x)
### st. lb<=x<=ub, the first num_int variables of x are integer
###
### where obj is the objective function, x0 the initial guess,
### lb and ub are the bounds, num_int is the number of integer variables,
### and max_evals is the maximum number of objective evaluations (rand_evals of these
### are random evaluations).
###
### Laurens Bliek, 06-03-2019

import os
import math
import random
import time
import numpy as np

from scipy.optimize import minimize, Bounds


def MVRSM_minimize(obj, x0, lb, ub, num_int, max_evals, rand_evals=0):
    d = len(x0)  # dimension, number of variables
    current_time = time.time()  # time when starting the algorithm
    next_X = []  # candidate solution presented by the algorithm

    ## Initialize the surrogate model
    def initializeModel():
        next_X = np.copy(x0)

        def ReLU(x):  # Rectified Linear Unit
            return np.maximum(0, x)

        def ReLUderiv(x):  # Derivative of Rectified Linear Unit
            return (x > 0) + 0.5 * (x == 0)

        # Define basis functions Z=ReLU(W*x+B)
        W = []  # Basis function weights
        B = []  # Basis function bias

        # Add a constant basis function independent on the variable x, giving the model an offset
        W.append([0] * d)
        B.append([1])

        # Add basis functions dependent on one integer variable
        for k in range(num_int):
            for i in range(int(lb[k]), int(ub[k]) + 1):
                if i == lb[k]:
                    temp = [0] * d
                    temp[k] = 1
                    W.append(np.copy(temp))
                    B.append([-i])
                elif i == ub[k]:
                    temp = [0] * d
                    temp[k] = -1
                    W.append(np.copy(temp))
                    B.append([i])
                else:
                    temp = [0] * d
                    temp[k] = -1
                    W.append(np.copy(temp))
                    B.append([i])
                    temp = [0] * d
                    temp[k] = 1
                    W.append(np.copy(temp))
                    B.append([-i])

        # Add basis functions dependent on two subsequent integer variables
        for k in range(1, num_int):
            for i in range(int(lb[k]) - int(ub[k - 1]), int(ub[k]) - int(lb[k - 1]) + 1):
                if i == lb[k] - ub[k - 1]:
                    temp = [0] * d
                    temp[k] = 1
                    temp[k - 1] = -1
                    W.append(np.copy(temp))
                    B.append([-i])
                elif i == ub[k] - lb[k - 1]:
                    temp = [0] * d
                    temp[k] = -1
                    temp[k - 1] = 1
                    W.append(np.copy(temp))
                    B.append([i])
                else:
                    temp = [0] * d
                    temp[k] = -1
                    temp[k - 1] = 1
                    W.append(np.copy(temp))
                    B.append([i])
                    temp = [0] * d
                    temp[k] = 1
                    temp[k - 1] = -1
                    W.append(np.copy(temp))
                    B.append([-i])

        num_discr_basisfunctions = len(B) - 1  # number of basis functions only related to the discrete variables

        # Add dx random linearly independent basis functions (and parallel ones)
        # which depend on both integer and continuous variables,
        # where dx is the number of continuous variables
        dx = d - num_int
        tempW = np.random.random((dx, d))
        tempW = (2 * tempW - 1) / d  # normalize between -1/d and 1/d
        for k in range(dx):

            # Check for the range in which B needs to lie by moving orthogonal to W

            signs = np.sign(tempW[k, :])

            # Find relevant corner points of the [lb, ub] hypercube
            cornerpoint1 = np.copy(lb)
            cornerpoint2 = np.copy(ub)
            for j in range(d):
                if signs[j] < 0:
                    cornerpoint1[j] = ub[j]
                    cornerpoint2[j] = lb[j]

            # Calculate minimal distance from hyperplane to corner points
            b1 = np.dot(tempW[k, :], cornerpoint1)
            b2 = np.dot(tempW[k, :], cornerpoint2)

            if b1 > b2:
                print('Warning: b1>b2. This may lead to problems.')

            # Add the same number of basis functions as for the discrete variables
            for j in range(math.ceil(num_discr_basisfunctions / num_int)):
                # or just add 1000 of them
                # for j in range(1000):
                b = (b2 - b1) * np.random.random() + b1
                W.append(np.copy(tempW[k, :]))
                B.append([-float(b)])

        W = np.asarray(W)
        B = np.asarray(B)

        # Transform input into model basis functions Z=ReLU(W*x+B)
        def Z(x):
            x = np.asarray(x)
            x = x.reshape(-1, 1)
            temp = np.matmul(W, x)
            temp = temp + B
            temp = np.asarray(temp)
            Zx = ReLU(temp)
            return Zx

        # Derivative of basis functions w.r.t. x
        def Zderiv(x):
            x = np.asarray(x)
            x = x.reshape(-1, 1)
            temp = np.matmul(W, x)
            temp = temp + B
            temp = np.asarray(temp)
            dZx = ReLUderiv(temp)
            return dZx

        D = len(B)  # Number of basis functions
        c = np.zeros((D, 1))  # Model weights, to be trained with recursive least squares (RLS)
        for i in range(1, num_discr_basisfunctions):
            c[i] = 1  # model weights start out as 1 for the discrete basis functions to stimulate convexity
        reg = 1e-8  # Regularization parameter. 1e-8 is good for the noiseless case, change to something like 1e-3 if there is noise.
        P = np.diag(np.ones(D)) / reg  # RLS covariance matrix
        model = {'W': W, 'B': B, 'ReLU': ReLU, 'ReLUderiv': ReLUderiv, 'Z': Z, 'Zderiv': Zderiv, 'D': D, 'c': c,
                 'reg': reg, 'P': P}  # Store model variables in a dictionary

        return next_X, model

    ## Update the model when a new data point (x,y) comes in
    def updateModel(x, y, model):
        Zx = model['Z'](x)
        # Recursive least squares algorithm
        temp = np.matmul(model['P'], Zx)
        g = temp / (1 + np.matmul(np.transpose(Zx), temp))
        model['P'] = model['P'] - np.matmul(g, np.transpose(temp))
        model['c'] = model['c'] + (y - np.matmul(np.transpose(Zx), model[
            'c'])) * g  # Only here, output y is used (to update the model weights)

        # Define model output for any new input x2
        def out(x2):
            return np.matmul(np.transpose(model['c']), model['Z'](x2)).item(0, 0)

        # Define model output derivative for any new input x2 (used in the optimization step)
        def deriv(x2):
            c = np.transpose(model['c'])
            temp = np.reshape(model['Zderiv'](x2), (model['Zderiv'](x2)).shape[0])
            temp = np.matmul(np.diag(temp), model['W'])
            result = np.transpose(np.matmul(c, temp))
            return result

        model['out'] = out
        model['outderiv'] = deriv
        return model

    ### Start actual algorithm
    next_X, model = initializeModel()
    best_X = np.copy(next_X)  # Best candidate solution found so far
    best_y = 9999999  # Best objective function value found so far

    ## Iteratively evaluate the objective, update the model, find the minimum of the model, and explore the search space
    time_start1 = time.time()

    run_data = []

    for ii in range(0, max_evals):
        print(f"Starting MVRSM iteration {ii}/{max_evals}")
        x = np.copy(next_X).astype(float)
        if ii == 0:
            y = obj(x)  # Evaluate the objective

            # Scale with respect to initial y value, causing the optimum to lie below 0.
            # This is better for exploitation and prevents the algorithm from getting stuck at the boundary.
            y0 = y

            def scale(y):
                if abs(y0) > 1e-8:
                    y = (y - y0) / abs(y0)
                else:
                    y = (y - y0)
                return y

            def inv_scale(y):
                if abs(y0) > 1e-8:
                    y = y * abs(y0) + y0
                else:
                    y = y + y0
                return y

            model['inv_scale'] = inv_scale
            y = scale(y)
        else:
            y = scale(obj(x))  # Evaluate the objective and scale it

        # Keep track of the best found objective value and candidate solution so far
        if y < best_y:
            best_X = np.copy(x)
            best_y = y

        ## Update the surrogate model
        time_start = time.time()
        model = updateModel(x, y, model)
        update_time = time.time() - time_start  # Time used to update the model

        ## Minimization of the surrogate model
        time_start = time.time()
        temp = minimize(model['out'], best_X, method='L-BFGS-B', bounds=Bounds(lb, ub), jac=model['outderiv'],
                        options={'maxiter': 20, 'maxfun': 20})
        minimization_time = time.time() - time_start  # Time used to find the minimum of the model
        next_X = np.copy(temp.x)
        # print('minimum of surrogate: ', next_X)
        next_X_before_rounding = np.copy(next_X)
        for j in range(num_int):
            next_X[j] = np.round(next_X[j])  # Round integer variables to nearest integer point
        # next_X = [int(x) for x in next_X]

        ## Visualize model

        # if ii > max_evals/2:

        # import matplotlib.pyplot as plt
        # # print('Hoi', len(xxxx))
        # # print(len(toplot))
        # # plt.plot(xxxx,toplot)
        # # #plt.plot(jjjj,toplot[iiii],'*')
        # # titlestr = ['Dimension ', iiii]
        # # plt.title(titlestr)
        # # plt.show()

        # from mpl_toolkits.mplot3d import Axes3D
        # from matplotlib import cm
        # from matplotlib.ticker import LinearLocator, FormatStrFormatter
        # #XX = np.arange(next_X[0]-0.5, next_X[0]+0.5, 0.01)
        # #XX = np.arange(lb[0], ub[0], 0.05)
        # #YY = np.arange(lb[1], ub[1], 0.05)
        # XX = np.arange(-2, 4, 0.05)
        # YY = np.arange(-2, 4, 0.05)
        # XXX, YYY = np.meshgrid(XX, YY)
        # R = []
        # for XXXX in XX:
        # temp = []
        # for YYYY in YY:
        # #print(next_X)
        # temp.append(model['out']([XXXX,YYYY]))
        # R.append(temp)
        # R = np.copy(R)

        # fig = plt.figure()
        # ax = fig.gca(projection='3d')
        # surf = ax.plot_surface(XXX, YYY, R, cmap=cm.coolwarm,
        # linewidth=0, antialiased=False)
        # fig.colorbar(surf, shrink=0.5, aspect=5)
        # plt.show()

        # Just to be sure, clip to the bounds
        np.clip(next_X, lb, ub)

        # Check if minimizer really gives better result
        # if model['out'](next_X) > model['out'](x) + 1e-8:
        # print('Warning: minimization of the surrogate model in MVRSM yielded a worse solution, maybe something went wrong.')

        ## Exploration step (else the algorithm gets stuck in the local minimum of the surrogate model)
        next_X_before_exploration = np.copy(next_X)
        next_X = np.copy(next_X)
        if ii < max_evals - 2:  # Skip exploration before the last iteration, to end at the exact minimum of the surrogate model.
            # Discrete exploration
            for j in range(0, num_int):
                r = random.random()
                r2 = random.random()  # Used to choose whether to explore left or right
                a = next_X[j]
                prob = 1 / d  # Probability for each variable to increase or decrease
                while r < prob:
                    if a == lb[j] and a < ub[j]:
                        a += 1  # Explore to the right
                    elif a == ub[j] and a > lb[j]:
                        a -= 1  # Explore to the left
                    elif a > lb[j] and a < ub[j]:
                        if r2 < 0.5:
                            a += 1
                        else:
                            a -= 1
                    r = r * 2
                next_X[j] = a
            # Continuous exploration
            for j in range(num_int, d):
                r = np.random.normal() * (ub[j] - lb[j]) * 0.1 * 1 / (
                    math.sqrt(d))  # choose a variance that scales inversely with the total number of variables
                a = next_X[j]
                while a + r > ub[j] or a + r < lb[j]:
                    r = np.random.normal() * (ub[j] - lb[j]) * 0.1 * 1 / (math.sqrt(d))  # out of bounds so repeat
                a += r
                next_X[j] = a

        # Just to be sure, clip to the bounds again
        np.clip(next_X, lb, ub)

        # For the first few iterations, just do random search instead (overwrites next_X)
        if ii < rand_evals:
            next_X[0:num_int] = np.round(
                np.random.rand(num_int) * (ub[0:num_int] - lb[0:num_int]) + lb[0:num_int])  # Random guess (integer)
            next_X[num_int:d] = np.random.rand(d - num_int) * (ub[num_int:d] - lb[num_int:d]) + lb[
                                                                                                num_int:d]  # Random guess (continuous)

        time_per_iteration = time.time() - time_start1

        # Save data to log file
        filename = 'log_MVRSM_' + str(current_time) + ".log"
        with open(filename, 'a') as f:
            print('\n\n MVRSM iteration: ', ii, file=f)
            print('Time spent training the model:				 ', update_time, file=f)
            print('Time spent finding the minimum of the model: ', minimization_time, file=f)
            print('Total computation time for this iteration:	', time_per_iteration, file=f)
            print('Current time: ', time.time(), file=f)
            print('Evaluated data point and evaluation:						   ', np.copy(x).astype(float), ', ',
                  inv_scale(y), file=f)
            print('Predicted value at evaluated data point (after learning)       ', np.copy(x).astype(float), ', ',
                  inv_scale(model['out'](x)), file=f)
            print('Best found data point and evaluation so far:				   ', np.copy(best_X).astype(float),
                  ', ', inv_scale(best_y), file=f)
            print('Best data point according to the model and predicted value:	   ', next_X_before_rounding, ', ',
                  inv_scale(model['out'](next_X_before_rounding)), file=f)
            print('Best rounded	 point according to the model and predicted value:', next_X_before_exploration, ', ',
                  inv_scale(model['out'](next_X_before_exploration)), file=f)
            print('Suggested next data point and predicted value:				   ', next_X, ', ',
                  inv_scale(model['out'](next_X)), file=f)
            if ii >= max_evals - 1:
                np.set_printoptions(threshold=np.inf)
                print('Model c parameters: ', np.transpose(model['c']), file=f)
                print('Model W parameters: ', np.transpose(model['W']), file=f)
                print('Model B parameters: ', np.transpose(model['B']), file=f)
                np.set_printoptions(threshold=1000)

        run_data.append(inv_scale(best_y))

        time_start1 = time.time()

    return best_X, inv_scale(best_y), model, filename, run_data


# Read data from log file (this reads the best found objective values at each iteration)
def read_log(filename):
    with open(filename, 'r') as f:
        MVRSMfile = f.readlines()
        MVRSM_best = []
        for i, lines in enumerate(MVRSMfile):
            searchterm = 'Best data point according to the model and predicted value'
            if searchterm in lines:
                # print('Hello', MVRSMfile)
                temp = MVRSMfile[i - 1]
                temp = temp.split('] , ')
                temp = temp[1]
                MVRSM_best.append(float(temp))
    return MVRSM_best


# Plot the best found objective values at each iteration
def plot_results(filename):
    import matplotlib.pyplot as plt
    fig = plt.figure(1)
    MVRSM_ev = read_log(filename)
    plt.plot(MVRSM_ev)
    plt.xlabel('Iteration')
    plt.ylabel('Objective')
    plt.grid()
    plt.show()


# fig.show()

def visualise_model(model, obj, x0, lb, ub, num_int):
    ## Plot in 'one dimension' (first integer and first continuous variable)
    # print(num_int)
    # print(ub[num_int]-lb[num_int])
    # print((ub[num_int]-lb[num_int])/0.05)
    print('W parameters: ', model['W'])
    print('B parameters: ', model['B'])
    int_range = np.arange(lb[0], ub[0], 0.05)  # range of the integer variable
    cont_range = np.arange(lb[num_int], ub[num_int], 0.05)  # range of the continuous variable
    model_output = np.zeros((len(int_range), len(cont_range)))
    obj_output = np.zeros((len(int_range), len(cont_range)))
    x = np.copy(x0)  # For the other variables, use x0 as the value
    correctint = 999
    for i in range(len(int_range)):
        x[0] = int_range[i]
        if abs(x[0] + 10) <= 0.05:
            correctint = i
        for j in range(len(cont_range)):
            x[num_int] = cont_range[j]
            model_output[i, j] = model['inv_scale'](model['out'](x))
            obj_output[i, j] = obj(x)
    X, Y = np.meshgrid(int_range, cont_range)
    R = np.sqrt(X ** 2 + Y ** 2)
    R = np.sin(R)
    # print(R.shape)
    # print(model_output.shape)
    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D
    from matplotlib import cm
    fig = plt.figure(2)
    ax = fig.add_subplot(121, projection='3d')
    ax.plot_surface(X, Y, np.transpose(model_output), cmap=cm.coolwarm)
    ax.set_title('Model output')
    ax.set_xlabel('Discrete variable')
    ax.set_ylabel('Continuous variable')
    ax2 = fig.add_subplot(122, projection='3d')
    ax2.plot_surface(X, Y, np.transpose(obj_output), cmap=cm.coolwarm)
    ax2.set_title('Function output')
    ax2.set_xlabel('Discrete variable')
    ax2.set_ylabel('Continuous variable')
    fig.show()

    fig2 = plt.figure(3)
    ax3 = fig2.add_subplot(121)
    ax3.plot(cont_range, np.transpose(model_output[correctint, :]))
    ax3.set_title('Model output')
    # ax3.set_xlabel('Discrete variable')
    ax3.set_xlabel('Continuous variable')
    ax4 = fig2.add_subplot(122)
    ax4.plot(cont_range, np.transpose(obj_output[correctint, :]))
    ax4.set_title('Function output')
    ax4.set_xlabel('Continuous variable')
    fig2.show()

    plt.show()

# fig2 = plt.figure()
# ax3 = fig2.add_subplot(121)
# cs3 = ax3.contourf(X, Y, np.transpose(model_output),cmap=cm.coolwarm)
# ax3.contour(cs3)
# ax3.set_title('Model output')
# ax3.set_xlabel('Discrete variable')
# ax3.set_ylabel('Continuous variable')
# ax4 = fig2.add_subplot(122)
# cs4 = ax4.contourf(X, Y, np.transpose(obj_output),cmap=cm.coolwarm)
# ax4.contour(cs4)
# ax4.set_title('Function output')
# ax4.set_xlabel('Discrete variable')
# ax4.set_ylabel('Continuous variable')
# plt.show()
