import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.image as mpimage
import numpy as np
import os
from glob import glob
import random
import scipy
from sklearn.metrics import confusion_matrix
import pickle

#os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices'
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

d = 20

ma = 20
m_half = 500



def generate_data(mu, omega1, flag, L, alpha_nd, M, sigma0, sigma, delta, P_m1, P_m2):
    num_nd = int(L * alpha_nd)
    if flag==1:
        o1=omega1
    else:
        o1=L-num_nd-omega1
    separate=[0, o1, L-num_nd,L]
    for i in range(M-3):
        separate.append(random.randint(L-num_nd,L))
    separate.sort()

    V=np.zeros((ma,L))
    Q = np.zeros((ma, L))
    K = np.zeros((ma, L))
    for i in range(M):
        V[:,separate[i]:separate[i+1]]=np.dot(P_m1[:,i].reshape(ma,1),np.ones((1,separate[i+1]-separate[i])))
        Q[:, separate[i]:separate[i + 1]] = np.dot(P_m2[:,i].reshape(ma,1), np.ones((1, separate[i + 1] - separate[i])))
        K[:, separate[i]:separate[i + 1]] = np.dot(P_m2[:,i].reshape(ma,1), np.ones((1, separate[i + 1] - separate[i])))
    noise_sigma=np.zeros((ma,L))
    index_small = list(np.arange(L))
    for i in set(index_small):

        ns=np.random.normal(0,0.1*sigma*sigma,(ma,))
        if np.linalg.norm(ns)<sigma:
            noise_sigma[:,i]=ns
        else:
            noise_sigma[:,i]=ns*sigma/np.linalg.norm(ns)
    V=V+noise_sigma
    Q=Q+np.random.normal(0,delta*delta,(ma,L))
    K=K+np.random.normal(0,delta*delta,(ma,L))
    return np.transpose(V),np.transpose(Q),np.transpose(K)

def batch_pred(model, data, index,L):
    y_pred = []
    for i in set(index):
        a = data[i]
        y_pred.append(model(a.reshape(1, 3, L, ma)))
    y_pred = np.array(y_pred)
    return y_pred.reshape(len(index),1)

def hg_loss(y_true, y_pred):
    loss = tf.reduce_mean(tf.nn.relu(1 - y_pred * y_true))
    return loss

class neural_netowrk(tf.keras.Model):
    def __init__(self, seed=1):
        super(neural_netowrk, self).__init__()
        # use random seed to make the initialization repeat
        tf.random.set_seed(seed)
        # define convolutional layers
        self.fc1 = tf.keras.layers.Dense(m_half,kernel_initializer=tf.keras.initializers.RandomNormal(0,1e-1), activation='relu')
        self.fc2 = tf.keras.layers.Dense(m_half, kernel_initializer=tf.keras.initializers.RandomNormal(0,1e-1),activation='relu')
        self.fc3 = tf.keras.layers.Dense(int(m_half*2), kernel_initializer=tf.keras.initializers.RandomNormal(0, 1e-1),activation='relu')
        self.in_v = tf.keras.layers.Dense(units=ma, kernel_initializer=tf.keras.initializers.Constant(tf.eye(ma)), activation=None, name='in_v')
        self.in_q = tf.keras.layers.Dense(units=ma, kernel_initializer=tf.keras.initializers.Constant(tf.eye(ma)), activation=None, name='in_q')
        self.in_k = tf.keras.layers.Dense(units=ma, kernel_initializer=tf.keras.initializers.Constant(tf.eye(ma)), activation=None, name='in_k')

        self.fc3 = tf.keras.layers.Dense(10, activation='softmax')

    def call(self, input):
        '''
        here we define the forward function
        :param input: the input data
        :return: output tensor
        '''
        # For each layer, a bias will also be initialized and add to the output after matrix multiply.
        v=self.in_v(input[:,0])
        q=self.in_q(input[:,1])
        k=self.in_k(input[:,2])
        scores = tf.matmul(q, k, transpose_b=True)
        distribution = tf.nn.softmax(scores)
        x = tf.keras.layers.Attention()([q,v,k])
        x1 = self.fc1(x)
        x2 = self.fc2(x)
        x = tf.math.reduce_mean(x1,2)-tf.math.reduce_mean(x2,2)
        #x=self.fc3(x)
        output=tf.math.reduce_mean(x,1)
        dimen=output.shape[0]

        return [tf.reshape(output, shape=[dimen,1]), distribution]


def main():

    #generate data
    L = 50
    M = 20
    sigma = 0.1
    sigma0=0.2
    delta = 0.2
    alpha_nd = 0.45

    #model= neural_netowrk()
    #y=model(train_data[0:2])
    sample_s=np.zeros((8,13))
    test_max=20
    N=200
    mu=1
    num_epoch=100
    N_set=[1120,640, 400, 280, 200, 160, 120,80]
    for i0 in range(12,13):
        #M=int(np.round(np.sqrt(i0)))
        #mu=0.05*i0+0.4
        #sigma=0.05*i0+0.2
        alpha=0.5
        omega1=int(np.round(alpha*L))
        for j in range(2,3):
            #T=int(2*(j+1))
            N=200
            count=0
            for t in range(test_max):
                #ind = variane_9(train_data, train_label, Nm)

                #train_index, test_index = sample_data(train_data, train_label, test_data, test_label, ind, Nm)


                P_m1 = scipy.linalg.orth(np.random.normal(0, 1, (ma, M)))
                P_m2 = scipy.linalg.orth(np.random.normal(0, 1, (ma, M)))
                Test = 100

                train_data = np.zeros((N, 3, L, ma))
                test_data = np.zeros((Test, 3, L, ma))
                train_label = np.zeros((N, 1))
                test_label = np.zeros((Test, 1))
                for i in range(N):
                    lb = np.random.binomial(1, 0.5, 1)
                    train_label[i] = lb * 2 - 1
                    train_data[i] = generate_data(mu, omega1, train_label[i], L, alpha_nd, M, sigma0, sigma, delta, P_m1, P_m2)
                for i in range(Test):
                    lb = np.random.binomial(1, 0.5, 1)
                    test_label[i] = lb * 2 - 1
                    test_data[i] = generate_data(mu, omega1, test_label[i], L, alpha_nd, M, sigma0, sigma, delta, P_m1, P_m2)

                optimizer = tf.keras.optimizers.Adam(1e-2)  #
                soft=[]
                minimum_mse = 10000.0
                test_error = []
                wd = 1
                batch_size = 4
                train_index = np.arange(N)
                model = neural_netowrk()
                for iter in range(num_epoch):
                    index_batch = random.sample(list(train_index), batch_size)
                    train_batch = train_data[index_batch]
                    with tf.GradientTape() as t:

                        # y_pred = model(train_batch)

                        # print(logits) 
                        # loss = tf.reduce_mean(tf.losses.mean_squared_error(node_labels, logits)) 
                        # loss=tf.nn.l2_loss(tf.math.multiply(node_labels-logits,train_mask))/500 

                        # print("off", loss) 
                        [y_pred, dist ]= model(train_batch)
                        y_true = train_label[index_batch]
                        variables = t.watched_variables()

                        # loss = tf.reduce_mean(tf.nn.relu(1-y_pred.reshape(batch_size,1)*train_label[index_batch]))
                        loss = hg_loss(y_true, y_pred)
                        grads = t.gradient(loss, variables)
                        optimizer.apply_gradients(zip(grads, variables))
                        # apply gradients to variables 

                        if iter % 10 == 0:
                            [_,matrix]=model(test_data)
                            sftmx=tf.reduce_mean(matrix,0)
                            #mean_sft=tf.reduce_mean(sftmx[0:omega1,:],0)
                            mean_sft = tf.reduce_mean(sftmx[0:L, :], 0)
                            pair=np.array([tf.reduce_sum(mean_sft[0:omega1]),tf.reduce_sum(mean_sft[omega1:L])])
                            soft.append(pair)
                            y_pred_t = []
                            for i in range(Test):
                                a = test_data[i]
                                [out,_]=model(a.reshape(1, 3, L, ma))
                                y_pred_t.append(out)
                            y_pred_t = np.array(y_pred_t)
                            test_hinge = tf.reduce_mean(tf.nn.relu(1 - y_pred_t.reshape(Test, 1) * test_label))
                            test_error.append(test_hinge)
                            # minimum_mse = val_mse 
                            print('Iteration', iter, '| Training loss:', loss.numpy(), '| Test loss',
                                  test_hinge.numpy())

                [out_train,_]=model(train_data)
                [out_test, _]=model(test_data)
                train_loss=tf.keras.losses.Hinge()(out_train,train_label)
                test_loss = tf.keras.losses.Hinge()(out_test, test_label)
                print('Iteration', iter, '| Training loss:', train_loss.numpy(), '| Test loss', test_loss.numpy())
                print(soft)



if __name__ == '__main__':
    main()

