import numpy as np
import tensorflow as tf
import random
import scipy

N = 1000
c = 10
d = 20

ma = 20
m_half = 200

def generate_label(N, gamma):
    label=np.zeros((N,1))
    set1=set(range(int(N*gamma/2)))
    set2=set(range(int(N*gamma)))-set1
    setM=set(range(N))-set1-set2
    for i in set1:
        label[i]=1
    for i in set2:
        label[i]=-1
    for i in setM:
        label[i]=2 * np.random.binomial(1, 0.5, 1) - 1
    return label


def generateAdj(N, gamma, deg, n_st, n_sr, label):
    adj_matrix = np.eye(N)
    set1=set(range(int(N*gamma/2)))
    set2=set(range(int(N*gamma)))-set1
    setM=set(range(N))-set1-set2
    for i in setM:
        if label[i]==1:
            l_star=random.sample(list(set1), n_st)
            l_sharp=random.sample(list(set2), n_sr)
        else:
            l_star=random.sample(list(set2), n_st)
            l_sharp=random.sample(list(set1), n_sr)
        n0=len(np.nonzero(adj_matrix[i,:])[0])
        if n0>deg-n_sr-n_st:
            l_nd=[]
        else:
            l_nd=random.sample(list(setM-set(np.where(adj_matrix[i,:]>0)[0])), int(deg+1-n_st-n_sr-n0))

        adj_matrix[i,l_star+l_sharp+l_nd]=1
        adj_matrix[l_star+l_sharp+l_nd, i]=1

    for i in set1:
        num=deg+1-len(list(np.nonzero(adj_matrix[i,:])[0]))
        if num>0:
            l_in=random.sample(list(set1-set(np.where(adj_matrix[i,:]>0)[0])), num)
            adj_matrix[i, l_in]=1
            adj_matrix[l_in, i]=1
    for i in set2:
        num=deg+1-len(list(np.nonzero(adj_matrix[i,:])[0]))
        if num>0:
            l_in=random.sample(list(set2-set(np.where(adj_matrix[i,:]>0)[0])), num)
            adj_matrix[i, l_in]=1
            adj_matrix[l_in, i]=1
    ep=gamma+(1-gamma)*n_st/(n_st+n_sr)
    deg_matrix = tf.reduce_sum(adj_matrix, axis=-1)
    return adj_matrix, 1-ep

def narrow(A):
    new=np.where(A>0,1,A)
    return new

def neighbor_12(A):
    nb1 = []
    nb2 = []
    nb3 = []
    for i in range(N):
        zero_spd = {i}
        et_list = list(np.nonzero(A[i, :]))
        ls_set = set(et_list[0]) - zero_spd
        nb1.append(list(ls_set))
    """
    for i in range(N):
        ls=[]
        for j in set(nb1[i]):
            ls=list(set(list(nb1[j])+ls))
        zero_spd={i}
        ls_set=set(ls)-set(nb1[i])-zero_spd
        nb2.append(list(ls_set))
    """
    """
    for i in range(N):
        ls=[]
        for j in set(nb1[i]):
            ls=list(set(list(nb1[j])+ls))
        zero_spd={i}
        ls_set=set(ls)-set(nb1[i])-zero_spd
        nb2.append(list(ls_set))
    """
    A2 = tf.matmul(A, A)
    A3 = tf.matmul(A2, A)
    A02=np.maximum(narrow(A2)-narrow(A),0)
    A03=np.maximum(narrow(A3)-narrow(A2),0)
    for i in range(N):
        zero_spd = {i}
        ls_set = set(range(N)) - set(nb1[i]) - zero_spd

        ls_set=set(np.array(A2[i,:], dtype=np.int64))-set(nb1[i])-zero_spd
        et_list = list(np.nonzero(A02[i, :]))
        ls_set = set(et_list[0]) - zero_spd
        nb2.append(list(ls_set))
    for i in range(N):
        zero_spd = {i}
        ls_set = set(range(N)) - set(nb1[i]) - zero_spd

        ls_set=set(np.array(A3[i,:], dtype=np.int64))-set(nb1[i])-zero_spd
        et_list = list(np.nonzero(A03[i, :]))
        ls_set = set(et_list[0]) - zero_spd
        nb3.append(list(ls_set))
    return nb1, nb2


def feature_assign(A, nb1, nb2):
    pattern_short = np.random.randint(low=3, high=c + 1, size=(N, 1))
    pattern_short[list(nb1[0])] = 1
    pattern_short[list(nb1[1])] = 2
    for i in set(nb1[0]).intersection(set(nb1[1])):
        pattern_short[i] = np.random.binomial(1, 0.5, 1) + 1
    for i in range(c):
        pattern_short[i] = i + 1
    vote = 0.5 * np.ones((N, 1))
    for i in range(N):
        nb_set = set(nb1[i])
        s1 = set(nb1[0]).intersection(nb_set)
        s2 = set(nb1[1]).intersection(nb_set)
        if len(s1) == 0 and len(s2) == 0:
            r = 0.5
        else:
            r = len(s1) / (len(s1) + len(s2))
        vote[i] = r
    ind1 = np.nonzero(np.maximum(0, vote - 0.5))
    node1_ratio = 1 - vote[list(ind1[0])]
    ind2 = np.nonzero(np.maximum(0, 0.5 - vote))
    node2_ratio = vote[list(ind2[0])]
    det_node_ratio = np.concatenate((node1_ratio, node2_ratio), 0)
    epsilon_S = np.mean(det_node_ratio)
    return vote, det_node_ratio, epsilon_S, pattern_short



def generate_data(dg_ratio, nb1, nb2, P_m1, P_m2, sigma, delta, deg, gamma):
    L=int(deg/2)+1
    set1=set(range(int(N*gamma/2)))
    set2=set(range(int(N*gamma)))-set1
    setM=set(range(N))-set1-set2
    pattern_short=[]
    for i in set1:
        pattern_short.append(0)
    for i in set2:
        pattern_short.append(1)
    for i in setM:
        num=np.random.randint(low=3, high=c+1, size=(1,))
        pattern_short.append(int(num)-1)
    data=np.zeros((N,3,L,ma))
    for i in range(N):
        V = np.zeros((ma, L))
        Q = np.zeros((ma, L))
        K = np.zeros((ma, L))
        V[:, 0] = P_m1[:,pattern_short[i]]
        Q[:, 0] = P_m2[:, pattern_short[i]]
        K[:, 0] = P_m2[:, pattern_short[i]]
        pt=np.asarray(pattern_short, dtype=np.int64)
        """
        ls=random.sample(list(pt),L-1)
        V[:, 1:2*deg + 1] = P_m1[:, ls]
        Q[:, 1:2*deg + 1] = P_m2[:, ls]
        K[:, 1:2*deg + 1] = P_m2[:, ls]
        """
        dg0=int(dg_ratio*L)
        #print(dg0)
        #dg0=40
        ls=random.sample(list(pt[nb1[i]]),dg0)
        V[:, 1:dg0 + 1] = P_m1[:, ls]
        Q[:, 1:dg0 + 1] = P_m2[:, ls]
        K[:, 1:dg0 + 1] = P_m2[:, ls]
        if len(list(pt[nb2[i]]))<=L-1-dg0:
            ls2=list(np.random.choice(list(pt[nb2[i]]),L-1-dg0))
        else:
            ls2=random.sample(list(pt[nb2[i]]),L-1-dg0)
        V[:, dg0+1:L] = P_m1[:, ls2]
        Q[:, dg0+1:L] = P_m2[:, ls2]
        K[:, dg0+1:L] = P_m2[:, ls2]

        noise_sigma = np.zeros((ma, L))
        index_small = list(np.arange(L))
        norm_ns = []
        for j in set(index_small):

            ns = np.random.normal(0, 0.1 * sigma * sigma, (ma,))
            if np.linalg.norm(ns) < sigma:
                noise_sigma[:, j] = ns
            else:
                noise_sigma[:, j] = ns * sigma / np.linalg.norm(ns)
            norm_ns.append(np.linalg.norm(ns))

        V = V + noise_sigma
        Q = Q + np.random.normal(0, delta * delta, (ma, L))
        K = K + np.random.normal(0, delta * delta, (ma, L))
        data[i,0,:,:]=np.transpose(V)
        data[i,1,:,:]=np.transpose(Q)
        data[i,2,:,:]=np.transpose(K)
    # noise

    return data, pattern_short

def hg_loss(y_true, y_pred):
    loss = tf.reduce_mean(tf.nn.relu(1 - y_pred * y_true))
    return loss

def dataset_generate(data, label, test_ind):

    train_ind = set(range(N)) - set(test_ind)


    train_data = data[list(train_ind), :, :, :]
    test_data = data[list(test_ind), :, :, :]
    train_label = label[list(train_ind)]
    test_label = label[list(test_ind)]
    return train_data, train_label, test_data, test_label


class BiasLayer(tf.keras.layers.Layer):
    def __init__(self, pe1_dim, pe2_dim, length=None, ratio=None):
        super(BiasLayer, self).__init__()
        self.spd_num1=int((length-1)*ratio)
        self.spd_num2=length-1-self.spd_num1
        self.pe1_dim=pe1_dim
        self.pe2_dim=pe2_dim
    def build(self, pe1_dim):
        self.bias1 = self.add_weight('bias',
                                    shape=self.pe1_dim,
                                    initializer='zeros',
                                    trainable=True)
        self.bias2 = self.add_weight('bias',
                                    shape=self.pe2_dim,
                                    initializer='zeros',
                                    trainable=True)
    def call(self, x):
        t=tf.concat([3.2*tf.ones((1)), tf.math.multiply(tf.ones(shape=(self.spd_num1)),self.bias1)],0)
        t=tf.concat([t, tf.math.multiply(tf.ones(shape=(self.spd_num2)), self.bias2)], 0)
        return x + t

class neural_netowrk(tf.keras.Model):
    def __init__(self, seed=1, agg=None, h1_ratio=None):
        super(neural_netowrk, self).__init__()
        # use random seed to make the initialization repeat
        tf.random.set_seed(seed)
        # define convolutional layers
        self.agg=agg+1
        self.h1_ratio=h1_ratio
        self.fc1 = tf.keras.layers.Dense(m_half, kernel_initializer=tf.keras.initializers.RandomNormal(0, 1e-2),
                                         activation='relu')
        self.fc2 = tf.keras.layers.Dense(m_half, kernel_initializer=tf.keras.initializers.RandomNormal(0, 1e-2),
                                         activation='relu')
        self.fc3 = tf.keras.layers.Dense(int(m_half * 2),
                                         kernel_initializer=tf.keras.initializers.RandomNormal(0, 1e-2),
                                         activation='relu')
        self.in_v = tf.keras.layers.Dense(units=ma, kernel_initializer=tf.keras.initializers.Constant(tf.eye(ma)),
                                          activation=None, name='in_v')
        self.in_q = tf.keras.layers.Dense(units=ma, kernel_initializer=tf.keras.initializers.Constant(tf.eye(ma)),
                                          activation=None, name='in_q')
        self.in_k = tf.keras.layers.Dense(units=ma, kernel_initializer=tf.keras.initializers.Constant(tf.eye(ma)),
                                          activation=None, name='in_k')

        self.fc3 = tf.keras.layers.Dense(10, activation='softmax')
        self.pe = BiasLayer(pe1_dim=1,pe2_dim=1, length=self.agg, ratio=self.h1_ratio)
        """
    def build(self):
        self.pe1 = self.add_weights('pe1', shape=(1,), initializer='zeros', trainable=True)
        self.pe2 = self.add_weights('pe2', shape=(1,), initializer='zeros', trainable=True)
    """
    def call(self, input):
        '''
        here we define the forward function
        :param input: the input data
        :return: output tensor
        '''
        # For each layer, a bias will also be initialized and add to the output after matrix multiply.
        v = self.in_v(input[:, 0, :, :])
        q = self.in_q(input[:, 1, :, :])
        k = self.in_k(input[:, 2, :, :])
        #t=[10, tf.math.multiply(self.pe2, tf.ones(shape=(self.spd1_num, 1)))]
        #t=tf.convert_to_tensor(t)
        scores = tf.matmul(q,k,transpose_b=True)
        #score=scores[0,1:self.spd1_num+1]
        #t=scores[:,0,:]
        scores = self.pe(scores)
        distribution = tf.nn.softmax(scores)
        x = tf.matmul(distribution, v)
        #x = tf.keras.layers.Attention()([q, v, k])
        x1 = self.fc1(x)
        x2 = self.fc2(x)
        x = tf.math.reduce_mean(x1, 2) - tf.math.reduce_mean(x2, 2)
        # x=self.fc3(x)
        output = x[:,0]
        dimen = output.shape[0]

        return tf.reshape(output, shape=[dimen, 1]), distribution

def main():
    Tmax=1
    p=0.4
    sp=np.zeros((10,11))
    for i in range(0,1):

        gamma = 0.4
        deg=120
        L=int(deg/2)
        epsilonS=0
        n_sr=int(deg*epsilonS)
        n_st=deg-n_sr
        for j in range(0,1):

            train_N=400
            count=0
            for t in range(Tmax):

                label=generate_label(N, gamma)
                A, epsilon_S = generateAdj(N, gamma, deg, n_st, n_sr, label)  # fix p1 and change p2 to control epsilon_S, change them together to control gamma
                nb1, nb2 = neighbor_12(A)
                dg_ratio=0
                for i_new in range(N):
                    dg_ratio=dg_ratio+len(nb1[i_new])/(len(nb1[i_new])+len(nb2[i_new]))
                dg_ratio=dg_ratio/N

                test_num = N-train_N
                test_ind = random.sample(list(range(N)), test_num)
                train_index = list(set(np.arange(N))-set(test_ind))
                list_error=random.sample(train_index, int(train_N*p))
                label[list_error]=-label[list_error]
                sigma = 0.1
                delta = 0.2
                P_m1 = scipy.linalg.orth(np.random.normal(0, 1, (ma, c)))
                P_m2 = scipy.linalg.orth(np.random.normal(0, 1, (ma, c)))
                data, attern_short=generate_data(dg_ratio, nb1, nb2, P_m1, P_m2, sigma, delta, deg, gamma)
                train_data, train_label, test_data, test_label=dataset_generate(data, label, test_ind)
                print(epsilon_S)

                num_epoch=11
                optimizer = tf.keras.optimizers.Adam(1e-2)  #
                soft = []
                minimum_mse = 10000.0
                test_error = []
                wd = 1
                batch_size = 100
                model = neural_netowrk(agg=L, h1_ratio=dg_ratio)
                for iter in range(num_epoch):
                    train_list=list(train_index)
                    random.shuffle(train_list)
                    for inner in range(int(train_N/batch_size)):
                    #index_batch = random.sample(list(train_index), batch_size)
                        index_batch=train_list[int(inner*batch_size):int((inner+1)*batch_size)]
                        train_batch = data[index_batch]
                        with tf.GradientTape() as t:

                            # y_pred = model(train_batch)

                            # print(logits) 
                            # loss = tf.reduce_mean(tf.losses.mean_squared_error(node_labels, logits)) 
                            # loss=tf.nn.l2_loss(tf.math.multiply(node_labels-logits,train_mask))/500 

                            # print("off", loss) 
                            [y_pred, dist] = model(train_batch)
                            y_true = label[index_batch]
                            variables = t.watched_variables()

                            # loss = tf.reduce_mean(tf.nn.relu(1-y_pred.reshape(batch_size,1)*train_label[index_batch]))
                            loss = hg_loss(y_true, y_pred)
                            grads = t.gradient(loss, variables)
                            optimizer.apply_gradients(zip(grads, variables))
                            # apply gradients to variables 

                    if iter % 1 == 0:
                        [_, matrix] = model(test_data)

                        y_pred_t = []
                        for i in range(test_num):
                            a = test_data[i]
                            [out, _] = model(a.reshape(1, 3, L+1, ma))
                            y_pred_t.append(out)
                        y_pred_t = np.array(y_pred_t)
                        test_hinge = tf.reduce_mean(tf.nn.relu(1 - y_pred_t.reshape(test_num, 1) * test_label))
                        test_error.append(test_hinge)
                            # minimum_mse = val_mse 
                        print('Iteration', iter, '| Training loss:', loss.numpy(), '| Test loss',
                                  test_hinge.numpy())



if __name__=='__main__':
    main()
