import pickle as pkl

import scipy.io as sio
import networkx as nx
import numpy as np
import scipy.sparse as sp
import torch
from sklearn.metrics import roc_auc_score, average_precision_score


from numpy import genfromtxt
import json
import os


def sample_mask(idx, l):
    """Create mask."""
    mask = np.zeros(l)
    mask[idx] = 1
    return np.array(mask, dtype=np.bool)


def loadjson(x):
    """
    des: load a file in json format
    return: dict
    """
    print(f"loading {x}")
    return json.load(open(x))

def encode_sparselist2denselist(idx, l):
    mask = np.zeros(l)
    mask[idx] = 1
    return mask


def load_data(dataset):
    # load the data: x, tx, allx, graph
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        '''
        fix Pickle incompatibility of numpy arrays between Python 2 and 3
        https://stackoverflow.com/questions/11305790/pickle-incompatibility-of-numpy-arrays-between-python-2-and-3
        '''
        with open("data/ind.{}.{}".format(dataset, names[i]), 'rb') as rf:
            u = pkl._Unpickler(rf)
            u.encoding = 'latin1'
            cur_data = u.load()
            objects.append(cur_data)
        # objects.append(
        #     pkl.load(open("data/ind.{}.{}".format(dataset, names[i]), 'rb')))
    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file(
        "data/ind.{}.test.index".format(dataset))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(
            min(test_idx_reorder), max(test_idx_reorder) + 1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range - min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range - min(test_idx_range), :] = ty
        ty = ty_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    features = torch.FloatTensor(np.array(features.todense()))
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]

    idx_test = test_idx_range.tolist()
    idx_train = range(len(y))
    idx_val = range(len(y), len(y) + 500)

    train_mask = sample_mask(idx_train, labels.shape[0])
    val_mask = sample_mask(idx_val, labels.shape[0])
    test_mask = sample_mask(idx_test, labels.shape[0])

    y_train = np.zeros(labels.shape)
    y_val = np.zeros(labels.shape)
    y_test = np.zeros(labels.shape)
    y_train[train_mask, :] = labels[train_mask, :]
    y_val[val_mask, :] = labels[val_mask, :]
    y_test[test_mask, :] = labels[test_mask, :]
    

    print('-----------features---------')
    print(type(features))
    print(features.shape)
    print('**********adj***************')
    print(type(adj))
    print(adj.shape)
    print('////////////////labels//////////')
    check = np.argmax(labels,1) 
    print(type(check))
    print(check.shape)
    return adj, features, y_test, tx, ty, test_mask, np.argmax(labels,1)


def load_other_data(dataset):
    
    if dataset == 'wiki':
        data = sio.loadmat('{}.mat'.format(dataset))
        features = data['fea']
        if sp.issparse(features):
            features.todense()
            #print(type(features))
            features = features.toarray()
            #print(features.dtype)
            #print(type(features))
            #print('-------------------------------------')
            #features = features.astype(float)

        features = torch.FloatTensor(features)
        adj = data['W']
        gnd = data['gnd']
        gnd = gnd.T
        gnd = gnd - 1
        gnd = gnd[0, :]
        k = len(np.unique(gnd))
        adj = sp.coo_matrix(adj) 
    
    else:
        os.environ['PROJECT_HOME'] = '/research/dept8/kamhoua/RWR-GAE/gae'
        topic = dataset
        features = loadjson(f"{os.environ['PROJECT_HOME']}/data/wikipedia/{topic}/raw/{topic}_features.json")
        N = len(features)
        features = {int(k): v for k, v in features.items()}
        min_noun_id, max_noun_id = features[1207][0], features[1207][0]
        for v in features.values():
            for i in v:
                min_noun_id = i if i < min_noun_id else min_noun_id
                max_noun_id = i if i > max_noun_id else max_noun_id

        #print(f"min_noun_id={min_noun_id}, max_noun_id={max_noun_id}")
        assert min_noun_id == 0
        num_nouns = max_noun_id + 1

        keys = list(features.keys())
        keys.sort()
        for i in range(len(keys)):
            assert i == keys[i]

        feats = np.zeros((len(features), num_nouns))
        for k, v in features.items():
            feats[k] = encode_sparselist2denselist(v, num_nouns)


        #print(feats)
        #print(type(feats))
        #print(feats.shape)

        if sp.issparse(feats):
            feats.todense()

        features = torch.from_numpy(feats)


        node_list = list(range(N))
        G = nx.DiGraph()
        G.add_nodes_from(node_list)
        with open(f"{os.environ['PROJECT_HOME']}/data/wikipedia/{topic}/raw/{topic}_edges.csv") as f:
            lines = f.readlines()[1:]
            edge_list = []

            for line in lines:
                src, dst = line[:-1].split(',')
                src, dst = int(src), int(dst)
                edge_list.append((src, dst))
            G.add_edges_from(edge_list)
            # G = nx.parse_edgelist(lines, nodetype=int, delimiter=',', comments="id1")

            adj = nx.to_numpy_array(G, nodelist=list(G.nodes))
            adj = sp.coo_matrix(adj)
            #print(adj)
            #print(type(adj))
            #print(lines)
        
        #print(edge_list)    
        #exit()
        #with open('lastfm_asia_edges.txt', 'w') as fp:
        #    fp.write('\n'.join('%s %s' % x for x in edge_list))


        #edgelist = genfromtxt(f"{os.environ['PROJECT_HOME']}/data/wikipedia/{topic}/raw/{topic}_target.csv", delimiter=',')    
        #print(edgelist[1:].astype(int))
        #exit()
        
        with open(f"{os.environ['PROJECT_HOME']}/data/wikipedia/{topic}/raw/{topic}_target.csv") as f:
            lines = f.readlines()[1:]
            values = []
            for line in lines:
                id, value = line.split(',')
                id = int(id)
                value = int(value)
                values.append(value)

            gnd = np.array(values)
            #print(gnd)
            #print(type(gnd))
            #exit(0) 



    print('-----------features---------')
    print(type(features))
    print(features.shape)
    print('**********adj***************')
    print(type(adj))
    print(adj.shape)
    print('////////////////labels//////////')
    check = gnd
    print(type(check))
    print(check.shape)
    #exit()
    
    return adj, features, gnd






def parse_index_file(filename):
    index = []
    for line in open(filename):
        index.append(int(line.strip()))
    return index


def sparse_to_tuple(sparse_mx):
    if not sp.isspmatrix_coo(sparse_mx):
        sparse_mx = sparse_mx.tocoo()
    coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose()
    values = sparse_mx.data
    shape = sparse_mx.shape
    return coords, values, shape


def mask_test_edges(adj):
    # Function to build test set with 10% positive links
    # NOTE: Splits are randomized and results might slightly deviate from reported numbers in the paper.
    # TODO: Clean up.

    # Remove diagonal elements
    adj = adj - sp.dia_matrix((adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape)
    adj.eliminate_zeros()
    # Check that diag is zero:
    assert np.diag(adj.todense()).sum() == 0

    adj_triu = sp.triu(adj)
    adj_tuple = sparse_to_tuple(adj_triu)
    edges = adj_tuple[0]
    edges_all = sparse_to_tuple(adj)[0]
    num_test = int(np.floor(edges.shape[0] / 10.))
    num_val = int(np.floor(edges.shape[0] / 20.))

    all_edge_idx = list(range(edges.shape[0]))
    np.random.shuffle(all_edge_idx)
    val_edge_idx = all_edge_idx[:num_val]
    test_edge_idx = all_edge_idx[num_val:(num_val + num_test)]
    test_edges = edges[test_edge_idx]
    val_edges = edges[val_edge_idx]
    train_edges = np.delete(edges, np.hstack([test_edge_idx, val_edge_idx]), axis=0)

    def ismember(a, b, tol=5):
        rows_close = np.all(np.round(a - b[:, None], tol) == 0, axis=-1)
        return np.any(rows_close)

    test_edges_false = []
    while len(test_edges_false) < len(test_edges):
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j:
            continue
        if ismember([idx_i, idx_j], edges_all):
            continue
        if test_edges_false:
            if ismember([idx_j, idx_i], np.array(test_edges_false)):
                continue
            if ismember([idx_i, idx_j], np.array(test_edges_false)):
                continue
        test_edges_false.append([idx_i, idx_j])

    val_edges_false = []
    while len(val_edges_false) < len(val_edges):
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j:
            continue
        if ismember([idx_i, idx_j], train_edges):
            continue
        if ismember([idx_j, idx_i], train_edges):
            continue
        if ismember([idx_i, idx_j], val_edges):
            continue
        if ismember([idx_j, idx_i], val_edges):
            continue
        if val_edges_false:
            if ismember([idx_j, idx_i], np.array(val_edges_false)):
                continue
            if ismember([idx_i, idx_j], np.array(val_edges_false)):
                continue
        val_edges_false.append([idx_i, idx_j])

    assert ~ismember(test_edges_false, edges_all)
    assert ~ismember(val_edges_false, edges_all)
    assert ~ismember(val_edges, train_edges)
    assert ~ismember(test_edges, train_edges)
    assert ~ismember(val_edges, test_edges)

    data = np.ones(train_edges.shape[0])

    # Re-build adj matrix
    adj_train = sp.csr_matrix((data, (train_edges[:, 0], train_edges[:, 1])), shape=adj.shape)
    adj_train = adj_train + adj_train.T

    # NOTE: these edge lists only contain single direction of edge!
    return adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false


def preprocess_graph(adj):
    adj = sp.coo_matrix(adj)
    adj_ = adj + sp.eye(adj.shape[0])
    rowsum = np.array(adj_.sum(1))
    degree_mat_inv_sqrt = sp.diags(np.power(rowsum, -0.5).flatten())
    adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot(degree_mat_inv_sqrt).tocoo()
    # return sparse_to_tuple(adj_normalized)
    return sparse_mx_to_torch_sparse_tensor(adj_normalized)


def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)


def get_roc_score(emb, adj_orig, edges_pos, edges_neg):
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))

    # Predict on test set of edges
    adj_rec = np.dot(emb, emb.T)
    preds = []
    pos = []
    for e in edges_pos:
        preds.append(sigmoid(adj_rec[e[0], e[1]]))
        pos.append(adj_orig[e[0], e[1]])

    preds_neg = []
    neg = []
    for e in edges_neg:
        preds_neg.append(sigmoid(adj_rec[e[0], e[1]]))
        neg.append(adj_orig[e[0], e[1]])

    preds_all = np.hstack([preds, preds_neg])
    labels_all = np.hstack([np.ones(len(preds)), np.zeros(len(preds))])
    roc_score = roc_auc_score(labels_all, preds_all)
    ap_score = average_precision_score(labels_all, preds_all)

    return roc_score, ap_score
