import os
import sys
import torch
import pickle as pkl
import numpy as np
import networkx as nx
import scipy.sparse as sp

from torch_geometric.datasets import WebKB
from torch_geometric.utils import to_scipy_sparse_matrix


def load_raw_data(data_path):
    graph = pkl.load(open(os.path.join(data_path, 'airport/airport.p'), 'rb'))
    adj = nx.adjacency_matrix(graph)
    features = np.array([graph.nodes[u]['feat'] for u in graph.nodes()])
    label_idx = 4
    labels = features[:, label_idx]
    features = features[:, :label_idx]
    labels = bin_feat(labels, bins=[7.0/7, 8.0/7, 9.0/7])
    return sp.csr_matrix(adj), features, labels


def load_nc_data(args, data_path, split_seed=None):
    raise NotImplementedError


def build_distance(G):
    length = dict(nx.all_pairs_shortest_path_length(G))
    R = np.array([[length.get(m, {}).get(n, 0) for m in G.nodes] for n in G.nodes], dtype=np.int32)
    return R


def load_md_data(args, data_path, split_seed=None):
    dataset_str = 'web-edu'
    data_path = os.path.join(data_path, dataset_str)
    object_to_idx = {}
    idx_counter = 0
    edges = []
    with open(os.path.join(data_path, "{}.edges.csv".format(dataset_str)), 'r') as f:
        all_edges = f.readlines()

    for line in all_edges:
        n1, n2 = line.rstrip().split(',')
        if n1 in object_to_idx:
            i = object_to_idx[n1]
        else:
            i = idx_counter
            object_to_idx[n1] = i
            idx_counter += 1
        if n2 in object_to_idx:
            j = object_to_idx[n2]
        else:
            j = idx_counter
            object_to_idx[n2] = j
            idx_counter += 1
        edges.append((i, j))
    adj = np.zeros((len(object_to_idx), len(object_to_idx)))
    adj_dict = {}
    for i,j in edges:
        adj_dict[i] = set()
    for i, j in edges:
        adj_dict[i].add(j)
        adj[i, j] = 1.  # comment this line for directed adjacency matrix
        adj[j, i] = 1.
    features = sp.eye(adj.shape[0]) 
    rand_feature = np.random.uniform(low=-0.02, high=0.02, size=(adj.shape[0],adj.shape[0]))
    features = features + sp.csr_matrix(rand_feature)
    G = nx.from_numpy_array(adj)
    labels = build_distance(G)
    labels = labels

    data = {
        'adj_train': sp.csr_matrix(adj),
        'features': features,
        'labels': labels, 
        'G': G
    }
    return data

