#!/opt/conda/bin/python3
from typing import Optional, Tuple, List
from torch import Tensor
from torch_geometric.typing import OptTensor


import networkx as nx
from networkx.algorithms import connected_components
import numpy as np
from ogb.nodeproppred import PygNodePropPredDataset
import torch
from torch_geometric.data import Data
from torch_geometric.datasets import Coauthor, Planetoid, WebKB, WikipediaNetwork
import torch_geometric.transforms as T
from torch_geometric.utils import assortativity, to_networkx, contains_isolated_nodes, homophily
from torch_geometric.utils.num_nodes import maybe_num_nodes

#----------------------------------------------------------------------------------------------------------------------------------------------------
# Helper
#----------------------------------------------------------------------------------------------------------------------------------------------------

def get_connected_components(nx_data):
    S_nx = [nx_data.subgraph(c).copy() for c in connected_components(nx_data)]
    n = [1 for c in connected_components(nx_data)]
    return sum(n)

#----------------------------------------------------------------------------------------------------------------------------------------------------

def get_maximum_degree(nx_data):
    return max(dict(nx_data.degree()).values())

def get_minimum_degree(nx_data):
    return min(dict(nx_data.degree()).values())

def get_mean_degree(nx_data):
    return np.mean(list(dict(nx_data.degree()).values()))

def get_std_degree(nx_data):
    return np.std(list(dict(nx_data.degree()).values()))

#----------------------------------------------------------------------------------------------------------------------------------------------------

def get_maximum_diameter(nx_data):
    cc = nx.connected_components(nx_data)
    max_diam = 0
    for component in cc:
        sub_graph = nx_data.subgraph(component)
        max_diam = max(nx.diameter(sub_graph), max_diam)
    return max_diam

def get_max_aspl(nx_data):
    asp = []
    for component in nx.connected_components(nx_data):
        sub_graph = nx_data.subgraph(component)
        asp.append(nx.average_shortest_path_length(sub_graph))
    return max(asp)

#----------------------------------------------------------------------------------------------------------------------------------------------------

def get_maximum_ec(nx_data):
    return max(nx.eigenvector_centrality(nx_data).values())

def get_minimum_ec(nx_data):
    return min(nx.eigenvector_centrality(nx_data).values())

def get_mean_ec(nx_data):
    vals = nx.eigenvector_centrality(nx_data).values()
    return np.mean(list(vals))

def get_std_ec(nx_data):
    vals = nx.eigenvector_centrality(nx_data).values()
    return np.std(list(vals))

#----------------------------------------------------------------------------------------------------------------------------------------------------
# Statistics
#----------------------------------------------------------------------------------------------------------------------------------------------------

def get_stats(data_name,data,nx_data):
    print('#'+'-'*49)
    print(f'{data_name.upper()}:')
    print(f'\tASPL: {get_max_aspl(nx_data)}') #O(n^2+m)
    print(f'\tAssortativity: {assortativity(data.edge_index)}')
    print(f'\tCC: {get_connected_components(nx_data)}')
    print(f'\tDegree (Max): {get_maximum_degree(nx_data)}')
    print(f'\tDegree (Min): {get_minimum_degree(nx_data)}')
    print(f'\tDegree (Mean): {get_mean_degree(nx_data)}')
    print(f'\tDegree (Std): {get_std_degree(nx_data)}')
    print(f'\tDiameter: {get_maximum_diameter(nx_data)}') #O(n^3)-dense O(n^2 logn)-sparse
    print(f'\tEdges: {data.edge_index.shape[1]}')
    print(f'\tEigenvector Centrality (Max): {get_maximum_ec(nx_data)}')
    # print(f'\tEigenvector Centrality (Min): {get_minimum_ec(nx_data)}')
    print(f'\tEigenvector Centrality (Mean): {get_mean_ec(nx_data)}')
    print(f'\tEigenvector Centrality (Std): {get_std_ec(nx_data)}')
    print(f'\tHomophily: {homophily(data.edge_index,data.y)}')
    # print(f'\tIs Connected: {nx.is_connected(nx_data)}') # Reduntant CC
    print(f'\tIso-Nodes: {contains_isolated_nodes(data.edge_index)}')
    print(f'\tNodes: {data.num_nodes}')
    pass

#----------------------------------------------------------------------------------------------------------------------------------------------------
# Datasets
#----------------------------------------------------------------------------------------------------------------------------------------------------

transform = T.Compose([]) # Raw
# transform = T.Compose([T.ToUndirected(),T.GCNNorm()]) # Augmented Adjacency

# Planetoid -----------------------------------------------------------------------------------------------------------------------------------------

for data_name in ['cora','citeseer','pubmed']:
    dataset = Planetoid(
            root='/root/workspace/data/'+data_name,
            name=data_name,
            transform=transform,
    )
    data = dataset[0]
    nx_data = to_networkx(data,to_undirected=True)
    get_stats(data_name,data,nx_data)

# WebKB ---------------------------------------------------------------------------------------------------------------------------------------------

for data_name in ['cornell','texas','wisconsin']:
    dataset = WebKB(
            root='/root/workspace/data/'+data_name,
            name=data_name,
            transform=transform,
    )
    data = dataset[0]
    nx_data = to_networkx(data,to_undirected=True)
    get_stats(data_name,data,nx_data)

# WikipediaNetwork -----------------------------------------------------------------------------------------------------------------------------------------

for data_name in ['chameleon','squirrel']:
    dataset = WikipediaNetwork(
            root='/root/workspace/data/'+data_name,
            name=data_name,
            transform=transform,
    )
    data = dataset[0]
    nx_data = to_networkx(data,to_undirected=True)
    get_stats(data_name,data,nx_data)

# Coauthor -----------------------------------------------------------------------------------------------------------------------------------------

for data_name in ['cs','physics']:
    dataset = Coauthor(
            root='/root/workspace/data/'+data_name,
            name=data_name,
            transform=transform,
    )
    data = dataset[0]
    nx_data = to_networkx(data,to_undirected=True)
    get_stats(data_name,data,nx_data)

# OGB -----------------------------------------------------------------------------------------------------------------------------------------

for data_name in ['ogbn-arxiv','ogbn-products']:
    dataset = PygNodePropPredDataset(
            root='/root/workspace/data/'+data_name,
            name=data_name,
            transform=transform,
    )
    data = dataset[0]
    nx_data = to_networkx(data,to_undirected=True)
    get_stats(data_name,data,nx_data)


"""
#-------------------------------------------------
CORA:
	ASPL: 1.1704835152309518
	Assortativity: -0.06587088108062744
	CC: 78
	Degree (Max): 168
	Degree (Min): 1
	Degree (Mean): 3.8980797636632203
	Degree (Std): 5.227818372799966
	Diameter: 19
	Edges: 10556
	Eigenvector Centrality (Max): 0.6542996956637284
	Eigenvector Centrality (Mean): 0.0047865456098027765
	Eigenvector Centrality (Std): 0.01861088927850572
	Homophily: 0.8099659085273743
	Iso-Nodes: False
	Nodes: 2708
#-------------------------------------------------
CITESEER:
	ASPL: 1.088063268837796
	Assortativity: 0.04837806522846222
	CC: 438
	Degree (Max): 99
	Degree (Min): 0
	Degree (Mean): 2.7363991584009617
	Degree (Std): 3.3808377274990122
	Diameter: 28
	Edges: 9104
	Eigenvector Centrality (Max): 0.46345822106007906
	Eigenvector Centrality (Mean): 0.0033854307488708777
	Eigenvector Centrality (Std): 0.01700323333093497
	Homophily: 0.7351008721351624
	Iso-Nodes: True
	Nodes: 3327
#-------------------------------------------------
PUBMED:
	ASPL: 6.336870027999047
	Assortativity: -0.04364030435681343
	CC: 1
	Degree (Max): 171
	Degree (Min): 1
	Degree (Mean): 4.496018664096972
	Degree (Std): 7.431003104853955
	Diameter: 18
	Edges: 88648
	Eigenvector Centrality (Max): 0.17190391163491642
	Eigenvector Centrality (Mean): 0.001410103610441541
	Eigenvector Centrality (Std): 0.006980634829545299
	Homophily: 0.8023869395256042
	Iso-Nodes: False
	Nodes: 19717
#-------------------------------------------------
CORNELL:
	ASPL: 0.19059352274843197
	Assortativity: -0.3836122453212738
	CC: 49
	Degree (Max): 65
	Degree (Min): 0
	Degree (Mean): 1.825136612021858
	Degree (Std): 4.8992010049769
	Diameter: 9
	Edges: 298
	Eigenvector Centrality (Max): 0.6962206086337285
	Eigenvector Centrality (Mean): 0.03856210056208116
	Eigenvector Centrality (Std): 0.06306699037180138
	Homophily: 0.13087248802185059
	Iso-Nodes: False
	Nodes: 183
#-------------------------------------------------
TEXAS:
	ASPL: 0.25416298028311424
	Assortativity: -0.34627223014831543
	CC: 46
	Degree (Max): 69
	Degree (Min): 0
	Degree (Mean): 2.0
	Degree (Std): 5.1914178864864216
	Diameter: 8
	Edges: 325
	Eigenvector Centrality (Max): 0.6832029605634744
	Eigenvector Centrality (Mean): 0.03900584056751637
	Eigenvector Centrality (Std): 0.06279351300841855
	Homophily: 0.10769230872392654
	Iso-Nodes: False
	Nodes: 183
#-------------------------------------------------
WISCONSIN:
	ASPL: 0.16714223216971144
	Assortativity: -0.27229174971580505
	CC: 73
	Degree (Max): 67
	Degree (Min): 0
	Degree (Mean): 1.9681274900398407
	Degree (Std): 4.4416289588896865
	Diameter: 10
	Edges: 515
	Eigenvector Centrality (Max): 0.6830967853840342
	Eigenvector Centrality (Mean): 0.02982530316681997
	Eigenvector Centrality (Std): 0.0556283653905739
	Homophily: 0.19611650705337524
	Iso-Nodes: False
	Nodes: 251
#-------------------------------------------------
CHAMELEON:
	ASPL: 0.19123614241374048
	Assortativity: -0.1128048449754715
	CC: 20
	Degree (Max): 629
	Degree (Min): 0
	Degree (Mean): 20.57092665788318
	Degree (Std): 38.59768388508901
	Diameter: 13
	Edges: 36101
	Eigenvector Centrality (Max): 0.12529428653976685
	Eigenvector Centrality (Mean): 0.005877583414547263
	Eigenvector Centrality (Std): 0.020115376343058305
	Homophily: 0.23100734567642212
	Iso-Nodes: False
	Nodes: 2277
#-------------------------------------------------
SQUIRREL:
	ASPL: 0.1403361109650705
	Assortativity: 0.37379735708236694
	CC: 47
	Degree (Max): 1821
	Degree (Min): 0
	Degree (Mean): 61.29129013651221
	Degree (Std): 138.50480421732385
	Diameter: 12
	Edges: 217073
	Eigenvector Centrality (Max): 0.07922291499467773
	Eigenvector Centrality (Mean): 0.005678755900554215
	Eigenvector Centrality (Std): 0.012649997968051068
	Homophily: 0.2239430993795395
	Iso-Nodes: False
	Nodes: 5201
#-------------------------------------------------
PHYSICS:
        ASPL: 5.163814898581291
        Assortativity: 0.2010314166545868
        CC: 1
        Degree (Max): 382
        Degree (Min): 1
        Degree (Mean): 14.37752587481518
        Degree (Std): 15.572855848347867
        Diameter: 17
        Edges: 495924
        Eigenvector Centrality (Max): 0.15409482590263263
        Eigenvector Centrality (Mean): 0.001627776777612802
        Eigenvector Centrality (Std): 0.005132419733377802
        Homophily: 0.9314451217651367
        Iso-Nodes: False
        Nodes: 34493
"""
