import numpy as np
import random

np.set_printoptions(threshold=100000)

Edge = np.zeros((500000, 2))
edge = {}

vertex = []
train_vertex = []
for i in range(200000):
    vertex.append([])
    train_vertex.append([])
deg = np.zeros(200000)

train_graph = "graph1.txt"
test_graph = "graph2.txt"
train_file = open(train_graph)
test_file = open(test_graph)
idx = 0

num_node = 0
num_edge = 0

t = 0

for line in test_file:

    line = line.split("	")
    l0 = int(line[0])
    l1 = int(line[1])
    if l0 == l1:
        continue
    if(l0 >= l1):
        tmp = l0
        l0 = l1
        l1 = tmp
    num_node = max(num_node, l1)
    if (l0, l1) in edge :
        continue

    Edge[idx][0] = int(l0)
    Edge[idx][1] = int(l1)

    edge[(l0, l1)] = 1
    edge[(l1, l0)] = 1
    vertex[l0].append(l1)
    vertex[l1].append(l0)
    deg[l0] += 1
    deg[l1] += 1
    idx += 1
    num_edge = idx

if deg[0] >= 1: num_node += 1

print("number of nodes", num_node)
print("number of edges", num_edge)



pred = np.zeros(num_edge)
res = np.zeros(num_edge)

idx = 0

order = np.zeros(num_node)

for i in range(num_node):
    order[i] = i

np.random.seed(2021)

np.random.shuffle(order) # fix a random order of the vertex arrival


train_file = open(train_graph)

trainEdge = {}
idx = 0

for line in train_file:

    line = line.split("	")
    l0 = int(line[0])
    l1 = int(line[1])

    if (l0, l1) in trainEdge:
        continue

    trainEdge[(l0, l1)] = 1
    trainEdge[(l1, l0)] = 1

    train_vertex[l0].append(l1)
    train_vertex[l1].append(l0)

test_file = open(test_graph)
idx = 0
newEdge = {}

for line in test_file:

    line = line.split("	")
    l0 = int(line[0])
    l1 = int(line[1])

    if (l0, l1) in newEdge:
        continue

    newEdge[(l0, l1)] = 1
    newEdge[(l1, l0)] = 1

    if l0 == l1:
        continue

    if(l0 >= l1):
        tmp = l0
        l0 = l1
        l1 = tmp

    if order[l0 - 1] >= order[l1 - 1]:
        tmp = l0
        l0 = l1
        l1 = tmp

    lis = vertex[l0]
    for i in range(len(lis)):

        k = lis[i]

        if order[k - 1] <= order[l0 - 1] or order[k - 1] >= order[l1 - 1]:
            continue
        if (l1, k) in edge:
            res[idx] += 1

    train_lis = train_vertex[l0]

    for i in range(len(train_lis)):
        k = train_lis[i]

        if (l1, k) in trainEdge:
            pred[idx] += 1

    idx += 1

print(-np.sort(-res)[0:100])
print(-np.sort(-pred)[0:100])

thr = -np.sort(-pred)[int(0.1 * idx)]

for i in range(idx): # only save 10% of the edges for Oregon and CAIDA Graph
    if pred[i] < thr:
        pred[i] = 0

np.save("edge_pred", pred)
np.save("edge_result", res)
np.save("vertex_order", order)



