import numpy as np
import random
import time
import tqdm
import dgl
import sys
import os

num_walks_per_node = 1000
walk_length = 100
path = sys.argv[1]

def construct_graph():
    paper_ids = []
    paper_names = []
    author_ids = []
    author_names = []
    conf_ids = []
    conf_names = []
    f_3 = open(os.path.join(path, "id_author.txt"), encoding="ISO-8859-1")
    f_4 = open(os.path.join(path, "id_conf.txt"), encoding="ISO-8859-1")
    f_5 = open(os.path.join(path, "paper.txt"), encoding="ISO-8859-1")
    while True:
        z = f_3.readline()
        if not z:
            break
        z = z.strip().split()
        identity = int(z[0])
        author_ids.append(identity)
        author_names.append(z[1])
    while True:
        w = f_4.readline()
        if not w:
            break;
        w = w.strip().split()
        identity = int(w[0])
        conf_ids.append(identity)
        conf_names.append(w[1])
    while True:
        v = f_5.readline()
        if not v:
            break;
        v = v.strip().split()
        identity = int(v[0])
        paper_name = 'p' + ''.join(v[1:])
        paper_ids.append(identity)
        paper_names.append(paper_name)
    f_3.close()
    f_4.close()
    f_5.close()

    author_ids_invmap = {x: i for i, x in enumerate(author_ids)}
    conf_ids_invmap = {x: i for i, x in enumerate(conf_ids)}
    paper_ids_invmap = {x: i for i, x in enumerate(paper_ids)}

    paper_author_src = []
    paper_author_dst = []
    paper_conf_src = []
    paper_conf_dst = []
    f_1 = open(os.path.join(path, "paper_author.txt"), "r")
    f_2 = open(os.path.join(path, "paper_conf.txt"), "r")
    for x in f_1:
        x = x.split('\t')
        x[0] = int(x[0])
        x[1] = int(x[1].strip('\n'))
        paper_author_src.append(paper_ids_invmap[x[0]])
        paper_author_dst.append(author_ids_invmap[x[1]])
    for y in f_2:
        y = y.split('\t')
        y[0] = int(y[0])
        y[1] = int(y[1].strip('\n'))
        paper_conf_src.append(paper_ids_invmap[y[0]])
        paper_conf_dst.append(conf_ids_invmap[y[1]])
    f_1.close()
    f_2.close()

    hg = dgl.heterograph({
        ('paper', 'pa', 'author') : (paper_author_src, paper_author_dst),
        ('author', 'ap', 'paper') : (paper_author_dst, paper_author_src),
        ('paper', 'pc', 'conf') : (paper_conf_src, paper_conf_dst),
        ('conf', 'cp', 'paper') : (paper_conf_dst, paper_conf_src)})
    return hg, author_names, conf_names, paper_names

#"conference - paper - Author - paper - conference" metapath sampling
def generate_metapath():
    output_path = open(os.path.join(path, "output_path.txt"), "w")
    count = 0

    hg, author_names, conf_names, paper_names = construct_graph()

    for conf_idx in tqdm.trange(hg.number_of_nodes('conf')):
        traces, _ = dgl.sampling.random_walk(
                hg, [conf_idx] * num_walks_per_node, metapath=['cp', 'pa', 'ap', 'pc'] * walk_length)
        for tr in traces:
            outline = ' '.join(
                    (conf_names if i % 4 == 0 else author_names)[tr[i]]
                    for i in range(0, len(tr), 2))  # skip paper
            print(outline, file=output_path)
    output_path.close()


if __name__ == "__main__":
    generate_metapath()
