# File: process.py
# Description: Generate a stream file from an input graph file.


import sgtl.random

def process_sbm_file(output_file, num_nodes=2400):
    p = 0.95
    cluster_sizes = [num_nodes // 3, num_nodes // 3, num_nodes - num_nodes // 3 - num_nodes // 3]
    prob_mat_q = [[p, 1 - p, 1 - p], [1 - p, p, 1 - p], [1 - p, 1 - p, p]]

    graph = sgtl.random.sbm(cluster_sizes, prob_mat_q).to_networkx()

    with open(output_file, "w") as f:
        f.write(f"{num_nodes}\n")
        for i in range(num_nodes):
            for j in range(i + 1, num_nodes):
                if graph.has_edge(i, j):
                    f.write(f"{i} {j} +\n")
                else:
                    f.write(f"{i} {j} -\n")

def process_dblp_file(input_file, output_file, num_nodes=10000):
    vertex_mapping = {}
    current_id = 0  
    processed_lines = []

    with open(input_file, "r") as file:
        for line in file:
            v1, v2, label = map(int, line.strip().split())

            if v1 not in vertex_mapping:
                vertex_mapping[v1] = current_id
                current_id += 1
            if v2 not in vertex_mapping:
                vertex_mapping[v2] = current_id
                current_id += 1

            v1_new = vertex_mapping[v1]
            v2_new = vertex_mapping[v2]

            label_new = "-" if label == 1 else "+"

            processed_lines.append(f"{v1_new} {v2_new} {label_new}")

    with open(output_file, "w") as file:
        file.write(f"{num_nodes}\n")
        for line in processed_lines:
            file.write(line + "\n")
    
    print(f"Processed file saved to {output_file}.")

if __name__ == "__main__":
    # output_file = "../data/sbm/nodes_2400/streaming-SBM-2400.txt"
    # process_sbm_file(output_file, 2400)
    
    input_file = "../data/dblp/dblp10000.txt" 
    output_file = "../data/dblp/streaming-dblp10000.txt" 
    process_dblp_file(input_file, output_file)