import json
import collections
import argparse

def add_nature_sequence_edges(data):
    """
    Add **NATURE_SEQUENCE** edges and labels to a single data item.
    """
    line_to_nodes = collections.defaultdict(list)
    for node_id, line in zip(data["nodes"], data["code_lines"]):
        if line != -1:
            line_to_nodes[line].append(node_id)

    sorted_lines = sorted(line_to_nodes.keys())
    new_edges = []
    new_labels = []

    for i in range(len(sorted_lines) - 1):
        prev_line = sorted_lines[i]
        next_line = sorted_lines[i + 1]
        for src in line_to_nodes[prev_line]:
            for dst in line_to_nodes[next_line]:
                new_edges.append([src, dst])
                new_labels.append("NATURE_SEQUENCE")

    data["edges"].extend(new_edges)
    data["edges_label"].extend(new_labels)
    return data


def process_dataset(input_file, output_file):
    with open(input_file, 'r') as infile, open(output_file, 'w') as outfile:
        for line in infile:
            data = json.loads(line)
            processed = add_nature_sequence_edges(data)
            outfile.write(json.dumps(processed) + '\n')


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Add NATURE_SEQUENCE edges to graph structures")
    parser.add_argument('--input_files', nargs='+', required=True, help='Input file paths (supports multiple)')
    parser.add_argument('--output_files', nargs='+', required=True, help='Output file paths (corresponding to inputs)')
    args = parser.parse_args()

    if len(args.input_files) != len(args.output_files):
        raise ValueError("The number of input files must be equal to the number of output files")
    for in_file, out_file in zip(args.input_files, args.output_files):
        print(f"Processing {in_file} -> {out_file}")
        process_dataset(in_file, out_file)

    print("Processing completed! NATURE_SEQUENCE edges have been added.")
