### Simple python script that loads a graph in gml format and outputs a JSON file with the edges
import json
import numpy as np
import argparse
import os
import pandas as pd 

def npy_to_json(input_file, input_csv_file, output_file):
    
    # Load the graph based on file format
    adj_mat = np.load(input_file)
    vars = pd.read_csv(input_csv_file)
    labels = list(vars.columns)
    N = len(labels)
    edges = []

    # Check if the adjacency matrix is square
    if adj_mat.shape[0] != adj_mat.shape[1]:
        raise ValueError("Adjacency matrix must be square.")
    # Check if the number of nodes in the adjacency matrix matches the number of variables
    if adj_mat.shape[0] != vars.shape[1]:
        raise ValueError("Number of nodes in adjacency matrix must match number of variables in dataset.")

    print(f"There are {len(vars)} columns and the dataset has {vars.shape[0]} rows and {vars.shape[1]} columns")
    # Convert the adjacency matrix to a list of edges
    for i in range(N):
        for j in range(N):
            if adj_mat[i][j] == 1:
                edges.append((vars.columns[i], vars.columns[j]))

    edges_dict = {"edges": edges
    }
    # Write the edges to a JSON file
    with open(output_file, 'w', encoding="utf-8") as f:
        json.dump(edges_dict, f, ensure_ascii=False, indent=4)

    return

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Convert a GML or GraphML file to a JSON file containing edges.")
    parser.add_argument("--npy_file", help="Path to the input .npy L file")
    parser.add_argument("--csv_file", help="Path to the input dataset")
    args = parser.parse_args()

    input_npy_file = args.npy_file
    input_csv_file = args.csv_file

    output_json_file = os.path.splitext(input_npy_file)[0] + ".json"
    
    npy_to_json(input_npy_file, input_csv_file, output_json_file)