### Simple python script that loads a graph in gml format and outputs a JSON file with the edges
import json
import numpy as np
import argparse
import os
import pandas as pd 
import networkx as nx

def npy_to_json(input_file, input_csv_file, output_file):
    
    # Load the graph based on file format
    adj_mat = np.load(input_file)
    vars = pd.read_csv(input_csv_file)
    labels = list(vars.columns)
    #N = len(labels)
    #edges = []

    # Check if the adjacency matrix is square
    if adj_mat.shape[0] != adj_mat.shape[1]:
        raise ValueError("Adjacency matrix must be square.")
    # Check if the number of nodes in the adjacency matrix matches the number of variables
    if adj_mat.shape[0] != vars.shape[1]:
        raise ValueError("Number of nodes in adjacency matrix must match number of variables in dataset.")

    print(f"There are {len(vars)} columns and the dataset has {vars.shape[0]} rows and {vars.shape[1]} columns")
    # Create a networkx graph from the adjacency matrix

    # Create a directed graph from the adjacency matrix
    G = nx.DiGraph(adj_mat)

    # Add node labels
    # Create a mapping from node indices to labels
    mapping = {i: label for i, label in enumerate(labels)}
    
    # Relabel the nodes with their actual variable names
    G = nx.relabel_nodes(G, mapping)

    # Save the graph in GML format
    nx.write_graphml(G, output_file)

    return

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Convert a npy file into a gml file.")
    parser.add_argument("--npy_file", help="Path to the input .npy L file")
    parser.add_argument("--csv_file", help="Path to the input dataset")
    args = parser.parse_args()

    input_npy_file = args.npy_file
    input_csv_file = args.csv_file

    output_json_file = os.path.splitext(input_npy_file)[0] + ".gml"
    
    npy_to_json(input_npy_file, input_csv_file, output_json_file)