import json
import pandas as pd

smiles_tsv = pd.read_csv('notebook/rhea-reaction-smiles.tsv', sep='\t')
ec_smiles = pd.read_json("notebook/ec_smile.jsonl", lines=True)

ec_rheaid_mapping = {}
for i in range(len(ec_smiles)):
    for ecn in ec_smiles['ec number'][i]:
        # print(ecn)
        if ecn not in ec_rheaid_mapping:
            ec_rheaid_mapping[ecn] = set()
        ec_rheaid_mapping[ecn].add(int(ec_smiles['rhea id'][i]))
        ecn3 = ecn[:ecn.rfind('.')]
        if ecn3 not in ec_rheaid_mapping:
            ec_rheaid_mapping[ecn3] = set()
        ec_rheaid_mapping[ecn3].add(int(ec_smiles['rhea id'][i]))

for ecn in ec_rheaid_mapping:
    ec_rheaid_mapping[ecn] = list(ec_rheaid_mapping[ecn])

with open("datasets/ecnumber_rheaid_mapping.json", 'w') as fout:
    json.dump(ec_rheaid_mapping, fout, indent=2)