import pubchempy as pcp
import json
from typing import DefaultDict

def get_compound_name_from_smiles(smiles: str) -> str:
    compounds = pcp.get_compounds(smiles, 'smiles')
    if not compounds:
        return f"No compound found for SMILES: {smiles}"
    cmpd = compounds[0]
    # 优先返回iupac_name，如果没有则返回name
    name = getattr(cmpd, 'iupac_name', None) or getattr(cmpd, 'name', None)
    if not name:
        print(f"No name found for SMILES: {smiles}")
        return None
    return name

DATASET = "buchwald_Cc1ccc(Nc2ccc(C(F)(F)F)cc2)cc1.csv"
file_name = f"{DATASET}/options_smiles.json"
with open(file_name, "r") as f:
    data = json.load(f)

new_json = DefaultDict(list)
names_maps = {

}

for key, smiles_list in data.items():
    for smiles in smiles_list:
        name = get_compound_name_from_smiles(smiles)
        if name:
            # print(f"SMILES: {smiles} -> Name: {name}")
            new_json[key].append(name)
            names_maps[smiles] = name
        else:
            print(f"SMILES: {smiles} -> No name found")
            new_json[key].append(smiles)
            names_maps[smiles] = smiles

# save
# with open("arylation/options.json", "w") as f:
#     json.dump(new_json, f, indent=4, ensure_ascii=False)

with open(f"{DATASET}/names_maps.json", "w") as f:
    json.dump(names_maps, f, indent=4, ensure_ascii=False)
