import os, json
from loguru import logger

datasets = ["buchwald_Cc1ccc(Nc2ccccn2)cc1.csv"]

dir_path_todo = "/mnt/shared-storage-user/caipengxiang/workspace/ChemBOMAS/info_encoder/exp_embed_cluster_results/summary/{dataset}_clusters.json"

dir_path_ok = "/mnt/shared-storage-user/caipengxiang/workspace/ChemBOMAS/train_regression/data4regression/{dataset}/options.json"

# dir_path_todo = "/mnt/shared-storage-user/caipengxiang/workspace/ChemBOMAS/Rag-Cluster/sci_files/{dataset}/{config}/partition.json"


for dataset in datasets:
    path_ok = dir_path_ok.format(dataset=dataset)
    path_todo = dir_path_todo.format(dataset=dataset)
    data_ok = json.load(open(path_ok))
    data_todo = json.load(open(path_todo))

    for key,value in data_todo.items():
        for att in value:
            for obj in att:
                try:
                    data_ok[key]
                except:
                    import pdb;pdb.set_trace()
                if obj not in data_ok[key]:
                    logger.warning(f"{obj} not exists in {path_ok}")
                    logger.warning(dataset)
                    # import pdb;pdb.set_trace()
                    print("\n")
                    # input("Press Enter to continue...")

# 确认json_ok中的所有object都在json_todo中存在
    for key,value in data_ok.items():
        for obj in value:
            found = False
            for todo_att in data_todo[key]:
                if obj in todo_att:
                    found = True
                    break
            if not found:
                logger.success(f"{obj} not exists in {path_todo}")
                logger.success(dataset)
                print("\n")
                # input("Press Enter to continue...")
