import os, json
from loguru import logger

datasets = [
    # "arylation",
    # "suzuki",
    # "buchwald_Cc1ccc(Nc2ccccn2)cc1.csv",
    "buchwald_COc1ccc(Nc2ccc(C)cc2)cc1.csv"
]

configs = ["RAG_0_DB_0_WEB_0", "RAG_0_DB_0_WEB_1", "RAG_0_DB_1_WEB_1", "RAG_1_DB_1_WEB_1"]

dir_path_ok = "/mnt/shared-storage-user/caipengxiang/workspace/ChemBOMAS/train_regression/data4regression/{dataset}/options.json"

dir_path_todo = "/mnt/shared-storage-user/caipengxiang/workspace/ChemBOMAS/Rag-Cluster/exp_ab_files/{dataset}/{config}/partition.json"


for dataset in datasets:
    for config in configs:
        path_ok = dir_path_ok.format(dataset=dataset)
        path_todo = dir_path_todo.format(dataset=dataset, config=config)
        data_ok = json.load(open(path_ok))
        data_todo = json.load(open(path_todo))

        for key,value in data_todo.items():
            for att in value:
                for obj in att:
                    if obj not in data_ok[key]:
                        logger.warning(f"{obj} not exists in {path_ok}")
                        logger.warning(dataset)
                        # import pdb;pdb.set_trace()
                        print("\n")
                        # input("Press Enter to continue...")

    # 确认json_ok中的所有object都在json_todo中存在
        for key,value in data_ok.items():
            for obj in value:
                found = False
                for todo_att in data_todo[key]:
                    if obj in todo_att:
                        found = True
                        break
                if not found:
                    logger.success(f"{obj} not exists in {path_todo}")
                    logger.success(dataset)
                    print("\n")
                    # input("Press Enter to continue...")
