from core.settings import get_settings
from core.openai_used_attrs import rounds
from openai import OpenAI
import base64, json
# import pubchempy as pcp
from loguru import logger
import os
import glob

settings = get_settings()

client = OpenAI(
    api_key=settings.api_key,
    base_url=settings.base_url,
)

# ============== Read File ==============

# new_datasets = ["crossed_barrel", "dye_lasers", "lnp3", "perovskites", "redoxmers"]

"""
现在请你整理上面所有物质类别的聚类结果，格式如下：
 {
  "key" : [
    [group1_a, group1_b, xxx],
    [group2_a, group2_b, xxx],
    [group3_a, group3_b, xxx],
]
}
"""

dataset = "redoxmers"
json_path = f"/mnt/shared-storage-user/caipengxiang/workspace/ChemBOMAS/train_regression/data4regression/{dataset}/options.json"

dataset_name = dataset
save_path = f"/mnt/shared-storage-user/caipengxiang/workspace/ChemBOMAS/Rag-Cluster/sci_files/{dataset}"

os.makedirs(save_path, exist_ok=True)



templates = {
    "role": "user",
    "content": []
}

# ============== Read Targets ==============

with open(f"sci_files/prompts/cluster.md", "r") as f:
    queries = f.read()

with open(json_path, "r") as f:
    data = json.load(f)

for key, value in data.items():
    with open(os.path.join(save_path, f"cluster_{key}.txt"), "w") as f:
        f.write(queries.format(target=key, subs=str(list(value))))