import os
import json

prompts = """
    You are an expert in chemistry.
Please list **1-3 key properties** of [{target}] that critically influence their effectiveness in chemical reactions (e.g., pKa for stronger basicity, steric hindrance, solubility, metal counter‑ion effects, etc.).  
- A concise list (1-3 items) of properties.  
- Justify each property’s relevance (e.g., "pKa determines proton affinity").

    For the following compounds:
{subs}

For each reagent, please collect documented information and literature data on the ALL properties identified in Step 1.  
- If the property has numeric values (e.g. pKa, solubility in specific solvent), provide precise figures with citation.  
- If the property can’t be quantified, describe it in detail (e.g. “strongly soluble in water, insoluble in organic solvent”, or “bulky” vs. “compact”).  
Format: for each reagent name, list all attributes with their value and text description
**Special Cases:**
- "nothing" (use "N/A" for all properties).
- Unavailable data: Mark as "unknown".

Convert data into this JSON structure and remove all the citations:  
```json
[
  {{
    "name": "compound_name",
    "property_1": "value+text",
    "property_2": "value+text",
    ...
  }},
  ...
]
```
"""

json_path = "/mnt/shared-storage-user/caipengxiang/workspace/ChemBOMAS/train_regression/data4regression/{dataset}/options.json"

save_path = "prompts"
os.makedirs(save_path, exist_ok=True)

for dataset in [
    "suzuki" ,"arylation", "buchwald_Cc1ccc(Nc2ccccn2)cc1.csv", "buchwald_COc1ccc(Nc2ccc(C)cc2)cc1.csv"
]:
    file_path = json_path.format(dataset=dataset)
    with open(file_path, "r") as f:
        data = json.load(f)
    
    for target, values in data.items():
        with open(os.path.join(save_path, f"{dataset}_{target}.md"), "w") as f:
            f.write(prompts.format(target=target, subs=str(values)))

        with open(os.path.join(f"exp_data/{dataset}_{target}.json"), "w") as f:
            pass