import pandas as pd
import json

all_csv_path = "/mnt/shared-storage-user/caipengxiang/workspace/ChemBOMAS/train_regression/data4regression/arylation/searchspace.csv"
json_save_path = "/mnt/shared-storage-user/caipengxiang/workspace/ChemBOMAS/train_regression/data4regression/arylation/options.json"

# 读取 CSV
df = pd.read_csv(all_csv_path)

# 删除 entry 列
df = df.drop(columns=['entry'])

# 提取除了 yield 外每一列的可选项
result = {}
for col in df.columns:
    if col != 'yield':
        result[col] = sorted(df[col].dropna().unique().tolist())

# 统计结果为 JSON
json_result = json.dumps(result, ensure_ascii=False, indent=2)
# print(json_result)

# 保存 JSON 到文件
with open(json_save_path, 'w', encoding='utf-8') as f:
    f.write(json_result)