import pandas as pd
import json

# 读取 CSV
df = pd.read_csv('searchspace.csv')

# 保留指定列
cols_to_keep = [col for col in df.columns if col in {
    'r1_smiles', 'r3_smiles', 'r4_smiles', 'r5_smiles', 'ered', 'gsol', 'abs_lam'
}]

# 创建新 DataFrame
df_filtered = df[cols_to_keep]

# 保存为新文件
df_filtered.to_csv('searchspace_clean.csv', index=False)

output = {}
for col in ['r1_smiles', 'r3_smiles', 'r4_smiles', 'r5_smiles']:
    unique_vals = df_filtered[col].dropna().unique()
    output[col.split("_")[0]] = unique_vals.tolist()

# save json
with open("options_smiles.json", "w") as f:
    json.dump(output, f, indent=2)