import pandas as pd
from pathlib import Path

def build_train_csv(
    from_csv: str = "train_split.csv",
    prompt_file: str = "prompt.txt",
    save_csv: str = "train.csv",
):
    # 1. 读 csv
    df = pd.read_csv(from_csv)

    # 2. 读 prompt 模板
    prompt_template = Path(prompt_file).read_text(encoding="utf-8").strip()

    # 3. 组装数据
    records = []
    for _, row in df.iterrows():
        # 把 NaN 转成空字符串，防止模板填不进去
        row_dict = {k: (str(v) if pd.notnull(v) else "") for k, v in row.items()}

        # 只保留模板需要的 key，防止 KeyError
        safe_dict = {k: row_dict.get(k, "") for k in
                     ["drug_input","solid_lipid","solid_lipid_input","liquid_lipid_input","surfractant_input"]}

        instruction = prompt_template.format(**safe_dict)

        records.append(
            {
                "instruction": instruction,
                "input": "",                # 某些框架需要，可为空
                "output": str(row["encap_efficiency"]) if pd.notnull(row["encap_efficiency"]) else "",
                "history": "[]",            # 空对话历史，写成字符串
            }
        )

    # 4. 写 csv
    new_df = pd.DataFrame(records)
    new_df.to_csv(save_csv, index=False, encoding="utf-8")
    print(f"Saved {len(new_df)} rows to {save_csv}")

if __name__ == "__main__":
    build_train_csv(
        from_csv="name_searchspace.csv",
        save_csv="all.csv"
    )