data:
  # Use either dataset_name+split (HF Hub) OR a local file
  dataset_name: ''
  split: "train"
  # train_file: data/processed/qa_dataset.jsonl  # <-- alternative to dataset_name

model:
  base: "OpenDFM/ChemDFM-v1.5-8B"
  load_in_8bit: true
  special_tokens: ["<SMILES>", "</SMILES>", "<QED>", "</QED>", "<LogP>", "</LogP>", "<MW>", "</MW>"]
  system_prompt: "You love and excel at editing SMILES strings to make original SMILES meet the required numeric properties.\n"

lora:
  r: 32
  alpha: 32
  dropout: 0.05
  target_modules: ["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"]

sft:
  output_dir: "experiments/sft"
  epochs: 0.4
  batch_size: 4
  grad_accum: 4
  lr: 5e-5
  max_seq_length: 1024
  save_steps: 500
  save_total_limit: 2
  logging_steps: 50

grpo:
  output_dir: "experiments/grpo"
  # If you want to start GRPO from your SFT LoRA checkpoint:
  # init_ckpt: "experiments/sft"    # or a specific checkpoint dir
  # tokenizer_path: "experiments/sft"
  per_device_train_batch_size: 4
  gradient_accumulation_steps: 8
  groups_per_step: 8
  group_size: 8
  kl_coef: 0.02
  lr: 3e-5
  max_steps: 1000
  logging_steps: 50
  save_steps: 500
  save_total_limit: 10
  rollout:
    max_new_tokens: 128
    temperature: 1.0
    top_p: 0.9
    top_k: 50

logging:
  report_to: "wandb"   # or "none"
  dir: "./logs"
