# SFT Thinking Trainer Configuration
# This config extends the base sft_trainer.yaml with thinking generation capabilities

defaults:
  - sft_trainer  # Inherit all settings from base SFT trainer

# Override data settings if needed
data:
  # Use the same dataset format (prompt + response)
  # No need for thinking_key since thinking is generated online
  train_batch_size: 128  # Reduce batch size due to 2x forward passes
  micro_batch_size_per_gpu: 2  # Smaller micro-batches for memory efficiency
  max_length: 2048  # Longer sequences to accommodate thinking

# Thinking generation configuration
thinking_generation:
  max_new_tokens: 200  # Maximum thinking length to generate
  temperature: 1.0  # Sampling temperature (1.0 = normal diversity)
  top_p: 0.9  # Nucleus sampling parameter
  do_sample: true  # Use sampling instead of greedy decoding

# Model settings (can override if needed)
model:
  enable_gradient_checkpointing: true  # Recommended for memory efficiency

# Trainer settings
trainer:
  project_name: sft-thinking
  experiment_name: test
  save_freq: 1000
  test_freq: 500

  # Note: Training will be ~2x slower due to thinking generation
  # Adjust total_epochs or total_training_steps accordingly
  total_epochs: 1

# Optimizer settings (same as base, but you can adjust learning rate if needed)
optim:
  lr: 5e-6  # Slightly lower LR for stability with thinking generation
