# @package _global_

data:
  train_batch_size: 8

actor_rollout_ref:
  model:
    path: Qwen/Qwen2.5-0.5B-Instruct
  actor:
    ppo_mini_batch_size: 8
    ppo_micro_batch_size_per_gpu: 2
  rollout:
    log_prob_micro_batch_size_per_gpu: 2
  ref:
    log_prob_micro_batch_size_per_gpu: 2

trainer:
  test_freq: 1
  total_epochs: 1
  val_before_train: False
    
