# @package _global_

# dataset config
data:
  train_files: selfplay_data/selfplay_prompts_arithmetic_v1/train.parquet
  val_files: selfplay_data/multiply-3_digit/test.parquet

  max_prompt_length: 512
  max_response_length: 1024
  truncation: left

trainer:
  balance_batch: false
  self_play: true
  proposer_parser_version: v1

actor_rollout_ref:
  rollout:
    n: 4
  model:
    path: Qwen/Qwen2.5-3B-Instruct

