defaults:
  - ppo_trainer
  - verl_config_dr_grpo     
  - _self_


reasoning_gym:
  developer_prompt: DeepSeekZero
  datasets:
    - name: spiral_matrix
      size: 20000
      seed: 42
      min_n: 2
      max_n: 4

  validation_dataset:
    - name: spiral_matrix
      size: 128
      seed: 41
      min_n: 2
      max_n: 4
  val_path: trainers/val_spiral_matrix
data:
  max_response_length: 2048

