defaults:
  - ../base/ppo_trainer
  - ../base/verl_config_grpo
  - _self_

reasoning_gym:
  developer_prompt: DeepSeekZero
  datasets:
    - name: zebra_puzzles
      size: 25000
      seed: 42
      num_people: 4 # 5
      num_characteristics: 4 # 5

  validation_dataset:
    - name: zebra_puzzles
      size: 128
      seed: 41
      num_people: 4 # 5
      num_characteristics: 4 # 5


  val_path: trainers/val_zebra_puzzles

