defaults:
  - ppo_trainer
  - verl_config_grpo     
  - _self_

reasoning_gym:
  developer_prompt: direct
  datasets:
    - name: futoshiki
      seed: 42
      size: 20000
      min_board_size: 4
      max_board_size: 9
      min_difficulty: 0
      max_difficulty: 3

  validation_dataset:
    - name: futoshiki
      seed: 41
      size: 128
      min_board_size: 4
      max_board_size: 9
      min_difficulty: 0
      max_difficulty: 3
    - name: futoshiki
      seed: 41
      size: 128
      min_board_size: 6
      max_board_size: 7
      min_difficulty: 1
      max_difficulty: 2

  val_path: trainers/direct/val_futoshiki

data:
  max_response_length: 2048 
