defaults:
  - ppo_trainer
  - verl_config_grpo     
  - _self_

reasoning_gym:
  developer_prompt: direct
  datasets:
    - name: simple_equations
      size: 20000
      seed: 42
      min_terms: 2 # 3
      max_terms: 4 # 10
      min_value: 1 # 10
      max_value: 100 # 10000
      operators_weights: [0.4, 0.4, 0.2]

  validation_dataset:
    - name: simple_equations
      size: 128
      seed: 41
      min_terms: 2 # 3
      max_terms: 4 # 10
      min_value: 1 # 10
      max_value: 100 # 10000
      operators_weights: [0.4, 0.4, 0.2]
    - name: simple_equations
      size: 128
      seed: 41
      min_terms: 3
      max_terms: 10
      min_value: 10
      max_value: 10000
      operators_weights: [0.35, 0.35, 0.3]

  val_path: trainers/direct/val_simple_equations

data:
  max_response_length: 2048 
  preappend_token: <answer>