hydra:
  searchpath:
    - file://verl/trainer/config

defaults:
  - ppo_trainer
  - _self_

data:
  max_prompt_length: 2048
  max_response_length: 2048
  train_batch_size: 256
  return_raw_chat: True
  custom_cls:
    path: "recipe/fileagent/rl_dataset.py"
    name: CustomRLHFDataset

actor_rollout_ref:
  hybrid_engine: True
  rollout:
    name: sglang
    multi_turn:
      enable: True
      max_assistant_turns: 5
      tool_config_path: "recipe/fileagent/config/tool/llm_tool_v1.yaml"

custom_reward_function:
  path: "recipe/fileagent/reward_score.py"
  name: compute_score