agent_cls: skyrl_agent.agents.react.ReActAgent

task: skyrl_agent.tasks.web_research_task.WebResearchTask

tools: ["search_engine", "web_browser", "finish"]

data:
  data_source_key: data_source
  instance_id_key: extra_info.index
  # Intentionally omit instance_key to pass full record (dict) to the task

generator:
  infer_backend: skyrl-train
  backend_config: null
  # Keep this in sync with +skyrl_agent.num_trajectories at launch
  num_trajectories: 8
  max_iterations: 20
  # Keep prompt/response lengths modest for initial smoke tests; tune per model/context
  max_prompt_length: 31232
  sampling_params:
    temperature: 1.0
    top_p: 1.0
    max_tokens: 31232
    stop: []
  val_config:
    num_trajectories: 1
    sampling_params:
      temperature: 1
      top_p: 0.95
      max_tokens: 31232
      stop: []
  remove_think_tokens: false
  vision_is_active: false
  qwen3_enable_thinking: true
  qwen3_acc_thinking: true
  profile_tools: true
  debug_log: false

dispatcher:
  type: async_batch
  scheduler: naive
  max_parallel_agents: 200
  max_eval_parallel_agents: 200