agent_cls: skyrl_agent.agents.react.ReActAgent

task: skyrl_agent.tasks.general_react.utils.GeneralReactTask

tools: ["em_finish", "local_search"]

data:
  data_source_key: data_source

generator:
  infer_backend: tinker
  backend_config: null
  num_trajectories: 8  # Number of trajectories per prompt for GRPO
  max_iterations: 5
  max_prompt_length: 8000
  sampling_params:
    temperature: 1.0
    top_p: 1
    max_tokens: 8000
  val_config:
    num_trajectories: 1
    sampling_params:
      temperature: 0
      top_p: 0.95 
      max_tokens: 8000
  remove_think_tokens: false
  vision_is_active: false
  qwen3_enable_thinking: false
  qwen3_acc_thinking: false

dispatcher:
  type: async_batch
  max_parallel_agents: 32  # Adjust based on your resources
  max_eval_parallel_agents: 32

