trainer:
  nnodes: 1
  n_gpus_per_node: 8
  device: cuda

data:
  path: ~/data/rlhf/math/test.parquet
  prompt_key: prompt
  n_samples: 1
  output_path: /opt/tiger/math_Qwen2-7B-Instruct.parquet
  batch_size: 32
  return_raw_chat: true
  max_prompt_length: 5120
  max_response_length: 512
  filter_overlong_prompts: false

model:
  path: ~/models/Qwen2-7B-Instruct
  external_lib: null
  trust_remote_code: true
rollout:
  name: vllm # sglang
  mode: sync # sync: LLM, async: AsyncLLM
  temperature: 0.0
  top_k: -1 # 0 for hf rollout, -1 for vllm rollout
  top_p: 1.0
  prompt_length: ${data.max_prompt_length}
  response_length: ${data.max_response_length}
  # for vllm rollout
  dtype: bfloat16 # should align with FSDP
  gpu_memory_utilization: 0.5
  ignore_eos: false
  enforce_eager: false
  free_cache_engine: false
  load_format: dummy_dtensor
  tensor_model_parallel_size: 1
  max_num_batched_tokens: 16384
  max_model_len: null
  max_num_seqs: 1024
  multi_stage_wake_up: false
  log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu
  log_prob_micro_batch_size_per_gpu: 32
  # for hf rollout
  do_sample: false
  disable_log_stats: true
  enable_chunked_prefill: true
  n: 1
  # support logging rollout prob for debugging purpose
  calculate_log_probs: false
  # n: 1
  multi_turn: 
    enable: false
    max_user_turns: 5
    max_assistant_turns: 5
    tool_config_path: null #./examples/sglang_multiturn/config/tool_config/gui_grounding_tool_config.yaml
    interaction_config_path: null
    use_inference_chat_template: false
    tokenization_sanity_check_mode: strict
    completion_callback: null
  val_kwargs:
    # for sglang rollout
    top_k: ${rollout.top_k} #-1
    top_p: ${rollout.top_p} #1.0
    temperature: ${rollout.temperature} #0.0
    do_sample: ${rollout.do_sample} #false
actor:
  strategy: fsdp  # This is for backward-compatibility
  ulysses_sequence_parallel_size: 1 # sp size
  entropy_from_logits_with_chunking: false  # calculate entropy with chunking to reduce memory peak
  entropy_checkpointing: false  # recompute entropy
  fsdp_config:
    fsdp_size: -1
    forward_prefetch: false  # FSDP1 forward_prefetch configuration

ray_init:
  num_cpus: null # `None` means using all CPUs, which might cause hang if limited in systems like SLURM. Please set to a number allowed then.
  timeline_json_file: null
