trainer:
  nnodes: 1
  n_gpus_per_node: 8
  project_name: verl_examples
  experiment_name: gsm8k
  save_output: False
  output_dir: /nas/shared/sys2/chefengdi/dafny_full_log/

data:
  path: ~/data/rlhf/math/test.parquet
  prompt_key: prompt
  n_samples: 5
  batch_size: 32
  return_raw_input_ids: False  # This should be set to true when the tokenizer between policy and rm differs
  return_raw_chat: False
  shuffle: True
  filter_overlong_prompts: False # for large-scale dataset, filtering overlong prompts could be timeconsuming. You cat set the filter_overlong_prompts_workers to use multiprocessing to speed up.
  filter_overlong_prompts_workers: 1
  truncation: error
  image_key: images
  prompt_type: sft

model:
  path: ~/models/Qwen2-7B-Instruct
  external_lib: null
  tensor_parallel_size: 8  # Number of GPUs to use for tensor parallelism (model sharding)

# Multi-GPU and parallel processing settings
parallel_processing:
  use_parallel_scoring: true  # Enable parallel scoring computation
  max_workers: null  # Auto-calculate based on available GPUs if null
  gpu_memory_utilization: 0.8  # Memory utilization per GPU

reward_model:
  reward_type: one_score

rollout:
  name: vllm
  shot_number: 1
  temperature: 1.0
  top_k: 50 # 0 for hf rollout, -1 for vllm rollout
  top_p: 0.7
  max_prompt_length: 2048
  max_response_length: 2048 # for vllm rollout
  dtype: bfloat16 # should align with FSDP
  gpu_memory_utilization: 0.6
  ignore_eos: False
  enforce_eager: True
  free_cache_engine: True
  load_format: dummy_dtensor
  tensor_model_parallel_size: 1
  max_num_batched_tokens: 8192
  max_model_len: null
  max_num_seqs: 1024
  log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu
  log_prob_micro_batch_size_per_gpu: 8 # for fire vllm rollout
  use_fire_sampling: False # enable FIRE https://arxiv.org/abs/2410.21236 # for hf rollout
  do_sample: True
  disable_log_stats: True
  enable_chunked_prefill: True

actor:
  strategy: fsdp  # This is for backward-compatibility
  ulysses_sequence_parallel_size: 1 # sp size
  fsdp_config:
    fsdp_size: -1