trainer:
  nnodes: 1
  n_gpus_per_node: 8
  device: cuda

data:
  path: ~/data/rlhf/math/test.parquet
  prompt_key: prompt
  n_samples: 5
  output_path: /opt/tiger/math_Qwen2-7B-Instruct.parquet
  batch_size: 128

model:
  path: ~/models/Qwen2-7B-Instruct
  external_lib: null
rollout:
  name: vllm
  mode: sync
  temperature: 1.0
  top_k: 50 
  prompt_length: 1536
  response_length: 512
  dtype: bfloat16 
  gpu_memory_utilization: 0.5
  ignore_eos: False
  enforce_eager: True
  free_cache_engine: True
  load_format: dummy_dtensor
  tensor_model_parallel_size: 1
  max_num_batched_tokens: 8192
  max_model_len: null
  max_num_seqs: 1024
  log_prob_micro_batch_size: null
  log_prob_micro_batch_size_per_gpu: 8
  do_sample: True
  disable_log_stats: True
  enable_chunked_prefill: True
  n: 1
  calculate_log_probs: False
actor:
  strategy: fsdp  
  entropy_from_logits_with_chunking: False  
  fsdp_config:
    fsdp_size: -1
    forward_prefetch: False  

ray_init:
  num_cpus: null
  timeline_json_file: null
