defaults:
  - dp_reward_model
  - _self_

use_reward_loop: True
reward_manager: naive
enable: False

# Whether to deploy the model to a separate resource pool.
enable_resource_pool: False
n_gpus_per_node: 8
num_workers: 1
nnodes: 0

model:
  path: ~/models/FsfairX-LLaMA3-RM-v0.1
  external_lib: ${actor_rollout_ref.model.external_lib}
  trust_remote_code: False

rollout:
  _target_: verl.workers.config.RolloutConfig
  name: ???
  dtype: bfloat16
  gpu_memory_utilization: 0.5
  enforce_eager: true
  cudagraph_capture_sizes: null
  free_cache_engine: true
  data_parallel_size: 1
  expert_parallel_size: 1
  tensor_model_parallel_size: 2
  max_num_batched_tokens: 8192
  max_model_len: null
  max_num_seqs: 1024
  load_format: auto
  engine_kwargs: {}
  limit_images: null
  enable_chunked_prefill: true
  enable_prefix_caching: true
  disable_log_stats: true
  skip_tokenizer_init: false

  prompt_length: 2048
  response_length: 2048