
defaults:


  - reward_model


  - _self_

strategy: megatron



nccl_timeout: 600


megatron:


  _target_: verl.workers.config.MegatronEngineConfig


  param_offload: False


  tensor_model_parallel_size: 1


  expert_model_parallel_size: 1


  expert_tensor_parallel_size: null


  pipeline_model_parallel_size: 1


  virtual_pipeline_model_parallel_size: null


  context_parallel_size: 1


  sequence_parallel: True


  use_distributed_optimizer: False


  use_dist_checkpointing: False


  dist_checkpointing_path: null


  seed: ${oc.select:actor_rollout_ref.actor.megatron.seed,42}


  override_transformer_config: ${oc.select:actor_rollout_ref.actor.megatron.override_transformer_config,{}}


  use_mbridge: ${oc.select:actor_rollout_ref.actor.megatron.use_mbridge,False}


load_weight: True