
_target_: verl.workers.config.CriticConfig


rollout_n: ${oc.select:actor_rollout_ref.rollout.n,1}


strategy: ???




enable: null


optim:


  lr: 1e-5


  lr_warmup_steps_ratio: 0.0


  total_training_steps: -1


  weight_decay: 0.01


  lr_warmup_steps: -1



model:


  path: ~/models/deepseek-llm-7b-chat


  tokenizer_path: ${oc.select:actor_rollout_ref.model.path,"~/models/deepseek-llm-7b-chat"}


  override_config: {}


  external_lib: ${oc.select:actor_rollout_ref.model.external_lib,null}


  trust_remote_code: ${oc.select:actor_rollout_ref.model.trust_remote_code,false}


ppo_mini_batch_size: ${oc.select:actor_rollout_ref.actor.ppo_mini_batch_size,256}


ppo_micro_batch_size: null


ppo_micro_batch_size_per_gpu: ${oc.select:.ppo_micro_batch_size,null}


use_dynamic_bsz: ${oc.select:actor_rollout_ref.actor.use_dynamic_bsz,false}


ppo_max_token_len_per_gpu: 32768


forward_max_token_len_per_gpu: ${.ppo_max_token_len_per_gpu}


ppo_epochs: ${oc.select:actor_rollout_ref.actor.ppo_epochs,1}


shuffle: ${oc.select:actor_rollout_ref.actor.shuffle,false}


cliprange_value: 0.5


loss_agg_mode: ${oc.select:actor_rollout_ref.actor.loss_agg_mode,token-mean}


checkpoint:


  _target_: verl.trainer.config.CheckpointConfig



  save_contents: ['model', 'optimizer', 'extra']


  load_contents: ${.save_contents}


  async_save: False



profiler:


  _target_: verl.utils.profiler.ProfilerConfig


  discrete: False


  all_ranks: False


  ranks: []
