






_target_: verl.workers.config.ActorConfig



strategy: ???


ppo_mini_batch_size: 256


ppo_micro_batch_size: null


ppo_micro_batch_size_per_gpu: null



use_dynamic_bsz: false




ppo_max_token_len_per_gpu: 16384


clip_ratio: 0.2


clip_ratio_low: 0.2


clip_ratio_high: 0.2


policy_loss:


  _target_: verl.workers.config.PolicyLossConfig


  loss_mode: "vanilla"


  clip_cov_ratio: 0.0002


  clip_cov_lb: 1.0


  clip_cov_ub: 5.0


  kl_cov_ratio: 0.0002


  ppo_kl_coef: 0.1


clip_ratio_c: 3.0


loss_agg_mode: token-mean


entropy_coeff: 0


use_kl_loss: false



use_torch_compile: true


kl_loss_coef: 0.001


kl_loss_type: low_var_kl


ppo_epochs: 1


shuffle: false


checkpoint:


  _target_: verl.trainer.config.CheckpointConfig



  save_contents: ['model', 'optimizer', 'extra']



  load_contents: ${.save_contents}


  async_save: False


optim:


  lr: 1e-6


  lr_warmup_steps_ratio: 0.0


  total_training_steps: -1


  weight_decay: 0.01


  lr_warmup_steps: -1



use_fused_kernels: ${oc.select:actor_rollout_ref.model.use_fused_kernels,false}
