# Format checks enforced on CI:
# 1. Comments must appear above each field.
# 2. There must be a blank line between each field.
# 3. Inline comments (after a field on the same line) are not allowed.
# 4. Indentation level is respected for nested fields.

# defaults specify the default config from each component
defaults:

  # fsdp optimizer config
  - ../optim@optim: fsdp

  # fsdp engine config
  - ../engine@model.fsdp_config: fsdp

  # dp actor config, inheriting from trainer/config/critic/critic.yaml
  - critic

  # load the reference default config, then apply the fields in the current yaml
  - _self_

# Required when using verl.utils.omega_conf_to_dataclass to instantiate dataclass configs
_target_: verl.workers.config.FSDPCriticConfig

# distribution strategy. Options: fsdp (deprecating), fsdp2
strategy: fsdp

# model config for the critic
model:

  # Required when using verl.utils.omega_conf_to_dataclass to instantiate dataclass configs
  _target_: verl.workers.config.FSDPCriticModelCfg

  # Whether to use shared memory for loading the model
  use_shm: False

  # Enable gradient checkpointing to save memory
  enable_gradient_checkpointing: True

  # Offload activations to CPU to reduce GPU memory usage
  enable_activation_offload: False

  # Use remove padding optimization (saves compute)
  use_remove_padding: False

  # Set to positive value to enable LoRA (e.g., 32)
  lora_rank: 0

  # LoRA scaling factor
  lora_alpha: 16

  # LoRA target modules: "all-linear" or list of linear projection layers
  target_modules: all-linear

# Forward-only batch size during inference (global)
forward_micro_batch_size: ${oc.select:.ppo_micro_batch_size,null}

# Forward-only batch size during inference (per GPU)
forward_micro_batch_size_per_gpu: ${oc.select:.ppo_micro_batch_size_per_gpu,null}

# Sequence parallelism size for Ulysses-style model parallelism
ulysses_sequence_parallel_size: 1

# Gradient clipping for critic updates
grad_clip: 1.0
