# defaults specify the default config from each component
defaults:

  # dp ref config, inheriting from trainer/config/ref/ref.yaml
  - ref
  
  # fsdp engine config
  - ../engine@fsdp_config: fsdp

  # load the reference default config, then apply the fields in the current yaml
  - _self_

# ref model is assumed to be identical to actor model. Specify model.path for using a different ref model.
# Potential use case involves on policy distillation where we calculate KL divergence between student actor
# and teacher ref
model: null

# sequence parallel size
# same as actor_rollout_ref.actor.ulysses_sequence_parallel_size if it exists, otherwise 1
ulysses_sequence_parallel_size: ${oc.select:actor_rollout_ref.actor.ulysses_sequence_parallel_size,1}

# calculate entropy with chunking to reduce memory peak
entropy_from_logits_with_chunking: False

# recompute entropy
entropy_checkpointing: False
