# defaults specify the default config from each component
defaults:

  # dp actor config, inheriting from trainer/config/reward_model/reward_model.yaml
  - reward_model

  # load the reference default config, then apply the fields in the current yaml
  - _self_

strategy: megatron

# seconds, default is 10 minutes for torch, you can set it to a larger value
# if you have long-running operations like 32B or 72B model using megatron
nccl_timeout: 600

# Megatron parallelism & checkpointing config
megatron:
  # Whether to offload model parameters to CPU
  param_offload: False

  # Number of GPUs in tensor model parallel group
  tensor_model_parallel_size: 1

  # Number of GPUs in expert model parallel group
  expert_model_parallel_size: 1

  # Expert tensor parallel size
  expert_tensor_parallel_size: null

  # Number of pipeline model parallel stages
  pipeline_model_parallel_size: 1

  # change VPP interface for parallelism tests
  virtual_pipeline_model_parallel_size: null

  # Context parallel size
  context_parallel_size: 1

  # Whether to use sequence parallelism
  sequence_parallel: True

  # Whether to use distributed optimizer
  use_distributed_optimizer: False

  # Whether to enable distributed checkpointing
  use_dist_checkpointing: False

  # Path for distributed checkpoints
  dist_checkpointing_path: null

  # RNG seed for megatron
  seed: ${actor_rollout_ref.actor.megatron.seed}

  # Any overrides to transformer config
  override_transformer_config: {}

  # Whether to use mbridge for faster comms
  use_mbridge: ${actor_rollout_ref.actor.megatron.use_mbridge}

# Whether to load weights (default True)
load_weight: True