# megatron actor config, inheriting from trainer/config/actor/actor.yaml
defaults:
  - actor
  # load the reference default config, then apply the fields in the current yaml
  - _self_

strategy: megatron

data_loader_seed: null

load_weight: True

checkpoint:

  async_save: False

optim:

  optimizer: adam

  clip_grad: 1.0

  # initial learning rate for warmup, default to 0.0
  lr_warmup_init: 0.0

  # Prioritized. None, 0 or Negative values mean delegating to lr_warmup_steps_ratio.
  lr_warmup_steps: null

  lr_decay_steps: null

  # select from constant/linear/cosine/inverse_square_root
  lr_decay_style: constant

  # minimum learning rate, default to 0.0
  min_lr: 0.0

  # select from constant/linear/cosine
  weight_decay_incr_style: constant

  # select from constant/exponential/cosine
  lr_wsd_decay_style: exponential

  lr_wsd_decay_steps: null

  # use checkpoint optimizer parameter scheduler
  use_checkpoint_opt_param_scheduler: False

megatron:

  param_offload: False

  grad_offload: False

  optimizer_offload: False

  tensor_model_parallel_size: 1

  expert_model_parallel_size: 1

  expert_tensor_parallel_size: null

  pipeline_model_parallel_size: 1

  virtual_pipeline_model_parallel_size: null

  context_parallel_size: 1

  sequence_parallel: True

  use_distributed_optimizer: True

  use_dist_checkpointing: False

  dist_checkpointing_path: null

  seed: 42

  # additional transformer config like: num_layers_in_first(/last)_pipeline_stage
  override_transformer_config: {}

  use_mbridge: False

# profile the actor model in `update_policy` 
profile:
  # turn it on when you want to profile the actor model
  use_profile: False

  # list, you can specify the ranks to profile
  profile_ranks: null

  # start step in update_policy
  step_start: -1

  # end step
  step_end: -1

  # the path to save the profile result
  save_path: null
