# defaults specify the default config from each component
defaults:

  # megatron optimizer config
  - ../optim@optim: megatron

  # megatron engine config
  - ../engine@megatron: megatron

  # dp actor config, inheriting from trainer/config/critic/critic.yaml
  - critic

  # load the reference default config, then apply the fields in the current yaml
  - _self_

# Required when using verl.utils.omega_conf_to_dataclass to instantiate dataclass configs
_target_: verl.workers.config.McoreCriticConfig

strategy: megatron

# seconds, default is 10 minutes for torch, you can set it to a larger value if you have long-running operations like 32B or 72B model using megatron
nccl_timeout: 600

# model config for the critic
model:

  # Required when using verl.utils.omega_conf_to_dataclass to instantiate dataclass configs
  _target_: verl.trainer.config.BaseModelConfig

  # override default empty mapping
  override_config:

    model_config: {}

    moe_config:

      freeze_moe_router: False

  # LoRA (Low-Rank Adaptation) configuration for parameter-efficient fine-tuning
  lora:
      # LoRA type: "lora", "vlm_lora", "canonical_lora", or "dora"
      type: lora

      # LoRA rank (Dimension of the low-rank projection space.). Set to 0 to disable LoRA
      rank: 0  # typical values: 8, 16, 32, 64
      
      #  Weighting factor for the low-rank projection. Defaults to 32
      alpha: 32
      
      # Dropout rate for the low-rank projection. Defaults to 0.0
      dropout: 0.0
      
      # A list of module names to apply LoRA to.
      # For fused LoRA, Defaults to all linear layers ['linear_qkv', 'linear_proj', 'linear_fc1', 'linear_fc2'].
      # For canonical LoRA: ["linear_q", "linear_k", "linear_v", "linear_proj", "linear_fc1_up", "linear_fc1_gate", "linear_fc2"]
      # - 'linear_qkv': Apply LoRA to the fused linear layer used for query, key, and value projections in self-attention
      # - 'linear_proj': Apply LoRA to the linear layer used for projecting the output of self-attention
      # - 'linear_fc1': Apply LoRA to the first fully-connected layer in MLP
      # - 'linear_fc2': Apply LoRA to the second fully-connected layer in MLP
      # Target modules can also contain wildcards. For example, you can specify
      # target_modules=['*.layers.0.*.linear_qkv', '*.layers.1.*.linear_qkv'] to add LoRA to only linear_qkv on the first two layers
      target_modules:
        - linear_qkv
        - linear_proj
        - linear_fc1
        - linear_fc2
      
      # A list of module names not to apply LoRa to. It will match all nn.Linear & nn.Linear-adjacent modules whose name
      # does not match any string in exclude_modules. If used, will require target_modules to be empty list or null
      exclude_modules: []

      # Position for applying dropout, can be 'pre' (before the low-rank projection) or 'post' (after). Defaults to 'pre'
      dropout_position: pre

      # Initialization method for the low-rank matrix A. Defaults to "xavier".
      lora_A_init_method: xavier

      # Initialization method for the low-rank matrix B. Defaults to "zero".
      lora_B_init_method: zero

      # Enables the experimental All-to-All (A2A) communication strategy. Defaults to False
      a2a_experimental: False

      # Parameter data type for LoRA weights. Default to null, which will use model's dtype.
      dtype: null

      # Path to pre-trained LoRA adapter weights (null to train from scratch)
      adapter_path: null

      # VLMLoRA additionally allows the user to specify whether the language or vision models should be frozen.
      # For example, a common finetuning workload for multimodal models is to apply adapters to language model and fully
      # finetune the vision model.
      freeze_vision_model: True
      freeze_vision_projection: True
      freeze_language_model: True

# Whether to load initial weights
load_weight: True

# seed for data loader
data_loader_seed: ${oc.select:actor_rollout_ref.actor.data_loader_seed,null}
