# the name of the model to use; should be something like
#   gpt2-xl or gpt-neo-2.7B or huggyllama/llama-7b
name_or_path: RLHFlow/LLaMA3-SFT-v2

# the name of the tokenizer to use; if null, will use the tokenizer from the model
tokenizer_name_or_path: null

# override pre-trained weights (e.g., from SFT); optional
archive: null

# the name of the module class to wrap with FSDP; should be something like
#   e.g. GPT2Block, GPTNeoXLayer, LlamaDecoderLayer, etc.
block_name: LlamaDecoderLayer

# the dtype for the policy parameters/optimizer state
policy_dtype: float32

# the mixed precision dtype if using FSDP; defaults to the same as the policy
fsdp_policy_mp: null

# the dtype for the reference model (which is used for inference only)
reference_dtype: float16

# baseline head
use_baseline_head: false
baseline_mlp_dim: 0
baseline_dropout: 0.0
baseline_l: -10.0
baseline_u: 10.0

# lora config
use_lora: true

lora_r: 32
lora_alpha: 64
lora_dropout: 0.05
lora_bias: "none"          # "none" | "all" | "lora_only"
lora_target_modules: null  # if null, pick defaults in code
