# the name of the model to use; should be something like
name_or_path: ???

# the name of the tokenizer to use; if null, will use the tokenizer from the model
tokenizer_name_or_path: null

# override pre-trained weights (e.g., from SFT); optional
archive: null

# the name of the module class to wrap with FSDP; should be something like
#   e.g. GPT2Block, GPTNeoXLayer, LlamaDecoderLayer, etc.
block_name: null

# the dtype for the policy parameters/optimizer state
policy_dtype: float32

# the mixed precision dtype if using FSDP; defaults to the same as the policy
fsdp_policy_mp: null

# the dtype for the reference model (which is used for inference only)
reference_dtype: float16
