name_or_path: prajjwal1/bert-tiny
tokenizer_name_or_path: prajjwal1/bert-tiny
archive: null

# the name of the module class to wrap with FSDP; should be something like
#   e.g. GPT2Block, GPTNeoXLayer, LlamaDecoderLayer, etc.
block_name:BertLayer

# the dtype for the policy parameters/optimizer state
policy_dtype: float32

# the mixed precision dtype if using FSDP; defaults to the same as the policy
fsdp_policy_mp: null

# the dtype for the reference model (which is used for inference only)
reference_dtype: float16
