defaults:
  - base_model

name_or_path: null
block_name: GPTNeoXLayer
activation_checkpointing: false # doesn't work with pythia