base_model: pythia-160m
variant: ~
cache: /mnt/hdd-0/circuits-over-time/model_cache
checkpoint_schedule: all_early
start_layer: 2
device: cuda:1
overwrite: false