python pretrain.py arch=bert-c5 train=bert-o3 train.batch_size=4096 \
    impl.microbatch_size=64 \
    name=pretrain-baseline \
    budget=24 \
    data=c4-subset-processed \
    wandb=default # wandb config in ./cramming/config/wandb/default.yaml