CUDA_VISIBLE_DEVICES=0 python train.py \
  --data-path ./data \
  --data-set CIFAR10 \
  --save-dir /root/weights/conservation/CIFAR10 \
  --input-size 32 --patch-size 4 --num-classes 10 \
  --hidden-size 256 --num-hidden-layer 6 --num-attention-heads 4 --intermediate-size 1024 --mlp-type mlp \
  --opt sgd --momentum 0.0 \
  --epochs 100 --batch-size 5000 --grad-accumulation-steps 10 \
  --lr 7e-4 --warmup-lr 7e-4 --weight_decay 0 --lr-scheduler linear --warmup-epochs 0 \
  --logs-frequency 1 --wandb-project ConservationLaws --wandb-group CIFAR10  \
  --seed 0