python train_scratch.py --dataset mnist --lr_scheduler cswr --batch_size 1024 -lr 1e-3 --weight_decay 1e-5 --seed 42
python train_scratch.py --dataset cifar10 --lr_scheduler cswr --batch_size 256 -lr 1e-3 --weight_decay 1e-5 --seed 42
python train_scratch.py --dataset cifar100 --lr_scheduler cswr --batch_size 256 -lr 1e-3 --weight_decay 1e-2 --seed 42 --patience 10
python train_scratch.py --dataset cifar100_resnet --lr_scheduler cswr --batch_size 256 -lr 1e-3 --weight_decay 1e-2 --seed 42 --patience 10
python train_scratch.py --dataset ag_news --lr_scheduler cswr --batch_size 64 -lr 2e-5 --weight_decay 1e-2 --seed 42
python train_scratch.py --dataset dbpedia --lr_scheduler cswr --batch_size 64 -lr 2e-5 --weight_decay 1e-2 --seed 42