CUDA_VISIBLE_DEVICES=0 python train.py --dataset cifar100 --layers 40 --widen-factor 10 --tensorboard \
    --loss cross_entropy \
    --model_seed 1 --data_seed 100 \
    --split_size=45000 --batch-size=128 --use_split train_val \
    --use_temp --temp_model nn_on_reps --temp_model_depth 2 --temp_model_width 512 --min_temp 0.2 \
    --lr 0.1 --scheduler onecycle --epochs 50 --wd=1e-5 --optimizer sgd \
    --resume ./runs/WRN-28-10-modelseed-1-dataseed-100-split-45000/checkpoint.pth.tar \
    --name  temp_model/nn_on_reps_[5]_2layer_sgd_lr_0.1_wd_1e-5_onecycle_50epochs &

CUDA_VISIBLE_DEVICES=1 python train.py --dataset cifar100 --layers 40 --widen-factor 10 --tensorboard \
    --loss cross_entropy \
    --model_seed 1 --data_seed 100 \
    --split_size=45000 --batch-size=128 --use_split train_val \
    --use_temp --temp_model nn_on_reps --temp_model_depth 2 --temp_model_width 512 --min_temp 0.2 \
    --lr 0.1 --scheduler onecycle --epochs 50 --wd=3e-5 --optimizer sgd \
    --resume ./runs/WRN-28-10-modelseed-1-dataseed-100-split-45000/checkpoint.pth.tar \
    --name  temp_model/nn_on_reps_[5]_2layer_sgd_lr_0.1_wd_3e-5_onecycle_50epochs &

CUDA_VISIBLE_DEVICES=2 python train.py --dataset cifar100 --layers 40 --widen-factor 10 --tensorboard \
    --loss cross_entropy \
    --model_seed 1 --data_seed 100 \
    --split_size=45000 --batch-size=128 --use_split train_val \
    --use_temp --temp_model nn_on_reps --temp_model_depth 2 --temp_model_width 512 --min_temp 0.2 \
    --lr 0.1 --scheduler onecycle --epochs 50 --wd=1e-4 --optimizer sgd \
    --resume ./runs/WRN-28-10-modelseed-1-dataseed-100-split-45000/checkpoint.pth.tar \
    --name  temp_model/nn_on_reps_[5]_2layer_sgd_lr_0.1_wd_1e-4_onecycle_50epochs &

CUDA_VISIBLE_DEVICES=3 python train.py --dataset cifar100 --layers 40 --widen-factor 10 --tensorboard \
    --loss cross_entropy \
    --model_seed 1 --data_seed 100 \
    --split_size=45000 --batch-size=128 --use_split train_val \
    --use_temp --temp_model nn_on_reps --temp_model_depth 2 --temp_model_width 512 --min_temp 0.2 \
    --lr 0.1 --scheduler onecycle --epochs 50 --wd=3e-3 --optimizer sgd \
    --resume ./runs/WRN-28-10-modelseed-1-dataseed-100-split-45000/checkpoint.pth.tar \
    --name  temp_model/nn_on_reps_[5]_2layer_sgd_lr_0.1_wd_3e-3_onecycle_50epochs &

CUDA_VISIBLE_DEVICES=4 python train.py --dataset cifar100 --layers 40 --widen-factor 10 --tensorboard \
    --loss cross_entropy \
    --model_seed 1 --data_seed 100 \
    --split_size=45000 --batch-size=128 --use_split train_val \
    --use_temp --temp_model nn_on_reps --temp_model_depth 2 --temp_model_width 512 --min_temp 0.2 \
    --lr 0.1 --scheduler onecycle --epochs 50 --wd=1e-3 --optimizer sgd \
    --resume ./runs/WRN-28-10-modelseed-1-dataseed-100-split-45000/checkpoint.pth.tar \
    --name  temp_model/nn_on_reps_[5]_2layer_sgd_lr_0.1_wd_1e-3_onecycle_50epochs