mkdir -p checkpoints/sst2/transformers-s/smoe_dropout

args="
--data data/finetuning/sst2 \
--data_name sst2 \
--base_arch transformer \
--architecture sgsgsgsgsgsg \
--gate_name smoe \
--nlayers 6 \
--hid-sz 264 \
--inner-hid-sz 264 \
--nheads 8 \
--block-sz 512 \
--attn-span 2048 \
--dropout 0.1 \
--load_balance 0.00 \
--optim adam \
--lr 0.0001 \
--lr-warmup 0 \
--niter 5 \
--batch-sz 16 \
--batch-split 2 \
--nbatches 1000 \
--smoe_dropout \
--checkpoint checkpoints/sst2/transformers-s/smoe_dropout/smoe-dropout.pt \
--pretrained_weight checkpoints/enwik8/transformers-s/smoe_dropout/smoe-dropout.pt \
"

echo "Training ..."
python finetune_train.py $args

