CUDA_VISIBLE_DEVICES=2 bash run_train_sparse_first.sh  roberta-base     1    0.00002  32   8910       70           0          0.1        qnli
CUDA_VISIBLE_DEVICES=2 bash run_train_sparse_first.sh  roberta-base     1    0.00002  32   8910       60           0          0.1        sst2
CUDA_VISIBLE_DEVICES=2 bash run_train_sparse_first.sh  roberta-base     1    0.00004  32   8910       80           0          0.1        rte