
# example to run our experiments
#### SOFTMAX ATTENTION
python run_tasks.py --model softmax --task text --learning_rate 0.0001 --weight_decay 0.0 --num_layers 2 --dropout_prob 0.1 --max_seq_len 2600 --attention_dropout 0.1 --num_train_steps 10000
 
#### ATTENTION-BN
python run_tasks.py --model softmax --task text --learning_rate 0.0001 --weight_decay 0.0 --num_layers 2 --dropout_prob 0.1 --max_seq_len 2600 --attention_dropout 0.1 --num_train_steps 10000 --beta 1.

#### ATTENTION-SH
python run_tasks.py --model sh --task text --learning_rate 0.0001 --weight_decay 0.0 --num_layers 2 --dropout_prob 0.1 --max_seq_len 2600 --attention_dropout 0.1 --num_train_steps 10000 

#### ATTENTION-BN+SH
python run_tasks.py --model sh --task text --learning_rate 0.0001 --weight_decay 0.0 --num_layers 2 --dropout_prob 0.1 --max_seq_len 2600 --attention_dropout 0.1 --num_train_steps 10000 --beta 1.
