# example to run our experiments
#### SOFTMAX ATTENTION
python run_tasks.py --model softmax --task retrieval --learning_rate 0.0001 --weight_decay 0.0 --num_layers 2  --n_train_samples 147086 --n_dev_samples 18090 --n_test_samples 17437 --max_seq_len 4096 --num_train_steps 30000 --num_eval_steps 565 --eval_frequency 300 --num_head 2  

#### ATTENTION-BN
python run_tasks.py --model softmax --task retrieval --learning_rate 0.0001 --weight_decay 0.0 --num_layers 2  --n_train_samples 147086 --n_dev_samples 18090 --n_test_samples 17437 --max_seq_len 4096 --num_train_steps 30000 --num_eval_steps 565 --eval_frequency 300 --num_head 2 --beta 1. 

#### ATTENTION-SH
python run_tasks.py --model sh --task retrieval --learning_rate 0.0001 --weight_decay 0.0 --num_layers 2  --n_train_samples 147086 --n_dev_samples 18090 --n_test_samples 17437 --max_seq_len 4096 --num_train_steps 30000 --num_eval_steps 565 --eval_frequency 300 --num_head 2

#### ATTENTION-BN+SH
python run_tasks.py --model sh --task retrieval --learning_rate 0.0001 --weight_decay 0.0 --num_layers 2  --n_train_samples 147086 --n_dev_samples 18090 --n_test_samples 17437 --max_seq_len 4096 --num_train_steps 30000 --num_eval_steps 565 --eval_frequency 300 --num_head 2 --beta 1. 





