#!/bin/bash

#python run_transformer.py --seeds 6312 --lr 0.0001 --device 'cuda' --layers 4 --wdecay 0.0 --dropout 0.0 --attnheads 1 --n_epochs 3000 --pe 2dpe --pe_init 1.0
optim="AdamW" #AdamW
pestring="rope2"
pe_init="1.0"
curriculum="All"
#masks="0.15 0.5 0.75 0.9"
masks="0.15 0.5 0.75"
for attnhead in 1; do #1 4 8; do #1 4; do
    for decay in 0.0; do # 0.1; do #0.1; do # do 0.01 0.05 0.1 # done 0.0 0.01
        #for lr in 0.0001; do
        for lr in 0.0001; do
            for mask in $masks; do
                jbsub -mail -cores 1+1 -queue x86_24h -mem 32g -proj lstnn -name pe-${pestring}-${pe_init}-h${attnhead}l${layer}-m${mask} \
                    python run_nmar_transformer.py \
                    --model_label nmar-Transformer \
                    --seeds 7785 195 6914 29 6312 2235 6068 9742 8880 2197 669 6256 3309 2541 8643 \
                    --lr $lr --device "cuda" \
                    --n_iterations 100000 \
                    --embedding_dim 64 \
                    --layers 4 \
                    --attnheads $attnhead \
                    --wdecay $decay \
                    --dropout 0.0 \
                    --pe $pestring --pe_init $pe_init --optimizer $optim \
                    --mask ${mask}
            done

        done 
    done
done
