path_prefix=`pwd`


gpu_id=3
py_file=private_test_scripts/mmoe_transformer/four_tasks_iclr24.py
debug=1
if [ $debug -eq 1 ]; then
    max_epochs=3
    is_train=1
    seeds=123
    task_name=debug
else
    max_epochs=100
    is_train=0
    # seeds='123 321 132'
    seeds='321 123 132'
    # task_name=purely_modality_moe
    # task_name=purely_task_moe
    task_name=iclr24
fi

for seed in $seeds
do
        args="--model-path private_test_scripts/model/four_tasks_$task_name$seed.pth 
        --seed $seed 
        --epochs $max_epochs 
        --is-train $is_train 
        --lr 0.0008
        --img-path log/img/four_tasks_$task_name 
        --unlimited-capacity-on-mlp 0 
        --seperate-qkv 1 
        --co-input 1 
        --gate-type NoisyVMoEGate 
        --debug $debug 
        --modality-gating-merge 1 
        --training-weight 0.9 1. 0.2 1.2
        --cross-modality-attn 1 
        --cross-depth 1
        --drop-rate 0.
        --grad-clip 1
        --moe-gate-weight 0.1
        --tune-gate-weight 1
        --mlp-top-k 2
        --attn-top-k 2
        --weight-decay 0.001
        --attn-modality-specific 0
        --mlp-modality-specific 0
        --task-gating-merge 1
        --modality-joint 1"
        CUDA_VISIBLE_DEVICES=$gpu_id python -u $py_file ${args}
done
