#!/bin/bash
set -x
source consts.sh

export CUDA_VISIBLE_DEVICES=3

MOE_LAYERS="10"
MOE_LAYERS_NAME="10"
TASK_NAME=mnli
MODEL_NAME=bert-large-cased
NUM_EXPERTS_LIST=(8)
REPEAT=8
SEEDS="0 1 2"
LR="2e-5"
TOP_K_LIST=(1 2 4)
GATE_LIST=(star)

for GATE in ${GATE_LIST[@]}; do
    for TOP_K in ${TOP_K_LIST[@]}; do
        for NUM_EXPERTS in ${NUM_EXPERTS_LIST[@]}; do
            echo "GATE: $GATE, MOE_LAYERS: $MOE_LAYERS, MOE_LAYERS_NAME: $MOE_LAYERS_NAME"
            TIME=$(date "+%Y%m%d-%H%M%S")
            output_dir=logs/${TASK_NAME}/${MODEL_NAME}/${GATE}/moe_${NUM_EXPERTS}_experts_top${TOP_K}_layers${MOE_LAYERS_NAME}_repeat${REPEAT}/${TIME}
            mkdir -p $output_dir

            nohup python Language/search_glue_no_trainer.py \
                --model_name_or_path $MODEL_NAME \
                --to_MoE \
                --gate_type $GATE \
                --task_name $TASK_NAME \
                --learning_rates $LR \
                --seeds $SEEDS \
                --num_experts $NUM_EXPERTS \
                --top_k $TOP_K \
                --moe_layers $MOE_LAYERS \
                --expert_repeat $REPEAT \
                --random_cluster \
                --save_model > $output_dir/train_nohup.out 2>&1 &  
        done
    done
done

