export CUDA_VISIBLE_DEVICES=5

MODEL_NAME="Qwen/Qwen3-8B"     
NUM_EXPERTS=8
RANK=8
TOP_K=2
ALPHA=16
USE_Q=1
USE_LORA_DROPOUT=0
LEARNING_RATE=2e-5
NUM_EPOCHS=1
BETA=0.002
LAMBDA=0.01
WEIGHT_DECAY=0.01
WARMUP_STEPS=1000
USE_AUX_LOSS=1
USE_ORTH_LOSS=1
MAX_LENGTH=128
LABEL_LENGTH=4
BATCH_SIZE=8
VALID_BATCH_SIZE=8
SAVE_STEPS=2000
SEED=2025
SELECTED_TASKS="boolq,obqa,piqa,arc_e,arc_c,siqa,winogrande,hellaswag"

python mose/main.py \
    --model_name "$MODEL_NAME" \
    --num_experts $NUM_EXPERTS \
    --rank $RANK \
    --top_k $TOP_K \
    --alpha $ALPHA \
    --use_q $USE_Q \
    --use_lora_dropout $USE_LORA_DROPOUT \
    --learning_rate $LEARNING_RATE \
    --num_epochs $NUM_EPOCHS \
    --beta $BETA \
    --lambda_1 $LAMBDA \
    --weight_decay $WEIGHT_DECAY \
    --warmup_steps $WARMUP_STEPS \
    --use_aux_loss $USE_AUX_LOSS \
    --use_orth_loss $USE_ORTH_LOSS \
    --max_length $MAX_LENGTH \
    --label_length $LABEL_LENGTH \
    --batch_size $BATCH_SIZE \
    --valid_batch_size $VALID_BATCH_SIZE \
    --save_steps $SAVE_STEPS \
    --seed $SEED \
    --selected_tasks "$SELECTED_TASKS"