# CUDA_VISIBLE_DEVICES=0 python mmlu_expert_bak.py \
# --mode "train" \
# --need-lora \
# --main-model Qwen/Qwen-14B

CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 accelerate launch glue_expert.py \
--mode "test" \
--max-len 1000

# CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 accelerate launch glue_expert.py \
# --mode "train" \
# --max-len 5500
#train: cola 8.5k mnli: 393k mrpc:3.67k qnli:105k qqp:364k rte:2.49k sst2:67.3k stsb:5.75k #wnli:635
# CUDA_VISIBLE_DEVICES=0 python train_router.py

# CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch test_multi.py