CUDA_VISIBLE_DEVICES=0 \
  python mergemoe/merge-moe.py \
  --task="winogrande" \
  --num_samples_for_merging=64 \
  --num_groups=30  \
  --merging_layers="1,2,3,4,5,6,7,8,9,10,11,12,13,14" \
  --merging_strategy="ours" \
  --model_type="qwen" \
  --output_dir="results/winogrande/merged-qwen/" \
  --checkpoint="/root/model/Qwen1.5-MoE-A2.7B" 

torchrun --nproc_per_node 2 \
  eval_dclm/eval_openlm_ckpt.py \
  --hf-model results/winogrande/merged-qwen/ours \
  --tokenizer /root/model/Qwen1.5-MoE-A2.7B/ \
  --eval-yaml "static/winogrande.yaml" \
  --output-file results/ours/gateupqwen/qwen_winogrande_results.json \
  --donot-compute-perplexity
rm -r results/winogrande/merged-qwen/
