CUDA_VISIBLE_DEVICES=0,1,2,3 \
  python mergemoe/merge-moe.py \
  --task="winogrande" \
  --num_samples_for_merging=40 \
  --num_groups=64  \
  --merging_layers="28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47" \
  --merging_strategy="ours" \
  --model_type="qwen" \
  --output_dir="results/winogrande/merged-qwen3/" \
  --checkpoint="/root/model/Qwen3-30B-A3B" 

torchrun --nproc_per_node 4 \
  eval_dclm/eval_openlm_ckpt.py \
  --hf-model results/winogrande/merged-qwen3/ours \
  --tokenizer /root/model/Qwen3-30B-A3B \
  --eval-yaml "static/winogrande.yaml" \
  --output-file results/ours/qwen3/qwen3_winogrande_results.json \
  --donot-compute-perplexity
rm -r results/winogrande/merged-qwen3/