CUDA_VISIBLE_DEVICES=0 \
  python mergemoe/merge-moe.py \
  --task="winogrande" \
  --num_samples_for_merging=64 \
  --num_groups=28  \
  --merging_layers="1,2,3,4,5,6,7,8,9,10" \
  --merging_strategy="ours" \
  --model_type="olmoe" \
  --output_dir="results/winogrande/merged-olmoe/" \
  --checkpoint="/root/model/OLMoE-1B-7B-0125" 

torchrun --nproc_per_node 2 \
  eval_dclm/eval_openlm_ckpt.py \
  --hf-model results/winogrande/merged-olmoe/ours \
  --tokenizer /root/model/OLMoE-1B-7B-0125 \
  --eval-yaml "static/winogrande.yaml" \
  --output-file results/ourssample/our_olmoe_winogrande_results.json \
  --donot-compute-perplexity
rm -r results/winogrande/merged-olmoe/