python eval/benchmark_models.py \
--ds_type "summary" \
--num_examples 1000 \
--model_path "Qwen/Qwen2.5-32B-Instruct" \
--model_name "qwen2.5-32b" \
--cache_dir "/share/models/huggingface/hub" \
--response1_path "your_responses_path" \
--response1_path "another_responses_path" \
--split "test" \