#first run the vLLM server (in another terminal vllm_serve_launch.sh)

model_paths="your_model_path"

for model_path in $model_paths; do

    echo "$model_path"

    CUDA_VISIBLE_DEVICES=0 python eval/new_generate_response.py \
        --ds_type "summary" \
        --num_examples 250 \
        --gen_model_path $model_path \
        --hf_org "your_hf_name" \
        --split "test" \
        --max_new_tokens 64
    
    response_path=""
    CUDA_VISIBLE_DEVICES=0 python eval/benchmark_models.py \
        --ds_type "summary" \
        --num_examples 250 \
        --model_path "Qwen/Qwen2.5-32B-Instruct" \
        --model_name "qwen2.5-32b" \
        --cache_dir "/share/models/huggingface/hub" \
        --response1_path $response_path \
        --response2_path "your_responses_path" \
        --split "test" \
        --max_new_tokens 64
    

done



## BoN Evaluation


for model_path in $model_paths; do

    echo "$model_path"

    CUDA_VISIBLE_DEVICES=0 python eval/new_generate_response.py \
        --ds_type "summary" \
        --num_examples 250 \
        --gen_model_path $model_path \
        --hf_org "your_hf_name" \
        --split "test" \
        --max_new_tokens 64 \
        --reward_model_path "your_reward_model_path" \
        --gen_type "bon" \
        --num_bon_workers_per_gpu 1 \
        --bon_batch_size 1 \
        --bon_chunk_size 1 \
        --best_of 64
    
    response_path=""
    CUDA_VISIBLE_DEVICES=0 python eval/benchmark_models.py \
        --ds_type "summary" \
        --num_examples 250 \
        --model_path "Qwen/Qwen2.5-32B-Instruct" \
        --model_name "qwen2.5-32b" \
        --cache_dir "/share/models/huggingface/hub" \
        --response1_path $response_path \
        --response2_path "your_responses_path" \
        --split "test" \
        --max_new_tokens 64
    

done