#first run the vLLM server (in another terminal vllm_serve_launch.sh)

model_paths="your_model_path"

for model_path in $model_paths; do

    echo "$model_path"

    CUDA_VISIBLE_DEVICES=0 python eval/new_generate_response.py \
        --ds_type "imdb" \
        --num_examples 250 \
        --gen_model_path $model_path \
        --hf_org "your_hf_name" \
        --split "test" \
        --max_new_tokens 504 \
        --max_seq_len 512 \
        --num_gen_workers 8
    

    

done