DEVICE=$1
MODEL_PATH=$2
RPATH=$3

NUM_GPUS=$(echo $DEVICE | tr ',' '\n' | wc -l)

# if "gemma" in the model path, use FLASHINFER, else use VLLM
if [[ $MODEL_PATH == *"gemma"* ]]; then
    export VLLM_ATTENTION_BACKEND="FLASHINFER"
    GPU_UTIL=0.65
elif [[ $MODEL_PATH == *"Mistral"* ]]; then
    GPU_UTIL=0.5
else
    GPU_UTIL=0.9
fi

# use vllm for generation
CUDA_VISIBLE_DEVICES=$DEVICE python -m eval.alpaca_eval.run_eval \
    --model $MODEL_PATH \
    --model_num_gpus $NUM_GPUS \
    --use_chat_format \
    --gpu_memory_utilization $GPU_UTIL \
    --reference_path $RPATH