MODEL_PATH=/data/model/models/GUI-Owl-32B
MODEL_NAME="GUI-Owl-32B"
PORT=8501
LOG_FILE=log_files/qwen_vl.log

#--api-key xxxxx \
CUDA_VISIBLE_DEVICES=0,1,2,3 vllm serve ${MODEL_PATH} \
--tensor-parallel-size 4 \
--port $PORT \
--served-model-name $MODEL_NAME \
--cpu-offload-gb 0 \
--swap-space 50 \
--gpu-memory-utilization 0.96 \
--max-model-len 32768 \
--max-num-seqs 32 \
--use-v2-block-manager \
--limit-mm-per-prompt "image=20" \
| tee $LOG_FILE
