MODEL_PATH=/data/model/models/InternVL3_5-38B-HF
MODEL_NAME="InternVL3_5-38B-HF"
PORT=8501
LOG_FILE=log_files/internvl3.5_38b_hf.log

#--api-key xxxxx \
CUDA_VISIBLE_DEVICES=0,1,2,3 vllm serve ${MODEL_PATH} \
--tensor-parallel-size 4 \
--port $PORT \
--served-model-name $MODEL_NAME \
--cpu-offload-gb 0 \
--swap-space 50 \
--gpu-memory-utilization 0.96 \
--max-model-len 40960 \
--max-num-seqs 32 \
--use-v2-block-manager \
--limit-mm-per-prompt "image=20" \
| tee $LOG_FILE
