MODEL_PATH=/data/model/models/SEAgent-1.0-7B
MODEL_NAME=SEAgent-1.0-7B
PORT=8501
LOG_FILE=log_files/seagent7b.log

#--api-key xxxxx \
CUDA_VISIBLE_DEVICES=0,1,2,3 vllm serve ${MODEL_PATH} \
--tensor-parallel-size 4 \
--port $PORT \
--served-model-name $MODEL_NAME \
--cpu-offload-gb 0 \
--swap-space 50 \
--gpu-memory-utilization 0.96 \
--max-model-len 32768 \
--max-num-seqs 32 \
--use-v2-block-manager \
--limit-mm-per-prompt "image=20" \
| tee $LOG_FILE