python3 -m vllm.entrypoints.openai.api_server --served-model-name Qwen2.5-VL-72B-Instruct \
       --tensor-parallel-size 4 \
       --gpu-memory-utilization 0.9 \
       --model YOUR_PATH/Qwen2.5-VL-72B-Instruct \
       --host 0.0.0.0 \
       --max_model_len 12288 \
       --port 8080 \
       --trust-remote-code \
