#!/usr/bin/bash

export CUDA_VISIBLE_DEVICES=$2

eval "$(conda shell.bash hook)"
conda activate vllm-090825-nightly

num_gpu=$3
port=8007
max_image=70
# max_video=0
chat_template=./src/chat_template_internvl3.jinja

vllm serve "$1" \
    --port "$port" \
    --host 0.0.0.0 \
    --trust_remote_code \
    --tool-call-parser hermes \
    --enable-auto-tool-choice \
    --limit-mm-per-prompt.image $max_image \
    --allowed-local-media-path / \
    --tensor-parallel-size "$num_gpu" \
    --chat-template "$chat_template"
