#!/usr/bin/bash

export CUDA_VISIBLE_DEVICES=$2

eval "$(conda shell.bash hook)"
conda activate vllm-090825-nightly

# num_gpu=$3
port=$3
max_image=70
# max_video=0

vllm serve "$1" \
    --port "$port" \
    --host 0.0.0.0 \
    --tool-call-parser hermes \
    --enable-auto-tool-choice \
    --trust-remote-code \
    --allowed-local-media-path / \
    --limit-mm-per-prompt.image $max_image \
    --max-model-len=65536
    # --tensor-parallel-size "$num_gpu" \
