conda activate /root/user/code_agent/env/qwen3vl

# tmux new-session -s deploy_vl3_32b
# bash src_my/deploy/deploy_qwenvl3_32b.sh 


vllm serve /root/user/models/Qwen/Qwen3-VL-32B-Instruct \
    --served-model-name Qwen3-VL-32B-Instruct \
    --port 8000 \
    --host 0.0.0.0 \
    --tensor-parallel-size 4 \
    --limit-mm-per-prompt.video 0 \
    --disable-cascade-attn \
    --mm-processor-cache-gb 0 \
    --media-io-kwargs '{"video": {"num_frames": -1}}' \
    --async-scheduling | tee src_my/deploy/deploy_qwenvl3_32b_${1}.log
