conda activate env/deploy

# tmux kill-session -t deploy_coder30btrain
# tmux new-session -s deploy_coder30b
# bash src_my/deploy/deploy_qwen3_coder.sh 

vllm serve /root/user/models/Qwen/Qwen3-Coder-30B-A3B-Instruct \
    --served-model-name Qwen3-Coder-30B-A3B-Instruct \
    --dtype auto \
    --host 0.0.0.0 \
    --port 8000 \
    --pipeline-parallel-size 1 \
    --tensor-parallel-size 4 \
    --cpu-offload-gb 0 \
    --enable-auto-tool-choice \
    --max-model-len 131702 \
    --tool-call-parser qwen3_coder | tee src_my/deploy/deploy_qwen3_coder_${1}.log
