conda activate env/deploy

# tmux new-session -s deploy_coder480b
# bash src_my/deploy/deploy_qwen3_coder_480B.sh 

vllm serve Qwen3-Coder-480B-A35B-Instruct-FP8 \
    --served-model-name Qwen3-Coder-480B-A35B-Instruct-FP8 \
    --dtype auto \
    --host 0.0.0.0 \
    --port 8000 \
    --pipeline-parallel-size 1 \
    --tensor-parallel-size 8 \
    --cpu-offload-gb 0 \
    --max-model-len 131702 \
    --enable-auto-tool-choice \
    --enable-expert-parallel \
    --disable-cascade-attn \
    --tool-call-parser qwen3_coder | tee src_my/deploy/deploy_qwen3_coder_480B_${1}.log
