conda activate env/deploy

# tmux kill-session -t deploy_coder30b
# tmux new-session -s deploy_coder30btrained
# bash src_my/deploy/deploy_qwen3_coder_trained.sh 

vllm serve outs/mca/qwen3_coder_30b_full_fullstack-agent_webgen-instruct_direct-gen_Qwen3-Coder-30B-A3B-Instruct_llama-factory-openai_2032/qwen3_coder_30b_full_fullstack-agent_webgen-instruct_direct-gen_Qwen3-Coder-30B-A3B-Instruct_llama-factory-openai_2032_checkpoint-126 \
    --dtype auto \
    --host 0.0.0.0 \
    --port 8000 \
    --pipeline-parallel-size 1 \
    --tensor-parallel-size 4 \
    --cpu-offload-gb 0 \
    --enable-auto-tool-choice \
    --max-model-len 131702 \
    --tool-call-parser qwen3_coder | tee src_my/deploy/deploy_qwen3_coder_trained_${1}.log
 