export CUDA_VISIBLE_DEVICES=0

log_path="../log"
port=6072

pt_model_path="../resource/model/pt_model/DeepSeek-R1-Distill-Qwen-32B"
adapter_path="../resource/model/ft_model/"

ft_model_path="../resource/model/ft_model/deepseek-32B-sft-merge-v1"

model_name="DeepSeek"

nohup python3 -m vllm.entrypoints.openai.api_server \
    --model $ft_model_path \
    --trust-remote-code \
    --port $port \
    --max-model-len 8192 \
    --tensor-parallel-size 2 \
    --disable-log-stats \
    --served-model-name $model_name \
    --gpu-memory-utilization 0.9 > $log_path/deepseek-32B_vllm_app.log 2>&1 &


