export VLLM_API_KEY='local_vllm'
export LOCAL_VLLM_API_KEY=$VLLM_API_KEY
export VLLM_HOST="http://localhost:8000/v1"
export VLLM_CONFIGURE_LOGGING=0

MODEL_PATH=$1 
if [ -z "GPU_NUMS" ]; then
  GPU_NUMS=8
fi

echo "Starting vLLM server with model path: $MODEL_PATH on $GPU_NUMS GPUs"

vllm serve $MODEL_PATH --enable-reasoning --port 8000 --seed 18833 --reasoning_parser deepseek_r1 --tensor_parallel_size 8 &