vllm serve $your_model_path --dtype auto --api-key token-abc123 --gpu-memory-utilization 0.5