MODEL_PATH="" # replace with your model path
port=20008
NUM_GPU=1
GPU_MEMORY_UTIL=0.9
export CUDA_VISIBLE_DEVICES="2"
nohup vllm serve ${MODEL_PATH} --port ${port} --tensor-parallel-size ${NUM_GPU} --gpu-memory-utilization ${GPU_MEMORY_UTIL} >logs/nohup_${parameter}_${port}.out 2>&1 &