# ray start --head --node-ip-address 0.0.0.0 --num-gpus 4
# ray stop --force

# start ray first, and submit the job with sh on the same node

# vllm > 0.8.5 may have error for qwen2.5-7b-instruct-1m