CUDA_VISIBLE_DEVICES=0,1 vllm serve \
    RLHFlow/Llama3.1-8B-PRM-Mistral-Data \
    --served-model-name Llama3.1-8B-PRM-Mistral-Data \
    --port 8000 \
    --tensor-parallel-size 2 \
    --dtype auto \
    --api-key \
    --enable-prefix-caching --max-logprobs 20

CUDA_VISIBLE_DEVICES=2,3 vllm serve \
    RLHFlow/Llama3.1-8B-PRM-Deepseek-Data \
    --served-model-name Llama3.1-8B-PRM-Deepseek-Data \
    --port 8001 \
    --tensor-parallel-size 2 \
    --dtype auto \
    --api-key \
    --enable-prefix-caching --max-logprobs 20