#!/bin/bash
# export CUDA_VISIBLE_DEVICES="4,5,6,7"
export VLLM_USE_MODELSCOPE="False"

MASTER_IP=IP_OF_MASTER_NODE
MASTER_PORT=10080

python=/root/miniconda3/envs/sg/bin/python
model_path=path_to/Qwen2.5-72B-Instruct
TP=4
GPU_PER_NODE=$(nvidia-smi -L | wc -l)
DP=$((${GPU_PER_NODE} / ${TP}))

CURRENT_IP=$(hostname -I | awk '{print $1}')
WORKER_PORT=30000

${python} -m sglang.launch_server \
    --model-path ${model_path} \
    --tp ${TP} \
    --dp ${DP} \
    --mem-fraction-static 0.9 \
    --host 0.0.0.0 \
    --port ${WORKER_PORT} &

sleep 40

curl -X POST http://${MASTER_IP}:${MASTER_PORT}/add_worker?url=http://${CURRENT_IP}:${WORKER_PORT}

wait