#!/bin/bash

source /data/home/the/anaconda3/bin/activate
conda activate vllm

unset http_proxy
unset https_proxy
unset HTTP_PROXY
unset HTTPS_PROXY
unset all_proxy
unset ALL_PROXY

# 模型路径和通用参数
MODEL_PATH="/ssd6/the/models/downloads/Qwen3-32B"
COMMON_ARGS="--tensor-parallel-size 2 --max_num_seqs 512"

# Function to wait for service readiness
wait_for_service() {
    local port=$1
    local service_name=$2
    local pid_to_watch=$3
    local max_retries=120 # 60 attempts * 5 seconds = 300 seconds (5 minutes) timeout
    local attempt_num=0
    echo "Waiting for $service_name (PID: $pid_to_watch) on port $port to be ready..."
    while ! curl --output /dev/null --silent --head --fail http://localhost:${port}; do
        if [ ${attempt_num} -ge ${max_retries} ]; then
            echo "Error: $service_name on port $port did not start within the timeout period (${max_retries} attempts)."
            echo "You may need to check the log file for this service (e.g., vllm_groupX.log)."
            echo "To clean up, you might need to manually kill PIDs: $PID_1, $PID_2, $PID_3, $PID_4 (if started)."
            exit 1
        fi

        # Check if the background process itself has died
        if ! ps -p $pid_to_watch > /dev/null; then
            echo "Error: $service_name (PID: $pid_to_watch) process died before service became ready on port $port."
            echo "Check the log file for this service (e.g., vllm_groupX.log)."
            echo "To clean up, you might need to manually kill other PIDs: $PID_1, $PID_2, $PID_3, $PID_4 (if started)."
            exit 1
        fi

        printf '.'
        sleep 5
        attempt_num=$((attempt_num+1))
    done
    echo "" # Newline after dots
    echo "$service_name on port $port is ready."
}


echo "Starting vLLM service groups..."
echo "Model: $MODEL_PATH"
echo "Common Args: $COMMON_ARGS"
echo "-----------------------------------------------------"

# 第1组 (使用 GPU 0, 1，端口 10000)
PORT_1=10000
GPUS_1="0,1"
echo "Launching Group 1: GPUs ${GPUS_1}, Port ${PORT_1}"
# 在命令末尾添加 '&'，并建议为每个服务重定向日志输出
CUDA_VISIBLE_DEVICES=${GPUS_1} nohup vllm serve ${MODEL_PATH} ${COMMON_ARGS} --port ${PORT_1} > vllm_group1.log 2>&1 &
PID_1=$!
echo "Group 1 launched with PID: ${PID_1}"
# sleep 2 # 短暂等待，让进程有时间启动或报错 # Removed

# 第2组 (使用 GPU 2, 3，端口 10001)
PORT_2=10001
GPUS_2="2,3"
echo "-----------------------------------------------------"
echo "Launching Group 2: GPUs ${GPUS_2}, Port ${PORT_2}"
CUDA_VISIBLE_DEVICES=${GPUS_2} nohup vllm serve ${MODEL_PATH} ${COMMON_ARGS} --port ${PORT_2} > vllm_group2.log 2>&1 &
PID_2=$!
echo "Group 2 launched with PID: ${PID_2}"
# sleep 2 # Removed

# 第3组 (使用 GPU 4, 5，端口 10002)
PORT_3=10002
GPUS_3="4,5"
echo "-----------------------------------------------------"
echo "Launching Group 3: GPUs ${GPUS_3}, Port ${PORT_3}"
CUDA_VISIBLE_DEVICES=${GPUS_3} nohup vllm serve ${MODEL_PATH} ${COMMON_ARGS} --port ${PORT_3} > vllm_group3.log 2>&1 &
PID_3=$!
echo "Group 3 launched with PID: ${PID_3}"
# sleep 2 # Removed

# 第4组 (使用 GPU 6, 7，端口 10003)
PORT_4=10003
GPUS_4="6,7"
echo "-----------------------------------------------------"
echo "Launching Group 4: GPUs ${GPUS_4}, Port ${PORT_4}"
CUDA_VISIBLE_DEVICES=${GPUS_4} nohup vllm serve ${MODEL_PATH} ${COMMON_ARGS} --port ${PORT_4} > vllm_group4.log 2>&1 &
PID_4=$!
echo "Group 4 launched with PID: ${PID_4}"

# sleep(240)

# echo "-----------------------------------------------------"
# # Wait for all services to be ready
# echo "Waiting for all vLLM services to start and become responsive..."

# wait_for_service ${PORT_1} "Group 1" ${PID_1}
# wait_for_service ${PORT_2} "Group 2" ${PID_2}
# wait_for_service ${PORT_3} "Group 3" ${PID_3}
# wait_for_service ${PORT_4} "Group 4" ${PID_4}

# echo "-----------------------------------------------------"
# echo "All vLLM instances have been launched and are ready."
# echo "Log files: vllm_group1.log, vllm_group2.log, vllm_group3.log, vllm_group4.log"