#!/bin/bash
# Deploy vLLM OpenAI-compatible server in background
#
# This script mirrors deploy_sglang_background.sh but uses vLLM instead.
# It starts an OpenAI-compatible API server (v1) for /completions and /chat/completions.

set -e

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"

# Default values (empty = use defaults from deploy_rewriter.sh)
MODEL_PATH=""
SERVER_NODE=""
SERVER_PORT=""
WAIT_FOR_READY=true
SERVED_MODEL_NAME=""
LORA_PATH=""

# Parse command line arguments
while [[ $# -gt 0 ]]; do
    case $1 in
        -m|--model)
            MODEL_PATH="$2"
            shift 2
            ;;
        -l|--lora-path|--lora)
            LORA_PATH="$2"
            shift 2
            ;;
        --served-model-name)
            SERVED_MODEL_NAME="$2"
            shift 2
            ;;
        --server-node)
            SERVER_NODE="$2"
            shift 2
            ;;
        --server-port)
            SERVER_PORT="$2"
            shift 2
            ;;
        --no-wait)
            WAIT_FOR_READY=false
            shift
            ;;
        *)
            echo "Unknown option: $1"
            echo "Usage: $0 [OPTIONS]"
            echo "  -m, --model MODEL_PATH          Model path (default: from deploy_rewriter.sh)"
            echo "  -l, --lora-path PATH            (optional) LoRA path"
            echo "  --served-model-name NAME        Served model name (default: from deploy_rewriter.sh)"
            echo "  --server-node NODE              Server node for URL/checks (default: localhost)"
            echo "  --server-port PORT              Server port for URL/checks (default: 8000)"
            echo "  --no-wait                       Don't wait for server to be ready"
            exit 1
            ;;
    esac
done

# Set defaults for URL construction and checks (must be concrete even if we let deploy_rewriter.sh decide)
CHECK_NODE="${SERVER_NODE:-localhost}"
CHECK_PORT="${SERVER_PORT:-8000}"  # should match deploy_rewriter.sh default

# Set server URL
SERVER_URL="http://${CHECK_NODE}:${CHECK_PORT}"

# Create logs directory
mkdir -p logs

# Check if server is already running
if curl -s "$SERVER_URL/health" > /dev/null 2>&1; then
    echo "✓ vLLM server is already running on $CHECK_NODE:$CHECK_PORT"
    echo "  Server is ready and accessible"
    # Get PID if on localhost
    if [ "$CHECK_NODE" = "localhost" ] || [ "$CHECK_NODE" = "127.0.0.1" ]; then
        SERVER_PID=$(pgrep -f "vllm serve.*--port ${CHECK_PORT}" | head -1 || echo "")
        if [ -n "$SERVER_PID" ]; then
            echo "  PID: $SERVER_PID"
        fi
    fi
    echo ""
    echo "To check server status:"
    echo "  curl $SERVER_URL/health"
    echo "  curl $SERVER_URL/v1/models"
    echo ""
    echo "To view logs:"
    echo "  tail -f logs/vllm_server.log"
    exit 0
fi

if ! command -v vllm &> /dev/null; then
    echo "Error: vllm is not available in PATH"
    exit 1
fi

# Only start server if on localhost
if [ "$CHECK_NODE" != "localhost" ] && [ "$CHECK_NODE" != "127.0.0.1" ]; then
    echo "⚠️  Remote server node specified ($CHECK_NODE)"
    echo "  Skipping local server start. Will only check if server is running."
    if curl -s "$SERVER_URL/health" > /dev/null 2>&1; then
        echo "✓ Remote server is running and accessible"
        exit 0
    else
        echo "❌ Remote server is not accessible at $SERVER_URL"
        exit 1
    fi
fi

echo "Starting vLLM server in background"
if [ -n "$MODEL_PATH" ]; then
    echo "  Model: $MODEL_PATH"
else
    echo "  Model: (default from deploy_rewriter.sh)"
fi
if [ -n "$SERVED_MODEL_NAME" ]; then
    echo "  Served model name: $SERVED_MODEL_NAME"
fi
if [ -n "$LORA_PATH" ]; then
    echo "  LoRA: $LORA_PATH"
fi
echo "  Server node: $CHECK_NODE"
echo "  Server URL: $SERVER_URL"
echo "  Logs will be written to: logs/vllm_server.log"
echo ""

# Stop any existing server processes on this port (best-effort)
pkill -f "vllm serve .*--port ${CHECK_PORT}" 2>/dev/null || true
pkill -f "uvicorn.*:${CHECK_PORT}" 2>/dev/null || true
sleep 2

# Run in background with nohup.
# Note: deploy_rewriter.sh handles all the logic, so we can call it directly and safely pass args (including spaces).
DEPLOY_ARGS=()
if [ -n "$MODEL_PATH" ]; then
    DEPLOY_ARGS+=(--model "$MODEL_PATH")
fi
if [ -n "$LORA_PATH" ]; then
    DEPLOY_ARGS+=(--lora-path "$LORA_PATH")
fi
if [ -n "$SERVED_MODEL_NAME" ]; then
    DEPLOY_ARGS+=(--served-model-name "$SERVED_MODEL_NAME")
fi
if [ -n "$SERVER_PORT" ]; then
    DEPLOY_ARGS+=(--port "$SERVER_PORT")
fi

nohup bash deploy_rewriter.sh "${DEPLOY_ARGS[@]}" > logs/vllm_server.log 2>&1 &
SERVER_PID=$!

echo "✓ Server process started with PID: $SERVER_PID"
echo ""

# Wait for server to be ready if requested
if [ "$WAIT_FOR_READY" = true ]; then
    echo "Waiting for server to initialize (this may take several minutes)..."
    echo "Press Ctrl+C to cancel and check logs manually"
    echo ""
    
    # Wait for server to start - loop until ready (no timeout)
    WAITED=0
    SERVER_READY=false
    
    while true; do
        # Check if process is still running
        if ! ps -p $SERVER_PID > /dev/null 2>&1; then
            echo ""
            echo "❌ Server process died! Check logs:"
            echo "  tail -40 logs/vllm_server.log"
            exit 1
        fi
        
        # Check if server is responding to health checks
        # Prefer /health; fall back to /v1/models
        if curl -s "$SERVER_URL/health" > /dev/null 2>&1 || curl -s "$SERVER_URL/v1/models" > /dev/null 2>&1; then
            # Double check with a more detailed request
            HEALTH_RESPONSE=$(curl -s "$SERVER_URL/health" 2>&1)
            if [ $? -eq 0 ]; then
                echo ""
                echo "✓ Server is ready and responding! (waited ${WAITED}s)"
                SERVER_READY=true
                break
            fi
        fi
        
        # Show progress every 10 seconds
        if [ $((WAITED % 10)) -eq 0 ] && [ $WAITED -gt 0 ]; then
            echo -n " (${WAITED}s)"
        else
            echo -n "."
        fi
        
        sleep 2
        WAITED=$((WAITED + 2))
    done
    echo ""
    
    # Final verification - test with a simple API call
    echo "Performing final server verification..."
    sleep 2
    if curl -s "$SERVER_URL/health" > /dev/null 2>&1 || curl -s "$SERVER_URL/v1/models" > /dev/null 2>&1; then
        echo "✓ Server verification successful"
    else
        echo "⚠️  Server verification failed, but continuing..."
    fi
    echo ""
else
    echo "Skipping wait (--no-wait flag set)"
    echo "  Server is starting in background. Check status with:"
    echo "    curl $SERVER_URL/health"
    echo "    curl $SERVER_URL/v1/models"
    echo ""
fi

echo "Server information:"
echo "  PID: $SERVER_PID"
echo "  URL: $SERVER_URL"
echo ""
echo "To check server status:"
echo "  curl $SERVER_URL/health"
echo "  curl $SERVER_URL/v1/models"
echo ""
echo "To view logs:"
echo "  tail -f logs/vllm_server.log"
echo ""
echo "To stop server:"
echo "  kill $SERVER_PID"
echo "  # or: pkill -f \"vllm serve\""


