#!/bin/bash  nohup bash argus_vllm_evals.sh > ./logs/vllm_run.log 2>&1 &

if [ "$1" = "--kill_all" ]; then
    echo "Stopping all VLLM evaluation processes..."
    pkill -f "argus_vllm_evals.sh" || true
    pkill -f "argus_vllm_evaluator.py" || true
    pkill -f "argus_vllm_server.py" || true
    echo "All VLLM evaluation processes stopped"
    exit 0
fi

PYTHON_PATH="/home/name/rag-reasoning/argus/.venv/bin/python"
MODELS=("LFM2-1.2B-fixed-sft")
SEEDS=("seed_001")
DATASETS=("bright-fast" "fever" "scifact" "hotpotqa" "nfcorpus" "msmarco")
SPLIT="test"
BATCH_SIZE=16
VLLM_SERVER_URL="http://localhost:5001"
IR_SERVER_URL="http://localhost:5000"
SQL_SERVER_URL="http://localhost:8000"

echo "Starting VLLM evaluations for ${#MODELS[@]} models with ${#SEEDS[@]} seeds across ${#DATASETS[@]} datasets"
echo "Total runs: $((${#MODELS[@]} * ${#SEEDS[@]} * ${#DATASETS[@]}))"
echo "Batch size: $BATCH_SIZE"
echo "Execution order: model -> dataset -> seed (minimal VLLM cleanup)"

function cleanup_gpu_memory() {
    echo "Targeted VLLM cleanup (preserving IR server on GPUs 0,1,2)..."
    pkill -9 -f "argus_vllm_server.py" 2>/dev/null || true
    sleep 3
    
    # Only clear GPU memory for VLLM GPUs (3,4,5,6,7), not IR GPUs (0,1,2)
    $PYTHON_PATH -c "
import torch
import gc
import os
try:
    if torch.cuda.is_available():
        # Only clear cache for VLLM GPUs (3,4,5,6,7)
        vllm_gpus = [3, 4, 5, 6, 7]
        for gpu_id in vllm_gpus:
            if gpu_id < torch.cuda.device_count():
                with torch.cuda.device(gpu_id):
                    torch.cuda.empty_cache()
        torch.cuda.synchronize()
        print('VLLM GPU memory cleared (GPUs 3,4,5,6,7)')
except Exception as e:
    print(f'GPU cleanup error: {e}')
gc.collect()
" 2>/dev/null || true
    
    sleep 2
}

for model in "${MODELS[@]}"; do
    echo "========================================"
    echo "STARTING MODEL: $model"
    echo "========================================"
    
    # Clean up any existing processes before loading new model
    cleanup_gpu_memory
    
    echo "Starting VLLM server for $model..."
    $PYTHON_PATH argus_vllm_server.py --model $model > /dev/null 2>&1 &
    SERVER_PID=$!
    
    echo "Waiting for server to be ready..."
    READY=false
    MAX_WAIT=600
    WAIT_TIME=0
    
    while [ $WAIT_TIME -lt $MAX_WAIT ]; do
        if curl -s http://localhost:5001/health 2>/dev/null | grep -q "ready"; then
            echo "Server ready for $model"
            READY=true
            break
        fi
        
        # Check if server process died
        if ! kill -0 $SERVER_PID 2>/dev/null; then
            echo "Server process died for $model"
            READY=false
            break
        fi
        
        sleep 5
        WAIT_TIME=$((WAIT_TIME + 5))
    done
    
    if [ "$READY" = false ]; then
        echo "Server failed to start for $model, cleaning up and moving to next model"
        kill $SERVER_PID 2>/dev/null || true
        cleanup_gpu_memory
        continue
    fi
    
    # Run all datasets and seeds for this model (model stays loaded)
    for dataset in "${DATASETS[@]}"; do
        echo "--- Processing dataset: $dataset for model $model ---"
        
        for seed in "${SEEDS[@]}"; do
            echo "Running $model on $dataset with $seed"
            
            $PYTHON_PATH argus_vllm_evaluator.py \
                --dataset-name $dataset \
                --split $SPLIT \
                --model-name "$model" \
                --experiment-id "$seed" \
                --batch-size $BATCH_SIZE \
                --vllm-server-url $VLLM_SERVER_URL \
                --ir-server-url $IR_SERVER_URL \
                --sql-server-url $SQL_SERVER_URL
            
            echo "Completed $model on $dataset ($seed)"
        done
        
        echo "--- Completed all seeds for $dataset on model $model ---"
    done
    
    echo "Stopping server for $model..."
    kill $SERVER_PID 2>/dev/null || true
    
    echo "========================================"
    echo "COMPLETED MODEL: $model"
    echo "========================================"
done

# Final cleanup after all models
cleanup_gpu_memory

echo "All VLLM evaluations completed!"
echo "Results are stored in the database with:"
echo "  - Experiment IDs: ${SEEDS[*]}"
echo "  - Datasets: ${DATASETS[*]}"
echo "  - Models: ${MODELS[*]}"