#!/bin/bash

# Loop execution for inference tasks with different models and shot-num values
# Model list and shot-num from 0 to 5

# ===================== Configuration Section =====================
# Model configuration list
MODEL_NAMES=("phi-4-reasoning-plus" "Qwen3-32B")

# GPU device configuration for each model
CUDA_DEVICES_LIST=(
    "0,1"
    "0,1,2,3"
)

# Path configuration
MODEL_BASE_PATH="~/models"
OUTPUT_BASE_PATH="./results"
LOG_BASE_PATH="logs"

# ===================== Smart Tensor Parallel Configuration Function =====================
get_tensor_parallel_size() {
    local gpu_devices=$1
    
    # Calculate GPU count, tensor parallel size equals the number of GPU devices
    local gpu_count=$(echo "$gpu_devices" | tr ',' '\n' | wc -l | tr -d ' ')
    echo $gpu_count
}

# ===================== Configuration Validation =====================
# Display configuration information
echo "Task started, PID: $!"
echo "Configuration validation passed ✓"
echo "Configured ${#MODEL_NAMES[@]} models in total"
for i in "${!MODEL_NAMES[@]}"; do
    CUDA_DEVICES="${CUDA_DEVICES_LIST[$i]}"
    TENSOR_PARALLEL_SIZE=$(get_tensor_parallel_size "$CUDA_DEVICES")
    echo "  ${MODEL_NAMES[$i]}: ${TENSOR_PARALLEL_SIZE} cards parallel, GPU=${CUDA_DEVICES}"
done
echo ""

# ===================== Execute Tasks =====================
echo "Starting multi-model multi-round inference tasks..."
echo "Model list: ${MODEL_NAMES[@]}"
echo "Shot number range: 0-5"
echo "=================================="

# Iterate through all models
for i in "${!MODEL_NAMES[@]}"
do
    MODEL_NAME="${MODEL_NAMES[$i]}"
    CUDA_DEVICES="${CUDA_DEVICES_LIST[$i]}"
    TENSOR_PARALLEL_SIZE=$(get_tensor_parallel_size "$CUDA_DEVICES")
    
    MODEL_PATH="${MODEL_BASE_PATH}/${MODEL_NAME}"
    echo "Starting to process model: ${MODEL_NAME}"
    echo "Model path: ${MODEL_PATH}"
    echo "Using GPU: ${CUDA_DEVICES} (${TENSOR_PARALLEL_SIZE} cards)"
    echo "--------------------------------"
    
    # Iterate through all shot numbers
    for shot_num in {0..5}
    do
        echo "  Executing inference task for model ${MODEL_NAME}, shot-num=${shot_num}..."
        echo "  Using GPU: ${CUDA_DEVICES}, tensor parallel: ${TENSOR_PARALLEL_SIZE}"
        
        OUTPUT_FILE="${OUTPUT_BASE_PATH}/${MODEL_NAME}-shot${shot_num}.json"
        LOG_FILE="${LOG_BASE_PATH}/${MODEL_NAME}-shot${shot_num}.log"
        
        # Check if output file already exists
        if [ -f "$OUTPUT_FILE" ]; then
            echo "  Warning: Output file $OUTPUT_FILE already exists, will be overwritten"
        fi
        
        # Execute inference task
        CUDA_VISIBLE_DEVICES=${CUDA_DEVICES} nohup python run_llm_vllm_inference.py \
            --model-path ${MODEL_PATH} \
            --shot-num ${shot_num} \
            --output-path ${OUTPUT_FILE} \
            --tensor-parallel-size ${TENSOR_PARALLEL_SIZE} \
            > ${LOG_FILE} 2>&1 &
        
        # Get background process PID
        PID=$!
        echo "  Task started, PID: $PID"
        echo "  Log file: $LOG_FILE"
        echo "  Output file: $OUTPUT_FILE"
        
        # Wait for current task to complete before executing next one
        echo "  Waiting for shot-num=${shot_num} task to complete..."
        wait $PID
        
        # Check if task completed successfully
        if [ $? -eq 0 ]; then
            echo "  ✓ ${MODEL_NAME} shot-num=${shot_num} task completed successfully"
        else
            echo "  ✗ ${MODEL_NAME} shot-num=${shot_num} task execution failed, please check log file: $LOG_FILE"
        fi
        
        echo "  ................................."
    done
    
    echo "All tasks for model ${MODEL_NAME} completed!"
    echo "=================================="
done

echo "All inference tasks for all models completed!"
echo "Result files location: ./results/*-shot*.json"
echo "Log files location: logs/*-shot*.log"
