#!/bin/bash

export NUM_GPUS=1

START_TIME=`date +%Y%m%d-%H:%M:%S`

if [[ -v PARTITION ]]; then
    echo "Submit to $PARTITION"
fi

# Get script directory first
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
cd "$SCRIPT_DIR"

# Set PYTHONPATH to project root (EMBGuard/) to enable importing src modules
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
export PYTHONPATH=$PROJECT_ROOT:$PYTHONPATH

# vLLM base URL mapping per model
declare -A VLLM_BASE_URL_MAP=(
    ["OpenGVLab/InternVL3-1B-hf"]="http://127.0.0.1:8002/v1"
    ["OpenGVLab/InternVL3-2B-hf"]="http://127.0.0.1:8003/v1"
    ["Qwen/Qwen3-VL-2B-Instruct"]="http://127.0.0.1:8001/v1"
    ["Qwen/Qwen3-VL-4B-Instruct"]="http://127.0.0.1:8000/v1"
    ["EMBGuard/EMBGuard-2B"]="http://127.0.0.1:8004/v1"
    ["EMBGuard/EMBGuard-4B"]="http://127.0.0.1:8005/v1"
)

# Configuration
GUARDRAIL_LIST=(
    # Open-source - InternVL
    "OpenGVLab/InternVL3-1B-hf:vllm" \
    "OpenGVLab/InternVL3-2B-hf:vllm" \
    
    # Open-source - Qwen
    "Qwen/Qwen3-VL-2B-Instruct:vllm" \
    "Qwen/Qwen3-VL-4B-Instruct:vllm" \
    
    # EMBGUARD
    "EMBGuard/EMBGuard-2B:vllm" \
    "EMBGuard/EMBGuard-4B:vllm" \
)

POLICY_LIST=(
    "gpt-4o:openai" \
    "gpt-4o-mini:openai" \
    "qwen/qwen3-vl-32b-instruct:openrouter" \
)

JUDGE_MODEL=gpt-4o
JUDGE_PROVIDER=openai

BENCHMARK_DIR=../data/images
TASK_LIST=../entrypoints/task_list.txt
TASKS_DIR=../data/tasks
BASE_OUTPUT_DIR=../results/embodied_planning/$START_TIME
# Configure in-script
NUM_WORKERS=5
AGGREGATE=1

PIDS=()
TAGS=()

for GUARDRAIL_PAIR in "${GUARDRAIL_LIST[@]}"; do
    GUARDRAIL_MODEL=${GUARDRAIL_PAIR%%:*}
    GUARDRAIL_PROVIDER=${GUARDRAIL_PAIR##*:}
    GUARDRAIL_TAG=${GUARDRAIL_MODEL//\//_}-${GUARDRAIL_PROVIDER}
    
    # Set VLLM_BASE_URL if provider is vllm
    if [[ "$GUARDRAIL_PROVIDER" == "vllm" ]]; then
        VLLM_BASE_URL="${VLLM_BASE_URL_MAP[$GUARDRAIL_MODEL]:-}"
        if [[ -z "$VLLM_BASE_URL" ]]; then
            echo "Missing VLLM base URL mapping for guardrail model: $GUARDRAIL_MODEL"
            exit 1
        fi
        export VLLM_BASE_URL
        echo "VLLM base URL for guardrail: $VLLM_BASE_URL"
    fi
    
    for POLICY_PAIR in "${POLICY_LIST[@]}"; do
        POLICY_MODEL=${POLICY_PAIR%%:*}
        POLICY_PROVIDER=${POLICY_PAIR##*:}
        POLICY_TAG=${POLICY_MODEL//\//_}-${POLICY_PROVIDER}
        OUTPUT_DIR="$BASE_OUTPUT_DIR/$GUARDRAIL_TAG/$POLICY_TAG"
        LOG_FILE=$OUTPUT_DIR/logs/exec_${START_TIME}_${GUARDRAIL_TAG}_${POLICY_TAG}.log

        # Create output directory (after cd to script dir)
        mkdir -p "$OUTPUT_DIR/logs"

        echo "Starting embodied planning evaluation..."
        echo "Guardrail: $GUARDRAIL_MODEL ($GUARDRAIL_PROVIDER)"
        echo "Policy: $POLICY_MODEL ($POLICY_PROVIDER)"
        echo "Judge: $JUDGE_MODEL ($JUDGE_PROVIDER)"
        echo "Benchmark dir: $BENCHMARK_DIR"
        echo "Task list: $TASK_LIST"
        echo "Tasks dir: $TASKS_DIR"
        echo "Output dir: $OUTPUT_DIR"
        echo "Num workers: $NUM_WORKERS"
        echo "Aggregate: $AGGREGATE"
        echo "Log file: $LOG_FILE"

        # Run evaluation in background
        python ../src/evaluator/embodied_planning_evaluator.py \
            --task_list $TASK_LIST \
            --tasks_dir $TASKS_DIR \
            --output_dir $OUTPUT_DIR \
            --benchmark_dir $BENCHMARK_DIR \
            --guardrail_model $GUARDRAIL_MODEL \
            --guardrail_provider $GUARDRAIL_PROVIDER \
            --policy_model $POLICY_MODEL \
            --policy_provider $POLICY_PROVIDER \
            --judge_model $JUDGE_MODEL \
            --judge_provider $JUDGE_PROVIDER \
            --num_workers $NUM_WORKERS \
            2>&1 | tee -a "$LOG_FILE" > /dev/null &

        PYTHON_PID=$!
        TAG="${GUARDRAIL_TAG}_${POLICY_TAG}"
        echo "Started evaluation for $TAG (PID: $PYTHON_PID)"
        PIDS+=("$PYTHON_PID")
        TAGS+=("$TAG")
    done
done

# Wait for all evaluations to complete
EXIT_CODE=0
for i in "${!PIDS[@]}"; do
    PID="${PIDS[$i]}"
    TAG="${TAGS[$i]}"
    wait "$PID"
    CODE=$?
    echo "Evaluation complete for $TAG. Exit code: $CODE"
    if [ "$CODE" -ne 0 ]; then
        EXIT_CODE=$CODE
    fi
done

# Aggregate results if enabled
if [ "$AGGREGATE" -eq 1 ]; then
    echo "Aggregating results for all completed evaluations..."
    for GUARDRAIL_PAIR in "${GUARDRAIL_LIST[@]}"; do
        GUARDRAIL_MODEL=${GUARDRAIL_PAIR%%:*}
        GUARDRAIL_PROVIDER=${GUARDRAIL_PAIR##*:}
        GUARDRAIL_TAG=${GUARDRAIL_MODEL//\//_}-${GUARDRAIL_PROVIDER}
        for POLICY_PAIR in "${POLICY_LIST[@]}"; do
            POLICY_MODEL=${POLICY_PAIR%%:*}
            POLICY_PROVIDER=${POLICY_PAIR##*:}
            POLICY_TAG=${POLICY_MODEL//\//_}-${POLICY_PROVIDER}
            OUTPUT_DIR="$BASE_OUTPUT_DIR/$GUARDRAIL_TAG/$POLICY_TAG"
            if [ -d "$OUTPUT_DIR" ]; then
                echo "Aggregating results for $GUARDRAIL_TAG / $POLICY_TAG..."
                python ../src/evaluator/aggregate_embodied_planning.py --output_dir "$OUTPUT_DIR"
            fi
        done
    done
fi

exit $EXIT_CODE