#!/bin/bash

export NUM_GPUS=1

START_TIME=`date +%Y%m%d-%H:%M:%S`

if [[ -v PARTITION ]]; then
    echo "Submit to $PARTITION"
fi

# Get script directory first
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
cd "$SCRIPT_DIR"

# Set PYTHONPATH to project root (EMBGuard/) to enable importing src modules
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
export PYTHONPATH=$PROJECT_ROOT:$PYTHONPATH

# Configuration
# Model:Provider pairs
# Configuration
GUARDRAIL_LIST=(
    # Closed-source (via OpenRouter)
    #"gpt-4o-mini:openai" \
    #"gpt-4o:openai" \
    #"gpt-5.1:openai" \
    #"google/gemini-2.5-flash:openrouter" \
    #"google/gemini-2.5-pro:openrouter" \
    
    # Open-source - InternVL
    # "OpenGVLab/InternVL3_5-1B:vllm" \
    # "OpenGVLab/InternVL3_5-2B-Instruct:vllm" \
    
    # Open-source - Qwen
    # "Qwen/Qwen3-VL-2B-Instruct:vllm" \
    # "Qwen/Qwen3-VL-4B-Instruct:vllm" \
    #"qwen/qwen3-vl-8b-instruct:openrouter" \
    "qwen/qwen3-vl-30b-a3b-instruct:openrouter" \
    "qwen/qwen3-vl-32b-instruct:openrouter" \
    "qwen/qwen3-vl-235b-a22b-instruct:openrouter" \
    
    # Open-source - Gemma
    "google/gemma-3-4b-it:openrouter" \
    "google/gemma-3-12b-it:openrouter" \
    "google/gemma-3-27b-it:openrouter" \
    
    # EMBGUARD
    # "EMBGuard/EMBGuard-4B:vllm" \
    # "EMBGuard/EMBGuard-8B:vllm" \
)

POLICY_LIST=(
    "gpt-4o:openai" \
    "gpt-4o-mini:openai" \
    "qwen/qwen3-vl-32b-instruct:openrouter" \
    # "gemini-2.5-pro:openrouter" \
    # "gemini-2.5-flash:openrouter" \
)

JUDGE_MODEL=gpt-4o
JUDGE_PROVIDER=openai

BENCHMARK_DIR=../data/images
TASK_LIST=../entrypoints/task_list.txt
TASKS_DIR=../data/tasks
BASE_OUTPUT_DIR=../results/embodied_planning/$START_TIME
# Configure in-script
NUM_WORKERS=20
AGGREGATE=1

for GUARDRAIL_PAIR in "${GUARDRAIL_LIST[@]}"; do
    GUARDRAIL_MODEL=${GUARDRAIL_PAIR%%:*}
    GUARDRAIL_PROVIDER=${GUARDRAIL_PAIR##*:}
    GUARDRAIL_TAG=${GUARDRAIL_MODEL//\//_}-${GUARDRAIL_PROVIDER}
    for POLICY_PAIR in "${POLICY_LIST[@]}"; do
        POLICY_MODEL=${POLICY_PAIR%%:*}
        POLICY_PROVIDER=${POLICY_PAIR##*:}
        POLICY_TAG=${POLICY_MODEL//\//_}-${POLICY_PROVIDER}
        OUTPUT_DIR="$BASE_OUTPUT_DIR/$GUARDRAIL_TAG/$POLICY_TAG"
        LOG_FILE=$OUTPUT_DIR/logs/exec_${START_TIME}_${GUARDRAIL_TAG}_${POLICY_TAG}.log

        # Create output directory (after cd to script dir)
        mkdir -p "$OUTPUT_DIR/logs"

        echo "Starting embodied planning evaluation..."
        echo "Guardrail: $GUARDRAIL_MODEL ($GUARDRAIL_PROVIDER)"
        echo "Policy: $POLICY_MODEL ($POLICY_PROVIDER)"
        echo "Judge: $JUDGE_MODEL ($JUDGE_PROVIDER)"
        echo "Benchmark dir: $BENCHMARK_DIR"
        echo "Task list: $TASK_LIST"
        echo "Tasks dir: $TASKS_DIR"
        echo "Output dir: $OUTPUT_DIR"
        echo "Num workers: $NUM_WORKERS"
        echo "Aggregate: $AGGREGATE"
        echo "Log file: $LOG_FILE"
        echo "Starting Python process..."
        echo "Command: python ../src/evaluator/embodied_planning_evaluator.py --task_list $TASK_LIST --tasks_dir $TASKS_DIR --output_dir $OUTPUT_DIR --benchmark_dir $BENCHMARK_DIR --guardrail_model $GUARDRAIL_MODEL --guardrail_provider $GUARDRAIL_PROVIDER --policy_model $POLICY_MODEL --policy_provider $POLICY_PROVIDER --judge_model $JUDGE_MODEL --judge_provider $JUDGE_PROVIDER --num_workers $NUM_WORKERS"

        # Run evaluation
        python ../src/evaluator/embodied_planning_evaluator.py \
            --task_list $TASK_LIST \
            --tasks_dir $TASKS_DIR \
            --output_dir $OUTPUT_DIR \
            --benchmark_dir $BENCHMARK_DIR \
            --guardrail_model $GUARDRAIL_MODEL \
            --guardrail_provider $GUARDRAIL_PROVIDER \
            --policy_model $POLICY_MODEL \
            --policy_provider $POLICY_PROVIDER \
            --judge_model $JUDGE_MODEL \
            --judge_provider $JUDGE_PROVIDER \
            --num_workers $NUM_WORKERS \
            2>&1 | tee -a "$LOG_FILE" > /dev/null &

        PYTHON_PID=$!
        sleep 0.5s
        echo "Python PID: $PYTHON_PID"
        echo "Tailing log file..."
        tail -f $LOG_FILE &
        TAIL_PID=$!

        # Wait for Python process to complete
        wait $PYTHON_PID
        PYTHON_EXIT_CODE=$?
        echo "Python finished. Exit code: $PYTHON_EXIT_CODE"

        # Kill tail process
        kill $TAIL_PID 2>/dev/null

        echo "Evaluation complete. Exit code: $PYTHON_EXIT_CODE"
        echo "Results saved to: $OUTPUT_DIR"

        if [ "$AGGREGATE" -eq 1 ]; then
            echo "Aggregating results..."
            python ../src/evaluator/aggregate_embodied_planning.py --output_dir "$OUTPUT_DIR"
        fi

        if [ "$PYTHON_EXIT_CODE" -ne 0 ]; then
            exit $PYTHON_EXIT_CODE
        fi
    done
done

exit 0
