#!/bin/bash

data_splits=("test200" "test400" "test600" "test800" "test1000" "test1100" "test1200")
#data_splits=("test200" "test400" "test600" "test800" "test1000" "test1100" "test1200")

# Define model categories
api_models=("gemini-2.5-flash" "claude-sonnet-4-20250514" "gpt-5" "o3" "o4-mini" "gpt-4.1" "gpt-4.1-mini" "gpt-4o" "Llama-4-Maverick-17B-128E-Instruct-FP8")
open_models=("deepseek-vl2" "Qwen2.5-VL-72B-Instruct" "Qwen2.5-VL-7B-Instruct" "Llama-3.2-90B-Vision-Instruct" "Llama-3.2-11B-Vision-Instruct")

# Choose which models to evaluate (choose a group of api_models or open_models)
models=("gemini-2.5-flash" "claude-sonnet-4-20250514" "gpt-5" "o3" "o4-mini" "gpt-4.1" "gpt-4.1-mini" "gpt-4o" "Llama-4-Maverick-17B-128E-Instruct-FP8")


analysis_type="basic" # "basic" "get_coord" "error" "MCQ_free-form_alignment"

# "basic" is the default analysis type for accuracy and disclosure metrics
# optionally run "get_coord" to get geolocated coordinates or load from cache
# "error" is for distance error and other geolocation utility metrics (which calls get_coord if not already geolocated)
# "MCQ_free-form_alignment" is for MCQ free-form alignment metrics (flag should be "heuristics")

# Valid flags: "heuristics" | "free-form"
flag="heuristics"   # MCQ setting
#flag="free-form"   # free-form setting

prompting_methods=("zs")  # prompting methods for free-form settings 
# "zs" "iter-cot" "malicious"

q7_only=false  # Set to true for Q7-only evaluation, false for full evaluation (default)

# if analysis_type is "MCQ_free-form_alignment", flag should be "heuristics"
if ["$analysis_type" == "MCQ_free-form_alignment" ]; then
    flag="heuristics"
fi


# Function to check if model is in api_models array
is_api_model() {
    local model=$1
    for api_model in "${api_models[@]}"; do
        if [[ "$model" == "$api_model" ]]; then
            return 0
        fi
    done
    return 1
}

for prompting_method in "${prompting_methods[@]}"; do
  for model in "${models[@]}"; do
    # Automatically set gen_method based on model type
    if is_api_model "$model"; then
        gen_method="api_gen"  # For api models 
    else
        gen_method="generate"  # For local open models
    fi
    
    # Construct task name based on whether it's Q7-only mode
    if [[ "$q7_only" == true ]]; then
        if [[ "$flag" == "heuristics" ]]; then
            task="${gen_method}_${prompting_method}_heuristics_q7-only"
        else
            task="${gen_method}_${prompting_method}_q7-only"
        fi
    else
        if [[ "$flag" == "heuristics" ]]; then
            task="${gen_method}_${prompting_method}_heuristics"
        else
            task="${gen_method}_${prompting_method}_${flag}"
        fi
    fi
    
    gold_paths=()
    pred_paths=()  
    for data in "${data_splits[@]}"; do
        gold="benchmark/annotations/${data}.csv"
        # Route to organized subfolders based on flags
        if [[ "$flag" == "heuristics" ]]; then
            pred="benchmark/results/${data}/mcq/${task}_${model}_${data}.json"
        elif [[ "$flag" == "free-form_explicit-granularity" ]]; then
            pred="benchmark/results/${data}/explicit_granularity/${task}_${model}_${data}.json"
        else
        # free-form settings
            method_folder="$prompting_method"
            pred="benchmark/results/${data}/${method_folder}/responses/${task}_${model}_${data}.json"
        fi
        
        echo "Evaluating: $pred against $gold"
        if [[ -f "$gold" && -f "$pred" ]]; then
            gold_paths+=("$gold")
            pred_paths+=("$pred")
        else
            echo "Skipping: $gold or $pred does not exist." >&2
        fi
    done

    if [[ ${#gold_paths[@]} -eq 0 ]]; then
        echo "No valid file pairs found. Exiting." >&2
    fi

    python src/eval.py \
        --gold_path "${gold_paths[@]}" \
        --pred_path "${pred_paths[@]}" \
        --analysis_type $analysis_type \
        --model_name $model
    echo "### End $model"
    echo " "
  done
done