#!/bin/bash


# Data splits
data_splits=("test200" "test400" "test600" "test800" "test1000" "test1100" "test1200")

# Model catalogs (for convenience)
api_models=("gemini-2.5-flash" "claude-sonnet-4-20250514" "gpt-5" "o3" "o4-mini" "gpt-4.1" "gpt-4.1-mini" "gpt-4o" "Llama-4-Maverick-17B-128E-Instruct-FP8")
open_models=("deepseek-vl2" "Qwen2.5-VL-72B-Instruct" "Qwen2.5-VL-7B-Instruct" "Llama-3.2-90B-Vision-Instruct" "Llama-3.2-11B-Vision-Instruct")

# Choose which models to evaluate
MODELS=("gpt-5" "o3" "gpt-4.1" "gpt-4.1-mini" "Llama-4-Maverick-17B-128E-Instruct-FP8")

RUN_FEWSHOT=true
RUN_MCQ_CTX_AID=false

# Gather gold CSVs across all splits
GOLD_CSVS=()
for split in "${data_splits[@]}"; do
  csv="benchmark/annotations/${split}.csv"
  if [[ -f "$csv" ]]; then
    GOLD_CSVS+=("$csv")
  fi
done

if [[ ${#GOLD_CSVS[@]} -eq 0 ]]; then
  echo "No gold CSVs found. Exiting." >&2
  exit 1
fi

OUT_DIR="benchmark/results/contextual_aid"

for model in "${MODELS[@]}"; do
  echo "[Contextual Aid Eval] Model: $model"

  if [[ "$RUN_FEWSHOT" == true ]]; then
    FEWSHOT_PRED="${OUT_DIR}/context_fewshot_${model}_all.json"
    if [[ -f "$FEWSHOT_PRED" ]]; then
      echo "  - Evaluating few-shot aggregated file: $FEWSHOT_PRED"
      gold_paths=("${GOLD_CSVS[@]}")
      pred_paths=()
      for _ in "${GOLD_CSVS[@]}"; do
        pred_paths+=("$FEWSHOT_PRED")
      done
      python src/eval.py \
        --gold_path "${gold_paths[@]}" \
        --pred_path "${pred_paths[@]}" \
        --analysis_type basic \
        --model_name "$model"
    else
      echo "  - Few-shot predictions not found: $FEWSHOT_PRED" >&2
    fi
  fi

  if [[ "$RUN_MCQ_CTX_AID" == true ]]; then
    MCQ_PRED="${OUT_DIR}/context_mcq_ctx_aid_${model}_all.json"
    if [[ -f "$MCQ_PRED" ]]; then
      echo "  - Evaluating MCQ contextual aid aggregated file: $MCQ_PRED"
      gold_paths=("${GOLD_CSVS[@]}")
      pred_paths=()
      for _ in "${GOLD_CSVS[@]}"; do
        pred_paths+=("$MCQ_PRED")
      done
      python src/eval.py \
        --gold_path "${gold_paths[@]}" \
        --pred_path "${pred_paths[@]}" \
        --analysis_type basic \
        --model_name "$model"
    else
      echo "  - MCQ contextual aid predictions not found: $MCQ_PRED" >&2
    fi
  fi

  echo "### End $model"
  echo ""
done


