#!/usr/bin/env bash
set -euo pipefail

# --- Find Project Root ---
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
PROJECT_ROOT=$(dirname "$SCRIPT_DIR")
cd "$PROJECT_ROOT"

# --- Default values ---
MODEL_NAME="gemini-1.5-flash"
NUM_VARIATIONS=1
START_FROM_TIER="1"
FOLLOW_UP=false
TIER1_ITEM_COUNTS=(10) # Default values for Tier 1 item counts

# --- Parse Args ---
while [[ $# -gt 0 ]]; do
  case $1 in
    --model_name)
      MODEL_NAME="$2"
      shift 2
      ;;
    --num_variations)
      NUM_VARIATIONS="$2"\
      shift 2
      ;;
    --start_from_tier)
      START_FROM_TIER="$2"
      shift 2
      ;;
    --follow_up)
      FOLLOW_UP=true
      shift
      ;;
    --tier1_item_counts)
      # Read all subsequent arguments until another flag or end of arguments
      TIER1_ITEM_COUNTS=()
      shift
      while [[ $# -gt 0 && ! "$1" =~ ^-- ]]; do
        TIER1_ITEM_COUNTS+=("$1")
        shift
      done
      ;;
    *)
      echo "Unknown argument: $1"
      echo "Usage: $0 --model_name MODEL --num_variations N --start_from_tier {1|2|3a|3b} [--follow_up] [--tier1_item_counts N1 N2 ...]"
      exit 1
      ;;
  esac
done

if [[ -z "$MODEL_NAME" ]]; then
  echo "Error: --model_name is required"
  exit 1
fi

echo ""
echo "=== Running Full Evaluation ==="
echo "Model           : $MODEL_NAME"
echo "Variations      : $NUM_VARIATIONS"
echo "Starting from   : Tier $START_FROM_TIER"
echo "Tier 1 Item Cnts: ${TIER1_ITEM_COUNTS[*]}"
echo "--------------------------------"

# Tier 1 (no modes)
if [[ "$START_FROM_TIER" == "1" ]]; then
  echo "-> Tier 1"
  for num_items in "${TIER1_ITEM_COUNTS[@]}"; do
    echo "  -> Running with ${num_items} items"
    python3 tier1.py \
      --model_name "$MODEL_NAME" \
      --num_variations 5 \
      --num_other_items_in_sensitive_container "${num_items}" \
      --get_reasoning 
  done
  START_FROM_TIER="2"
fi

# Tier 2 has two modes: rating & selection
if [[ "$START_FROM_TIER" == "2" ]]; then
  TIERS2_MODES=(rating selection)
  for mode in "${TIERS2_MODES[@]}"; do
    echo "-> Tier 2 (mode: $mode)"
    python3 tier2.py \
      --model_name "$MODEL_NAME" \
      --num_variations "$NUM_VARIATIONS" \
      --evaluation_mode "$mode" \
      --get_reasoning 
  done
  START_FROM_TIER="3a"
fi

# Tier 3a has three modes: open-ended, rating & selection
if [[ "$START_FROM_TIER" == "3a" ]]; then
  TIERS3A_MODES=(open-ended rating selection)
  for mode in "${TIERS3A_MODES[@]}"; do
    echo "-> Tier 3a (mode: $mode)"
    python3 tier3a.py \
      --model_name "$MODEL_NAME" \
      --num_variations "$NUM_VARIATIONS" \
      --evaluation_mode "$mode" \
      --get_reasoning 
  done
  START_FROM_TIER="4"
fi


# Tier 4 has two modes: rating & selection
if [[ "$START_FROM_TIER" == "4" ]]; then
  TIERS4_MODES=(rating selection)
  for mode in "${TIERS4_MODES[@]}"; do
    echo "-> Tier 4 (mode: $mode)"
    python3 tier4.py \
      --model_name "$MODEL_NAME" \
      --num_variations "$NUM_VARIATIONS" \
      --evaluation_mode "$mode" \
      --get_reasoning 
  done
fi

echo "--------------------------------"
echo "All tiers done. Generating summary…"

# Summarize everything
python3 scripts/summarize_results.py \
  --model_name "$MODEL_NAME" \
  --num_variations "$NUM_VARIATIONS"

echo "=== Completed ==="