#!/usr/bin/env bash
set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="${PROJECT_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd)}"
VENV_PATH="${VENV_PATH:-${PROJECT_ROOT}/.venv}"
EVAL_SCRIPT="${PROJECT_ROOT}/evaluation/eval.py"
LOG_ROOT_DEFAULT="${LOG_ROOT_DEFAULT:-${PROJECT_ROOT}/evaluation/logs}"
MODEL_CACHE="${MODEL_CACHE:-${PROJECT_ROOT}/models}"

DEFAULT_GT="${PROJECT_ROOT}/results/original/flux_dev_50"
DEFAULT_ACCELERATED="${PROJECT_ROOT}/results/hicache/default"

usage() {
  cat <<USAGE
Usage: $0 [options]

Options:
  --gt PATH                 Specify baseline GT directory (default: ${DEFAULT_GT})
  --acc LABEL=PATH          Add "accelerated" result directory, repeatable; if only PATH provided, will use directory name as label
  --batch-size N            DataLoader batch size (default: 32 or BATCH_SIZE env var)
  --num-workers N           DataLoader worker count (default: 8 or NUM_WORKERS env var)
  --log-root DIR            Log output directory (default: ${LOG_ROOT_DEFAULT})
  --save-to-test-folder     Save evaluation results to evaluation_results.json file in tested directory (enabled by default)
  --help                    Show help

Examples:
  $0                                  # Only evaluate default HiCache directory
  $0 --acc accelerated=/path/to/run   # Specify single candidate
  $0 --acc accelerated=/path1 --acc other=/path2 --gt /path/to/gt
USAGE
}

GT_DIR="${DEFAULT_GT}"
LOG_ROOT="${LOG_ROOT_DEFAULT}"
BATCH_SIZE="${BATCH_SIZE:-32}"
NUM_WORKERS="${NUM_WORKERS:-8}"
SAVE_TO_TEST_FOLDER=true

ACC_LABELS=()
ACC_PATHS=()
custom_acc=false

sanitize_label() {
  python - "$1" <<'PY'
import sys
import re
label = sys.argv[1]
if not label:
    print("")
else:
    safe = re.sub(r"[^0-9A-Za-z_.\u4e00-\u9fff-]", "_", label)
    print(safe)
PY
}

add_acc() {
  local label="$1"
  local path="$2"
  if [[ -z "$label" ]]; then
    label="accelerated_${#ACC_LABELS[@]}"
  fi
  label=$(sanitize_label "$label")
  ACC_LABELS+=("$label")
  ACC_PATHS+=("$path")
}

while [[ $# -gt 0 ]]; do
  case "$1" in
    --gt)
      [[ $# -ge 2 ]] || { echo "[ERROR] --gt requires path" >&2; exit 1; }
      GT_DIR="$2"
      shift 2
      ;;
    --acc)
      [[ $# -ge 2 ]] || { echo "[ERROR] --acc requires parameter" >&2; exit 1; }
      if [[ "$custom_acc" == false ]]; then
        ACC_LABELS=()
        ACC_PATHS=()
        custom_acc=true
      fi
      arg="$2"
      shift 2
      if [[ "$arg" == *"="* ]]; then
        label="${arg%%=*}"
        path="${arg#*=}"
      else
        path="$arg"
        label=$(basename "${path}")
      fi
      add_acc "$label" "$path"
      ;;
    --batch-size)
      [[ $# -ge 2 ]] || { echo "[ERROR] --batch-size requires number" >&2; exit 1; }
      BATCH_SIZE="$2"
      shift 2
      ;;
    --num-workers)
      [[ $# -ge 2 ]] || { echo "[ERROR] --num-workers requires number" >&2; exit 1; }
      NUM_WORKERS="$2"
      shift 2
      ;;
    --log-root)
      [[ $# -ge 2 ]] || { echo "[ERROR] --log-root requires path" >&2; exit 1; }
      LOG_ROOT="$2"
      shift 2
      ;;
    --save-to-test-folder)
      SAVE_TO_TEST_FOLDER=true
      shift
      ;;
    --help)
      usage
      exit 0
      ;;
    --*)
      echo "[ERROR] Unknown parameter: $1" >&2
      usage
      exit 1
      ;;
    *)
      echo "[ERROR] Unknown positional argument: $1" >&2
      usage
      exit 1
      ;;
  esac
done

if [[ "${#ACC_LABELS[@]}" -eq 0 ]]; then
  add_acc "accelerated" "${DEFAULT_ACCELERATED}"
fi

if [[ ! -d "${GT_DIR}" ]]; then
  echo "[ERROR] GT directory does not exist: ${GT_DIR}" >&2
  exit 1
fi

for path in "${ACC_PATHS[@]}"; do
  if [[ ! -d "$path" ]]; then
    echo "[ERROR] Accelerated directory does not exist: $path" >&2
    exit 1
  fi
  if [[ "$path" == "$GT_DIR" ]]; then
    echo "[WARNING] Accelerated directory same as GT: $path" >&2
  fi
done

if [[ ! -d "${VENV_PATH}" ]]; then
  echo "[ERROR] Virtual environment does not exist: ${VENV_PATH}" >&2
  exit 1
fi

# shellcheck disable=SC1091
source "${VENV_PATH}/bin/activate"

mkdir -p "${MODEL_CACHE}" "${LOG_ROOT}"
export TOKENIZERS_PARALLELISM="false"
export TRANSFORMERS_CACHE="${TRANSFORMERS_CACHE:-${MODEL_CACHE}}"
export HF_HOME="${HF_HOME:-${MODEL_CACHE}/.hf_home}"
export HUGGINGFACE_HUB_CACHE="${HUGGINGFACE_HUB_CACHE:-${MODEL_CACHE}/.huggingface}"
export BERT_BASE_UNCASED_DIR="${BERT_BASE_UNCASED_DIR:-${MODEL_CACHE}/bert-base-uncased}"
export TORCH_HOME="${TORCH_HOME:-${MODEL_CACHE}/.torch}"

# Try to resolve a local CLIP model directory to keep offline
resolve_clip_local() {
  local base_candidates=(
    "${MODEL_CACHE}/clip-vit-large-patch14"
    "${MODEL_CACHE}/clip-vit-large-patch14/clip-vit-large-patch14"
    "${MODEL_CACHE}/openai/clip-vit-large-patch14"
  )
  for d in "${base_candidates[@]}"; do
    if [[ -d "$d" && -f "$d/config.json" ]]; then
      echo "$d"
      return 0
    fi
  done
  # Look into HF cache snapshots
  local hub_root="${HF_HOME:-${MODEL_CACHE}/.hf_home}/hub/models--openai--clip-vit-large-patch14/snapshots"
  if [[ -d "$hub_root" ]]; then
    local latest
    latest=$(ls -1dt "$hub_root"/* 2>/dev/null | head -n1 || true)
    if [[ -n "$latest" && -f "$latest/config.json" ]]; then
      echo "$latest"
      return 0
    fi
  fi
  # Fallback: try to find any directory containing clip-vit-large-patch14 with a config.json
  local found
  found=$(find "${MODEL_CACHE}" -type f -name config.json -path "*clip-vit-large-patch14*" 2>/dev/null | head -n1 || true)
  if [[ -n "$found" ]]; then
    echo "$(dirname "$found")"
    return 0
  fi
  return 1
}

CLIP_LOCAL_DIR="$(resolve_clip_local || true)"
if [[ -n "${CLIP_LOCAL_DIR}" ]]; then
  export HF_HUB_OFFLINE="1"
  export HF_DATASETS_OFFLINE="1"
  export TRANSFORMERS_OFFLINE="1"
  CLIP_ARG=(--clip_model "${CLIP_LOCAL_DIR}")
  echo "[INFO] Using local CLIP model: ${CLIP_LOCAL_DIR} (offline mode)"
else
  # No local copy found; allow online lookup (requires network/proxy)
  export HF_HUB_OFFLINE="0"
  export HF_DATASETS_OFFLINE="0"
  export TRANSFORMERS_OFFLINE="0"
  CLIP_ARG=( )
  echo "[WARN] Local CLIP model cache not found, will try to pull openai/clip-vit-large-patch14 online."
  echo "[HINT] If current machine cannot connect to internet, you can first run hf_download.py in a networked environment to pre-download models, then sync to ${MODEL_CACHE}."
fi

TIMESTAMP=$(date +%Y%m%d_%H%M%S)

run_eval() {
  local label="$1"
  local acc_dir="$2"
  local log_file="${LOG_ROOT}/${label}_${TIMESTAMP}.log"

  echo "[INFO] Starting evaluation ${label}: ${acc_dir} -> ${log_file}"
  
  # Build parameter array
  local eval_args=(
    --test_folder "${acc_dir}"
    --original_folder "${GT_DIR}"
    --batch_size "${BATCH_SIZE}"
    --num_workers "${NUM_WORKERS}"
    "${CLIP_ARG[@]}"
  )
  
  # If save to test directory is enabled, add parameter
  if [[ "${SAVE_TO_TEST_FOLDER}" == "true" ]]; then
    eval_args+=(--save-to-test-folder)
  fi
  
  python "${EVAL_SCRIPT}" "${eval_args[@]}" 2>&1 | tee "${log_file}"
}

for idx in "${!ACC_LABELS[@]}"; do
  run_eval "${ACC_LABELS[$idx]}" "${ACC_PATHS[$idx]}"
done

echo "[INFO] 评测完成，日志位于 ${LOG_ROOT}" 
