#!/usr/bin/env bash
set -euo pipefail

ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
LIMIT="${ACE_RAG_RELEASE_CHECK_LIMIT:-20}"
CORPUS_LIMIT="${ACE_RAG_RELEASE_CHECK_CORPUS_LIMIT:-500}"
RUN_ID="$(date +%Y%m%d_%H%M%S)"
OUTPUT_ROOT="/tmp/ace_rag_release_repro_check_${RUN_ID}"
KEEP_OUTPUTS="${ACE_RAG_KEEP_RELEASE_CHECK_OUTPUTS:-0}"

cleanup() {
  if [[ "${KEEP_OUTPUTS}" != "1" ]]; then
    rm -rf "${OUTPUT_ROOT}"
    echo "removed_output_root=${OUTPUT_ROOT}"
  else
    echo "kept_output_root=${OUTPUT_ROOT}"
  fi
}
trap cleanup EXIT

cd "${ROOT_DIR}"

: "${ACE_RAG_DATA_DIR:?Set ACE_RAG_DATA_DIR=/path/to/data}"
: "${NVEMBED_MODEL_PATH:?Set NVEMBED_MODEL_PATH=/path/to/nvidia/NV-Embed-v2}"
: "${ACE_RAG_LLM_BASE_URL:?Set ACE_RAG_LLM_BASE_URL=http://localhost:8013/v1}"

required=(
  "${ACE_RAG_DATA_DIR}/hotpotqa/hotpotqa.json"
  "${ACE_RAG_DATA_DIR}/hotpotqa/hotpotqa_corpus.json"
  "${ACE_RAG_DATA_DIR}/2wiki/2wikimultihopqa.json"
  "${ACE_RAG_DATA_DIR}/2wiki/2wikimultihopqa_corpus.json"
)

missing=()
for path in "${required[@]}"; do
  [[ -f "${path}" ]] || missing+=("${path}")
done
if [[ "${#missing[@]}" -gt 0 ]]; then
  echo "Missing dataset files:" >&2
  printf '  - %s\n' "${missing[@]}" >&2
  exit 2
fi

python scripts/check_vllm.py
python scripts/check_nvembed.py --skip-load

summarize_eval() {
  local eval_path="$1"
  python -c 'import json,sys; p=sys.argv[1]; d=json.load(open(p)); print("{}: n={} Recall@5={} EM={} F1={} ctx_tokens={} latency_ms={}".format(p,d.get("n"),d.get("support_title_recall"),d.get("em"),d.get("f1"),d.get("context_tokens"),d.get("latency_ms")))' "${eval_path}"
}

run_one() {
  local protocol="$1"
  local config="$2"
  local dataset="$3"
  local timestamp="${RUN_ID}_${protocol}_${dataset}"
  python main.py \
    --config "${config}" \
    --datasets "${dataset}" \
    --limit "${LIMIT}" \
    --corpus-limit "${CORPUS_LIMIT}" \
    --output-root "${OUTPUT_ROOT}" \
    --timestamp "${timestamp}" \
    --reindex \
    --rebuild-embeddings
  summarize_eval "${OUTPUT_ROOT}/${dataset}/eval/${timestamp}/eval.json"
}

echo "output_root=${OUTPUT_ROOT}"
echo "limit=${LIMIT}"
echo "corpus_limit=${CORPUS_LIMIT}"
echo "llm_endpoint=${ACE_RAG_LLM_BASE_URL}"

run_one common configs/protocol_common_prompt.yaml hotpotqa
run_one common configs/protocol_common_prompt.yaml 2wiki
run_one native configs/protocol_native_prompt.yaml hotpotqa
run_one native configs/protocol_native_prompt.yaml 2wiki
