#!/usr/bin/env bash
set -euo pipefail
LIMIT="${1:-3}"
shift || true
DATA_DIR="${ACE_RAG_DATA_DIR:-data}"
OUTPUT_ROOT="${ACE_RAG_OUTPUT_DIR:-outputs/smoke}"
CORPUS_LIMIT="${ACE_RAG_SMOKE_CORPUS_LIMIT:-200}"
WITH_MODELS="${ACE_RAG_SMOKE_WITH_MODELS:-0}"

required=(
  "${DATA_DIR}/popqa/popqa.json"
  "${DATA_DIR}/popqa/popqa_corpus.json"
  "${DATA_DIR}/hotpotqa/hotpotqa.json"
  "${DATA_DIR}/hotpotqa/hotpotqa_corpus.json"
  "${DATA_DIR}/2wiki/2wikimultihopqa.json"
  "${DATA_DIR}/2wiki/2wikimultihopqa_corpus.json"
  "${DATA_DIR}/musique/musique.json"
  "${DATA_DIR}/musique/musique_corpus.json"
)
missing=()
for path in "${required[@]}"; do
  [[ -f "${path}" ]] || missing+=("${path}")
done
if [[ "${#missing[@]}" -gt 0 ]]; then
  echo "Missing dataset files under ACE_RAG_DATA_DIR=${DATA_DIR}:" >&2
  printf '  - %s\n' "${missing[@]}" >&2
  echo "Create small smoke files with the documented schema, or set ACE_RAG_DATA_DIR=/path/to/data." >&2
  exit 2
fi

model_flags=(--no-llm --no-embedding)
if [[ "${WITH_MODELS}" == "1" ]]; then
  : "${NVEMBED_MODEL_PATH:?Set NVEMBED_MODEL_PATH=/path/to/nvidia/NV-Embed-v2}"
  python scripts/check_vllm.py
  model_flags=()
fi

python main.py \
  --config configs/smoke_test.yaml \
  --datasets popqa hotpotqa 2wiki musique \
  --limit "${LIMIT}" \
  --corpus-limit "${CORPUS_LIMIT}" \
  --output-root "${OUTPUT_ROOT}" \
  --reindex \
  "${model_flags[@]}" \
  "$@"
