from __future__ import annotations

import json
from dataclasses import dataclass
from pathlib import Path
from typing import Any

from .config import LEAN_BENCH_ROOT, ROOT


@dataclass(frozen=True, slots=True)
class NLAnswerResult:
    text: str
    source: str
    meta: dict[str, Any]


def _load_json(path: Path) -> Any:
    return json.loads(path.read_text(encoding="utf-8"))


def _as_nonempty_str(value: Any) -> str | None:
    if not isinstance(value, str):
        return None
    text = value.strip()
    return text if text else None


def _stem_as_int(path: Path) -> int | None:
    """
    Parse a benchmark task id from filename stem.

    Supports:
    - `42.lean` -> 42
    - `42_new.lean` -> 42
    - `042.lean` -> 42
    """
    stem = path.stem
    try:
        return int(stem)
    except ValueError:
        pass

    # best-effort: take leading digits (e.g. "93_new" -> 93)
    digits = ""
    for ch in stem:
        if ch.isdigit():
            digits += ch
        else:
            break
    try:
        return int(digits) if digits else None
    except ValueError:
        return None


def _entry_id_as_int(entry: dict[str, Any]) -> int | None:
    value = entry.get("id")
    if isinstance(value, int):
        return value
    if isinstance(value, str):
        v = value.strip()
        if v.isdigit():
            try:
                return int(v)
            except ValueError:
                return None
    return None


def _lookup_from_json(raw: Any, *, task_id: int, fields: list[str]) -> str | None:
    """
    Extract `fields` in priority order from a JSON payload.

    Supported payload formats:
    - list[dict] where each dict has an `id` field
    - dict keyed by id (string/int), value is either a string or a dict with candidate fields
    """
    if isinstance(raw, list):
        for entry in raw:
            if not isinstance(entry, dict):
                continue
            if _entry_id_as_int(entry) != task_id:
                continue
            for field in fields:
                text = _as_nonempty_str(entry.get(field))
                if text:
                    return text
        return None

    if isinstance(raw, dict):
        candidates = [raw.get(task_id), raw.get(str(task_id))]
        for obj in candidates:
            if isinstance(obj, str):
                text = _as_nonempty_str(obj)
                if text:
                    return text
            if isinstance(obj, dict):
                for field in fields:
                    text = _as_nonempty_str(obj.get(field))
                    if text:
                        return text
        return None

    return None


def _first_existing(paths: list[Path]) -> Path | None:
    return next((p for p in paths if p.exists()), None)


def lookup_reference_nl_answer_for_bench(lean_file_rel: Path) -> NLAnswerResult | None:
    """
    Best-effort lookup for a ground-truth natural-language proof/answer for a benchmark file.

    Currently supported sources (best-effort, case-insensitive suite name):
    - `Question_bench/FateH/<id>.lean` -> `data/FATE-H-proof.json` (field: `informal_proof`)
      - If missing, fall back to `<LeanRoot>/Question_bench/FATE-H.json` (field: `informal_statement`).
    - `Question_bench/FateX/<id>.lean` -> `<LeanRoot>/Question_bench/FATE-X.json` (field: `informal_proof`, if present)
    """
    parts = list(lean_file_rel.parts)
    if "Question_bench" not in parts:
        return None

    task_id = _stem_as_int(lean_file_rel)
    if task_id is None:
        return None

    # Example: M2F/Question_bench/FateX/42.lean
    suite = None
    try:
        qb_idx = parts.index("Question_bench")
        if qb_idx + 1 < len(parts):
            suite = parts[qb_idx + 1]
    except ValueError:
        suite = None

    if suite is None:
        return None

    suite_key = suite.upper().replace("_", "-")
    suite_dir = LEAN_BENCH_ROOT / suite

    # Preferred generic locations (works for any bank):
    # - `<LeanRoot>/Question_bench/<suite>/answers.json`
    # - `<RepoRoot>/data/Question_bench/<suite>/answers.json` (often local, not committed)
    generic_answer_paths = [
        suite_dir / "answers.json",
        suite_dir / "answers.jsonl",
        suite_dir / "proof.json",
        suite_dir / "proofs.json",
        ROOT / "data" / "Question_bench" / suite / "answers.json",
        ROOT / "data" / "Question_bench" / suite / "answers.jsonl",
        ROOT / "data" / "Question_bench" / suite / "proof.json",
        ROOT / "data" / "Question_bench" / suite / "proofs.json",
    ]
    generic_path = _first_existing(generic_answer_paths)
    if generic_path is not None:
        raw = _load_json(generic_path)
        text = _lookup_from_json(
            raw,
            task_id=task_id,
            fields=["informal_proof", "nl_answer", "natural_language_answer", "answer", "informal_statement"],
        )
        if text:
            return NLAnswerResult(
                text=text,
                source=str(generic_path),
                meta={"suite": suite, "id": task_id, "field": "auto"},
            )

    if suite_key in {"FATEH", "FATE-H"}:
        proof_candidates = [
            ROOT / "data" / "FATE-H-proof.json",
            ROOT / "data" / "FateH-proof.json",
            ROOT / "data" / "fateh-proof.json",
            ROOT / "data" / "Question_bench" / "FATE-H-proof.json",
            ROOT / "data" / "Question_bench" / "FateH-proof.json",
            ROOT / "data" / "Question_bench" / "fateh-proof.json",
            # legacy per-suite location (often local, not committed)
            ROOT / "data" / "Question_bench" / suite / "FATE-H-proof.json",
            ROOT / "data" / "Question_bench" / suite / "FateH-proof.json",
            ROOT / "data" / "Question_bench" / suite / "fateh-proof.json",
            LEAN_BENCH_ROOT / "FATE-H-proof.json",
            LEAN_BENCH_ROOT / "FateH-proof.json",
            LEAN_BENCH_ROOT / "fateh-proof.json",
        ]
        proof_path = _first_existing(proof_candidates)
        if proof_path is not None:
            raw = _load_json(proof_path)
            text = _lookup_from_json(raw, task_id=task_id, fields=["informal_proof", "nl_answer", "answer"])
            if text:
                return NLAnswerResult(
                    text=text,
                    source=str(proof_path),
                    meta={"suite": suite, "id": task_id, "field": "informal_proof"},
                )

        fallback_candidates = [
            LEAN_BENCH_ROOT / "FATE-H.json",
            LEAN_BENCH_ROOT / "FateH.json",
            LEAN_BENCH_ROOT / "fateh.json",
            suite_dir / "bank.json",
            suite_dir / "tasks.json",
            ROOT / "data" / "Question_bench" / "FATE-H.json",
            ROOT / "data" / "Question_bench" / "FateH.json",
            ROOT / "data" / "Question_bench" / "fateh.json",
        ]
        fallback_path = _first_existing(fallback_candidates)
        if fallback_path is None:
            return None
        raw = _load_json(fallback_path)
        text = _lookup_from_json(raw, task_id=task_id, fields=["informal_statement", "nl_answer", "answer"])
        if not text:
            return None
        return NLAnswerResult(
            text=text,
            source=str(fallback_path),
            meta={"suite": suite, "id": task_id, "field": "informal_statement"},
        )

    if suite_key in {"FATEX", "FATE-X"}:
        data_candidates = [
            LEAN_BENCH_ROOT / "FATE-X.json",
            LEAN_BENCH_ROOT / "FateX.json",
            LEAN_BENCH_ROOT / "fatex.json",
            suite_dir / "bank.json",
            suite_dir / "tasks.json",
            ROOT / "data" / "Question_bench" / "FATE-X.json",
            ROOT / "data" / "Question_bench" / "FateX.json",
            ROOT / "data" / "Question_bench" / "fatex.json",
        ]
        data_path = _first_existing(data_candidates)
        if data_path is None:
            return None
        raw = _load_json(data_path)
        text = _lookup_from_json(
            raw,
            task_id=task_id,
            fields=["informal_proof", "nl_answer", "natural_language_answer", "answer"],
        )
        if not text:
            return None
        return NLAnswerResult(
            text=text,
            source=str(data_path),
            meta={"suite": suite, "id": task_id, "field": "informal_proof"},
        )

    return None
