"""Runs the audit pipeline against the synthetic corpus and compares each
item's verdicts to the manifest's ground truth. Used both as a unit test and
as a reproducibility artifact (the table in the paper is generated by this).
"""
from __future__ import annotations

import json
import re
from pathlib import Path

from .checks.axioms import check_axioms
from .checks.tactics import check_tactics
from .kernel.lean_runner import LeanRunner


_THEOREM_NAME_RE = re.compile(r"\btheorem\s+([\w'.]+)\b")


def run_corpus(root: Path, out_path: Path, timeout: int = 60) -> int:
    manifest = json.loads((Path(root) / "manifest.json").read_text(encoding="utf-8"))
    runner = LeanRunner(timeout_s=timeout)

    rows = []
    matches = 0
    total = 0

    for item in manifest["items"]:
        src_path = Path(root) / item["file"]
        if not src_path.exists():
            rows.append({"file": item["file"], "error": "missing"})
            continue
        src = src_path.read_text(encoding="utf-8")
        m = _THEOREM_NAME_RE.search(src)
        thm = m.group(1) if m else item["theorem"]

        # Faithfulness/vacuity files already include `#print axioms <thm>`;
        # alignment files don't (they may not type-check without mathlib).
        if "#print axioms" in src:
            axiom_reps = check_axioms(src_path, [thm], runner=runner)
            ax = axiom_reps[0] if axiom_reps else None
            ax_verdict = ax.verdict if ax else "error"
        else:
            ax = None
            ax_verdict = "n/a"

        tac_reps = check_tactics(src, [thm])
        tac = tac_reps[0]

        expected = item["expected"]
        actual = {"check1_axioms": ax_verdict, "check2_tactics": tac.verdict}
        # Be a little flexible: 'pass-with-flag' matches 'pass'/'pass-with-flag'.
        def _match(actual_v: str, expected_v: str) -> bool:
            if expected_v == "pass-with-flag":
                return actual_v in {"pass", "pass-with-flag"}
            if expected_v == "pass":
                return actual_v in {"pass", "pass-with-flag"}
            return actual_v == expected_v

        row_match = True
        for k in ("check1_axioms", "check2_tactics"):
            if expected.get(k) in (None, "n/a"):
                continue
            row_match = row_match and _match(actual[k], expected[k])

        rows.append({
            "file": item["file"],
            "theorem": thm,
            "expected": {k: v for k, v in expected.items() if k in ("check1_axioms", "check2_tactics")},
            "actual": actual,
            "match": row_match,
            "axioms_seen": ax.axioms if ax else None,
            "tactic_findings": [
                {"tactic": f.tactic, "line": f.line} for f in tac.findings
            ],
            "note": item.get("note", ""),
        })
        matches += int(row_match)
        total += 1

    Path(out_path).write_text(json.dumps({
        "total": total,
        "matched": matches,
        "rows": rows,
    }, indent=2))

    print(f"Synthetic corpus: matched {matches}/{total} expected verdicts")
    for r in rows:
        flag = "OK " if r.get("match", False) else "BAD"
        print(f"  {flag}  {r['file']}: expected={r['expected']}  actual={r.get('actual')}")
    return 0 if matches == total else 1
