from __future__ import annotations

import argparse
import json
import os
import re
import time
from pathlib import Path
from typing import Any

from .codex_client import (
    run_final_agent_a,
    run_final_agent_b,
    run_final_agent_b_book,
    run_final_agent_c,
    run_final_agent_d,
)
from .axiom_guard import (
    build_axiom_cleanup_instructions,
    find_axiom_decls,
    format_axiom_report,
)
from .config import (
    FINAL_FAILURE_LOG,
    FINAL_LOGS_DIR,
    METRICS_DIR,
    ROOT,
    LEAN_ROOT,
    LEAN_BENCH_ROOT,
    LEAN_SRC_CHAPTERS,
    chapter_dir,
    get_enable_nl_hints_default,
    section_file,
    section_part_files,
)
from .final_agent_settings import resolve_final_agents_settings
from .history_store import append_history, load_recent_history
from .book_maintainer import (
    compile_entry_for,
    ensure_book_exists,
    ensure_book_imports,
    ensure_chapter_imports,
    ensure_section_aggregate_exists,
)
from .lean_runner import get_declaration_snippet, get_line_snippet, lake_env_lean
from .nl_hint import fetch_nl_hint
from .nl_answer import lookup_reference_nl_answer_for_bench
from .metrics import finish_run, log_event, start_run
from .protocol import (
    AGENT_A_FEEDBACK_END,
    AGENT_A_FEEDBACK_START,
    AGENT_C_PLAN_END,
    AGENT_C_PLAN_START,
    extract_marked_json,
)
from .infra_pipeline import run_infra_pipeline
from .state import FINAL_PROGRESS_FILE, load_state, save_state
from Util.sorry_locator import find_sorry_locations

INFRA_PLAN_CHECK_ROUNDS_DEFAULT = 50
AUTO_INFRA_MIN_LOCAL_ROUNDS_DEFAULT = 2
AUTO_INFRA_CLASSIC_MIN_SCORE_DEFAULT = 2
BAD_STATEMENT_AUTO_RECOVER_ROUNDS_DEFAULT = 2
HELPER_BAD_STATEMENT_AUTO_REPAIR_ROUNDS_DEFAULT = 2

_CLASSIC_MISSING_THEORY_MARKERS = (
    "compact",
    "compactness",
    "noetherian",
    "jacobson",
    "nullstellensatz",
    "krull",
    "nakayama",
    "zorn",
    "spectral space",
    "constructible topology",
    "maximal spectrum",
    "prime spectrum",
    "chinese remainder",
    "going up",
    "going down",
    "integral closure",
    "primary decomposition",
    "nilradical",
    "pid",
    "ufd",
    "dedekind",
    "finite generation",
    "finitely generated",
    "iscompact",
    "isopen",
)

_NONCLASSIC_MISSING_THEORY_MARKERS = (
    "helperforinfra",
    "chosenexponent",
    "hopenu",
    "huitrivial",
    "huiuniv",
    "fromperiodiconly",
    "noncircular",
    "local bridge",
    "currentitem",
)

_HELPER_NAME_MARKERS = (
    "helper",
    "aux",
    "bridge",
    "local",
    "intermediate",
)


def _non_sorry_warning_blocks(lean_output: str) -> list[str]:
    """
    Extract multi-line warning blocks that do not mention 'sorry'.
    """
    if not lean_output:
        return []

    lines = lean_output.splitlines()
    blocks: list[str] = []
    i = 0
    while i < len(lines):
        line = lines[i]
        lowered = line.lower()
        if "warning" in lowered:
            block_lines = [line]
            block_lines_lower = [lowered]
            i += 1
            while i < len(lines):
                next_line = lines[i]
                next_lower = next_line.lower()
                if "warning" in next_lower:
                    break
                block_lines.append(next_line)
                block_lines_lower.append(next_lower)
                i += 1
            block_text_lower = "\n".join(block_lines_lower)
            if "sorry" not in block_text_lower:
                blocks.append("\n".join(block_lines).strip())
            continue
        i += 1

    return blocks


def _combine_extra_instructions(*parts: str | None) -> str | None:
    """
    Join multiple extra instruction strings with spacing, skipping blanks/None.
    """
    cleaned = [p.strip() for p in parts if p and p.strip()]
    if not cleaned:
        return None
    return "\n\n".join(cleaned)


def _format_replan_history(history: list[str]) -> str | None:
    """
    Render prior re-plan feedback lines into a single instruction block.
    """
    if not history:
        return None
    return "REPLAN HISTORY (past Agent A feedback):\n" + "\n".join(history)


def _format_agent_b_history(history: list[str]) -> str | None:
    """
    Render prior Agent B edits as an instruction block.
    """
    if not history:
        return None
    return "AGENT B HISTORY (prior attempts):\n" + "\n".join(history)


def _summarize_agent_b_output(stdout: str, stderr: str, *, max_lines: int = 6, max_chars: int = 800) -> str:
    """
    Create a compact summary from Agent B output; fall back to stderr if needed.
    """
    source = stdout.strip() or stderr.strip()
    if not source:
        return "no summary provided"
    lines = [line.strip() for line in source.splitlines() if line.strip()]
    clipped = lines[:max_lines]
    text = " | ".join(clipped)
    if len(text) > max_chars:
        return text[:max_chars] + "... [truncated]"
    return text


def _summarize_plan(plan: dict[str, Any] | None) -> str | None:
    if not plan:
        return None
    status = plan.get("status")
    main = None
    md = plan.get("main_declaration")
    if isinstance(md, dict):
        main = md.get("name")
    lemmas: list[str] = []
    lp = plan.get("lemma_plan")
    if isinstance(lp, list):
        for entry in lp[:8]:
            if isinstance(entry, dict) and isinstance(entry.get("name"), str):
                lemmas.append(entry["name"])
    parts: list[str] = []
    if isinstance(status, str) and status:
        parts.append(f"status={status}")
    if isinstance(main, str) and main:
        parts.append(f"main={main}")
    if lemmas:
        parts.append("lemmas=" + ", ".join(lemmas))
    failure = plan.get("failure_reason")
    if isinstance(failure, str) and failure.strip():
        parts.append("failure_reason=" + failure.strip()[:300])
    return " | ".join(parts) if parts else None


def _summarize_feedback(feedback: dict[str, Any] | None) -> str | None:
    if not feedback:
        return None
    status = feedback.get("status")
    reason = feedback.get("reason")
    pending = feedback.get("pending_goals")
    parts: list[str] = []
    if isinstance(status, str) and status:
        parts.append(f"status={status}")
    if isinstance(reason, str) and reason.strip():
        parts.append("reason=" + reason.strip()[:300])
    if isinstance(pending, list) and pending:
        parts.append(f"pending_goals={len(pending)}")
    return " | ".join(parts) if parts else None


def _validate_bad_statement_report(feedback: dict[str, Any]) -> tuple[bool, str | None]:
    """
    Validate the required fields for a `failed_bad_statement` report from Agent A.

    Required keys (strings; non-empty after strip):
    - counterexample_or_contradiction
    - lean_checkable_conflict
    - missing_assumptions
    """
    required = [
        "counterexample_or_contradiction",
        "lean_checkable_conflict",
        "missing_assumptions",
    ]
    missing: list[str] = []
    for k in required:
        v = feedback.get(k)
        if not isinstance(v, str) or not v.strip():
            missing.append(k)
    if missing:
        return False, f"missing required field(s): {', '.join(missing)}"
    return True, None


def _validate_missing_theory_report(
    feedback: dict[str, Any],
    *,
    require_infra_requests: bool,
) -> tuple[bool, str | None]:
    """
    Validate the required fields for a `failed_missing_theory` report from Agent A.

    Always required:
    - reason (string; non-empty after strip)

    When `require_infra_requests` is True, also require a structured missing-theory signal:
    - missing_theory_signal_version == 1
    - blocker.{kind, lean_error_excerpt}
    - infra_requests: non-empty list of objects with {name_suggestion, env, content, priority, intended_use}
    """
    reason = feedback.get("reason")
    if not isinstance(reason, str) or not reason.strip():
        return False, "missing required field: reason"

    if not require_infra_requests:
        return True, None

    if feedback.get("missing_theory_signal_version") != 1:
        return False, "missing or invalid missing_theory_signal_version (expected 1)"

    blocker = feedback.get("blocker")
    if not isinstance(blocker, dict):
        return False, "missing blocker object"
    if not isinstance(blocker.get("kind"), str) or not blocker.get("kind", "").strip():
        return False, "blocker.kind must be a non-empty string"
    if not isinstance(blocker.get("lean_error_excerpt"), str) or not blocker.get("lean_error_excerpt", "").strip():
        return False, "blocker.lean_error_excerpt must be a non-empty string"

    infra_requests = feedback.get("infra_requests")
    if not isinstance(infra_requests, list) or not infra_requests:
        return False, "infra_requests must be a non-empty list"
    required_fields = ("name_suggestion", "env", "content", "priority", "intended_use")
    for i, req in enumerate(infra_requests):
        if not isinstance(req, dict):
            return False, f"infra_requests[{i}] must be an object"
        for k in required_fields:
            v = req.get(k)
            if not isinstance(v, str) or not v.strip():
                return False, f"infra_requests[{i}].{k} must be a non-empty string"

    return True, None


def _collect_missing_theory_text(feedback: dict[str, Any]) -> str:
    parts: list[str] = []
    reason = feedback.get("reason")
    if isinstance(reason, str) and reason.strip():
        parts.append(reason.strip())

    blocker = feedback.get("blocker")
    if isinstance(blocker, dict):
        for k in ("kind", "lean_error_excerpt", "goal_excerpt"):
            v = blocker.get(k)
            if isinstance(v, str) and v.strip():
                parts.append(v.strip())

    pending_goals = feedback.get("pending_goals")
    if isinstance(pending_goals, list):
        for g in pending_goals:
            if isinstance(g, str) and g.strip():
                parts.append(g.strip())

    requested_lemmas = feedback.get("requested_lemmas")
    if isinstance(requested_lemmas, list):
        for req in requested_lemmas:
            if not isinstance(req, dict):
                continue
            for k in ("name", "statement_hint", "reason"):
                v = req.get(k)
                if isinstance(v, str) and v.strip():
                    parts.append(v.strip())

    infra_requests = feedback.get("infra_requests")
    if isinstance(infra_requests, list):
        for req in infra_requests:
            if not isinstance(req, dict):
                continue
            for k in ("name_suggestion", "content", "intended_use", "target_file_suggestion"):
                v = req.get(k)
                if isinstance(v, str) and v.strip():
                    parts.append(v.strip())

    return "\n".join(parts)


def _assess_missing_theory_classicity(
    feedback: dict[str, Any],
    *,
    min_score: int,
) -> dict[str, Any]:
    text = _collect_missing_theory_text(feedback).lower()
    classic_hits = sorted({k for k in _CLASSIC_MISSING_THEORY_MARKERS if k in text})
    nonclassic_hits = sorted({k for k in _NONCLASSIC_MISSING_THEORY_MARKERS if k in text})
    classic_score = len(classic_hits)
    nonclassic_score = len(nonclassic_hits)
    gate_score = classic_score - nonclassic_score
    is_classic = bool(classic_score >= max(1, int(min_score)) and gate_score > 0)
    return {
        "is_classic": is_classic,
        "classic_score": classic_score,
        "nonclassic_score": nonclassic_score,
        "gate_score": gate_score,
        "classic_hits": classic_hits[:8],
        "nonclassic_hits": nonclassic_hits[:8],
    }


def _parse_core_blocker_from_reason(reason: str) -> dict[str, str]:
    """
    Parse a structured `needs_replan` reason that contains
    `core_blocker_signature=decl:...;goal:...;missing:...;cause:...;same_core_blocker=...`.
    """
    out: dict[str, str] = {}
    if not isinstance(reason, str) or not reason.strip():
        return out

    text = reason.strip()
    m_same = re.search(r"(?:^|;)same_core_blocker\s*=\s*(yes|no)\b", text, flags=re.IGNORECASE)
    if m_same:
        out["same_core_blocker"] = (m_same.group(1) or "").strip().lower()

    m_pivot = re.search(r"(?:^|;)pivot_action\s*=\s*(.+)$", text, flags=re.IGNORECASE)
    if m_pivot:
        out["pivot_action"] = (m_pivot.group(1) or "").strip()

    m_core = re.search(r"core_blocker_signature\s*=\s*(.+?)(?:;same_core_blocker\s*=|$)", text)
    core = (m_core.group(1) or "").strip() if m_core else ""
    if not core:
        return out

    out["core_blocker_signature"] = core
    for key in ("decl", "goal", "missing", "cause"):
        m = re.search(
            rf"{key}:(.*?)(?=;(?:decl|goal|missing|cause):|$)",
            core,
            flags=re.IGNORECASE | re.DOTALL,
        )
        if m:
            out[key] = (m.group(1) or "").strip()
    return out


def _sanitize_name_for_infra(name: str) -> str:
    cleaned = re.sub(r"[^A-Za-z0-9_]", "_", name).strip("_")
    if not cleaned:
        return "MissingBridgeLemma"
    if not re.match(r"^[A-Za-z_]", cleaned):
        cleaned = "M_" + cleaned
    return cleaned


def _promote_same_core_needs_replan_to_missing_theory(
    *,
    feedback: dict[str, Any],
    lean_file_rel: Path,
    target_decl_name: str | None,
) -> dict[str, Any] | None:
    """
    Upgrade `needs_replan + same_core_blocker=yes` into a structured
    `failed_missing_theory` signal so infra can build the missing bridge lemma(s).
    """
    if not isinstance(feedback, dict):
        return None
    if str(feedback.get("status", "")).strip().lower() != "needs_replan":
        return None

    reason = str(feedback.get("reason") or "").strip()
    parsed = _parse_core_blocker_from_reason(reason)
    if parsed.get("same_core_blocker") != "yes":
        return None

    decl = parsed.get("decl") or (target_decl_name or "")
    goal = parsed.get("goal") or ""
    missing = parsed.get("missing") or ""
    cause = parsed.get("cause") or ""
    core_sig = parsed.get("core_blocker_signature") or ""

    requested = feedback.get("requested_lemmas")
    requested_list = requested if isinstance(requested, list) else []
    infra_requests: list[dict[str, str]] = []
    for idx, req in enumerate(requested_list, start=1):
        if not isinstance(req, dict):
            continue
        raw_name = str(req.get("name") or "").strip()
        statement_hint = str(req.get("statement_hint") or "").strip()
        intended_use = str(req.get("reason") or "").strip()
        if not statement_hint:
            statement_hint = missing or goal or "Missing prerequisite bridge lemma."
        if not intended_use:
            intended_use = "Needed to close repeated same-core blocker without cyclic dependencies."
        if not raw_name:
            base = decl.split(".")[-1] if decl else "coreBlocker"
            raw_name = f"{base}_bridge_{idx}"
        infra_requests.append(
            {
                "name_suggestion": _sanitize_name_for_infra(raw_name),
                "env": "lemma",
                "content": statement_hint,
                "priority": "must",
                "intended_use": intended_use,
            }
        )

    if not infra_requests:
        base = decl.split(".")[-1] if decl else "coreBlocker"
        fallback_name = _sanitize_name_for_infra(f"{base}_bridge")
        fallback_content = missing or goal or (
            "Introduce a pre-target, non-circular bridge lemma that resolves the repeated core blocker."
        )
        fallback_use = (
            "Use this bridge lemma to discharge the unresolved frontier in the target declaration."
        )
        infra_requests = [
            {
                "name_suggestion": fallback_name,
                "env": "lemma",
                "content": fallback_content,
                "priority": "must",
                "intended_use": fallback_use,
            }
        ]

    lean_excerpt_parts = [
        f"same_core_blocker persisted in {decl or 'target declaration'}",
        f"missing={missing}" if missing else "",
        f"cause={cause}" if cause else "",
    ]
    lean_excerpt = " | ".join(p for p in lean_excerpt_parts if p).strip()
    if not lean_excerpt:
        lean_excerpt = reason or "same_core_blocker persisted"

    promoted = dict(feedback)
    promoted["status"] = "failed_missing_theory"
    promoted["missing_theory_signal_version"] = 1
    promoted["reason"] = (
        "Promoted from needs_replan: repeated same_core_blocker=yes indicates a persistent "
        "pre-target bridge-theory gap that local replans are not resolving."
    )
    promoted["blocker"] = {
        "kind": "missing_lemma",
        "lean_error_excerpt": lean_excerpt[:1200],
        "goal_excerpt": (goal or reason or "")[:1200],
    }
    promoted["infra_requests"] = infra_requests
    promoted["promoted_from_same_core_needs_replan"] = True
    promoted["same_core_blocker_signature"] = core_sig

    note_parts = [
        "Auto-upgrade: `needs_replan + same_core_blocker=yes` -> `failed_missing_theory`.",
        f"lean_file={lean_file_rel}",
        f"decl={decl or (target_decl_name or '<unknown>')}",
        f"core_blocker_signature={core_sig}" if core_sig else "",
        f"pivot_action={parsed.get('pivot_action', '')}" if parsed.get("pivot_action") else "",
    ]
    existing_notes = feedback.get("notes")
    if isinstance(existing_notes, str) and existing_notes.strip():
        note_parts.append(existing_notes.strip())
    promoted["notes"] = "\n".join(part for part in note_parts if part)
    return promoted


_DECL_NAME_RE = re.compile(
    r"^\s*(theorem|lemma|def|abbrev|example|instance)\s+([A-Za-z0-9_'.]+)\b"
)
_SORRY_TOKEN_RE = re.compile(r"\bsorry\b")


def _extract_decl_kind_and_name_from_snippet(snippet: str) -> tuple[str | None, str | None]:
    """
    Best-effort: extract declaration kind and name from the first non-empty line of a snippet.
    Note: anonymous `instance : ...` has no name; returns (kind, None) in that case.
    """
    if not snippet:
        return None, None
    for line in snippet.splitlines():
        if not line.strip():
            continue
        m = _DECL_NAME_RE.match(line)
        if not m:
            return None, None
        kind = (m.group(1) or "").strip() or None
        name = (m.group(2) or "").strip() or None
        return kind, name
    return None, None


def _name_looks_helper_like(name: str | None) -> bool:
    if not name:
        return False
    low = name.lower()
    if low.startswith("helperfor") or low.startswith("helper_"):
        return True
    return any(marker in low for marker in _HELPER_NAME_MARKERS)


def _classify_bad_statement_target(
    *,
    file_text: str,
    target_line: int,
    target_decl_name: str | None,
    target_decl_kind: str | None,
) -> dict[str, Any]:
    """
    Heuristic classifier: if a bad-statement target is likely an internal helper declaration,
    we should try statement repair + downstream call-site updates before giving up.
    """
    out: dict[str, Any] = {
        "is_helper_like": False,
        "reason": "insufficient_context",
        "decl_name": target_decl_name,
        "decl_kind": target_decl_kind,
        "decl_line": None,
        "downstream_ref_count": 0,
        "later_decl_count": 0,
        "name_hint": _name_looks_helper_like(target_decl_name),
    }
    if not file_text or not target_decl_name:
        return out

    lines = file_text.splitlines()
    decl_line = _find_decl_line_by_name(lines, name=target_decl_name) or (
        target_line if target_line > 0 else None
    )
    if not decl_line:
        out["reason"] = "decl_line_not_found"
        return out
    out["decl_line"] = decl_line

    tail_lines = lines[decl_line:]
    later_decl_count = 0
    for ln in tail_lines:
        if _DECL_NAME_RE.match(ln):
            later_decl_count += 1
    out["later_decl_count"] = later_decl_count

    tail_text = "\n".join(tail_lines)
    ref_pat = re.compile(rf"\b{re.escape(target_decl_name)}\b")
    downstream_ref_count = len(ref_pat.findall(tail_text))
    out["downstream_ref_count"] = downstream_ref_count

    kind_helper = (target_decl_kind or "").strip().lower() in {"lemma", "def", "abbrev"}
    name_hint = bool(out["name_hint"])
    has_downstream_usage = downstream_ref_count > 0 and later_decl_count > 0
    helper_like = bool(name_hint or (kind_helper and has_downstream_usage))
    out["is_helper_like"] = helper_like
    out["reason"] = (
        "name_hint"
        if name_hint
        else ("kind_and_downstream_usage" if helper_like else "not_helper_like")
    )
    return out


def _has_sorry_token(text: str) -> bool:
    return bool(_SORRY_TOKEN_RE.search(text or ""))

def _count_sorry_tokens(text: str) -> int:
    if not text:
        return 0
    return len(_SORRY_TOKEN_RE.findall(text))


def _named_decl_names(text: str) -> set[str]:
    """
    Best-effort: collect names of *named* declarations in a file.
    Note: anonymous `instance : ...` has no name and is ignored.
    """
    names: set[str] = set()
    if not text:
        return names
    for line in text.splitlines():
        m = _DECL_NAME_RE.match(line)
        if not m:
            continue
        name = (m.group(2) or "").strip()
        if name:
            names.add(name)
    return names


def _extract_decl_name_from_snippet(snippet: str) -> str | None:
    """
    Best-effort: extract a declaration name from the first non-empty line of a snippet.
    Note: anonymous `instance : ...` has no name; we return None in that case.
    """
    _, name = _extract_decl_kind_and_name_from_snippet(snippet)
    return name


def _plan_decl_names(plan: dict[str, Any] | None) -> list[str]:
    """
    Extract the main declaration name + lemma plan names from an Agent C plan (if present).
    """
    if not isinstance(plan, dict):
        return []
    names: list[str] = []
    main = plan.get("main_declaration")
    if isinstance(main, dict):
        n = main.get("name")
        if isinstance(n, str) and n.strip():
            names.append(n.strip())
    lemma_plan = plan.get("lemma_plan")
    if isinstance(lemma_plan, list):
        for entry in lemma_plan:
            if not isinstance(entry, dict):
                continue
            n = entry.get("name")
            if isinstance(n, str) and n.strip():
                names.append(n.strip())
    # de-dup while preserving order
    seen: set[str] = set()
    out: list[str] = []
    for n in names:
        if n in seen:
            continue
        seen.add(n)
        out.append(n)
    return out


def _new_sorry_bearing_decls(*, before_text: str, after_text: str, abs_file: Path) -> list[str]:
    """
    Return names of newly introduced named declarations whose declaration snippet contains `sorry`.
    """
    before_names = _named_decl_names(before_text)
    after_names = _named_decl_names(after_text)
    new_names = sorted(after_names - before_names)
    if not new_names:
        return []
    lines_after = after_text.splitlines()
    bad: list[str] = []
    for name in new_names:
        decl_line = _find_decl_line_by_name(lines_after, name=name)
        if not decl_line:
            continue
        snippet = get_declaration_snippet(abs_file, line=decl_line)
        if _has_sorry_token(snippet):
            bad.append(name)
    return bad


def _find_decl_line_by_name(lines: list[str], *, name: str) -> int | None:
    """
    Find a 1-based line number where a declaration named `name` starts (best-effort).
    """
    if not name:
        return None
    name_re = re.compile(
        rf"^\s*(theorem|lemma|def|abbrev|example|instance)\s+{re.escape(name)}\b"
    )
    for i, line in enumerate(lines, start=1):
        if name_re.match(line):
            return i
    return None


def _decl_snippet_from_lines(lines: list[str], *, line: int, max_chars: int = 8000) -> str:
    """
    Like `lean_runner.get_declaration_snippet`, but operates on an in-memory `lines` list.
    """
    if not lines:
        return ""
    idx = max(0, min(len(lines) - 1, line - 1))
    start = idx
    while start > 0 and lines[start].strip() != "":
        start -= 1
    if lines[start].strip() == "" and start < idx:
        start += 1
    end = idx
    while end + 1 < len(lines) and lines[end + 1].strip() != "":
        end += 1
    snippet = "\n".join(lines[start : end + 1])
    if len(snippet) > max_chars:
        return snippet[:max_chars]
    return snippet


def _decl_header_signature_from_text(*, file_text: str, name: str) -> str | None:
    """
    Extract a normalized declaration header/signature (up to `:=`) for the named declaration.
    Used to guard against accidental theorem statement changes.
    """
    if not file_text or not name:
        return None
    lines = file_text.splitlines()
    decl_line = _find_decl_line_by_name(lines, name=name)
    if not decl_line:
        return None
    idx = max(0, decl_line - 1)
    header_parts: list[str] = []
    for j in range(idx, len(lines)):
        raw = lines[j]
        # strip inline comments for stable comparison
        clean = raw.split("--", 1)[0].strip()
        if not clean:
            if header_parts:
                break
            continue
        if ":=" in clean:
            clean = clean.split(":=", 1)[0].strip()
            if clean:
                header_parts.append(clean)
            break
        header_parts.append(clean)
    if not header_parts:
        return None
    sig = " ".join(header_parts)
    sig = re.sub(r"\s+", " ", sig).strip()
    return sig or None


def _count_sorry_tokens_in_named_decl_text(*, file_text: str, name: str) -> int | None:
    """
    Count `sorry` tokens in the declaration snippet for `name`, using only `file_text`.
    """
    if not file_text or not name:
        return None
    lines = file_text.splitlines()
    decl_line = _find_decl_line_by_name(lines, name=name)
    if not decl_line:
        return None
    snippet = _decl_snippet_from_lines(lines, line=decl_line)
    return _count_sorry_tokens(snippet)


def _new_proved_named_decls(*, before_text: str, after_text: str) -> list[str]:
    """
    Return names of newly introduced named declarations whose snippet contains no `sorry`.
    """
    before_names = _named_decl_names(before_text)
    after_names = _named_decl_names(after_text)
    new_names = sorted(after_names - before_names)
    if not new_names:
        return []
    lines_after = after_text.splitlines()
    proved: list[str] = []
    for name in new_names:
        decl_line = _find_decl_line_by_name(lines_after, name=name)
        if not decl_line:
            continue
        snippet = _decl_snippet_from_lines(lines_after, line=decl_line)
        if not _has_sorry_token(snippet):
            proved.append(name)
    return proved


def _truncate_text(text: str, *, max_chars: int) -> str:
    if not text:
        return ""
    if len(text) <= max_chars:
        return text
    return text[:max_chars] + "... [truncated]"


def _scaffold_sorry_budget_for_file(args: argparse.Namespace, *, lean_file_rel: Path) -> int:
    """
    How many new sorry-bearing helper decls (and net-new `sorry` tokens) are allowed as scaffolding.

    Defaults:
    - Question_bench files: 5
    - Other files: 0
    """
    raw = getattr(args, "scaffold_sorry_budget", None)
    if raw is not None:
        try:
            return max(0, int(raw))
        except Exception:
            return 0
    return 5 if _is_bench_file(lean_file_rel) else 0




def _list_section_files(*, chapter: int | None = None) -> list[Path]:
    """
    All chapter section files (relative to LEAN_ROOT), sorted for stable processing.
    """
    if chapter is None and not LEAN_SRC_CHAPTERS.exists():
        return []

    def _section_sort_key(path: Path) -> tuple[int, int, int, str]:
        """
        Sort by chapter, section, part number (so section02_part2 comes after section02_part1).
        """
        chap_num = 0
        for part in path.parts:
            m_chap = re.match(r"Chap(\d+)", part)
            if m_chap:
                chap_num = int(m_chap.group(1))
                break

        sec_num = 0
        part_num = 0
        m = re.match(r"section(\d+)(?:_part(\d+))?\\.lean$", path.name)
        if m:
            sec_num = int(m.group(1))
            if m.group(2):
                part_num = int(m.group(2))

        return (chap_num, sec_num, part_num, str(path))

    def _is_aggregate_with_parts(path: Path) -> bool:
        m = re.match(r"section(\d+)\.lean$", path.name)
        if not m:
            return False
        chap_num = None
        for part in path.parts:
            m_chap = re.match(r"Chap(\d+)", part)
            if m_chap:
                chap_num = int(m_chap.group(1))
                break
        if chap_num is None:
            return False
        sec_num = int(m.group(1))
        return bool(section_part_files(chap_num, sec_num))

    if chapter is not None:
        chapter_path = chapter_dir(chapter)
        if not chapter_path.exists():
            return []
        files = sorted(chapter_path.rglob("section*.lean"), key=_section_sort_key)
    else:
        files = sorted(LEAN_SRC_CHAPTERS.rglob("section*.lean"), key=_section_sort_key)
    filtered = [p for p in files if not _is_aggregate_with_parts(p)]
    return [p.relative_to(LEAN_ROOT) for p in filtered]


def _is_splittable_section_aggregate(path: Path) -> tuple[int, int] | None:
    """
    Return (chapter, section) if `path` looks like `Chapters/ChapXX/sectionYY.lean` (aggregate, not part).
    """
    if not path.parts or len(path.parts) < 3:
        return None
    if path.parts[0] != "Chapters":
        return None
    m_chap = re.match(r"Chap(\d+)$", path.parts[1])
    if not m_chap:
        return None
    m_sec = re.match(r"section(\d+)\.lean$", path.name)
    if not m_sec:
        return None
    chap_num = int(m_chap.group(1))
    sec_num = int(m_sec.group(1))
    return chap_num, sec_num


def _list_section_files_for_chapters(chapters: list[int]) -> list[Path]:
    files: list[Path] = []
    for chapter in chapters:
        files.extend(_list_section_files(chapter=chapter))
    # de-dup while preserving order
    seen: set[Path] = set()
    ordered: list[Path] = []
    for path in files:
        if path in seen:
            continue
        seen.add(path)
        ordered.append(path)
    return ordered


def _parse_chapter_list(raw: str) -> list[int]:
    chapters: list[int] = []
    for part in raw.split(","):
        part = part.strip()
        if not part:
            continue
        chapters.append(int(part))
    return chapters


def _parse_section_spec(raw: str) -> tuple[int, int]:
    """
    Parse a chapter.section spec like "4.18" into (4, 18).
    """
    text = (raw or "").strip()
    m = re.match(r"^(\d+)\s*\.\s*(\d+)$", text)
    if not m:
        raise ValueError(f"Invalid --only-section value: {raw} (expected CHAPTER.SECTION like 4.18).")
    chapter = int(m.group(1))
    section = int(m.group(2))
    if chapter <= 0 or section <= 0:
        raise ValueError(f"Invalid --only-section value: {raw} (chapter/section must be positive).")
    return chapter, section


def _normalize_rel_to_lean_root(path: Path) -> Path:
    """
    Normalize a user-provided path to be relative to `LEAN_ROOT` (e.g. `M2F/`).
    Accepts either:
    - a path already relative to `LEAN_ROOT` (recommended), or
    - a path prefixed with the Lean project directory name (e.g. `M2F/`), or
    - an absolute path under `LEAN_ROOT`, or
    - a path that contains `/M2F/` in the middle (e.g. `_worktrees/prover-exp/M2F/...`).
    """
    if path.is_absolute():
        try:
            return path.relative_to(LEAN_ROOT)
        except ValueError:
            pass
    parts = list(path.parts)
    if LEAN_ROOT.name in parts:
        idx = parts.index(LEAN_ROOT.name)
        rest = parts[idx + 1 :]
        return Path(*rest) if rest else Path(".")
    return path


def _natural_token_key(s: str) -> list[tuple[int, object]]:
    """
    Tokenize a string into tagged parts for natural sorting.
    """
    parts = re.split(r"(\d+)", s)
    out: list[tuple[int, object]] = []
    for part in parts:
        if not part:
            continue
        if part.isdigit():
            out.append((0, int(part)))
        else:
            out.append((1, part.lower()))
    return out


def _bench_filename_key(path: Path) -> tuple:
    """
    Sort key for bench files based on filename (stem), treating digit runs as integers.
    """
    tokens = _natural_token_key(path.stem)
    tokens.append((2, str(path).lower()))
    return tuple(tokens)


def _bench_part_files(rel: Path) -> list[Path]:
    """
    For a bench file like `Question_bench/FateH/23.lean`, return sibling part files like:
    `Question_bench/FateH/23_part01.lean`, `.../23_part02.lean`, sorted by part number.
    """
    if rel.suffix != ".lean":
        return []
    if not _is_bench_file(rel):
        return []
    abs_path = LEAN_ROOT / rel
    parent = abs_path.parent
    stem = abs_path.stem
    if not parent.exists():
        return []

    def _part_sort_key(p: Path) -> tuple[int, str]:
        m = re.match(rf"^{re.escape(stem)}_part(\d+)\.lean$", p.name)
        if m:
            return (int(m.group(1)), p.name)
        return (10**9, p.name)

    parts_abs = sorted(parent.glob(f"{stem}_part*.lean"), key=_part_sort_key)
    parts_rel: list[Path] = []
    for p in parts_abs:
        try:
            parts_rel.append(p.relative_to(LEAN_ROOT))
        except ValueError:
            continue
    return parts_rel


def _is_bench_file(rel: Path) -> bool:
    try:
        prefix = LEAN_BENCH_ROOT.relative_to(LEAN_ROOT)
    except Exception:
        return False
    try:
        rel.relative_to(prefix)
        return True
    except ValueError:
        return False


def _is_under_bench_root(rel: Path) -> bool:
    try:
        prefix = LEAN_BENCH_ROOT.relative_to(LEAN_ROOT)
    except Exception:
        return False
    try:
        rel.relative_to(prefix)
        return True
    except ValueError:
        return False


def _is_bench_part_file(rel: Path) -> bool:
    return _is_bench_file(rel) and re.search(r"_part\d+$", rel.stem) is not None


def _list_bench_files(*, order: str = "asc") -> list[Path]:
    """
    Benchmark Lean files under `Question_bench/` (relative to LEAN_ROOT),
    sorted by filename number for stable processing. Only includes files that contain a `sorry` token.
    """
    if not LEAN_BENCH_ROOT.exists():
        return []
    files = sorted(p for p in LEAN_BENCH_ROOT.rglob("*.lean") if p.is_file())
    rel_files = [p.relative_to(LEAN_ROOT) for p in files]
    # If a bench file has parts, always prefer processing the part files (avoid double-work).
    filtered: list[Path] = []
    for p in rel_files:
        if _is_bench_part_file(p):
            filtered.append(p)
            continue
        if _bench_part_files(p):
            continue
        filtered.append(p)

    rel_files = [p for p in filtered if find_sorry_locations(LEAN_ROOT / p)]
    rel_files.sort(key=_bench_filename_key, reverse=(order == "desc"))
    return rel_files


def _resolve_only_dir(path: Path) -> tuple[Path, Path]:
    rel = _normalize_rel_to_lean_root(path)
    abs_dir = rel if rel.is_absolute() else (LEAN_ROOT / rel)
    try:
        rel_dir = abs_dir.relative_to(LEAN_ROOT)
    except ValueError:
        raise ValueError(f"--only-dir must be under {LEAN_ROOT} (got: {path})")
    if not abs_dir.exists():
        raise ValueError(f"--only-dir not found: {abs_dir}")
    if not abs_dir.is_dir():
        raise ValueError(f"--only-dir is not a directory: {abs_dir}")
    return rel_dir, abs_dir


def _list_lean_files_under_dir(path: Path, *, only_with_sorry: bool = True) -> list[Path]:
    _, abs_dir = _resolve_only_dir(path)
    rel_files = sorted(
        p.relative_to(LEAN_ROOT)
        for p in abs_dir.rglob("*.lean")
        if p.is_file()
    )
    if only_with_sorry:
        rel_files = [p for p in rel_files if find_sorry_locations(LEAN_ROOT / p)]
    return rel_files


def _load_env_file(path: Path) -> None:
    """
    Load KEY=VALUE lines from a file into os.environ (does not override existing keys).
    """
    if not path.exists():
        print(f"[env] file not found: {path}")
        return
    for raw in path.read_text(encoding="utf-8").splitlines():
        line = raw.strip()
        if not line or line.startswith("#") or "=" not in line:
            continue
        key, value = line.split("=", 1)
        key = key.strip()
        value = value.strip().strip('"').strip("'")
        if key and key not in os.environ:
            os.environ[key] = value


def _terminal_report_math_blocker(
    *,
    source: str,
    lean_file_rel: Path,
    task_id: str | None,
    target: dict[str, Any] | None,
    decl_name: str | None,
    message: str,
    details: str | None = None,
) -> None:
    """
    Emit a prominent terminal hint when a target appears mathematically false or otherwise unprovable.
    """
    loc = ""
    if isinstance(target, dict):
        try:
            line = int(target.get("line", 0) or 0)
            col = int(target.get("col", 0) or 0)
            if line > 0:
                loc = f":{line}" + (f":{col}" if col > 0 else "")
        except Exception:
            loc = ""
    head = f"[FINAL][MATH-BLOCKER][{source}] {lean_file_rel}{loc}"
    if decl_name:
        head += f" ({decl_name})"
    if task_id:
        head += f" task={task_id}"
    print(head)
    print(f"  {message}")
    if details:
        details = str(details).strip()
        if details:
            # Keep this short to avoid flooding the terminal.
            preview = details if len(details) <= 400 else details[:400] + "... [truncated]"
            print(f"  details: {preview}")


def main() -> None:
    parser = argparse.ArgumentParser(description="Run final sorry-elimination orchestrator.")
    parser.add_argument(
        "--env-file",
        type=Path,
        default=None,
        help="Optional env file with KEY=VALUE lines (loaded before other settings).",
    )
    parser.add_argument(
        "--start-file-index",
        type=int,
        default=None,
        help="Start processing from this file index (overrides saved state).",
    )
    parser.add_argument(
        "--only-file",
        type=Path,
        default=None,
        help="Process only this file (path relative to the Lean project root).",
    )
    parser.add_argument(
        "--only-dir",
        type=Path,
        default=None,
        help=(
            "Process all `.lean` files under this directory recursively (default: only files still containing `sorry`). "
            "Path may be relative to Lean root or contain `/M2F/`."
        ),
    )
    parser.add_argument(
        "--only-section",
        type=str,
        default=None,
        help="Process only this section (format: CHAPTER.SECTION, e.g., 4.18 for Chap04/section18).",
    )
    parser.add_argument(
        "--only-chapter",
        type=int,
        default=None,
        help="Process only this chapter number (e.g., 2 for Chap02).",
    )
    parser.add_argument(
        "--only-chapters",
        type=str,
        default=None,
        help="Process only these chapters (comma-separated, e.g., 2,3,5).",
    )
    parser.add_argument(
        "--only-bench",
        action="store_true",
        help="Process all benchmark Lean files under <LeanRoot>/Question_bench/.",
    )
    parser.add_argument(
        "--bench-order",
        type=str,
        choices=["asc", "desc"],
        default="asc",
        help="When using --only-bench, order files by filename number (asc/desc).",
    )
    parser.add_argument(
        "--enable-nl-hints",
        dest="enable_nl_hints",
        action="store_true",
        default=None,
        help="Call external NL hint API before planning each sorry (default via ENABLE_NL_HINTS env).",
    )
    parser.add_argument(
        "--disable-nl-hints",
        dest="enable_nl_hints",
        action="store_false",
        help="Disable NL hint API calls even if ENABLE_NL_HINTS is set.",
    )
    parser.add_argument(
        "--nl-hint-api-key",
        type=str,
        default=None,
        help="Override API key for NL hint API (fallback: AIHUBMIX_API_KEY env).",
    )
    parser.add_argument(
        "--nl-hint-model",
        type=str,
        default=None,
        help="Model id for NL hint API (default: AIHUBMIX_MODEL env or gpt-4o-mini).",
    )
    parser.add_argument(
        "--nl-hint-url",
        type=str,
        default=None,
        help="Endpoint URL for NL hint API (default: AIHUBMIX_API_URL env).",
    )
    parser.add_argument(
        "--nl-hint-context-lines",
        type=int,
        default=40,
        help="How many context lines around the target line to send to the NL hint API.",
    )
    parser.add_argument(
        "--print-agent-a-prompt",
        action="store_true",
        help="Print Agent A prompt (truncated) to the terminal before sending.",
    )
    parser.add_argument(
        "--list-files",
        action="store_true",
        help="List selected files (with indices) and exit.",
    )
    parser.add_argument(
        "--start-from-file",
        type=Path,
        default=None,
        help="Start processing from this file (path relative to the Lean project root).",
    )
    parser.add_argument(
        "--max-files",
        type=int,
        default=None,
        help="Process at most this many files (useful for batching).",
    )
    parser.add_argument(
        "--max-sorries-per-file",
        type=int,
        default=1,
        help="Eliminate at most this many `sorry` per file in one run (default: 1).",
    )
    parser.add_argument(
        "--clean-warnings-with-agent-b",
        action=argparse.BooleanOptionalAction,
        default=False,
        help="Whether to call Final Agent B to clean non-sorry warnings reported by Lean (default: false).",
    )
    parser.add_argument(
        "--write-history",
        action=argparse.BooleanOptionalAction,
        default=True,
        help="Persist Agent C/A/B plan & fix history to a JSONL file under log/<project>/final_logs/ (default: true).",
    )
    parser.add_argument(
        "--use-history",
        action=argparse.BooleanOptionalAction,
        default=True,
        help="Load prior history for the same Lean file and pass it into Agent prompts (default: true).",
    )
    parser.add_argument(
        "--history-file",
        type=Path,
        default=None,
        help="Optional path for final history JSONL (default: log/<project>/final_logs/final_history.jsonl).",
    )
    parser.add_argument(
        "--progress-file",
        type=Path,
        default=None,
        help="Optional path for final progress JSON (default: log/<project>/final_logs/final_progress.json).",
    )
    parser.add_argument(
        "--job-name",
        type=str,
        default=None,
        help=(
            "Optional job name to isolate progress/history files when running FINAL in parallel. "
            "If set and --history-file/--progress-file are not provided, defaults become: "
            "log/<project>/final_logs/jobs/<job-name>/(final_history.jsonl, final_progress.json)."
        ),
    )
    parser.add_argument(
        "--history-max-records",
        type=int,
        default=5,
        help="How many recent history records to load per Lean file (default: 5).",
    )
    parser.add_argument(
        "--final-agent-config",
        type=Path,
        default=None,
        help="Path to a TOML file controlling per-agent model/reasoning for FINAL stage. "
        "Default: use $FINAL_AGENT_CONFIG_FILE, else repo `agent_configs/final_agents.toml` if present.",
    )
    parser.add_argument(
        "--split-with-agent-d",
        action=argparse.BooleanOptionalAction,
        default=(
            os.getenv("FINAL_SPLIT_WITH_AGENT_D", "").lower() in {"1", "true", "yes", "on"}
            or os.getenv("SPLIT_WITH_AGENT_D", "").lower() in {"1", "true", "yes", "on"}
        ),
        help="If enabled, call final Agent D to split oversized Lean files into part files (default: false).",
    )
    parser.add_argument(
        "--max-lines-per-part",
        type=int,
        default=int(os.getenv("FINAL_MAX_LINES_PER_PART", os.getenv("MAX_LINES_PER_PART", "1000"))),
        help="When splitting with Agent D, target at most this many lines per part file (default: 1000).",
    )
    parser.add_argument(
        "--max-no-progress-retries",
        type=int,
        default=1,
        help="If Agent A makes no progress (no reduction in `sorry` count), retry this target up to N times with stricter instructions (default: 1).",
    )
    parser.add_argument(
        "--scaffold-sorry-budget",
        type=int,
        default=None,
        help=(
            "Bench/prover scaffolding budget: allow introducing up to N new sorry-bearing helper declarations "
            "(and up to N net-new `sorry` tokens) in a single Agent A attempt, without triggering the strict "
            "FINAL new-sorry guardrail. Intended for long-horizon bench proving. "
            "Default: 5 for Question_bench, 0 otherwise."
        ),
    )
    parser.add_argument(
        "--max-b-retries",
        type=int,
        default=3,
        help="Max retries for Agent B when Lean fails after a fix (default: 3).",
    )
    parser.add_argument(
        "--max-c-replans",
        type=int,
        default=1,
        help="Max additional Agent C re-plans when Agent A requests re-planning (default: 1).",
    )
    parser.add_argument(
        "--bad-statement-policy",
        choices=["fail", "skip", "continue"],
        default=None,
        help=(
            "How to handle Agent A reporting `status=failed_bad_statement`. "
            "fail=stop run and keep this file as next; skip=record and continue to next file; "
            "continue=ignore and keep trying (not recommended). "
            "Default: skip."
        ),
    )
    parser.add_argument(
        "--bad-statement-auto-recover-rounds",
        type=int,
        default=BAD_STATEMENT_AUTO_RECOVER_ROUNDS_DEFAULT,
        help=(
            "When Agent A reports `failed_bad_statement`, automatically roll back that attempt and try an alternative "
            "route via Agent C re-plan up to N times before honoring --bad-statement-policy "
            f"(default: {BAD_STATEMENT_AUTO_RECOVER_ROUNDS_DEFAULT})."
        ),
    )
    parser.add_argument(
        "--helper-bad-statement-auto-repair-rounds",
        type=int,
        default=HELPER_BAD_STATEMENT_AUTO_REPAIR_ROUNDS_DEFAULT,
        help=(
            "When `failed_bad_statement` is reported on a likely helper declaration, "
            "auto-roll back and force a statement-repair route (fix helper statement + update downstream call sites) "
            f"up to N times before falling back to normal bad-statement handling (default: {HELPER_BAD_STATEMENT_AUTO_REPAIR_ROUNDS_DEFAULT})."
        ),
    )
    parser.add_argument(
        "--missing-theory-policy",
        choices=["fail", "skip", "continue"],
        default="continue",
        help=(
            "How to handle Agent A reporting `status=failed_missing_theory` (blocked on missing library/theory). "
            "fail=stop run and keep this file as next; skip=record and continue to next file; "
            "continue=ignore and keep trying (default: continue)."
        ),
    )
    parser.add_argument(
        "--auto-infra-sprint",
        action=argparse.BooleanOptionalAction,
        default=None,
        help=(
            "Bench default: when processing `Question_bench/...` via --only-file or --only-dir and "
            "Agent A reports `failed_missing_theory`, automatically launch the infra sub-pipeline "
            "(generate infra_plan.json → statement→proof→final) under `Question_bench/.../infra_<id>/`. "
            "Default: enabled for --only-file/--only-dir under Question_bench, disabled otherwise."
        ),
    )
    parser.add_argument(
        "--auto-infra-min-local-rounds",
        type=int,
        default=AUTO_INFRA_MIN_LOCAL_ROUNDS_DEFAULT,
        help=(
            "Before auto infra launch is allowed, force at least N local failed_missing_theory replan rounds "
            f"(default: {AUTO_INFRA_MIN_LOCAL_ROUNDS_DEFAULT})."
        ),
    )
    parser.add_argument(
        "--auto-infra-classic-only",
        action=argparse.BooleanOptionalAction,
        default=True,
        help=(
            "Only allow auto infra launch when missing-theory signal looks like a classical math theorem gap "
            "(default: true)."
        ),
    )
    parser.add_argument(
        "--auto-infra-classic-min-score",
        type=int,
        default=AUTO_INFRA_CLASSIC_MIN_SCORE_DEFAULT,
        help=(
            "Minimum classic-theory keyword score for auto-infra gate when --auto-infra-classic-only is enabled "
            f"(default: {AUTO_INFRA_CLASSIC_MIN_SCORE_DEFAULT})."
        ),
    )
    parser.add_argument(
        "--infra-public-api-cap",
        type=int,
        default=30,
        help=(
            "Max allowed entries in `infra_<id>/PUBLIC_API.json` during infra pipeline (default: 30)."
        ),
    )
    parser.add_argument(
        "--infra-plan-generate-attempts",
        type=int,
        default=5,
        help="Max plan-agent retries for initial infra plan JSON generation (default: 5).",
    )
    parser.add_argument(
        "--infra-plan-check-rounds",
        type=int,
        default=INFRA_PLAN_CHECK_ROUNDS_DEFAULT,
        help=(
            "Max plan check+auto-fix rounds in infra pipeline "
            f"(default: {INFRA_PLAN_CHECK_ROUNDS_DEFAULT}; set 0 to disable)."
        ),
    )
    parser.add_argument(
        "--infra-statement-max-b-retries",
        type=int,
        default=None,
        help=(
            "Max retries for Agent B in infra statement stage only. "
            "Default: use --max-b-retries."
        ),
    )
    parser.add_argument(
        "--infra-expand-max-rounds",
        type=int,
        default=2,
        help=(
            "When infra final sweep still reports missing theory, run up to N expansion rounds "
            "(writes `infra_plan_expand_<k>.json`; old `infra_plan_expend_<k>.json` is still accepted) "
            "and re-runs statement→proof→final. "
            "Default: 2; set 0 to disable."
        ),
    )
    parser.add_argument(
        "--infra-agent-config",
        type=Path,
        default=None,
        help=(
            "Path to a TOML file controlling infra plan/check agent model and reasoning. "
            "Default: use $INFRA_AGENT_CONFIG_FILE, else repo `agent_configs/infra_agents.toml` if present."
        ),
    )
    parser.add_argument(
        "--infra-exec-mode",
        type=str,
        choices=["legacy", "direct_item"],
        default="direct_item",
        help=(
            "Infra execution mode used by auto-infra-sprint. "
            "direct_item=item-by-item statement->check->proof->promote, "
            "legacy=statement->proof->final."
        ),
    )
    parser.add_argument(
        "--infra-direct-simulate-success",
        action=argparse.BooleanOptionalAction,
        default=False,
        help=(
            "When infra-exec-mode=direct_item, simulate item success and promote scaffold placeholders "
            "(debug/testing only)."
        ),
    )
    parser.add_argument(
        "--infra-direct-chunk-item-limit",
        type=int,
        default=20,
        help="When infra-exec-mode=direct_item, max items per GeneratedPrefix chunk (default: 20).",
    )
    parser.add_argument(
        "--infra-direct-chunk-line-limit",
        type=int,
        default=1800,
        help="When infra-exec-mode=direct_item, soft max lines per GeneratedPrefix chunk (default: 1800).",
    )
    parser.add_argument(
        "--infra-direct-max-items",
        type=int,
        default=None,
        help="When infra-exec-mode=direct_item, process at most this many items in one infra run.",
    )
    parser.add_argument(
        "--infra-direct-start-index",
        type=int,
        default=None,
        help="When infra-exec-mode=direct_item, force the item cursor start index.",
    )
    parser.add_argument(
        "--infra-direct-statement-max-b-retries",
        type=int,
        default=None,
        help=(
            "When infra-exec-mode=direct_item, statement stage max Agent B retries per item. "
            "Default: use --infra-statement-max-b-retries if set, else --max-b-retries."
        ),
    )
    parser.add_argument(
        "--infra-direct-proof-max-b-retries",
        type=int,
        default=None,
        help=(
            "When infra-exec-mode=direct_item, proof stage max Agent B retries per item. "
            "Default: use --max-b-retries."
        ),
    )
    parser.add_argument(
        "--infra-direct-proof-max-c-replans",
        type=int,
        default=None,
        help=(
            "When infra-exec-mode=direct_item, proof stage max Agent C replans per item. "
            "Default: use --max-c-replans."
        ),
    )
    parser.add_argument(
        "--force-stage",
        choices=["final", "infra"],
        default=None,
        help="Force Codex stage/AGENTS for all Final Agent A/B/C calls (default: use final stage).",
    )
    args = parser.parse_args()

    mode_flags = [
        args.only_file is not None,
        args.only_dir is not None,
        args.only_section is not None,
        bool(args.only_bench),
        args.only_chapter is not None,
        args.only_chapters is not None,
    ]
    if sum(1 for x in mode_flags if x) > 1:
        print(
            "Please specify only one of --only-file / --only-dir / --only-section / --only-bench / "
            "--only-chapter / --only-chapters."
        )
        return

    if args.env_file:
        _load_env_file(args.env_file)

    if args.enable_nl_hints is None:
        args.enable_nl_hints = get_enable_nl_hints_default()

    if args.bad_statement_policy is None:
        # Default: skip bad statements so long runs can continue (recorded in failures/history).
        args.bad_statement_policy = "skip"

    # Default infra pipeline behavior: for one-off bench runs via --only-file or --only-dir.
    if args.auto_infra_sprint is None:
        only_file_rel = _normalize_rel_to_lean_root(args.only_file) if args.only_file else None
        only_dir_rel = _normalize_rel_to_lean_root(args.only_dir) if args.only_dir else None
        args.auto_infra_sprint = bool(
            (only_file_rel and _is_bench_file(only_file_rel))
            or (only_dir_rel and _is_under_bench_root(only_dir_rel))
        )
    args.auto_infra_min_local_rounds = max(0, int(args.auto_infra_min_local_rounds))
    args.auto_infra_classic_min_score = max(1, int(args.auto_infra_classic_min_score))
    args.bad_statement_auto_recover_rounds = max(0, int(args.bad_statement_auto_recover_rounds))

    if args.only_file:
        files = [args.only_file]
    elif args.only_dir:
        try:
            files = _list_lean_files_under_dir(args.only_dir, only_with_sorry=True)
        except ValueError as e:
            print(str(e))
            return
    elif args.only_section:
        try:
            chap, sec = _parse_section_spec(args.only_section)
        except ValueError as e:
            print(str(e))
            return
        parts = section_part_files(chap, sec)
        if parts:
            files = [p.relative_to(LEAN_ROOT) for p in parts]
        else:
            target = section_file(chap, sec)
            if not target.exists():
                try:
                    rel = target.relative_to(LEAN_ROOT)
                except Exception:
                    rel = target
                print(f"Section file not found: {rel}")
                return
            files = [target.relative_to(LEAN_ROOT)]
    elif args.only_bench:
        files = _list_bench_files(order=args.bench_order)
    elif args.only_chapters:
        try:
            chapters = _parse_chapter_list(args.only_chapters)
        except ValueError:
            print(f"Invalid --only-chapters value: {args.only_chapters}")
            return
        files = _list_section_files_for_chapters(chapters)
    else:
        files = _list_section_files(chapter=args.only_chapter)
    if not files:
        if args.only_bench:
            try:
                bench_rel = LEAN_BENCH_ROOT.relative_to(LEAN_ROOT)
            except Exception:
                bench_rel = Path("Question_bench")
            print(f"No bench files found under {bench_rel}/; nothing to do.")
        elif args.only_chapters:
            print(f"No section files found for chapters={args.only_chapters}; nothing to do.")
        elif args.only_dir:
            print(f"No `.lean` files with `sorry` found under --only-dir={args.only_dir}.")
        elif args.only_chapter is not None:
            print(f"No section files found for Chap{args.only_chapter:02d}; nothing to do.")
        else:
            print("No section files found; nothing to do.")
        return

    env_start = os.environ.get("FINAL_START_FILE_INDEX")
    start_file_index = args.start_file_index
    if start_file_index is None and env_start:
        try:
            start_file_index = int(env_start)
        except ValueError:
            pass

    # Resolve per-job history/progress destinations (useful for parallel FINAL).
    job_name = (args.job_name or os.environ.get("FINAL_JOB_NAME", "")).strip()
    job_dir: Path | None = None
    if job_name:
        safe_job = re.sub(r"[^A-Za-z0-9._-]+", "_", job_name).strip("_") or "job"
        job_dir = FINAL_LOGS_DIR / "jobs" / safe_job
        job_dir.mkdir(parents=True, exist_ok=True)

    history_file = args.history_file
    progress_file = args.progress_file
    if job_dir is not None:
        if history_file is None:
            history_file = job_dir / "final_history.jsonl"
        if progress_file is None:
            progress_file = job_dir / "final_progress.json"
    if history_file is None:
        history_file = FINAL_LOGS_DIR / "final_history.jsonl"
    if progress_file is None:
        progress_file = FINAL_PROGRESS_FILE

    loaded_state = load_state(progress_file)
    state = {
        "next_file_index": loaded_state.get("next_file_index", 0),
        "next_file": loaded_state.get("next_file"),
    }

    env_cfg = os.environ.get("FINAL_AGENT_CONFIG_FILE")
    default_cfg = ROOT / "agent_configs/final_agents.toml"
    final_agent_cfg_path = args.final_agent_config
    if final_agent_cfg_path is None and env_cfg:
        final_agent_cfg_path = Path(env_cfg)
    if final_agent_cfg_path is None and default_cfg.exists():
        final_agent_cfg_path = default_cfg

    agent_settings = resolve_final_agents_settings(final_agent_cfg_path)
    if final_agent_cfg_path is not None and final_agent_cfg_path.exists() and agent_settings.source_path is None:
        print(
            f"Warning: failed to parse final agent config at {final_agent_cfg_path}; falling back to defaults/env."
        )
    if agent_settings.source_path:
        print(f"[Final agent config] using {agent_settings.source_path}")
    else:
        print("[Final agent config] using defaults/env (no config file)")
    print(
        "[Final agent models] "
        f"A={agent_settings.agent_a.model}/{agent_settings.agent_a.reasoning_effort} "
        f"(strict_retry={agent_settings.agent_a_strict_retry.model or agent_settings.agent_a.model}/"
        f"{agent_settings.agent_a_strict_retry.reasoning_effort or agent_settings.agent_a.reasoning_effort}), "
        f"B={agent_settings.agent_b.model}/{agent_settings.agent_b.reasoning_effort}, "
        f"C={agent_settings.agent_c.model}/{agent_settings.agent_c.reasoning_effort}"
        + (
            f", D={agent_settings.agent_d.model}/{agent_settings.agent_d.reasoning_effort}"
            if args.split_with_agent_d
            else ""
        )
    )

    run_id = start_run(
        "final",
        stage=3,
        name_tag=("bench" if args.only_bench else "sections"),
        data_file=None,
        extra={
            "only_bench": bool(args.only_bench),
            "only_file": str(args.only_file) if args.only_file else None,
            "only_dir": str(args.only_dir) if args.only_dir else None,
            "only_section": args.only_section,
            "only_chapter": args.only_chapter,
            "only_chapters": args.only_chapters,
            "split_with_agent_d": bool(args.split_with_agent_d),
            "max_lines_per_part": args.max_lines_per_part,
            "final_agent_config_path": str(agent_settings.source_path) if agent_settings.source_path else None,
            "final_agent_a_model": agent_settings.agent_a.model,
            "final_agent_a_reasoning_effort": agent_settings.agent_a.reasoning_effort,
            "final_agent_b_model": agent_settings.agent_b.model,
            "final_agent_b_reasoning_effort": agent_settings.agent_b.reasoning_effort,
            "final_agent_c_model": agent_settings.agent_c.model,
            "final_agent_c_reasoning_effort": agent_settings.agent_c.reasoning_effort,
            "final_agent_d_model": agent_settings.agent_d.model,
            "final_agent_d_reasoning_effort": agent_settings.agent_d.reasoning_effort,
            "job_name": job_name or None,
            "history_file": str(history_file),
            "progress_file": str(progress_file),
        },
    )
    run_start = time.monotonic()
    processed_files = 0
    total_tokens_used = 0
    total_files_failed = 0

    def _split_oversized_files() -> None:
        nonlocal total_tokens_used, files
        if not args.split_with_agent_d:
            return
        if args.max_lines_per_part <= 0:
            return

        # Determine candidate aggregate files to split.
        candidates: list[Path] = []
        if args.only_file:
            rel = _normalize_rel_to_lean_root(args.only_file)
            if _is_splittable_section_aggregate(rel):
                candidates = [rel]
            elif _is_bench_file(rel) and not _is_bench_part_file(rel):
                candidates = [rel]
        elif args.only_dir:
            for rel in files:
                if _is_splittable_section_aggregate(rel):
                    candidates.append(rel)
                elif _is_bench_file(rel) and not _is_bench_part_file(rel):
                    candidates.append(rel)
        elif args.only_section:
            try:
                chap, sec = _parse_section_spec(args.only_section)
            except ValueError:
                return
            rel = section_file(chap, sec).relative_to(LEAN_ROOT)
            if _is_splittable_section_aggregate(rel):
                candidates = [rel]
        elif args.only_bench:
            candidates = [rel for rel in files if _is_bench_file(rel) and not _is_bench_part_file(rel)]
        else:
            for rel in files:
                if _is_splittable_section_aggregate(rel):
                    candidates.append(rel)
            # de-dup preserving order
            seen: set[Path] = set()
            candidates = [p for p in candidates if not (p in seen or seen.add(p))]

        for rel in candidates:
            chap_sec = _is_splittable_section_aggregate(rel)
            abs_file = LEAN_ROOT / rel
            try:
                line_count = len(abs_file.read_text(encoding="utf-8").splitlines())
            except FileNotFoundError:
                continue
            if line_count <= args.max_lines_per_part:
                continue

            if chap_sec is not None:
                chap, sec = chap_sec
                if section_part_files(chap, sec):
                    continue
                task_id = f"split_{chap}_{sec}"
                maintainability = (
                    "Maintainability requirement:\n"
                    "- Prefer splitting at existing comment/label boundaries.\n"
                    "- Keep declarations in original order; do not rewrite unrelated code.\n"
                    "- Ensure each `sectionYY_partZZ.lean` remains standalone and compilable.\n"
                    "- Update `sectionYY.lean` to reference parts in a minimal, maintainable way.\n"
                )
            elif _is_bench_file(rel):
                if _bench_part_files(rel):
                    continue
                task_id = f"split_bench_{rel.parent.name}_{rel.stem}"
                maintainability = (
                    "Maintainability requirement:\n"
                    "- Prefer splitting at existing comment/label boundaries.\n"
                    "- Keep declarations in original order; do not rewrite unrelated code.\n"
                    "- Create sibling part files next to the original:\n"
                    f"  - {rel.stem}_part01.lean, {rel.stem}_part02.lean, ...\n"
                    "- Keep each part file standalone and compilable.\n"
                    "- Update the original file to reference parts in a minimal, maintainable way (e.g. import parts).\n"
                )
            else:
                continue

            print(f"[Final Agent D] splitting oversized file ({line_count} lines): {rel}")
            d_start = time.monotonic()
            d_res = run_final_agent_d(
                lean_file=abs_file,
                max_lines=args.max_lines_per_part,
                task_id=task_id,
                model=agent_settings.agent_d.model,
                reasoning_effort=agent_settings.agent_d.reasoning_effort,
                extra_instructions=maintainability,
            )
            d_seconds = time.monotonic() - d_start
            total_tokens_used += d_res.tokens_used or 0
            log_event(
                run_id,
                "agent_d_result",
                {
                    "lean_file": str(rel),
                    "chapter": chap_sec[0] if chap_sec else None,
                    "section": chap_sec[1] if chap_sec else None,
                    "is_bench": bool(_is_bench_file(rel)),
                    "code": d_res.code,
                    "seconds": d_seconds,
                    "line_count": line_count,
                    "max_lines": args.max_lines_per_part,
                    "tokens_used": d_res.tokens_used,
                    "log_path": str(d_res.log_path) if d_res.log_path else None,
                },
            )

            if d_res.code != 0:
                print(f"[Final Agent D] split failed for {rel}: {d_res.stderr}")
                FINAL_FAILURE_LOG.parent.mkdir(parents=True, exist_ok=True)
                with FINAL_FAILURE_LOG.open("a", encoding="utf-8") as f:
                    f.write(
                        f"file={rel}\n"
                        f"task_id={task_id}\n"
                        "note=final_agent_d_split_failed\n"
                        f"lean_code={d_res.code}\n"
                        + _truncate_text(d_res.stderr or "", max_chars=12000)
                        + "\n\n"
                    )
                if args.write_history:
                    append_history(
                        history_file,
                        pipeline="final",
                        run_id=run_id,
                        lean_file=str(rel),
                        task_id=task_id,
                        kind="agent_d_split_failure",
                        summary="final_agent_d_split_failed",
                        log_path=str(d_res.log_path) if d_res.log_path else None,
                        payload={"stderr": d_res.stderr, "code": d_res.code},
                    )
                continue

            # Validate aggregate file after split (and optionally fix with Agent B).
            base_rel = section_file(chap_sec[0], chap_sec[1]).relative_to(LEAN_ROOT) if chap_sec else rel
            code, out, err = lake_env_lean(base_rel)
            lean_output = "\n".join(part for part in (err, out) if part)
            non_sorry_blocks = _non_sorry_warning_blocks(lean_output)
            if code != 0 or (non_sorry_blocks and args.clean_warnings_with_agent_b):
                print("[Final Agent D] aggregate file needs cleanup after split; calling Final Agent B.")
                if code == 0 and non_sorry_blocks:
                    warnings_text = "\n\n".join(non_sorry_blocks)
                    err_for_b = "Lean produced the following non-sorry warnings. Please remove them:\n\n" + warnings_text
                else:
                    err_for_b = lean_output
                b_start = time.monotonic()
                b_res = run_final_agent_b(
                    lean_file=base_rel,
                    error_log=err_for_b,
                    task_id=f"{task_id}_post",
                    model=agent_settings.agent_b.model,
                    reasoning_effort=agent_settings.agent_b.reasoning_effort,
                )
                b_seconds = time.monotonic() - b_start
                total_tokens_used += b_res.tokens_used or 0
                log_event(
                    run_id,
                    "agent_b_post_split_result",
                    {
                        "lean_file": str(base_rel),
                        "task_id": f"{task_id}_post",
                        "code": b_res.code,
                        "seconds": b_seconds,
                        "tokens_used": b_res.tokens_used,
                        "log_path": str(b_res.log_path) if b_res.log_path else None,
                    },
                )

        # Refresh file list after splitting so parts are processed.
        if args.only_file:
            rel = _normalize_rel_to_lean_root(args.only_file)
            chap_sec = _is_splittable_section_aggregate(rel)
            if chap_sec and section_part_files(*chap_sec):
                chap, sec = chap_sec
                parts = [p.relative_to(LEAN_ROOT) for p in section_part_files(chap, sec)]
                files = parts or [rel]
            elif _is_bench_file(rel) and _bench_part_files(rel):
                files = _bench_part_files(rel)
        elif args.only_dir:
            try:
                files = _list_lean_files_under_dir(args.only_dir, only_with_sorry=True)
            except ValueError:
                return
        elif args.only_section:
            try:
                chap, sec = _parse_section_spec(args.only_section)
            except ValueError:
                return
            parts = section_part_files(chap, sec)
            if parts:
                files = [p.relative_to(LEAN_ROOT) for p in parts]
            else:
                files = [section_file(chap, sec).relative_to(LEAN_ROOT)]
        elif args.only_bench:
            files = _list_bench_files(order=args.bench_order)
        elif args.only_chapter is not None:
            files = _list_section_files(chapter=args.only_chapter)
        elif args.only_chapters:
            files = _list_section_files_for_chapters(_parse_chapter_list(args.only_chapters))
        else:
            files = _list_section_files()

    _split_oversized_files()

    if args.list_files:
        for i, p in enumerate(files):
            print(f"[{i}] {p}")
        return

    if args.start_from_file is not None:
        start_path = _normalize_rel_to_lean_root(args.start_from_file)
        try:
            start_file_index = files.index(start_path)
        except ValueError:
            chap_sec = _is_splittable_section_aggregate(start_path)
            if chap_sec and section_part_files(*chap_sec):
                chap, sec = chap_sec
                first_part = section_part_files(chap, sec)[0].relative_to(LEAN_ROOT)
                try:
                    start_file_index = files.index(first_part)
                except ValueError:
                    print(f"--start-from-file not found in selected files: {start_path}")
                    print("Tip: run with --list-files to see the exact paths/indices.")
                    return
            elif _is_bench_file(start_path) and _bench_part_files(start_path):
                first_part = _bench_part_files(start_path)[0]
                try:
                    start_file_index = files.index(first_part)
                except ValueError:
                    print(f"--start-from-file not found in selected files: {start_path}")
                    print("Tip: run with --list-files to see the exact paths/indices.")
                    return
            else:
                print(f"--start-from-file not found in selected files: {start_path}")
                print("Tip: run with --list-files to see the exact paths/indices.")
                return

    default_start = state.get("next_file_index", 0)
    next_file = state.get("next_file")
    if start_file_index is None and isinstance(next_file, str) and next_file.strip():
        try:
            default_start = files.index(Path(next_file))
        except ValueError:
            chap_sec = _is_splittable_section_aggregate(Path(next_file))
            if chap_sec and section_part_files(*chap_sec):
                chap, sec = chap_sec
                first_part = section_part_files(chap, sec)[0].relative_to(LEAN_ROOT)
                try:
                    default_start = files.index(first_part)
                except ValueError:
                    default_start = int(state.get("next_file_index", 0) or 0)
            elif _is_bench_file(Path(next_file)) and _bench_part_files(Path(next_file)):
                first_part = _bench_part_files(Path(next_file))[0]
                try:
                    default_start = files.index(first_part)
                except ValueError:
                    default_start = int(state.get("next_file_index", 0) or 0)
            else:
                default_start = int(state.get("next_file_index", 0) or 0)

    effective_start = start_file_index if start_file_index is not None else default_start
    effective_start = max(0, min(effective_start, len(files)))
    log_event(
        run_id,
        "run_config",
        {
            "file_count": len(files),
            "effective_start_file_index": effective_start,
            "start_file_index_arg": start_file_index,
            "loaded_state_next_file_index": state.get("next_file_index"),
            "loaded_state_next_file": state.get("next_file"),
        },
    )

    def record_failure(
        *,
        lean_file_rel: Path,
        task_id: str | None,
        note: str,
        target: dict[str, Any] | None = None,
        plan: dict[str, Any] | None = None,
        feedback: dict[str, Any] | None = None,
        agent_b_history: list[str] | None = None,
        replan_history: list[str] | None = None,
        lean_code: int | None = None,
        lean_output: str | None = None,
    ) -> None:
        """
        Persist a compact failure record (human log + optional JSONL history).
        Intended for cases where: replans are exhausted and/or Agent B completed, yet
        the target decl / planned lemmas still contain `sorry` or Lean still errors.
        """
        abs_file = LEAN_ROOT / lean_file_rel
        lines = abs_file.read_text(encoding="utf-8").splitlines() if abs_file.exists() else []
        current_sorries = find_sorry_locations(abs_file) if abs_file.exists() else []
        target_line_snippet = None
        target_decl_snippet = None
        target_decl_name = None
        target_decl_has_sorry = None

        # Collect "related" decl names: target decl + plan decls.
        decl_names: list[str] = []
        if target and isinstance(target.get("line"), int):
            line0 = int(target["line"])
            target_line_snippet = (
                get_line_snippet(abs_file, line=line0) if abs_file.exists() else None
            )
            target_decl_snippet = (
                get_declaration_snippet(abs_file, line=line0) if abs_file.exists() else None
            )
            target_decl_has_sorry = _has_sorry_token(target_decl_snippet or "")
            target_decl_name = _extract_decl_name_from_snippet(target_decl_snippet or "")
            if target_decl_name:
                decl_names.append(target_decl_name)
        decl_names.extend(_plan_decl_names(plan))
        # de-dup preserving order
        seen: set[str] = set()
        decl_names = [n for n in decl_names if not (n in seen or seen.add(n))]

        decl_reports: list[dict[str, Any]] = []
        for n in decl_names:
            line_no = _find_decl_line_by_name(lines, name=n)
            snippet = get_declaration_snippet(abs_file, line=line_no) if (abs_file.exists() and line_no) else ""
            decl_reports.append(
                {
                    "name": n,
                    "line": line_no,
                    "has_sorry": _has_sorry_token(snippet),
                    "snippet": _truncate_text(snippet, max_chars=3000),
                }
            )

        compile_rel = compile_entry_for(ensure_section_aggregate_exists(lean_file_rel))
        if lean_code is None or lean_output is None:
            code, out, err = lake_env_lean(compile_rel)
            lean_code = code
            lean_output = "\n".join(part for part in (err, out) if part)

        FINAL_FAILURE_LOG.parent.mkdir(parents=True, exist_ok=True)
        with FINAL_FAILURE_LOG.open("a", encoding="utf-8") as f:
            f.write(
                f"file={lean_file_rel}\n"
                f"compiled_file={compile_rel}\n"
                f"task_id={task_id}\n"
                f"note={note}\n"
                f"lean_code={lean_code}\n"
                f"sorry_count={len(current_sorries)}\n"
            )
            if target:
                f.write(f"target={json.dumps(target, ensure_ascii=False)}\n")
            if note == "failed_bad_statement" and isinstance(feedback, dict):
                for k in [
                    "counterexample_or_contradiction",
                    "lean_checkable_conflict",
                    "missing_assumptions",
                ]:
                    v = feedback.get(k)
                    if isinstance(v, str) and v.strip():
                        f.write(f"{k}={v.strip()}\n")
            if decl_reports:
                bad = [d["name"] for d in decl_reports if d.get("has_sorry")]
                f.write(f"decls_with_sorry={bad}\n")
            f.write(_truncate_text(lean_output or "", max_chars=12000) + "\n\n")

        if args.write_history:
            append_history(
                history_file,
                pipeline="final",
                run_id=run_id,
                lean_file=str(lean_file_rel),
                task_id=task_id,
                kind="final_failure",
                summary=note,
                payload={
                    "note": note,
                    "lean_code": lean_code,
                    "sorry_count": len(current_sorries),
                    "target": target,
                    "target_decl_name": target_decl_name,
                    "target_decl_has_sorry": target_decl_has_sorry,
                    "target_line_snippet": _truncate_text(target_line_snippet or "", max_chars=3000),
                    "target_decl_snippet": _truncate_text(target_decl_snippet or "", max_chars=6000),
                    "plan_summary": _summarize_plan(plan),
                    "feedback_summary": _summarize_feedback(feedback),
                    "decl_reports": decl_reports,
                    "agent_b_history": list(agent_b_history or []),
                    "replan_history": list(replan_history or []),
                    "lean_output": lean_output or "",
                },
            )

    for file_index, lean_file_rel in enumerate(files):
        lean_file_rel = _normalize_rel_to_lean_root(lean_file_rel)
        if file_index < effective_start:
            continue
        if args.max_files is not None and processed_files >= args.max_files:
            break

        print(f"=== final file[{file_index}]={lean_file_rel} ===")
        file_start = time.monotonic()
        log_event(
            run_id,
            "file_start",
            {"file_index": file_index, "lean_file": str(lean_file_rel)},
        )

        sorries_eliminated = 0
        abandon_file = False
        abandon_reason: str | None = None

        abs_file = LEAN_ROOT / lean_file_rel
        compile_file_rel = compile_entry_for(ensure_section_aggregate_exists(lean_file_rel))
        agent_b_history: list[str] = []
        scaffold_sorry_budget = _scaffold_sorry_budget_for_file(args, lean_file_rel=lean_file_rel)
        file_text_scaffold_baseline: str | None = None
        infra_public_api_cap = max(0, int(getattr(args, "infra_public_api_cap", 30) or 30))
        infra_plan_check_rounds = max(
            0,
            int(
                getattr(
                    args,
                    "infra_plan_check_rounds",
                    INFRA_PLAN_CHECK_ROUNDS_DEFAULT,
                )
                or INFRA_PLAN_CHECK_ROUNDS_DEFAULT
            ),
        )
        auto_infra_sprint = bool(getattr(args, "auto_infra_sprint", False)) and bool(
            (args.only_file or args.only_dir)
            and _is_bench_file(lean_file_rel)
        )
        nl_answer = lookup_reference_nl_answer_for_bench(lean_file_rel)
        nl_answer_text = nl_answer.text if nl_answer else None
        if nl_answer_text:
            preview = (
                nl_answer_text
                if len(nl_answer_text) < 800
                else nl_answer_text[:800] + "... [truncated]"
            )
            print(f"[NL answer] provided from {nl_answer.source}:\n" + preview)

        persisted_replan_history: list[str] = []
        persisted_agent_b_history: list[str] = []
        if args.use_history and args.history_max_records > 0:
            history_exists = history_file.exists()
            recs = load_recent_history(
                history_file,
                lean_file=str(lean_file_rel),
                max_records=args.history_max_records,
            )
            loaded_replan = 0
            loaded_b = 0
            for rec in recs:
                kind = rec.get("kind")
                summary = rec.get("summary")
                if not isinstance(kind, str):
                    continue
                if not isinstance(summary, str) or not summary.strip():
                    continue
                if kind in {"agent_c_plan", "agent_a_feedback"}:
                    persisted_replan_history.append(f"- (prev) {kind}: {summary}")
                    loaded_replan += 1
                if kind in {"agent_b_fix", "agent_b_warning_cleanup"}:
                    persisted_agent_b_history.append(f"- (prev) {kind}: {summary}")
                    loaded_b += 1
            print(
                f"[History] loaded {len(recs)} record(s) from {history_file}"
                + ("" if history_exists else " (file not found; starting fresh)")
                + f"; replan={loaded_replan}, agent_b={loaded_b}"
            )
        elif not args.use_history:
            print("[History] disabled (--no-use-history)")

        while True:
            codex_stage = (args.force_stage or "final").strip().lower()
            code, out, err = lake_env_lean(compile_file_rel)
            lean_output = "\n".join(part for part in (err, out) if part)

            if code != 0:
                task_id = f"{file_index}_compile"
                print("Lean failed; calling final Agent B...")
                b_success = False
                for attempt in range(1, args.max_b_retries + 1):
                    print(f"[Final Agent B attempt {attempt}/{args.max_b_retries}]")
                    history_text = _format_agent_b_history(persisted_agent_b_history + agent_b_history)
                    file_text_before_b = abs_file.read_text(encoding="utf-8") if abs_file.exists() else ""
                    before_sorry_tokens_b = _count_sorry_tokens(file_text_before_b)
                    b_start = time.monotonic()
                    b_res = run_final_agent_b(
                        lean_file=lean_file_rel,
                        error_log=lean_output,
                        task_id=task_id,
                        history=agent_b_history,
                        extra_instructions=history_text,
                        nl_answer=nl_answer_text,
                        model=agent_settings.agent_b.model,
                        reasoning_effort=agent_settings.agent_b.reasoning_effort,
                        stage=codex_stage,
                    )
                    b_seconds = time.monotonic() - b_start
                    total_tokens_used += b_res.tokens_used or 0
                    log_event(
                        run_id,
                        "agent_b_result",
                        {
                            "file_index": file_index,
                            "lean_file": str(lean_file_rel),
                            "task_id": task_id,
                            "attempt": attempt,
                            "code": b_res.code,
                            "seconds": b_seconds,
                            "tokens_used": b_res.tokens_used,
                            "log_path": str(b_res.log_path) if b_res.log_path else None,
                            "model": agent_settings.agent_b.model,
                            "reasoning_effort": agent_settings.agent_b.reasoning_effort,
                        },
                    )
                    summary = _summarize_agent_b_output(b_res.stdout, b_res.stderr)
                    entry = f"- attempt {attempt}: summary={summary}"
                    agent_b_history.append(entry)
                    if args.write_history:
                        append_history(
                            history_file,
                            pipeline="final",
                            run_id=run_id,
                            lean_file=str(lean_file_rel),
                            task_id=task_id,
                            kind="agent_b_fix",
                            summary=summary,
                            log_path=str(b_res.log_path) if b_res.log_path else None,
                            payload={
                                "attempt": attempt,
                                "code": b_res.code,
                                "error_log": lean_output,
                            },
                        )
                    print(f"[Final Agent B summary] {entry}")
                    if b_res.code != 0:
                        print(f"Final Agent B failed with code {b_res.code}.\n{b_res.stderr}")
                        break

                    file_text_after_b = abs_file.read_text(encoding="utf-8") if abs_file.exists() else ""
                    after_sorry_tokens_b = _count_sorry_tokens(file_text_after_b)
                    new_sorry_decls_b = _new_sorry_bearing_decls(
                        before_text=file_text_before_b,
                        after_text=file_text_after_b,
                        abs_file=abs_file,
                    )
                    if after_sorry_tokens_b > before_sorry_tokens_b or new_sorry_decls_b:
                        abs_file.write_text(file_text_before_b, encoding="utf-8")
                        log_event(
                            run_id,
                            "final_agent_b_new_sorry_violation",
                            {
                                "file_index": file_index,
                                "lean_file": str(lean_file_rel),
                                "task_id": task_id,
                                "attempt": attempt,
                                "before_sorry_tokens": before_sorry_tokens_b,
                                "after_sorry_tokens": after_sorry_tokens_b,
                                "new_sorry_decls": new_sorry_decls_b,
                            },
                        )
                        print(
                            "Final Agent B introduced new `sorry` (or a new sorry-bearing decl). "
                            "Reverting and retrying if attempts remain."
                        )
                        continue
                    code, out, err = lake_env_lean(compile_file_rel)
                    lean_output = "\n".join(part for part in (err, out) if part)
                    if code == 0:
                        b_success = True
                        break
                    print("Lean still failing after final Agent B; retrying if attempts remain.")

                if not b_success:
                    print("Giving up on this file for now; recording failure.")
                    record_failure(
                        lean_file_rel=lean_file_rel,
                        task_id=task_id,
                        note="lean_failed_after_final_agent_b",
                        lean_code=code,
                        lean_output=lean_output,
                        agent_b_history=persisted_agent_b_history + agent_b_history,
                    )
                    abandon_file = True
                if abandon_file:
                    break
                continue

            non_sorry_blocks = _non_sorry_warning_blocks(lean_output)
            if non_sorry_blocks and args.clean_warnings_with_agent_b:
                task_id = f"{file_index}_warnings"
                warnings_text = "\n\n".join(non_sorry_blocks)
                print("Lean has non-sorry warnings; calling final Agent B to clean.")
                history_text = _format_agent_b_history(persisted_agent_b_history + agent_b_history)
                file_text_before_b = abs_file.read_text(encoding="utf-8") if abs_file.exists() else ""
                before_sorry_tokens_b = _count_sorry_tokens(file_text_before_b)
                b_start = time.monotonic()
                b_res = run_final_agent_b(
                    lean_file=lean_file_rel,
                    error_log="Lean produced the following non-sorry warnings. Please remove them:\n\n"
                    + warnings_text,
                    task_id=task_id,
                    history=agent_b_history,
                    extra_instructions=history_text,
                    nl_answer=nl_answer_text,
                    model=agent_settings.agent_b.model,
                    reasoning_effort=agent_settings.agent_b.reasoning_effort,
                    stage=codex_stage,
                )
                b_seconds = time.monotonic() - b_start
                total_tokens_used += b_res.tokens_used or 0
                log_event(
                    run_id,
                    "agent_b_result",
                    {
                        "file_index": file_index,
                        "lean_file": str(lean_file_rel),
                        "task_id": task_id,
                        "attempt": None,
                        "code": b_res.code,
                        "seconds": b_seconds,
                        "tokens_used": b_res.tokens_used,
                        "log_path": str(b_res.log_path) if b_res.log_path else None,
                        "model": agent_settings.agent_b.model,
                        "reasoning_effort": agent_settings.agent_b.reasoning_effort,
                    },
                )
                summary = _summarize_agent_b_output(b_res.stdout, b_res.stderr)
                entry = f"- warnings cleanup: summary={summary}"
                agent_b_history.append(entry)
                if args.write_history:
                    append_history(
                        history_file,
                        pipeline="final",
                        run_id=run_id,
                        lean_file=str(lean_file_rel),
                        task_id=task_id,
                        kind="agent_b_warning_cleanup",
                        summary=summary,
                        log_path=str(b_res.log_path) if b_res.log_path else None,
                        payload={
                            "code": b_res.code,
                            "warnings": warnings_text,
                        },
                    )
                print(f"[Final Agent B summary] {entry}")
                if b_res.code != 0:
                    print(f"Final Agent B failed with code {b_res.code}.\n{b_res.stderr}")
                    record_failure(
                        lean_file_rel=lean_file_rel,
                        task_id=task_id,
                        note="final_agent_b_warning_cleanup_failed",
                        lean_code=code,
                        lean_output=lean_output,
                        agent_b_history=persisted_agent_b_history + agent_b_history,
                    )
                    abandon_file = True
                    break
                file_text_after_b = abs_file.read_text(encoding="utf-8") if abs_file.exists() else ""
                after_sorry_tokens_b = _count_sorry_tokens(file_text_after_b)
                new_sorry_decls_b = _new_sorry_bearing_decls(
                    before_text=file_text_before_b,
                    after_text=file_text_after_b,
                    abs_file=abs_file,
                )
                if after_sorry_tokens_b > before_sorry_tokens_b or new_sorry_decls_b:
                    abs_file.write_text(file_text_before_b, encoding="utf-8")
                    log_event(
                        run_id,
                        "final_agent_b_new_sorry_violation",
                        {
                            "file_index": file_index,
                            "lean_file": str(lean_file_rel),
                            "task_id": task_id,
                            "attempt": None,
                            "before_sorry_tokens": before_sorry_tokens_b,
                            "after_sorry_tokens": after_sorry_tokens_b,
                            "new_sorry_decls": new_sorry_decls_b,
                        },
                    )
                    print("Final Agent B introduced new `sorry` during warning cleanup; reverted.")
                continue
            elif non_sorry_blocks:
                print(
                    "Lean has non-sorry warnings, but warning cleanup is disabled "
                    "(--no-clean-warnings-with-agent-b); continuing."
                )

            if file_text_scaffold_baseline is None:
                file_text_scaffold_baseline = abs_file.read_text(encoding="utf-8") if abs_file.exists() else ""

            sorry_locs = find_sorry_locations(abs_file)
            if not sorry_locs:
                print("No `sorry` warnings; file is complete.")
                break

            if args.max_sorries_per_file is not None and sorries_eliminated >= args.max_sorries_per_file:
                print(f"Reached max-sorries-per-file={args.max_sorries_per_file}; stopping this file for now.")
                break

            progress_attempt = 0
            while True:
                sorry_locs = find_sorry_locations(abs_file)
                if not sorry_locs:
                    print("No `sorry` warnings; file is complete.")
                    break
                before_sorry_count = len(sorry_locs)
                target = sorry_locs[0]
                line = int(target.get("line", 1))
                task_id = f"{file_index}_L{line}"
                target_decl_snippet = get_declaration_snippet(abs_file, line=line)
                target_decl_kind_before, target_decl_before = _extract_decl_kind_and_name_from_snippet(
                    target_decl_snippet
                )

                if nl_answer_text:
                    log_event(
                        run_id,
                        "nl_answer",
                        {
                            "file_index": file_index,
                            "lean_file": str(lean_file_rel),
                            "task_id": task_id,
                            "source": nl_answer.source,
                            "meta": nl_answer.meta,
                            "chars": len(nl_answer_text),
                        },
                    )

                nl_hint_text = None
                nl_hint_error = None
                if args.enable_nl_hints:
                    snippet = abs_file.read_text(encoding="utf-8")
                    nl_hint_text, nl_hint_error = fetch_nl_hint(
                        snippet=snippet,
                        lean_file=lean_file_rel,
                        target=target,
                        api_key=args.nl_hint_api_key,
                        api_url=args.nl_hint_url,
                        model=args.nl_hint_model,
                    )
                if nl_hint_error:
                    print(f"[NL hint] skipped: {nl_hint_error}")
                    nl_hint_text = None
                elif nl_hint_text:
                    preview = (
                        nl_hint_text
                        if len(nl_hint_text) < 800
                        else nl_hint_text[:800] + "... [truncated]"
                    )
                    print("[NL hint] received:\n" + preview)

                plan_data = None
                plan_raw_block = None
                feedback_for_c = None
                missing_theory_rounds = 0
                bad_statement_recoveries = 0
                helper_bad_statement_repairs = 0
                helper_statement_repair_mode = False
                extra_infra_round = (
                    int(args.auto_infra_min_local_rounds) + 1
                    if auto_infra_sprint
                    else 0
                )
                max_plan_rounds = args.max_c_replans + 1 + extra_infra_round
                replan_history: list[str] = list(persisted_replan_history)
                strict_progress_nudge = None
                agent_a_model_override = None
                agent_a_reasoning_effort_override = None
                if progress_attempt > 0:
                    strict_progress_nudge = (
                        f"STRICT NO-PROGRESS RETRY (attempt {progress_attempt}/{args.max_no_progress_retries}): "
                        "The previous pass did not reduce the number of `sorry` tokens. "
                        "You must eliminate the targeted `sorry` in this attempt. "
                        "Do NOT claim 'Mathlib lacks facilities' without concrete evidence (failed lemma names / failed tactics / exact subgoal). "
                        "If no ready-made lemma is found, unfold definitions and prove from first principles; "
                        "add and prove small helper lemmas as needed. "
                        "Do not introduce new `sorry`."
                    )
                    # On strict retries, spend more reasoning budget for Agent A (configurable).
                    agent_a_model_override = agent_settings.agent_a_strict_retry.model
                    agent_a_reasoning_effort_override = (
                        agent_settings.agent_a_strict_retry.reasoning_effort or "high"
                    )

                for plan_round in range(1, max_plan_rounds + 1):
                    progress_nudge = None
                    if plan_round > 1:
                        progress_nudge = (
                            f"PROGRESS REQUIREMENT (plan round {plan_round}): "
                            "make concrete Lean progress this pass. Add small helper lemmas or partial proof steps "
                            "that shrink the target `sorry`; avoid no-op edits. The file must be strictly more proved "
                            "than in the previous round (still compiling, only scoped `sorry` if you explicitly request re-plan)."
                        )
                    history_text = _format_replan_history(replan_history)
                    helper_statement_repair_nudge = None
                    if helper_statement_repair_mode:
                        helper_statement_repair_nudge = (
                            "HELPER STATEMENT REPAIR MODE (hard):\n"
                            "- Previous attempt proved current target helper declaration is false/over-strong.\n"
                            "- In this round, repair that helper statement first (add missing assumptions or weaken conclusion),\n"
                            "  then update all downstream call-sites in this file.\n"
                            "- Keep the file compiling after edits, and continue pushing the main proof route.\n"
                            "- Do not just retry the old proof of the old statement."
                        )
                    extra_for_c = _combine_extra_instructions(
                        helper_statement_repair_nudge, strict_progress_nudge, history_text
                    )
                    print(f"[Final Agent C planning {plan_round}/{max_plan_rounds}]")
                    c_start = time.monotonic()
                    c_res = run_final_agent_c(
                        lean_file=lean_file_rel,
                        target=target,
                        task_id=task_id,
                        feedback_from_agent_a=feedback_for_c,
                        prior_plan=plan_data,
                        nl_hint=nl_hint_text,
                        nl_answer=nl_answer_text,
                        extra_instructions=extra_for_c,
                        model=agent_settings.agent_c.model,
                        reasoning_effort=agent_settings.agent_c.reasoning_effort,
                        stage=codex_stage,
                    )
                    c_seconds = time.monotonic() - c_start
                    total_tokens_used += c_res.tokens_used or 0
                    log_event(
                        run_id,
                        "agent_c_result",
                        {
                            "file_index": file_index,
                            "lean_file": str(lean_file_rel),
                            "task_id": task_id,
                            "plan_round": plan_round,
                            "code": c_res.code,
                            "seconds": c_seconds,
                            "tokens_used": c_res.tokens_used,
                            "log_path": str(c_res.log_path) if c_res.log_path else None,
                            "model": agent_settings.agent_c.model,
                            "reasoning_effort": agent_settings.agent_c.reasoning_effort,
                        },
                    )
                    if c_res.code != 0:
                        print(f"Final Agent C failed with code {c_res.code}.\n{c_res.stderr}")
                        abandon_file = True
                        break

                    plan_data, plan_raw_block = extract_marked_json(
                        c_res.stdout, AGENT_C_PLAN_START, AGENT_C_PLAN_END
                    )
                    if plan_raw_block is None:
                        plan_raw_block = c_res.stdout
                    if isinstance(plan_data, dict) and plan_data.get("status") == "failed":
                        _terminal_report_math_blocker(
                            source="agent_c_plan_failed",
                            lean_file_rel=lean_file_rel,
                            task_id=task_id,
                            target=target,
                            decl_name=target_decl_before,
                            message="Agent C reports the target is likely mathematically unprovable/blocked as written.",
                            details=plan_data.get("failure_reason") or plan_data.get("strategy"),
                        )
                    if args.write_history:
                        append_history(
                            history_file,
                            pipeline="final",
                            run_id=run_id,
                            lean_file=str(lean_file_rel),
                            task_id=task_id,
                            kind="agent_c_plan",
                            summary=_summarize_plan(plan_data),
                            log_path=str(c_res.log_path) if c_res.log_path else None,
                            payload={
                                "plan_round": plan_round,
                                "plan": plan_data,
                                "plan_raw": plan_raw_block,
                                "strict_retry": bool(progress_attempt > 0),
                            },
                        )

                    feedback_for_c = None
                    bench_long_horizon = ""
                    if _is_bench_file(lean_file_rel):
                        bench_long_horizon = (
                            "PROVER LONG-HORIZON MODE (Question_bench):\n"
                            "- Make as much **correct mathematical progress** as possible in this attempt.\n"
                            "- You may eliminate additional `sorry` in the same file if you can do so correctly.\n"
                            "- If you introduce helper lemmas, they must be mathematically correct; prefer proving them immediately.\n"
                            "- Avoid tiny no-op patches; prioritize large, correct proof progress.\n"
                            "- Avoid re-plans: aim to finish the target theorem in this attempt; request re-plan only if you can report an exact Lean blocker.\n"
                        )
                    extra_for_a = _combine_extra_instructions(
                        helper_statement_repair_nudge,
                        bench_long_horizon,
                        strict_progress_nudge,
                        progress_nudge,
                        history_text,
                    )
                    file_text_before_a = abs_file.read_text(encoding="utf-8")
                    before_sorry_tokens = _count_sorry_tokens(file_text_before_a)
                    a_start = time.monotonic()
                    a_res = run_final_agent_a(
                        lean_file=lean_file_rel,
                        target=target,
                        task_id=task_id,
                        plan=plan_data,
                        plan_raw=plan_raw_block,
                        attempt=plan_round,
                        extra_instructions=extra_for_a,
                        nl_hint=nl_hint_text,
                        nl_answer=nl_answer_text,
                        echo_prompt=args.print_agent_a_prompt,
                        model=agent_a_model_override or agent_settings.agent_a.model,
                        reasoning_effort=agent_a_reasoning_effort_override
                        or agent_settings.agent_a.reasoning_effort,
                        stage=codex_stage,
                    )
                    a_seconds = time.monotonic() - a_start
                    total_tokens_used += a_res.tokens_used or 0
                    log_event(
                        run_id,
                        "agent_a_result",
                        {
                            "file_index": file_index,
                            "lean_file": str(lean_file_rel),
                            "task_id": task_id,
                            "plan_round": plan_round,
                            "code": a_res.code,
                            "seconds": a_seconds,
                            "tokens_used": a_res.tokens_used,
                            "log_path": str(a_res.log_path) if a_res.log_path else None,
                            "model": agent_a_model_override or agent_settings.agent_a.model,
                            "reasoning_effort": agent_a_reasoning_effort_override
                            or agent_settings.agent_a.reasoning_effort,
                        },
                    )
                    if a_res.code != 0:
                        print(f"Final Agent A failed with code {a_res.code}.\n{a_res.stderr}")
                        abandon_file = True
                        break
                    axiom_decls = find_axiom_decls(abs_file)
                    if axiom_decls:
                        print("Detected forbidden `axiom` declarations after Final Agent A; requesting cleanup.")
                        cleanup_instructions = build_axiom_cleanup_instructions(lean_file_rel, axiom_decls)
                        cleanup_extra = _combine_extra_instructions(
                            cleanup_instructions,
                            strict_progress_nudge,
                            progress_nudge,
                            history_text,
                        )
                        cleanup_start = time.monotonic()
                        cleanup_res = run_final_agent_a(
                            lean_file=lean_file_rel,
                            target=target,
                            task_id=task_id,
                            plan=plan_data,
                            plan_raw=plan_raw_block,
                            attempt=plan_round,
                            extra_instructions=cleanup_extra,
                            nl_hint=nl_hint_text,
                            nl_answer=nl_answer_text,
                            echo_prompt=args.print_agent_a_prompt,
                            model=agent_a_model_override or agent_settings.agent_a.model,
                            reasoning_effort=agent_a_reasoning_effort_override
                            or agent_settings.agent_a.reasoning_effort,
                            log_name=f"final_agent_a_{task_id}_axiom_cleanup.log",
                            stage=codex_stage,
                        )
                        cleanup_seconds = time.monotonic() - cleanup_start
                        total_tokens_used += cleanup_res.tokens_used or 0
                        log_event(
                            run_id,
                            "agent_a_axiom_cleanup_result",
                            {
                                "file_index": file_index,
                                "lean_file": str(lean_file_rel),
                                "task_id": task_id,
                                "plan_round": plan_round,
                                "code": cleanup_res.code,
                                "seconds": cleanup_seconds,
                                "tokens_used": cleanup_res.tokens_used,
                                "log_path": str(cleanup_res.log_path)
                                if cleanup_res.log_path
                                else None,
                                "model": agent_a_model_override or agent_settings.agent_a.model,
                                "reasoning_effort": agent_a_reasoning_effort_override
                                or agent_settings.agent_a.reasoning_effort,
                            },
                        )
                        if cleanup_res.code != 0:
                            print(
                                f"Final Agent A axiom cleanup failed with code {cleanup_res.code}.\n"
                                f"{cleanup_res.stderr}"
                            )
                            abandon_file = True
                            break
                        axiom_decls = find_axiom_decls(abs_file)
                        if axiom_decls:
                            print("Axiom cleanup failed; `axiom` declarations still present.")
                            print(format_axiom_report(axiom_decls))
                            abandon_file = True
                            break
                        a_res = cleanup_res

                    a_feedback, _ = extract_marked_json(
                        a_res.stdout, AGENT_A_FEEDBACK_START, AGENT_A_FEEDBACK_END
                    )

                    file_text_after_a = abs_file.read_text(encoding="utf-8")
                    after_sorry_tokens = _count_sorry_tokens(file_text_after_a)
                    new_sorry_decls = _new_sorry_bearing_decls(
                        before_text=file_text_before_a,
                        after_text=file_text_after_a,
                        abs_file=abs_file,
                    )
                    protected_main_decl = bool(
                        target_decl_before
                        and (target_decl_kind_before or "").strip().lower() == "theorem"
                        and not _name_looks_helper_like(target_decl_before)
                    )
                    protected_main_decl_header_before = None
                    protected_main_decl_header_after = None
                    protected_main_decl_violation_reason = None
                    if protected_main_decl and target_decl_before:
                        protected_main_decl_header_before = _decl_header_signature_from_text(
                            file_text=file_text_before_a,
                            name=target_decl_before,
                        )
                        protected_main_decl_header_after = _decl_header_signature_from_text(
                            file_text=file_text_after_a,
                            name=target_decl_before,
                        )
                        if (
                            protected_main_decl_header_before
                            and protected_main_decl_header_after
                            and protected_main_decl_header_before != protected_main_decl_header_after
                        ):
                            protected_main_decl_violation_reason = (
                                "protected theorem declaration header/signature changed"
                            )
                        elif protected_main_decl_header_before and not protected_main_decl_header_after:
                            protected_main_decl_violation_reason = (
                                "protected theorem declaration missing/renamed after edit"
                            )
                    if scaffold_sorry_budget <= 0:
                        guardrail_violation = bool(
                            after_sorry_tokens > before_sorry_tokens
                            or new_sorry_decls
                            or protected_main_decl_violation_reason
                        )
                        violation_details: dict[str, Any] = {
                            "protected_main_decl": protected_main_decl,
                        }
                        if protected_main_decl_violation_reason:
                            violation_details["protected_main_decl_violation_reason"] = (
                                protected_main_decl_violation_reason
                            )
                            violation_details["protected_main_decl_header_before"] = (
                                protected_main_decl_header_before
                            )
                            violation_details["protected_main_decl_header_after"] = (
                                protected_main_decl_header_after
                            )
                    else:
                        delta_sorry_tokens = after_sorry_tokens - before_sorry_tokens
                        new_sorry_decls_since_baseline = _new_sorry_bearing_decls(
                            before_text=(file_text_scaffold_baseline or ""),
                            after_text=file_text_after_a,
                            abs_file=abs_file,
                        )
                        target_sorry_tokens_before = (
                            _count_sorry_tokens_in_named_decl_text(
                                file_text=file_text_before_a, name=target_decl_before
                            )
                            if target_decl_before
                            else None
                        )
                        target_sorry_tokens_after = (
                            _count_sorry_tokens_in_named_decl_text(
                                file_text=file_text_after_a, name=target_decl_before
                            )
                            if target_decl_before
                            else None
                        )
                        violation_reasons: list[str] = []
                        if delta_sorry_tokens > scaffold_sorry_budget:
                            violation_reasons.append(
                                f"net-new sorry tokens in this attempt exceed budget "
                                f"(delta={delta_sorry_tokens}, budget={scaffold_sorry_budget})"
                            )
                        if delta_sorry_tokens > 0 and not new_sorry_decls:
                            violation_reasons.append(
                                "introduced net-new `sorry` without introducing a new sorry-bearing helper decl"
                            )
                        if len(new_sorry_decls_since_baseline) > scaffold_sorry_budget:
                            violation_reasons.append(
                                f"too many sorry-bearing helper decls since file start "
                                f"(count={len(new_sorry_decls_since_baseline)}, budget={scaffold_sorry_budget})"
                            )
                        if (
                            target_sorry_tokens_before is not None
                            and target_sorry_tokens_after is not None
                            and target_sorry_tokens_after > target_sorry_tokens_before
                        ):
                            violation_reasons.append(
                                f"added new `sorry` inside target declaration `{target_decl_before}` "
                                f"(before={target_sorry_tokens_before}, after={target_sorry_tokens_after})"
                            )
                        if protected_main_decl_violation_reason:
                            violation_reasons.append(protected_main_decl_violation_reason)
                        guardrail_violation = bool(violation_reasons)
                        violation_details = {
                            "scaffold_sorry_budget": scaffold_sorry_budget,
                            "delta_sorry_tokens": delta_sorry_tokens,
                            "new_sorry_decls_this_round": new_sorry_decls,
                            "new_sorry_decls_since_baseline": new_sorry_decls_since_baseline,
                            "target_sorry_tokens_before": target_sorry_tokens_before,
                            "target_sorry_tokens_after": target_sorry_tokens_after,
                            "violation_reasons": violation_reasons,
                            "protected_main_decl": protected_main_decl,
                            "protected_main_decl_violation_reason": protected_main_decl_violation_reason,
                            "protected_main_decl_header_before": protected_main_decl_header_before,
                            "protected_main_decl_header_after": protected_main_decl_header_after,
                        }

                    if guardrail_violation:
                        # Guardrail: do not "move sorries around" or create too many new sorry-bearing helpers.
                        abs_file.write_text(file_text_before_a, encoding="utf-8")
                        payload = {
                            "file_index": file_index,
                            "lean_file": str(lean_file_rel),
                            "task_id": task_id,
                            "plan_round": plan_round,
                            "before_sorry_tokens": before_sorry_tokens,
                            "after_sorry_tokens": after_sorry_tokens,
                            "new_sorry_decls": new_sorry_decls,
                            **violation_details,
                        }
                        log_event(run_id, "final_new_sorry_violation", payload)
                        extra_reason = ""
                        reasons = violation_details.get("violation_reasons")
                        if isinstance(reasons, list) and reasons:
                            extra_reason = " Reasons: " + "; ".join(str(r) for r in reasons[:5])
                        if protected_main_decl_violation_reason and not (
                            after_sorry_tokens > before_sorry_tokens or new_sorry_decls
                        ):
                            msg = (
                                "Final guardrail triggered: protected main theorem declaration was modified. "
                                "Reverting and requesting re-plan."
                                + extra_reason
                            )
                        else:
                            msg = (
                                f"Final guardrail triggered: introduced new `sorry` tokens "
                                f"(before={before_sorry_tokens}, after={after_sorry_tokens}) "
                                f"or new sorry-bearing declarations ({', '.join(new_sorry_decls) or 'none'}). "
                                "Reverting and requesting re-plan."
                                + extra_reason
                            )
                        print(msg)
                        replan_history.append(f"- round {plan_round}: {msg}")
                        if plan_round >= max_plan_rounds:
                            record_failure(
                                lean_file_rel=lean_file_rel,
                                task_id=task_id,
                                note="final_new_sorry_violation",
                                target=target,
                                plan=plan_data,
                                feedback=a_feedback,
                                agent_b_history=persisted_agent_b_history + agent_b_history,
                                replan_history=replan_history,
                            )
                            abandon_file = True
                            abandon_reason = "final_new_sorry_violation"
                            break
                        feedback_for_c = {
                            "status": "needs_replan",
                            "reason": msg,
                            "notes": (
                                "Revise the plan to avoid introducing new `sorry` and avoid adding new helper decls "
                                "whose proof contains `sorry`. If a helper lemma is truly needed, its statement must be "
                                "mathematically correct and you must plan for (and execute) a complete proof of it in the same attempt. "
                                "Do not modify protected main theorem statements; if a statement issue exists, repair helper lemmas instead."
                            ),
                        }
                        continue

                    if a_feedback and a_feedback.get("status") == "needs_replan":
                        promoted_feedback = _promote_same_core_needs_replan_to_missing_theory(
                            feedback=a_feedback,
                            lean_file_rel=lean_file_rel,
                            target_decl_name=target_decl_before,
                        )
                        if promoted_feedback is not None:
                            a_feedback = promoted_feedback
                            print(
                                "[needs-replan-upgrade] detected `same_core_blocker=yes`; "
                                "promoted to `failed_missing_theory` for infra escalation."
                            )

                    if a_feedback:
                        replan_history.append(
                            f"- round {plan_round}: {json.dumps(a_feedback, ensure_ascii=False)}"
                        )
                        if args.write_history:
                            append_history(
                                history_file,
                                pipeline="final",
                                run_id=run_id,
                                lean_file=str(lean_file_rel),
                                task_id=task_id,
                                kind="agent_a_feedback",
                                summary=_summarize_feedback(a_feedback),
                                log_path=str(a_res.log_path) if a_res.log_path else None,
                                payload={"plan_round": plan_round, "feedback": a_feedback},
                            )
                    if not (a_feedback and a_feedback.get("status") == "failed_bad_statement"):
                        helper_statement_repair_mode = False
                    if a_feedback and a_feedback.get("status") == "failed_bad_statement":
                        ok, why = _validate_bad_statement_report(a_feedback)
                        if not ok:
                            print(
                                "Final Agent A reported `failed_bad_statement`, but the report is invalid: "
                                + (why or "unknown error")
                            )
                            # Treat as a normal failure (keeps the file compiling) and stop this file.
                            record_failure(
                                lean_file_rel=lean_file_rel,
                                task_id=task_id,
                                note="invalid_bad_statement_report",
                                target=target,
                                plan=plan_data,
                                feedback=a_feedback,
                                agent_b_history=persisted_agent_b_history + agent_b_history,
                                replan_history=replan_history,
                            )
                            abandon_file = True
                            abandon_reason = "invalid_bad_statement_report"
                            break

                        bad_stmt_details = (
                            a_feedback.get("counterexample_or_contradiction")
                            or a_feedback.get("lean_checkable_conflict")
                            or a_feedback.get("missing_assumptions")
                            or a_feedback.get("reason")
                            or "no details provided"
                        )
                        bad_stmt_target_info = _classify_bad_statement_target(
                            file_text=file_text_before_a,
                            target_line=line,
                            target_decl_name=target_decl_before,
                            target_decl_kind=target_decl_kind_before,
                        )
                        helper_repair_budget = int(args.helper_bad_statement_auto_repair_rounds)
                        can_helper_auto_repair = bool(
                            helper_repair_budget > 0
                            and bool(bad_stmt_target_info.get("is_helper_like"))
                            and helper_bad_statement_repairs < helper_repair_budget
                            and plan_round < max_plan_rounds
                        )
                        if can_helper_auto_repair:
                            helper_bad_statement_repairs += 1
                            helper_statement_repair_mode = True
                            abs_file.write_text(file_text_before_a, encoding="utf-8")
                            repair_msg = (
                                "Agent A reported `failed_bad_statement` on a likely helper declaration; "
                                "auto-recover will repair helper statement + downstream call-sites. "
                                f"(repair {helper_bad_statement_repairs}/{helper_repair_budget})"
                            )
                            print(f"[bad-statement-helper-repair] {repair_msg}")
                            replan_history.append(
                                "- round "
                                + str(plan_round)
                                + ": auto-helper-repair from failed_bad_statement"
                                + f" | decl={target_decl_before or 'unknown'}"
                                + f" | details={str(bad_stmt_details)[:600]}"
                            )
                            log_event(
                                run_id,
                                "helper_bad_statement_auto_repair",
                                {
                                    "file_index": file_index,
                                    "lean_file": str(lean_file_rel),
                                    "task_id": task_id,
                                    "plan_round": plan_round,
                                    "repair_used": helper_bad_statement_repairs,
                                    "repair_budget": helper_repair_budget,
                                    "target_decl": target_decl_before,
                                    "target_decl_kind": target_decl_kind_before,
                                    "target_info": bad_stmt_target_info,
                                    "details": str(bad_stmt_details)[:4000],
                                },
                            )
                            feedback_for_c = {
                                "status": "needs_replan",
                                "reason": (
                                    "Current target helper declaration appears mathematically false/over-strong. "
                                    "Repair the helper statement and update all downstream call-sites before continuing."
                                ),
                                "notes": (
                                    "Produce a plan that first restates the helper declaration to a correct form "
                                    "(add missing assumptions or weaken conclusion), then updates all dependent uses. "
                                    "In `notes_for_agent_a`, require a short `-- Route correction:` comment that explains "
                                    "why old helper statement was false and how the repaired statement is used."
                                ),
                                "path_error_reason": str(bad_stmt_details)[:4000],
                                "helper_statement_repair": {
                                    "target_decl_name": target_decl_before,
                                    "target_decl_kind": target_decl_kind_before,
                                    "target_line": line,
                                    "target_info": bad_stmt_target_info,
                                },
                                "prior_failed_bad_statement": {
                                    "counterexample_or_contradiction": a_feedback.get(
                                        "counterexample_or_contradiction"
                                    ),
                                    "lean_checkable_conflict": a_feedback.get("lean_checkable_conflict"),
                                    "missing_assumptions": a_feedback.get("missing_assumptions"),
                                    "reason": a_feedback.get("reason"),
                                },
                            }
                            continue

                        recover_budget = int(args.bad_statement_auto_recover_rounds)
                        can_auto_recover = bool(
                            recover_budget > 0
                            and bad_statement_recoveries < recover_budget
                            and plan_round < max_plan_rounds
                        )
                        if can_auto_recover:
                            bad_statement_recoveries += 1
                            helper_statement_repair_mode = False
                            abs_file.write_text(file_text_before_a, encoding="utf-8")
                            recover_msg = (
                                "Agent A reported `failed_bad_statement`; auto-recover will roll back this attempt and "
                                "force an alternative proof route. "
                                f"(recovery {bad_statement_recoveries}/{recover_budget})"
                            )
                            print(f"[bad-statement-recover] {recover_msg}")
                            replan_history.append(
                                f"- round {plan_round}: auto-recover from failed_bad_statement | details={str(bad_stmt_details)[:600]}"
                            )
                            log_event(
                                run_id,
                                "bad_statement_auto_recover",
                                {
                                    "file_index": file_index,
                                    "lean_file": str(lean_file_rel),
                                    "task_id": task_id,
                                    "plan_round": plan_round,
                                    "recovery_used": bad_statement_recoveries,
                                    "recovery_budget": recover_budget,
                                    "details": str(bad_stmt_details)[:4000],
                                },
                            )
                            feedback_for_c = {
                                "status": "needs_replan",
                                "reason": (
                                    "Previous route likely incorrect/over-strong (Agent A emitted failed_bad_statement). "
                                    "Do not repeat that route; plan a materially different strategy."
                                ),
                                "notes": (
                                    "Route-correction required: explain why prior route failed, then propose an alternative "
                                    "dependency-closed strategy. In `notes_for_agent_a`, require a short `-- Route correction:` "
                                    "comment in Lean describing old-route failure and new-route plan. Keep proving locally; "
                                    "only use failed_bad_statement again if contradiction still persists after the alternative route."
                                ),
                                "path_error_reason": str(bad_stmt_details)[:4000],
                                "prior_failed_bad_statement": {
                                    "counterexample_or_contradiction": a_feedback.get(
                                        "counterexample_or_contradiction"
                                    ),
                                    "lean_checkable_conflict": a_feedback.get("lean_checkable_conflict"),
                                    "missing_assumptions": a_feedback.get("missing_assumptions"),
                                    "reason": a_feedback.get("reason"),
                                },
                            }
                            continue

                        _terminal_report_math_blocker(
                            source="agent_a_failed_bad_statement",
                            lean_file_rel=lean_file_rel,
                            task_id=task_id,
                            target=target,
                            decl_name=target_decl_before,
                            message="Agent A reports: statement is mathematically false/unprovable as written.",
                            details=bad_stmt_details,
                        )
                        record_failure(
                            lean_file_rel=lean_file_rel,
                            task_id=task_id,
                            note="failed_bad_statement",
                            target=target,
                            plan=plan_data,
                            feedback=a_feedback,
                            agent_b_history=persisted_agent_b_history + agent_b_history,
                            replan_history=replan_history,
                        )
                        abandon_file = True
                        abandon_reason = "failed_bad_statement"
                        break
                    if a_feedback and a_feedback.get("status") == "failed_missing_theory":
                        require_infra = bool(auto_infra_sprint and args.missing_theory_policy == "continue")
                        ok, why = _validate_missing_theory_report(
                            a_feedback, require_infra_requests=require_infra
                        )
                        if not ok:
                            print(
                                "Final Agent A reported `failed_missing_theory`, but the report is invalid: "
                                + (why or "unknown error")
                            )
                            record_failure(
                                lean_file_rel=lean_file_rel,
                                task_id=task_id,
                                note="invalid_missing_theory_report",
                                target=target,
                                plan=plan_data,
                                feedback=a_feedback,
                                agent_b_history=persisted_agent_b_history + agent_b_history,
                                replan_history=replan_history,
                            )
                            abandon_file = True
                            abandon_reason = "invalid_missing_theory_report"
                            break

                        print("Final Agent A reported: blocked on missing library/theory support.")
                        record_failure(
                            lean_file_rel=lean_file_rel,
                            task_id=task_id,
                            note="failed_missing_theory",
                            target=target,
                            plan=plan_data,
                            feedback=a_feedback,
                            agent_b_history=persisted_agent_b_history + agent_b_history,
                            replan_history=replan_history,
                        )
                        msg = a_feedback.get("reason") or "missing theory"
                        missing_theory_rounds += 1
                        classic_assessment = _assess_missing_theory_classicity(
                            a_feedback,
                            min_score=int(args.auto_infra_classic_min_score),
                        )
                        gate_min_rounds = int(args.auto_infra_min_local_rounds)
                        gate_rounds_ready = bool(
                            gate_min_rounds <= 0 or missing_theory_rounds >= gate_min_rounds
                        )
                        gate_classic_only = bool(args.auto_infra_classic_only)
                        gate_classic_ready = bool(
                            (not gate_classic_only) or classic_assessment.get("is_classic")
                        )
                        promoted_from_same_core = bool(
                            a_feedback.get("promoted_from_same_core_needs_replan")
                        )
                        if promoted_from_same_core:
                            # Upgrade path: repeated same-core blocker should directly enter
                            # infra escalation rather than spinning on equivalent replans.
                            gate_rounds_ready = True
                            gate_classic_ready = True
                        gate_summary = (
                            "classic_score="
                            f"{int(classic_assessment.get('classic_score', 0))}"
                            f", nonclassic_score={int(classic_assessment.get('nonclassic_score', 0))}"
                            f", gate_score={int(classic_assessment.get('gate_score', 0))}"
                            f", classic_hits={classic_assessment.get('classic_hits', [])}"
                            f", nonclassic_hits={classic_assessment.get('nonclassic_hits', [])}"
                            f", promoted_from_same_core={promoted_from_same_core}"
                        )

                        # Only-bench default: auto-launch infra sub-pipeline for missing theory.
                        if auto_infra_sprint and args.missing_theory_policy == "continue":
                            if not gate_rounds_ready:
                                gate_msg = (
                                    "[infra-gate] hold auto infra: "
                                    f"local failed_missing_theory rounds {missing_theory_rounds}/{gate_min_rounds}."
                                )
                                print(gate_msg)
                                replan_history.append(
                                    f"- round {plan_round}: failed_missing_theory: {msg} ({gate_msg})"
                                )
                                if plan_round >= max_plan_rounds:
                                    abandon_file = True
                                    abandon_reason = "failed_missing_theory"
                                    break
                                feedback_for_c = {
                                    "status": "needs_replan",
                                    "reason": (
                                        f"Missing-theory blocker seen {missing_theory_rounds}/{gate_min_rounds} times. "
                                        f"Auto infra is gated until at least {gate_min_rounds} local strategy rounds are attempted."
                                    ),
                                    "notes": (
                                        "Do another local attempt first: split the blocked goal, try a different proof route, "
                                        "and provide concrete Lean evidence (failed lemma names, exact subgoals)."
                                    ),
                                }
                                continue
                            if not gate_classic_ready:
                                gate_msg = (
                                    "[infra-gate] hold auto infra: blocker does not look like a classical theorem gap. "
                                    + gate_summary
                                )
                                print(gate_msg)
                                replan_history.append(
                                    f"- round {plan_round}: failed_missing_theory: {msg} ({gate_msg})"
                                )
                                if plan_round >= max_plan_rounds:
                                    abandon_file = True
                                    abandon_reason = "failed_missing_theory"
                                    break
                                feedback_for_c = {
                                    "status": "needs_replan",
                                    "reason": (
                                        "Missing-theory report does not pass classical-theorem gate; "
                                        "continue local proving/decomposition instead of infra expansion."
                                    ),
                                    "notes": (
                                        "Only escalate to infra for classic reusable math theorem gaps. "
                                        "If this is a local bridge/decomposition issue, refine the current file directly."
                                    ),
                                }
                                continue
                            print(f"[infra-gate] passed ({gate_summary})")
                            print("[infra] launching infra sub-pipeline (statement→proof→final).")
                            infra_ok = run_infra_pipeline(
                                bench_file=lean_file_rel,
                                missing_theory_signal=a_feedback,
                                infra_public_api_cap=infra_public_api_cap,
                                max_b_retries=args.max_b_retries,
                                infra_statement_max_b_retries=(
                                    int(args.infra_statement_max_b_retries)
                                    if args.infra_statement_max_b_retries is not None
                                    else None
                                ),
                                max_c_replans=args.max_c_replans,
                                infra_plan_generate_attempts=int(args.infra_plan_generate_attempts),
                                max_plan_check_rounds=infra_plan_check_rounds,
                                infra_expand_max_rounds=int(args.infra_expand_max_rounds),
                                infra_agent_config=args.infra_agent_config,
                                infra_exec_mode=str(args.infra_exec_mode),
                                infra_direct_simulate_success=bool(args.infra_direct_simulate_success),
                                infra_direct_chunk_item_limit=int(args.infra_direct_chunk_item_limit),
                                infra_direct_chunk_line_limit=int(args.infra_direct_chunk_line_limit),
                                infra_direct_max_items=(
                                    int(args.infra_direct_max_items)
                                    if args.infra_direct_max_items is not None
                                    else None
                                ),
                                infra_direct_start_index=(
                                    int(args.infra_direct_start_index)
                                    if args.infra_direct_start_index is not None
                                    else None
                                ),
                                infra_direct_statement_max_b_retries=(
                                    int(args.infra_direct_statement_max_b_retries)
                                    if args.infra_direct_statement_max_b_retries is not None
                                    else (
                                        int(args.infra_statement_max_b_retries)
                                        if args.infra_statement_max_b_retries is not None
                                        else int(args.max_b_retries)
                                    )
                                ),
                                infra_direct_proof_max_b_retries=(
                                    int(args.infra_direct_proof_max_b_retries)
                                    if args.infra_direct_proof_max_b_retries is not None
                                    else int(args.max_b_retries)
                                ),
                                infra_direct_proof_max_c_replans=(
                                    int(args.infra_direct_proof_max_c_replans)
                                    if args.infra_direct_proof_max_c_replans is not None
                                    else int(args.max_c_replans)
                                ),
                            )
                            replan_history.append(
                                f"- round {plan_round}: failed_missing_theory: {msg} "
                                f"(infra pipeline={'ok' if infra_ok else 'failed'}; {gate_summary})"
                            )
                            if not infra_ok:
                                abandon_file = True
                                abandon_reason = "infra_pipeline_failed"
                                break
                            if plan_round >= max_plan_rounds:
                                abandon_file = True
                                abandon_reason = "failed_missing_theory"
                                break
                            feedback_for_c = {
                                "status": "needs_replan",
                                "reason": f"Missing theory blocker: {msg}. Infra pipeline completed.",
                                "notes": (
                                    "Re-plan using the new infra under `Question_bench/.../infra_<id>/` and "
                                    "attempt the target proof again."
                                ),
                            }
                            continue

                        if args.missing_theory_policy == "continue":
                            replan_history.append(
                                f"- round {plan_round}: failed_missing_theory: {msg} ({gate_summary})"
                            )
                            if plan_round >= max_plan_rounds:
                                abandon_file = True
                                abandon_reason = "failed_missing_theory"
                                break
                            feedback_for_c = {
                                "status": "needs_replan",
                                "reason": f"Agent A reported missing theory: {msg}",
                                "notes": (
                                    "If the target is believed true, consider changing strategy to avoid the missing theory "
                                    "(e.g., prove from first principles, or use a different characterization). "
                                    "Otherwise, confirm the missing-theory diagnosis with concrete failed lemma names / "
                                    "subgoal shapes and keep edits compiling."
                                ),
                            }
                            continue
                        abandon_file = True
                        abandon_reason = "failed_missing_theory"
                        break
                    if a_feedback and a_feedback.get("status") == "needs_replan":
                        reason = a_feedback.get("reason") or "no reason given"
                        print(f"Final Agent A requested re-plan: {reason}")
                        if plan_round >= max_plan_rounds:
                            print("Reached max Agent C re-plan attempts; abandoning this file for now.")
                            record_failure(
                                lean_file_rel=lean_file_rel,
                                task_id=task_id,
                                note="max_replans_reached",
                                target=target,
                                plan=plan_data,
                                feedback=a_feedback,
                                agent_b_history=persisted_agent_b_history + agent_b_history,
                                replan_history=replan_history,
                            )
                            abandon_file = True
                            break
                        feedback_for_c = a_feedback
                        continue

                    break

                if abandon_file:
                    break

                # after Agent A, re-check Lean to confirm progress (fewer `sorry` warnings)
                after_sorry_count = len(find_sorry_locations(abs_file))
                target_solved = None
                if target_decl_before:
                    lines_now = abs_file.read_text(encoding="utf-8").splitlines()
                    decl_line_now = _find_decl_line_by_name(lines_now, name=target_decl_before)
                    snippet_now = (
                        get_declaration_snippet(abs_file, line=decl_line_now) if decl_line_now else ""
                    )
                    target_solved = not _has_sorry_token(snippet_now)

                if (target_solved is True) or (target_solved is None and after_sorry_count < before_sorry_count):
                    delta = before_sorry_count - after_sorry_count
                    sorries_eliminated += delta if delta > 0 else (1 if target_solved is True else 0)
                    break

                proved_new_decls = _new_proved_named_decls(
                    before_text=file_text_before_a, after_text=file_text_after_a
                )
                target_sorry_tokens_before = (
                    _count_sorry_tokens_in_named_decl_text(
                        file_text=file_text_before_a, name=target_decl_before
                    )
                    if target_decl_before
                    else None
                )
                target_sorry_tokens_after = (
                    _count_sorry_tokens_in_named_decl_text(
                        file_text=file_text_after_a, name=target_decl_before
                    )
                    if target_decl_before
                    else None
                )
                target_sorry_shrunk = (
                    target_sorry_tokens_before is not None
                    and target_sorry_tokens_after is not None
                    and target_sorry_tokens_after < target_sorry_tokens_before
                )
                if proved_new_decls or target_sorry_shrunk:
                    log_event(
                        run_id,
                        "progress_without_sorry_reduction",
                        {
                            "file_index": file_index,
                            "lean_file": str(lean_file_rel),
                            "task_id": task_id,
                            "target": target,
                            "proved_new_decls": proved_new_decls,
                            "target_sorry_tokens_before": target_sorry_tokens_before,
                            "target_sorry_tokens_after": target_sorry_tokens_after,
                        },
                    )
                    print(
                        "Progress detected (proved helpers / shrunk target) without reducing global sorry warnings; continuing."
                    )
                    continue

                progress_attempt += 1
                log_event(
                    run_id,
                    "no_progress_after_agent_a",
                    {
                        "file_index": file_index,
                        "lean_file": str(lean_file_rel),
                        "task_id": task_id,
                        "target": target,
                        "before_sorry_count": before_sorry_count,
                        "after_sorry_count": after_sorry_count,
                        "attempt": progress_attempt,
                    },
                )
                if progress_attempt <= args.max_no_progress_retries:
                    print(
                        "Warning: no reduction in `sorry` warnings after Agent A; retrying with stricter instructions."
                    )
                    continue

                print("Warning: no reduction in `sorry` warnings after Agent A; abandoning this file for now.")
                code, out, err = lake_env_lean(compile_file_rel)
                lean_output = "\n".join(part for part in (err, out) if part)
                record_failure(
                    lean_file_rel=lean_file_rel,
                    task_id=task_id,
                    note="no_progress_after_final_agent_a",
                    target=target,
                    plan=plan_data,
                    feedback=feedback_for_c,
                    agent_b_history=persisted_agent_b_history + agent_b_history,
                    replan_history=replan_history,
                    lean_code=code,
                    lean_output=lean_output,
                )
                abandon_file = True
                break

        if abandon_file:
            # Special handling for "bad statement" reports in prover/bench mode.
            if abandon_reason == "failed_bad_statement" and args.bad_statement_policy == "continue":
                print("Ignoring failed_bad_statement report (--bad-statement-policy=continue); continuing (not recommended).")
                abandon_file = False
                abandon_reason = None
            else:
                total_files_failed += 1
                status = abandon_reason or "failed"
                if abandon_reason == "failed_bad_statement" and args.bad_statement_policy == "skip":
                    status = "skipped_bad_statement"
                if abandon_reason == "failed_missing_theory" and args.missing_theory_policy == "skip":
                    status = "skipped_missing_theory"
                log_event(
                    run_id,
                    "file_end",
                    {
                        "file_index": file_index,
                        "lean_file": str(lean_file_rel),
                        "status": status,
                        "seconds": time.monotonic() - file_start,
                        "sorries_eliminated": sorries_eliminated,
                    },
                )
                if abandon_reason == "failed_bad_statement" and args.bad_statement_policy == "skip":
                    processed_files += 1
                    state["next_file_index"] = file_index + 1
                    save_state(state, progress_file, run_id=run_id)
                    continue
                if abandon_reason == "failed_missing_theory" and args.missing_theory_policy == "skip":
                    processed_files += 1
                    state["next_file_index"] = file_index + 1
                    save_state(state, progress_file, run_id=run_id)
                    continue
                # do not advance progress; keep this file as next on rerun
                state["next_file_index"] = file_index
                save_state(state, progress_file, run_id=run_id)
                break

        # Maintain per-project Book.lean and compile it (errors delegated to final Agent B).
        project = os.environ.get("FORMAL_PROJECT", "").strip()
        if project and not args.only_bench:
            try:
                # Ensure Book.lean exists; keep imports at chapter granularity:
                # - `<project>/Chapters/ChapXX.lean` imports aggregate sections
                # - `<project>/Book.lean` imports only chapter aggregates
                book_rel = ensure_book_exists(project=project)
                if (
                    len(compile_file_rel.parts) >= 4
                    and compile_file_rel.parts[0] == project
                    and compile_file_rel.parts[1] == "Chapters"
                ):
                    chap_update = ensure_chapter_imports(
                        project=project, section_aggregate_rel=compile_file_rel
                    )
                    ensure_book_imports(project=project, chapter_aggregate_rel=chap_update.chapter_rel)

                # Rule: before checking Book.lean, always compile the target file first.
                target_code, target_out, target_err = lake_env_lean(compile_file_rel)
                log_event(
                    run_id,
                    "lean_check",
                    {
                        "file_index": file_index,
                        "lean_file": str(lean_file_rel),
                        "phase": "pre_book_target_check",
                        "code": target_code,
                        "compiled_file": str(compile_file_rel),
                    },
                )
                if target_code != 0:
                    print("[Book] target compile failed before Book.lean check; recording failure and stopping.")
                    record_failure(
                        lean_file_rel=lean_file_rel,
                        task_id=f"{file_index}_pre_book_target_compile",
                        note="target_failed_before_book_check",
                        lean_code=target_code,
                        lean_output="\n".join(part for part in (target_err, target_out) if part),
                        agent_b_history=persisted_agent_b_history + agent_b_history,
                    )
                    total_files_failed += 1
                    break

                book_code, book_out, book_err = lake_env_lean(book_rel)
                log_event(
                    run_id,
                    "book_lean_check",
                    {
                        "file_index": file_index,
                        "lean_file": str(lean_file_rel),
                        "project": project,
                        "book_file": str(book_rel),
                        "code": book_code,
                    },
                )
                if book_code != 0:
                    print("Book.lean failed to compile; calling final Agent B...")
                    task_id = f"{file_index}_book_compile"
                    b_res = run_final_agent_b_book(
                        lean_file=LEAN_ROOT / book_rel,
                        error_log="\n".join(part for part in (book_err, book_out) if part),
                        task_id=task_id,
                        extra_instructions=(
                            "Fix the project's Book.lean aggregator only (imports). "
                            "Do not edit any section/part files."
                        ),
                        model=agent_settings.agent_b.model,
                        reasoning_effort=agent_settings.agent_b.reasoning_effort,
                    )
                    total_tokens_used += b_res.tokens_used or 0
                    log_event(
                        run_id,
                        "agent_b_book_fix_result",
                        {
                            "file_index": file_index,
                            "lean_file": str(lean_file_rel),
                            "task_id": task_id,
                            "code": b_res.code,
                            "tokens_used": b_res.tokens_used,
                            "log_path": str(b_res.log_path) if b_res.log_path else None,
                            "book_file": str(book_rel),
                        },
                    )
                    if b_res.code != 0:
                        record_failure(
                            lean_file_rel=lean_file_rel,
                            task_id=task_id,
                            note="book_failed_after_final_agent_b",
                            lean_code=b_res.code,
                            lean_output=b_res.stderr,
                            agent_b_history=persisted_agent_b_history + agent_b_history,
                        )
                        total_files_failed += 1
                        break
                    book_code2, book_out2, book_err2 = lake_env_lean(book_rel)
                    if book_code2 != 0:
                        record_failure(
                            lean_file_rel=lean_file_rel,
                            task_id=task_id,
                            note="book_still_failing_after_final_agent_b",
                            lean_code=book_code2,
                            lean_output="\n".join(part for part in (book_err2, book_out2) if part),
                            agent_b_history=persisted_agent_b_history + agent_b_history,
                        )
                        total_files_failed += 1
                        break
            except Exception as e:
                print(f"[Book] maintenance failed: {e}")

        processed_files += 1
        log_event(
            run_id,
            "file_end",
            {
                "file_index": file_index,
                "lean_file": str(lean_file_rel),
                "status": "ok",
                "seconds": time.monotonic() - file_start,
                "sorries_eliminated": sorries_eliminated,
                "remaining_sorries": len(find_sorry_locations(LEAN_ROOT / lean_file_rel)),
            },
        )
        if args.only_file:
            break

        # if file still has sorries (because of max-sorries-per-file), do not advance
        if find_sorry_locations(LEAN_ROOT / lean_file_rel):
            state["next_file_index"] = file_index
            state["next_file"] = str(files[file_index]) if 0 <= file_index < len(files) else None
        else:
            state["next_file_index"] = file_index + 1
            state["next_file"] = (
                str(files[file_index + 1]) if 0 <= file_index + 1 < len(files) else None
            )
        save_state(state, progress_file, run_id=run_id)

    finish_run(
        run_id,
        {
            "pipeline": "final",
            "stage": 3,
            "run_id": run_id,
            "processed_files": processed_files,
            "next_file_index": state.get("next_file_index", 0),
            "files_failed": total_files_failed,
            "tokens_used_total": total_tokens_used,
            "seconds_total": time.monotonic() - run_start,
            "paths": {
                "progress_file": str(progress_file),
                "job_name": job_name or None,
                "final_logs_dir": str(FINAL_LOGS_DIR),
                "final_failure_log": str(FINAL_FAILURE_LOG),
                "final_history_file": str(history_file),
                "metrics_dir": str(METRICS_DIR),
            },
        },
    )

    print(
        f"Processed {processed_files} files. Next file index: {load_state(progress_file).get('next_file_index', 0)}."
    )


if __name__ == "__main__":
    main()
