from __future__ import annotations

import argparse
import os
import re
import time
from pathlib import Path
from typing import Any

from .codex_client import (
    run_proof_agent_a,
    run_proof_agent_b,
    run_proof_agent_b_bookcheck,
    run_proof_agent_c,
    run_proof_agent_d,
)
from .agent_settings import resolve_stage_agents_settings
from .axiom_guard import (
    build_axiom_cleanup_instructions,
    find_axiom_decls,
    format_axiom_report,
)
from .book_maintainer import (
    compile_entry_for,
    ensure_book_exists,
    ensure_book_imports,
    ensure_chapter_imports,
    ensure_section_aggregate_exists,
)
from .config import (
    LEAN_ROOT,
    METRICS_DIR,
    PROOF_DATA_PATH,
    PROOF_FAILURE_LOG,
    PROOF_LOGS_DIR,
    ROOT,
    resolve_item_target_file,
    section_file,
)
from .lean_runner import lake_env_lean
from .loader import filter_items, load_items
from .log_utils import file_snapshot, snapshot_delta
from .metrics import finish_run, log_event, start_run
from .history_store import append_history, load_recent_history
from .protocol import (
    AGENT_A_FEEDBACK_END,
    AGENT_A_FEEDBACK_START,
    AGENT_C_PLAN_END,
    AGENT_C_PLAN_START,
    extract_marked_json,
)
from .state import PROOF_PROGRESS_FILE, load_state, save_state


def _prebuild_project_aggregates(*, project: str, items: list[object]) -> Path:
    """
    Before running a whole proof JSON, pre-create/update aggregation modules:
    - `<project>/Chapters/ChapXX.lean` imports all touched `sectionYY.lean`
    - `<project>/Book.lean` imports only `Chapters/ChapXX.lean`

    Returns the relative path to `<project>/Book.lean` for compilation checks.
    """
    project = (project or "").strip()
    if not project:
        raise ValueError("project must be non-empty")

    book_rel = ensure_book_exists(project=project)

    chapters: set[int] = set()
    seen_sections: set[tuple[int, int]] = set()
    for it in items:
        chap = int(getattr(it, "chapter"))
        sec = int(getattr(it, "section"))
        chapters.add(chap)
        seen_sections.add((chap, sec))

    for chap, sec in sorted(seen_sections):
        section_abs = LEAN_ROOT / project / "Chapters" / f"Chap{chap:02d}" / f"section{sec:02d}.lean"
        section_abs.parent.mkdir(parents=True, exist_ok=True)
        if not section_abs.exists():
            section_abs.write_text("import Mathlib\n", encoding="utf-8")
        section_rel = section_abs.relative_to(LEAN_ROOT)
        section_rel = ensure_section_aggregate_exists(section_rel)
        chap_update = ensure_chapter_imports(project=project, section_aggregate_rel=section_rel)
        ensure_book_imports(project=project, chapter_aggregate_rel=chap_update.chapter_rel)

    for chap in sorted(chapters):
        chap_rel = Path(project) / "Chapters" / f"Chap{chap:02d}.lean"
        ensure_book_imports(project=project, chapter_aggregate_rel=chap_rel)

    return book_rel


def _non_sorry_warning_blocks(lean_output: str) -> list[str]:
    """
    Extract multi-line warning blocks that do not mention 'sorry'.
    """
    if not lean_output:
        return []

    lines = lean_output.splitlines()
    blocks: list[str] = []
    i = 0
    while i < len(lines):
        line = lines[i]
        lowered = line.lower()
        if "warning" in lowered:
            block_lines = [line]
            block_lines_lower = [lowered]
            i += 1
            # accumulate lines until the next warning starts
            while i < len(lines):
                next_line = lines[i]
                next_lower = next_line.lower()
                if "warning" in next_lower:
                    break
                block_lines.append(next_line)
                block_lines_lower.append(next_lower)
                i += 1
            block_text_lower = "\n".join(block_lines_lower)
            if "sorry" not in block_text_lower:
                blocks.append("\n".join(block_lines).strip())
            continue
        i += 1

    return blocks


def _has_non_sorry_warning(lean_output: str) -> bool:
    """
    Return True if lean output contains warnings other than the allowed 'sorry' warnings.
    Lean writes warnings to stdout, so callers should pass the combined stdout/stderr text.
    """
    return bool(_non_sorry_warning_blocks(lean_output))


_SORRY_TOKEN_RE = re.compile(r"\bsorry\b")
_DECL_NAME_RE = re.compile(r"^\s*(theorem|lemma|def|abbrev|example|instance)\s+([A-Za-z0-9_'.]+)\b")


def _count_sorry_tokens(text: str) -> int:
    if not text:
        return 0
    return len(_SORRY_TOKEN_RE.findall(text))


def _named_decl_names(text: str) -> set[str]:
    names: set[str] = set()
    if not text:
        return names
    for line in text.splitlines():
        m = _DECL_NAME_RE.match(line)
        if not m:
            continue
        name = (m.group(2) or "").strip()
        if name:
            names.add(name)
    return names


def _find_decl_line_by_name(lines: list[str], *, name: str) -> int | None:
    if not name:
        return None
    name_re = re.compile(rf"^\s*(theorem|lemma|def|abbrev|example|instance)\s+{re.escape(name)}\b")
    for i, line in enumerate(lines, start=1):
        if name_re.match(line):
            return i
    return None


def _decl_snippet_from_lines(lines: list[str], *, line: int, max_chars: int = 8000) -> str:
    if not lines:
        return ""
    idx = max(0, min(len(lines) - 1, line - 1))
    start = idx
    while start > 0 and lines[start].strip() != "":
        start -= 1
    if lines[start].strip() == "" and start < idx:
        start += 1
    end = idx
    while end + 1 < len(lines) and lines[end + 1].strip() != "":
        end += 1
    snippet = "\n".join(lines[start : end + 1])
    if len(snippet) > max_chars:
        return snippet[:max_chars]
    return snippet


def _count_sorry_tokens_in_named_decl_text(*, file_text: str, name: str) -> int | None:
    if not file_text or not name:
        return None
    lines = file_text.splitlines()
    decl_line = _find_decl_line_by_name(lines, name=name)
    if not decl_line:
        return None
    snippet = _decl_snippet_from_lines(lines, line=decl_line)
    return _count_sorry_tokens(snippet)


def _new_proved_named_decls(*, before_text: str, after_text: str) -> list[str]:
    before_names = _named_decl_names(before_text)
    after_names = _named_decl_names(after_text)
    new_names = sorted(after_names - before_names)
    if not new_names:
        return []
    lines_after = after_text.splitlines()
    proved: list[str] = []
    for name in new_names:
        decl_line = _find_decl_line_by_name(lines_after, name=name)
        if not decl_line:
            continue
        snippet = _decl_snippet_from_lines(lines_after, line=decl_line)
        if _count_sorry_tokens(snippet) == 0:
            proved.append(name)
    return proved


def _combine_extra_instructions(*parts: str | None) -> str | None:
    cleaned = [p.strip() for p in parts if p and p.strip()]
    if not cleaned:
        return None
    return "\n\n".join(cleaned)


def _format_plan_history(history: list[str]) -> str | None:
    if not history:
        return None
    return "PLAN HISTORY (past proof attempts):\n" + "\n".join(history)


def _summarize_plan(plan: dict[str, Any] | None) -> str | None:
    if not isinstance(plan, dict):
        return None
    parts: list[str] = []
    status = plan.get("status")
    if isinstance(status, str) and status.strip():
        parts.append(f"status={status.strip()}")
    main_decl = plan.get("main_declaration")
    if isinstance(main_decl, dict):
        name = main_decl.get("name")
        if isinstance(name, str) and name.strip():
            parts.append(f"main={name.strip()}")
    lemma_names: list[str] = []
    lemma_plan = plan.get("lemma_plan")
    if isinstance(lemma_plan, list):
        for entry in lemma_plan[:8]:
            if not isinstance(entry, dict):
                continue
            name = entry.get("name")
            if isinstance(name, str) and name.strip():
                lemma_names.append(name.strip())
    if lemma_names:
        parts.append("lemmas=" + ", ".join(lemma_names))
    failure_reason = plan.get("failure_reason")
    if isinstance(failure_reason, str) and failure_reason.strip():
        parts.append("failure_reason=" + failure_reason.strip()[:300])
    return " | ".join(parts) if parts else None


def _summarize_feedback(feedback: dict[str, Any] | None) -> str | None:
    if not isinstance(feedback, dict):
        return None
    parts: list[str] = []
    status = feedback.get("status")
    if isinstance(status, str) and status.strip():
        parts.append(f"status={status.strip()}")
    reason = feedback.get("reason")
    if isinstance(reason, str) and reason.strip():
        parts.append("reason=" + reason.strip()[:400])
    pending = feedback.get("pending_goals")
    if isinstance(pending, list) and pending:
        parts.append(f"pending_goals={len(pending)}")
    return " | ".join(parts) if parts else None


def _format_retry_feedback_for_agent_a(feedback: dict[str, Any] | None) -> str | None:
    if not isinstance(feedback, dict):
        return None
    lines: list[str] = ["RETRY CONTEXT FROM PREVIOUS AGENT A FEEDBACK:"]
    status = feedback.get("status")
    if isinstance(status, str) and status.strip():
        lines.append(f"- previous_status: {status.strip()}")
    reason = feedback.get("reason")
    if isinstance(reason, str) and reason.strip():
        lines.append(f"- previous_reason: {reason.strip()[:500]}")
    pending = feedback.get("pending_goals")
    if isinstance(pending, list) and pending:
        lines.append("- previous_pending_goals:")
        for goal in pending[:8]:
            if isinstance(goal, str) and goal.strip():
                lines.append(f"  - {goal.strip()[:300]}")
    requested = feedback.get("requested_lemmas")
    if isinstance(requested, list) and requested:
        lines.append("- previous_requested_lemmas:")
        for entry in requested[:8]:
            if not isinstance(entry, dict):
                continue
            name = entry.get("name")
            statement_hint = entry.get("statement_hint")
            lemma_reason = entry.get("reason")
            detail_parts: list[str] = []
            if isinstance(name, str) and name.strip():
                detail_parts.append(f"name={name.strip()}")
            if isinstance(statement_hint, str) and statement_hint.strip():
                detail_parts.append(f"statement={statement_hint.strip()[:260]}")
            if isinstance(lemma_reason, str) and lemma_reason.strip():
                detail_parts.append(f"reason={lemma_reason.strip()[:260]}")
            if detail_parts:
                lines.append("  - " + " | ".join(detail_parts))
    if len(lines) == 1:
        return None
    lines.append(
        "Address this feedback concretely in this retry; do not return another no-op or purely narrative re-plan."
    )
    return "\n".join(lines)


def _history_line_from_record(rec: dict[str, Any]) -> str | None:
    kind = rec.get("kind")
    summary = rec.get("summary")
    if not isinstance(kind, str) or not kind.strip():
        return None
    if isinstance(summary, str) and summary.strip():
        return f"- (prev) {kind.strip()}: {summary.strip()}"
    payload = rec.get("payload")
    if isinstance(payload, dict):
        reason = payload.get("reason")
        if isinstance(reason, str) and reason.strip():
            return f"- (prev) {kind.strip()}: reason={reason.strip()[:300]}"
    return f"- (prev) {kind.strip()}"


def _should_skip_empty_proof_item(item: object) -> bool:
    """
    Proof-stage guard: if the JSON entry explicitly includes a `proof` field (or close variants)
    but it is empty/blank, skip this item entirely.

    Rationale: some datasets include statement entries whose proof text was not extracted yet.
    Users want the proof pipeline to ignore those items rather than invoking agents.
    """
    raw: dict[str, Any] = getattr(item, "raw", {}) or {}
    if not isinstance(raw, dict):
        return False

    proof_keys = ("proof", "proof_text", "formal_proof", "informal_proof")
    if not any(k in raw for k in proof_keys):
        return False

    for k in proof_keys:
        v = raw.get(k)
        if isinstance(v, str) and v.strip():
            return False
    return True


def main() -> None:
    parser = argparse.ArgumentParser(description="Run Codex-to-Lean proof orchestrator.")
    parser.add_argument(
        "--start-index",
        type=int,
        default=None,
        help="Start processing from this global index (overrides saved state).",
    )
    parser.add_argument(
        "--max-items",
        type=int,
        default=None,
        help="Process at most this many items (useful for batching).",
    )
    parser.add_argument(
        "--max-b-retries",
        type=int,
        default=3,
        help="Max retries for Agent B when Lean still fails after a fix (default: 3).",
    )
    parser.add_argument(
        "--max-c-replans",
        type=int,
        default=1,
        help="Max additional Agent C re-plans when Agent A requests re-planning (default: 1).",
    )
    parser.add_argument(
        "--max-no-progress-retries",
        type=int,
        default=2,
        help=(
            "If Agent A makes no meaningful progress (no sorry reduction, no proved helper, "
            "and no target-decl sorry reduction), retry this plan round up to N times (default: 2)."
        ),
    )
    parser.add_argument(
        "--clean-warnings-with-agent-b",
        action=argparse.BooleanOptionalAction,
        default=False,
        help="Whether to call proof Agent B to clean non-sorry warnings reported by Lean (default: false).",
    )
    parser.add_argument(
        "--book-check-every",
        type=int,
        default=int(os.getenv("PROOF_BOOK_CHECK_EVERY", "10")),
        help=(
            "How often to run `lake env lean <project>/Book.lean` within the same chapter (default: 10). "
            "1 = always (legacy behavior), 0 = never. "
            "Regardless of this value, Book.lean is checked once at chapter boundaries when project mode is enabled."
        ),
    )
    parser.add_argument(
        "--data-file",
        type=Path,
        default=None,
        help="Path to the JSON data file to process (default: config.PROOF_DATA_PATH).",
    )
    parser.add_argument(
        "--proof-agent-config",
        type=Path,
        default=None,
        help="Path to a TOML file controlling per-agent model/reasoning for PROOF stage. "
        "Default: use $PROOF_AGENT_CONFIG_FILE, else repo `agent_configs/proof_agents.toml` if present.",
    )
    parser.add_argument(
        "--write-history",
        action=argparse.BooleanOptionalAction,
        default=True,
        help="Persist proof Agent C/A plan & feedback history to JSONL (default: true).",
    )
    parser.add_argument(
        "--use-history",
        action=argparse.BooleanOptionalAction,
        default=True,
        help="Load prior proof history for the same Lean file and pass it into Agent prompts (default: true).",
    )
    parser.add_argument(
        "--history-file",
        type=Path,
        default=None,
        help="Optional path for proof history JSONL (default: log/<project>/proof_logs/proof_history.jsonl).",
    )
    parser.add_argument(
        "--history-max-records",
        type=int,
        default=5,
        help="How many recent history records to load per Lean file (default: 5).",
    )
    parser.add_argument(
        "--split-with-agent-d",
        action=argparse.BooleanOptionalAction,
        default=(
            os.getenv("SPLIT_WITH_AGENT_D", "").strip() == ""
            or os.getenv("SPLIT_WITH_AGENT_D", "").lower() in {"1", "true", "yes", "on"}
        ),
        help=(
            "If enabled, call proof Agent D to split oversized section files into part files "
            "(default: true; set SPLIT_WITH_AGENT_D=0/false/no/off or pass --no-split-with-agent-d to disable)."
        ),
    )
    parser.add_argument(
        "--max-lines-per-part",
        type=int,
        default=int(os.getenv("MAX_LINES_PER_PART", "1000")),
        help="When splitting with Agent D, target at most this many lines per part file (default: 1000).",
    )
    args = parser.parse_args()

    # proof pipeline 起点：确定从哪个 index 开始
    # 优先级：命令行 --start-index > 环境变量 PROOF_START_INDEX/START_INDEX > proof_progress.json 的 next_index
    env_start = os.environ.get("PROOF_START_INDEX") or os.environ.get("START_INDEX")
    start_index = args.start_index
    if start_index is None and env_start:
        try:
            start_index = int(env_start)
        except ValueError:
            pass

    data_path = args.data_file or PROOF_DATA_PATH
    print(f"Using proof data file: {data_path}")
    if not Path(data_path).exists():
        print(f"Data file not found: {data_path}")
        print("Put your JSON list under `data/` and pass it via --data-file. See `data/README.md`.")
        return
    items = load_items(data_path=data_path)
    state = load_state(PROOF_PROGRESS_FILE)
    default_start = state.get("next_index", 1)
    effective_start = start_index if start_index is not None else default_start

    items = list(filter_items(items, min_index=effective_start))

    project = os.environ.get("FORMAL_PROJECT", "").strip()
    book_rel: Path | None = None
    if project:
        try:
            book_rel = _prebuild_project_aggregates(project=project, items=list(items))
            print(f"[Aggregates] prebuilt project aggregators; book={book_rel}")
        except Exception as e:
            print(f"[Aggregates] prebuild failed: {e}")
            book_rel = None

    book_check_every = int(args.book_check_every or 0)
    if book_check_every < 0:
        book_check_every = 0
    current_chapter: int | None = None
    chapter_item_count = 0

    default_cfg = ROOT / "agent_configs/proof_agents.toml"
    proof_agent_cfg = args.proof_agent_config or (
        Path(os.environ["PROOF_AGENT_CONFIG_FILE"]) if os.environ.get("PROOF_AGENT_CONFIG_FILE") else None
    )
    agent_keys = ["A", "B", "C"] + (["D"] if args.split_with_agent_d else [])
    agent_settings = resolve_stage_agents_settings(
        stage_prefix="PROOF_AGENT",
        agent_keys=agent_keys,
        config_path=proof_agent_cfg,
        default_config_path=default_cfg,
    )
    agent_a_strict = agent_settings.strict_agents.get("A")
    history_file = args.history_file or (PROOF_LOGS_DIR / "proof_history.jsonl")

    run_id = start_run(
        "proof",
        stage=2,
        name_tag=Path(data_path).name,
        data_file=str(data_path),
        extra={
            "start_index": effective_start,
            "clean_warnings_with_agent_b": args.clean_warnings_with_agent_b,
            "book_check_every": book_check_every,
            "proof_agent_config": str(agent_settings.source_path) if agent_settings.source_path else None,
            "proof_agent_a_model": agent_settings.agents["A"].model,
            "proof_agent_a_reasoning_effort": agent_settings.agents["A"].reasoning_effort,
            "proof_agent_a_strict_retry_model": (agent_a_strict.model if agent_a_strict else None),
            "proof_agent_a_strict_retry_reasoning_effort": (
                agent_a_strict.reasoning_effort if agent_a_strict else None
            ),
            "proof_agent_b_model": agent_settings.agents["B"].model,
            "proof_agent_b_reasoning_effort": agent_settings.agents["B"].reasoning_effort,
            "proof_agent_c_model": agent_settings.agents["C"].model,
            "proof_agent_c_reasoning_effort": agent_settings.agents["C"].reasoning_effort,
            "history_file": str(history_file),
            "write_history": bool(args.write_history),
            "use_history": bool(args.use_history),
            "history_max_records": int(args.history_max_records),
            "max_no_progress_retries": int(args.max_no_progress_retries),
        },
    )
    run_start = time.monotonic()
    processed = 0
    last_success = state.get("next_index", 1) - 1
    total_tokens_used = 0
    total_items_failed = 0
    total_items_failed_bad_statement = 0
    if not args.use_history:
        print("[History] disabled (--no-use-history)")
    print(
        "[Agent A strict retry] "
        f"model={agent_a_strict.model if agent_a_strict and agent_a_strict.model else (agent_settings.agents['A'].model or 'default')}, "
        f"reasoning_effort={agent_a_strict.reasoning_effort if agent_a_strict and agent_a_strict.reasoning_effort else (agent_settings.agents['A'].reasoning_effort or 'default')}"
    )

    try:
        def _split_and_validate(*, item: object, lean_file_abs: Path, lean_seconds: float) -> bool:
            if not args.split_with_agent_d:
                return True
            if args.max_lines_per_part <= 0:
                return True

            try:
                line_count = len(lean_file_abs.read_text(encoding="utf-8").splitlines())
            except FileNotFoundError:
                return True
            if line_count <= args.max_lines_per_part:
                return True

            settings_d = agent_settings.agents.get("D")
            d_start = time.monotonic()
            d_res = run_proof_agent_d(
                lean_file=lean_file_abs,
                max_lines=args.max_lines_per_part,
                task_id=f"idx{getattr(item, 'index', 'na')}_split",
                model=(settings_d.model if settings_d else None),
                reasoning_effort=(settings_d.reasoning_effort if settings_d else None),
            )
            d_seconds = time.monotonic() - d_start
            nonlocal total_tokens_used
            total_tokens_used += d_res.tokens_used or 0
            log_event(
                run_id,
                "agent_d_result",
                {
                    "index": getattr(item, "index", None),
                    "label": getattr(item, "label", None),
                    "code": d_res.code,
                    "seconds": d_seconds,
                    "line_count": line_count,
                    "max_lines": args.max_lines_per_part,
                    "tokens_used": d_res.tokens_used,
                    "log_path": str(d_res.log_path) if d_res.log_path else None,
                },
            )
            if d_res.code != 0:
                failure_entry = (
                    f"index={getattr(item, 'index', None)}, label={getattr(item, 'label', None)}, "
                    f"file={lean_file_abs.relative_to(LEAN_ROOT)}\n"
                    f"note=agent_d_failed\n"
                    f"stderr:\n{d_res.stderr}\n\n"
                )
                PROOF_FAILURE_LOG.parent.mkdir(parents=True, exist_ok=True)
                with PROOF_FAILURE_LOG.open("a", encoding="utf-8") as f:
                    f.write(failure_entry)
                return False

            # Validate by compiling the aggregate section file (after split).
            try:
                base_abs = section_file(getattr(item, "chapter"), getattr(item, "section"))
                base_rel = base_abs.relative_to(LEAN_ROOT)
            except Exception:
                base_rel = lean_file_abs.relative_to(LEAN_ROOT)

            failed_mode: str | None = None  # "target" | "book" | None
            code, out, err = lake_env_lean(base_rel)
            lean_output = "\n".join(part for part in (err, out) if part)
            non_sorry_blocks = _non_sorry_warning_blocks(lean_output)
            if code != 0:
                failed_mode = "target"
            elif non_sorry_blocks and args.clean_warnings_with_agent_b:
                failed_mode = "target"
                warnings_text = "\n\n".join(non_sorry_blocks)
                err = "Lean produced the following non-sorry warnings. Please remove them:\n\n" + warnings_text

            if failed_mode is None and book_rel is not None:
                book_code, book_out, book_err = lake_env_lean(book_rel)
                book_output = "\n".join(part for part in (book_err, book_out) if part)
                book_non_sorry = _non_sorry_warning_blocks(book_output)
                if book_code != 0:
                    failed_mode = "book"
                    out, err = book_out, book_err
                    non_sorry_blocks = book_non_sorry
                elif book_non_sorry and args.clean_warnings_with_agent_b:
                    failed_mode = "book"
                    warnings_text = "\n\n".join(book_non_sorry)
                    err = (
                        "Lean produced the following non-sorry warnings (via Book.lean). Please remove them:\n\n"
                        + warnings_text
                    )
                    non_sorry_blocks = book_non_sorry

            if failed_mode is None and (not non_sorry_blocks or not args.clean_warnings_with_agent_b):
                return True

            if failed_mode == "book":
                print("Post-split Book.lean check failed; calling proof Agent B (Book-check mode)...")
            else:
                print("Post-split check failed; calling proof Agent B...")

            for attempt in range(1, args.max_b_retries + 1):
                print(f"[Proof Agent B (post-split) attempt {attempt}/{args.max_b_retries}]")
                run_b_post_split = (
                    run_proof_agent_b_bookcheck
                    if (book_rel is not None and failed_mode == "book")
                    else run_proof_agent_b
                )
                b_res = run_b_post_split(
                    LEAN_ROOT / base_rel,
                    error_log=err,
                    item_index=getattr(item, "index"),
                    item_context=getattr(item, "context"),
                    item_content=getattr(item, "content"),
                    item_dependencies=getattr(item, "dependencies"),
                    item_notes=getattr(item, "notes", None),
                    model=agent_settings.agents["B"].model,
                    reasoning_effort=agent_settings.agents["B"].reasoning_effort,
                )
                total_tokens_used += b_res.tokens_used or 0
                log_event(
                    run_id,
                    "agent_b_post_split_result",
                    {
                        "index": getattr(item, "index", None),
                        "label": getattr(item, "label", None),
                        "attempt": attempt,
                        "code": b_res.code,
                        "tokens_used": b_res.tokens_used,
                        "log_path": str(b_res.log_path) if b_res.log_path else None,
                    },
                )
                if b_res.code != 0:
                    break

                failed_mode = None
                code, out, err2 = lake_env_lean(base_rel)
                lean_output2 = "\n".join(part for part in (err2, out) if part)
                non_sorry_blocks2 = _non_sorry_warning_blocks(lean_output2)
                err = err2
                if code != 0:
                    failed_mode = "target"
                    err = lean_output2
                elif non_sorry_blocks2 and args.clean_warnings_with_agent_b:
                    failed_mode = "target"
                    warnings_text = "\n\n".join(non_sorry_blocks2)
                    err = "Lean still reports these non-sorry warnings. Please remove them:\n\n" + warnings_text

                if failed_mode is None and book_rel is not None:
                    book_code2, book_out2, book_err2 = lake_env_lean(book_rel)
                    book_output2 = "\n".join(part for part in (book_err2, book_out2) if part)
                    book_non_sorry2 = _non_sorry_warning_blocks(book_output2)
                    if book_code2 != 0:
                        failed_mode = "book"
                        err = book_output2
                    elif book_non_sorry2 and args.clean_warnings_with_agent_b:
                        failed_mode = "book"
                        warnings_text = "\n\n".join(book_non_sorry2)
                        err = (
                            "Lean still reports these non-sorry warnings (via Book.lean). Please remove them:\n\n"
                            + warnings_text
                        )

                if failed_mode is None and (
                    not non_sorry_blocks2 or not args.clean_warnings_with_agent_b
                ):
                    return True

            failure_entry = (
                f"index={getattr(item, 'index', None)}, label={getattr(item, 'label', None)}, file={base_rel}\n"
                f"note=split_aggregate_failed\n"
                f"last_error:\n{err}\n\n"
            )
            PROOF_FAILURE_LOG.parent.mkdir(parents=True, exist_ok=True)
            with PROOF_FAILURE_LOG.open("a", encoding="utf-8") as f:
                f.write(failure_entry)
            log_event(
                run_id,
                "item_end",
                {
                    "index": getattr(item, "index", None),
                    "label": getattr(item, "label", None),
                    "status": "failed_after_split",
                    "seconds": time.monotonic() - item_start,
                    "lean_seconds": lean_seconds,
                    "error_snippet": err[:1000] if err else None,
                },
            )
            return False

        for idx, item in enumerate(items):
            next_item = items[idx + 1] if idx + 1 < len(items) else None
            # 每条 proof 数据的管线：
            # Agent C 拆题规划 → Agent A 生成/更新证明（可请求 re-plan） → 单文件 lean 检查 → Agent B（如需清理错误/警告） → lean 复查 → 记录进度
            print(f"=== proof index={item.index} label={item.label} ({item.chapter}.{item.section}.{item.local_index}) ===")
            item_start = time.monotonic()
            log_event(
                run_id,
                "item_start",
                {
                    "index": item.index,
                    "label": item.label,
                    "chapter": item.chapter,
                    "section": item.section,
                    "local_index": item.local_index,
                },
            )

            if _should_skip_empty_proof_item(item):
                print(f"Skipping proof item index={item.index} label={item.label}: empty `proof` in data.")
                state["next_index"] = item.index + 1
                save_state(state, PROOF_PROGRESS_FILE, run_id=run_id)
                processed += 1
                last_success = item.index
                log_event(
                    run_id,
                    "item_end",
                    {
                        "index": item.index,
                        "label": item.label,
                        "status": "skipped_empty_proof",
                        "seconds": time.monotonic() - item_start,
                    },
                )
                continue

            lean_file_abs = resolve_item_target_file(
                item.chapter, item.section, label=item.label, target_file=getattr(item, "target_file", None)
            )
            lean_file_abs.parent.mkdir(parents=True, exist_ok=True)
            if not lean_file_abs.exists():
                print(f"Target file {lean_file_abs} does not exist; skipping.")
                state["next_index"] = item.index + 1
                save_state(state, PROOF_PROGRESS_FILE, run_id=run_id)
                processed += 1
                last_success = item.index
                log_event(
                    run_id,
                    "item_end",
                    {
                        "index": item.index,
                        "label": item.label,
                        "status": "skipped_missing_file",
                        "seconds": time.monotonic() - item_start,
                    },
                )
                continue
            lean_file_rel = lean_file_abs.relative_to(LEAN_ROOT)
            compile_file_rel = compile_entry_for(ensure_section_aggregate_exists(lean_file_rel))

            plan_history: list[str] = []
            if args.use_history and args.history_max_records > 0:
                history_exists = history_file.exists()
                recs = load_recent_history(
                    history_file,
                    lean_file=str(lean_file_rel),
                    max_records=int(args.history_max_records),
                    kinds={"agent_c_plan", "agent_a_feedback", "item_end"},
                    task_id=str(item.index),
                    item_index=int(item.index),
                    label=item.label,
                )
                for rec in recs:
                    line = _history_line_from_record(rec)
                    if line:
                        plan_history.append(line)
                print(
                    f"[History] loaded {len(recs)} record(s) from {history_file} for {lean_file_rel}"
                    + ("" if history_exists else " (file not found; starting fresh)")
                )

            # 步骤 1：Agent C 拆题 + Agent A 填写/修正证明（支持 re-plan 多轮）
            plan_data = None
            plan_raw_block = None
            feedback_for_c = None
            abandon_item = False
            skip_current_item = False
            max_plan_rounds = args.max_c_replans + 1

            for plan_round in range(1, max_plan_rounds + 1):
                print(f"[Proof Agent C planning {plan_round}/{max_plan_rounds}]")
                history_text = _format_plan_history(plan_history)
                c_start = time.monotonic()
                c_res = run_proof_agent_c(
                    item,
                    feedback_from_agent_a=feedback_for_c,
                    prior_plan=plan_data,
                    extra_instructions=history_text,
                    model=agent_settings.agents["C"].model,
                    reasoning_effort=agent_settings.agents["C"].reasoning_effort,
                )
                c_seconds = time.monotonic() - c_start
                total_tokens_used += c_res.tokens_used or 0
                log_event(
                    run_id,
                    "agent_c_result",
                    {
                        "index": item.index,
                        "label": item.label,
                        "plan_round": plan_round,
                        "code": c_res.code,
                        "seconds": c_seconds,
                        "tokens_used": c_res.tokens_used,
                        "log_path": str(c_res.log_path) if c_res.log_path else None,
                    },
                )
                if c_res.code != 0:
                    print(f"Proof Agent C failed with code {c_res.code}. Stopping.\n{c_res.stderr}")
                    abandon_item = True
                    break

                plan_data, plan_raw_block = extract_marked_json(
                    c_res.stdout, AGENT_C_PLAN_START, AGENT_C_PLAN_END
                )
                if plan_raw_block is None:
                    plan_raw_block = c_res.stdout
                if plan_data is None:
                    print("Warning: could not parse Agent C plan JSON; passing raw output to Agent A.")
                plan_summary = _summarize_plan(plan_data) or "unparsed_plan"
                plan_history.append(f"- round {plan_round} C: {plan_summary}")
                main_decl_name = None
                if isinstance(plan_data, dict):
                    main_decl = plan_data.get("main_declaration")
                    if isinstance(main_decl, dict):
                        main_name = main_decl.get("name")
                        if isinstance(main_name, str) and main_name.strip():
                            main_decl_name = main_name.strip()
                if args.write_history:
                    append_history(
                        history_file,
                        pipeline="proof",
                        run_id=run_id,
                        lean_file=str(lean_file_rel),
                        task_id=str(item.index),
                        kind="agent_c_plan",
                        summary=plan_summary,
                        log_path=(str(c_res.log_path) if c_res.log_path else None),
                        payload={
                            "index": item.index,
                            "label": item.label,
                            "plan_round": plan_round,
                            "status": (plan_data.get("status") if isinstance(plan_data, dict) else None),
                            "main_declaration": main_decl_name,
                        },
                    )

                feedback_for_c = None  # 默认下一轮不带反馈，除非 A 请求 re-plan
                request_replan = False
                replan_feedback: dict[str, Any] | None = None
                prior_no_progress_feedback: dict[str, Any] | None = None

                for progress_attempt in range(0, int(args.max_no_progress_retries) + 1):
                    stricter_no_progress = None
                    retry_feedback_context = None
                    agent_a_model = agent_settings.agents["A"].model
                    agent_a_reasoning_effort = agent_settings.agents["A"].reasoning_effort
                    if progress_attempt > 0:
                        stricter_no_progress = (
                            "NO-PROGRESS RETRY: previous attempt did not make meaningful proof progress. "
                            "This retry must produce at least one of: "
                            "(1) reduce total `sorry` tokens in file; "
                            "(2) reduce `sorry` tokens in target declaration; "
                            "(3) add and fully prove at least one new helper lemma. "
                            "Do not make cosmetic edits."
                        )
                        retry_feedback_context = _format_retry_feedback_for_agent_a(prior_no_progress_feedback)
                        if agent_a_strict and agent_a_strict.model:
                            agent_a_model = agent_a_strict.model
                        if agent_a_strict and agent_a_strict.reasoning_effort:
                            agent_a_reasoning_effort = agent_a_strict.reasoning_effort
                        print(
                            f"[Proof Agent A strict retry {progress_attempt}/{args.max_no_progress_retries}] "
                            f"model={agent_a_model or 'default'}/{agent_a_reasoning_effort or 'default'}"
                        )

                    file_before_a = file_snapshot(lean_file_abs)
                    before_text_a = lean_file_abs.read_text(encoding="utf-8")
                    before_sorry_total = _count_sorry_tokens(before_text_a)
                    before_target_sorry = (
                        _count_sorry_tokens_in_named_decl_text(
                            file_text=before_text_a,
                            name=main_decl_name,
                        )
                        if main_decl_name
                        else None
                    )

                    a_start = time.monotonic()
                    a_res = run_proof_agent_a(
                        item,
                        plan=plan_data,
                        plan_raw=plan_raw_block,
                        attempt=plan_round,
                        extra_instructions=_combine_extra_instructions(
                            _format_plan_history(plan_history),
                            stricter_no_progress,
                            retry_feedback_context,
                        ),
                        model=agent_a_model,
                        reasoning_effort=agent_a_reasoning_effort,
                    )
                    a_seconds = time.monotonic() - a_start
                    file_after_a = file_snapshot(lean_file_abs)
                    after_text_a = lean_file_abs.read_text(encoding="utf-8")
                    after_sorry_total = _count_sorry_tokens(after_text_a)
                    after_target_sorry = (
                        _count_sorry_tokens_in_named_decl_text(
                            file_text=after_text_a,
                            name=main_decl_name,
                        )
                        if main_decl_name
                        else None
                    )
                    total_tokens_used += a_res.tokens_used or 0
                    log_event(
                        run_id,
                        "agent_a_result",
                        {
                            "index": item.index,
                            "label": item.label,
                            "plan_round": plan_round,
                            "progress_attempt": progress_attempt + 1,
                            "code": a_res.code,
                            "seconds": a_seconds,
                            "tokens_used": a_res.tokens_used,
                            "log_path": str(a_res.log_path) if a_res.log_path else None,
                            "model": agent_a_model,
                            "reasoning_effort": agent_a_reasoning_effort,
                            "file_before": file_before_a,
                            "file_after": file_after_a,
                            "file_delta": snapshot_delta(file_before_a, file_after_a),
                        },
                    )
                    if a_res.code != 0:
                        print(f"Proof Agent A failed with code {a_res.code}. Stopping.\n{a_res.stderr}")
                        abandon_item = True
                        break

                    axiom_decls = find_axiom_decls(lean_file_abs)
                    if axiom_decls:
                        print("Detected forbidden `axiom` declarations after proof Agent A; requesting cleanup.")
                        cleanup_instructions = build_axiom_cleanup_instructions(lean_file_rel, axiom_decls)
                        cleanup_start = time.monotonic()
                        cleanup_res = run_proof_agent_a(
                            item,
                            plan=plan_data,
                            plan_raw=plan_raw_block,
                            attempt=plan_round,
                            extra_instructions=cleanup_instructions,
                            model=agent_a_model,
                            reasoning_effort=agent_a_reasoning_effort,
                        )
                        cleanup_seconds = time.monotonic() - cleanup_start
                        total_tokens_used += cleanup_res.tokens_used or 0
                        log_event(
                            run_id,
                            "agent_a_axiom_cleanup_result",
                            {
                                "index": item.index,
                                "label": item.label,
                                "plan_round": plan_round,
                                "progress_attempt": progress_attempt + 1,
                                "code": cleanup_res.code,
                                "seconds": cleanup_seconds,
                                "tokens_used": cleanup_res.tokens_used,
                                "log_path": str(cleanup_res.log_path) if cleanup_res.log_path else None,
                                "model": agent_a_model,
                                "reasoning_effort": agent_a_reasoning_effort,
                            },
                        )
                        if cleanup_res.code != 0:
                            print(
                                f"Proof Agent A axiom cleanup failed with code {cleanup_res.code}. "
                                f"Stopping.\n{cleanup_res.stderr}"
                            )
                            abandon_item = True
                            break
                        axiom_decls = find_axiom_decls(lean_file_abs)
                        if axiom_decls:
                            print("Axiom cleanup failed; `axiom` declarations still present.")
                            print(format_axiom_report(axiom_decls))
                            abandon_item = True
                            break
                        after_text_a = lean_file_abs.read_text(encoding="utf-8")
                        after_sorry_total = _count_sorry_tokens(after_text_a)
                        after_target_sorry = (
                            _count_sorry_tokens_in_named_decl_text(
                                file_text=after_text_a,
                                name=main_decl_name,
                            )
                            if main_decl_name
                            else None
                        )

                    a_feedback, _ = extract_marked_json(
                        a_res.stdout, AGENT_A_FEEDBACK_START, AGENT_A_FEEDBACK_END
                    )
                    if a_feedback and a_feedback.get("status") == "failed_bad_statement":
                        reason = (a_feedback.get("reason") or "no reason provided").strip()
                        print(f"[REPORT] Agent A flagged item as failed_bad_statement: {reason}")
                        if args.write_history:
                            append_history(
                                history_file,
                                pipeline="proof",
                                run_id=run_id,
                                lean_file=str(lean_file_rel),
                                task_id=str(item.index),
                                kind="agent_a_feedback",
                                summary=f"status=failed_bad_statement | reason={reason[:300]}",
                                log_path=(str(a_res.log_path) if a_res.log_path else None),
                                payload={
                                    "index": item.index,
                                    "label": item.label,
                                    "plan_round": plan_round,
                                    "feedback": a_feedback,
                                },
                            )
                        PROOF_FAILURE_LOG.parent.mkdir(parents=True, exist_ok=True)
                        with PROOF_FAILURE_LOG.open("a", encoding="utf-8") as f:
                            f.write(
                                f"index={item.index}, label={item.label}, file={lean_file_rel}\n"
                                "note=failed_bad_statement\n"
                                f"reason={reason}\n\n"
                            )
                        if args.write_history:
                            append_history(
                                history_file,
                                pipeline="proof",
                                run_id=run_id,
                                lean_file=str(lean_file_rel),
                                task_id=str(item.index),
                                kind="item_end",
                                summary="failed_bad_statement",
                                payload={
                                    "index": item.index,
                                    "label": item.label,
                                    "status": "failed_bad_statement",
                                    "reason": reason,
                                },
                            )
                        total_items_failed += 1
                        total_items_failed_bad_statement += 1
                        log_event(
                            run_id,
                            "item_end",
                            {
                                "index": item.index,
                                "label": item.label,
                                "status": "failed_bad_statement",
                                "reason": reason,
                                "seconds": time.monotonic() - item_start,
                            },
                        )
                        state["next_index"] = item.index + 1
                        save_state(state, PROOF_PROGRESS_FILE, run_id=run_id)
                        processed += 1
                        last_success = item.index
                        skip_current_item = True
                        break

                    proved_new_decls = _new_proved_named_decls(
                        before_text=before_text_a,
                        after_text=after_text_a,
                    )
                    target_sorry_shrunk = (
                        before_target_sorry is not None
                        and after_target_sorry is not None
                        and after_target_sorry < before_target_sorry
                    )
                    meaningful_progress = (
                        after_sorry_total < before_sorry_total
                        or bool(proved_new_decls)
                        or target_sorry_shrunk
                    )
                    if not meaningful_progress:
                        prior_no_progress_feedback = a_feedback if isinstance(a_feedback, dict) else None
                        log_event(
                            run_id,
                            "no_progress_after_agent_a",
                            {
                                "index": item.index,
                                "label": item.label,
                                "plan_round": plan_round,
                                "progress_attempt": progress_attempt + 1,
                                "before_sorry_total": before_sorry_total,
                                "after_sorry_total": after_sorry_total,
                                "target_before": before_target_sorry,
                                "target_after": after_target_sorry,
                                "proved_new_decls": proved_new_decls,
                            },
                        )
                        if progress_attempt < int(args.max_no_progress_retries):
                            print(
                                "Warning: no meaningful progress after proof Agent A; "
                                "retrying with stricter instructions."
                            )
                            continue
                        print(
                            "Warning: no meaningful progress after proof Agent A after retries; "
                            "recording failure and continuing."
                        )
                        PROOF_FAILURE_LOG.parent.mkdir(parents=True, exist_ok=True)
                        with PROOF_FAILURE_LOG.open("a", encoding="utf-8") as f:
                            f.write(
                                f"index={item.index}, label={item.label}, file={lean_file_rel}\n"
                                "note=no_progress_after_proof_agent_a\n\n"
                            )
                        if args.write_history:
                            append_history(
                                history_file,
                                pipeline="proof",
                                run_id=run_id,
                                lean_file=str(lean_file_rel),
                                task_id=str(item.index),
                                kind="item_end",
                                summary="failed_no_progress",
                                payload={
                                    "index": item.index,
                                    "label": item.label,
                                    "status": "failed_no_progress",
                                },
                            )
                        total_items_failed += 1
                        log_event(
                            run_id,
                            "item_end",
                            {
                                "index": item.index,
                                "label": item.label,
                                "status": "failed_no_progress",
                                "seconds": time.monotonic() - item_start,
                            },
                        )
                        state["next_index"] = item.index + 1
                        save_state(state, PROOF_PROGRESS_FILE, run_id=run_id)
                        processed += 1
                        last_success = item.index
                        skip_current_item = True
                        break

                    if a_feedback and a_feedback.get("status") == "needs_replan":
                        reason = a_feedback.get("reason") or "no reason given"
                        print(f"Agent A requested re-plan: {reason}")
                        feedback_summary = _summarize_feedback(a_feedback) or reason
                        plan_history.append(f"- round {plan_round} A: {feedback_summary}")
                        if args.write_history:
                            append_history(
                                history_file,
                                pipeline="proof",
                                run_id=run_id,
                                lean_file=str(lean_file_rel),
                                task_id=str(item.index),
                                kind="agent_a_feedback",
                                summary=feedback_summary,
                                log_path=(str(a_res.log_path) if a_res.log_path else None),
                                payload={
                                    "index": item.index,
                                    "label": item.label,
                                    "plan_round": plan_round,
                                    "feedback": a_feedback,
                                },
                            )
                        request_replan = True
                        replan_feedback = a_feedback
                        break

                    # 证明阶段已取得实质进展，且未请求 re-plan，进入后续 Lean 检查
                    break

                if skip_current_item:
                    break
                if abandon_item:
                    break
                if request_replan:
                    if plan_round >= max_plan_rounds:
                        print("Reached max Agent C re-plan attempts; stopping.")
                        abandon_item = True
                        break
                    feedback_for_c = replan_feedback
                    continue

                # 未请求 re-plan，结束拆题/证明阶段
                break

            if skip_current_item:
                if args.max_items is not None and processed >= args.max_items:
                    print(f"Reached max-items={args.max_items}, stopping batch.")
                    break
                continue

            if abandon_item:
                total_items_failed += 1
                if args.write_history:
                    append_history(
                        history_file,
                        pipeline="proof",
                        run_id=run_id,
                        lean_file=str(lean_file_rel),
                        task_id=str(item.index),
                        kind="item_end",
                        summary="abandoned_in_planning",
                        payload={"index": item.index, "label": item.label, "status": "abandoned_in_planning"},
                    )
                log_event(
                    run_id,
                    "item_end",
                    {
                        "index": item.index,
                        "label": item.label,
                        "status": "abandoned_in_planning",
                        "seconds": time.monotonic() - item_start,
                    },
                )
                break

            # 步骤 2：首次编译检查
            # Rule: before checking Book.lean, always check the target compile entry first.
            failed_mode: str | None = None  # "target" | "book" | None
            failed_reason_after_a: str | None = None
            target_non_sorry: list[str] = []
            book_non_sorry: list[str] = []
            should_check_book = False
            book_check_reason = None
            if book_rel is not None:
                chap = int(item.chapter)
                if current_chapter is None or current_chapter != chap:
                    current_chapter = chap
                    chapter_item_count = 0
                chapter_item_count += 1
                chapter_boundary = next_item is None or int(next_item.chapter) != chap
                if book_check_every <= 0:
                    should_check_book = chapter_boundary
                    book_check_reason = "chapter_boundary_only" if chapter_boundary else "disabled"
                elif book_check_every == 1:
                    should_check_book = True
                    book_check_reason = "always"
                else:
                    should_check_book = chapter_boundary or (chapter_item_count % book_check_every == 0)
                    book_check_reason = (
                        "chapter_boundary"
                        if chapter_boundary
                        else ("every_n" if should_check_book else "throttled")
                    )

            lean_start = time.monotonic()
            code, out, err = lake_env_lean(compile_file_rel)
            lean_seconds = time.monotonic() - lean_start
            lean_output = "\n".join(part for part in (err, out) if part)
            target_non_sorry = _non_sorry_warning_blocks(lean_output)
            log_event(
                run_id,
                "lean_check",
                {
                    "index": item.index,
                    "label": item.label,
                    "phase": "post_agent_a_target",
                    "code": code,
                    "seconds": lean_seconds,
                    "has_non_sorry_warnings": bool(target_non_sorry),
                    "compiled_file": str(compile_file_rel),
                },
            )

            if code != 0:
                failed_mode = "target"
                failed_reason_after_a = "lean_error"
            elif target_non_sorry and args.clean_warnings_with_agent_b:
                failed_mode = "target"
                failed_reason_after_a = "non_sorry_warnings"
                warnings_text = "\n\n".join(target_non_sorry)
                err = "Lean produced the following non-sorry warnings. Please remove them:\n\n" + warnings_text

            if failed_mode is None and book_rel is not None and should_check_book:
                lean_start = time.monotonic()
                book_code, book_out, book_err = lake_env_lean(book_rel)
                book_seconds = time.monotonic() - lean_start
                book_output = "\n".join(part for part in (book_err, book_out) if part)
                book_non_sorry = _non_sorry_warning_blocks(book_output)
                log_event(
                    run_id,
                    "lean_check",
                    {
                        "index": item.index,
                        "label": item.label,
                        "phase": "post_agent_a_book",
                        "code": book_code,
                        "seconds": book_seconds,
                        "has_non_sorry_warnings": bool(book_non_sorry),
                        "compiled_file": str(book_rel),
                    },
                )
                if book_code != 0:
                    failed_mode = "book"
                    failed_reason_after_a = "book_lean_error"
                    out, err = book_out, book_err
                elif book_non_sorry and args.clean_warnings_with_agent_b:
                    failed_mode = "book"
                    failed_reason_after_a = "book_non_sorry_warnings"
                    warnings_text = "\n\n".join(book_non_sorry)
                    err = (
                        "Lean produced the following non-sorry warnings (via Book.lean). Please remove them:\n\n"
                        + warnings_text
                    )
            elif failed_mode is None and book_rel is not None and not should_check_book:
                log_event(
                    run_id,
                    "lean_check_skipped",
                    {
                        "index": item.index,
                        "label": item.label,
                        "phase": "post_agent_a_book",
                        "reason": book_check_reason,
                        "chapter": int(item.chapter),
                        "chapter_item_count": chapter_item_count,
                        "book_check_every": book_check_every,
                        "compiled_file": str(book_rel),
                    },
                )

            if failed_mode is None:
                status = (
                    "ok_with_warnings"
                    if (target_non_sorry or book_non_sorry) and not args.clean_warnings_with_agent_b
                    else "ok"
                )
                if status == "ok_with_warnings":
                    print(
                        "Lean OK (proof), but has non-sorry warnings (--no-clean-warnings-with-agent-b); continuing."
                    )
                else:
                    print("Lean OK (proof).")

                if not _split_and_validate(item=item, lean_file_abs=lean_file_abs, lean_seconds=lean_seconds):
                    total_items_failed += 1
                    break
                state["next_index"] = item.index + 1
                save_state(state, PROOF_PROGRESS_FILE, run_id=run_id)
                processed += 1
                last_success = item.index
                log_event(
                    run_id,
                    "item_end",
                    {
                        "index": item.index,
                        "label": item.label,
                        "status": status,
                        "seconds": time.monotonic() - item_start,
                    },
                )
                if args.max_items is not None and processed >= args.max_items:
                    print(f"Reached max-items={args.max_items}, stopping batch.")
                    break
                continue

            if failed_mode == "book":
                if failed_reason_after_a == "book_non_sorry_warnings":
                    print("Book.lean has non-sorry warnings after proof Agent A; calling proof Agent B (Book-check mode)...")
                else:
                    print("Book.lean check failed after proof Agent A; calling proof Agent B (Book-check mode)...")
            else:
                if failed_reason_after_a == "non_sorry_warnings":
                    print("Lean produced non-sorry warnings after proof Agent A; calling proof Agent B to clean.")
                else:
                    print("Lean failed after proof Agent A, calling proof Agent B...")
            success_after_b = False
            ok_after_b_status = "ok_after_b"
            force_book_check_in_retry = bool(book_rel is not None and failed_mode == "book")
            for attempt in range(1, args.max_b_retries + 1):
                print(f"[Proof Agent B attempt {attempt}/{args.max_b_retries}]")
                file_before_b = file_snapshot(lean_file_abs)
                before_text_b = lean_file_abs.read_text(encoding="utf-8")
                b_start = time.monotonic()
                run_b = (
                    run_proof_agent_b_bookcheck
                    if (book_rel is not None and failed_mode == "book")
                    else run_proof_agent_b
                )
                b_res = run_b(
                    lean_file_abs,
                    error_log=err,
                    item_index=item.index,
                    item_context=item.context,
                    item_content=item.content,
                    item_dependencies=item.dependencies,
                    item_notes=getattr(item, "notes", None),
                    label=item.label,
                    model=agent_settings.agents["B"].model,
                    reasoning_effort=agent_settings.agents["B"].reasoning_effort,
                )
                b_seconds = time.monotonic() - b_start
                file_after_b = file_snapshot(lean_file_abs)
                after_text_b = lean_file_abs.read_text(encoding="utf-8")
                total_tokens_used += b_res.tokens_used or 0
                log_event(
                    run_id,
                    "agent_b_result",
                    {
                        "index": item.index,
                        "label": item.label,
                        "attempt": attempt,
                        "code": b_res.code,
                        "seconds": b_seconds,
                        "tokens_used": b_res.tokens_used,
                        "log_path": str(b_res.log_path) if b_res.log_path else None,
                        "file_before": file_before_b,
                        "file_after": file_after_b,
                        "file_delta": snapshot_delta(file_before_b, file_after_b),
                    },
                )
                if b_res.code != 0:
                    print(f"Proof Agent B failed with code {b_res.code}. Stopping.\n{b_res.stderr}")
                    success_after_b = False
                    break

                # Re-check: always compile target first, then Book (if enabled).
                failed_mode = None
                target_non_sorry = []
                book_non_sorry = []

                lean_start = time.monotonic()
                code, out, err = lake_env_lean(compile_file_rel)
                lean_seconds = time.monotonic() - lean_start
                lean_output = "\n".join(part for part in (err, out) if part)
                target_non_sorry = _non_sorry_warning_blocks(lean_output)
                log_event(
                    run_id,
                    "lean_check",
                    {
                        "index": item.index,
                        "label": item.label,
                        "phase": f"post_agent_b_attempt{attempt}",
                        "code": code,
                        "seconds": lean_seconds,
                        "has_non_sorry_warnings": bool(target_non_sorry),
                        "compiled_file": str(compile_file_rel),
                    },
                )
                if code != 0:
                    failed_mode = "target"
                elif target_non_sorry and args.clean_warnings_with_agent_b:
                    failed_mode = "target"
                    warnings_text = "\n\n".join(target_non_sorry)
                    err = "Lean still reports these non-sorry warnings. Please remove them:\n\n" + warnings_text

                if failed_mode is None and book_rel is not None and (should_check_book or force_book_check_in_retry):
                    lean_start = time.monotonic()
                    book_code, book_out, book_err = lake_env_lean(book_rel)
                    book_seconds = time.monotonic() - lean_start
                    book_output = "\n".join(part for part in (book_err, book_out) if part)
                    book_non_sorry = _non_sorry_warning_blocks(book_output)
                    log_event(
                        run_id,
                        "lean_check",
                        {
                            "index": item.index,
                            "label": item.label,
                            "phase": f"post_agent_b_attempt{attempt}_book",
                            "code": book_code,
                            "seconds": book_seconds,
                            "has_non_sorry_warnings": bool(book_non_sorry),
                            "compiled_file": str(book_rel),
                        },
                    )
                    if book_code != 0:
                        failed_mode = "book"
                        out, err = book_out, book_err
                    elif book_non_sorry and args.clean_warnings_with_agent_b:
                        failed_mode = "book"
                        warnings_text = "\n\n".join(book_non_sorry)
                        err = (
                            "Lean still reports these non-sorry warnings (via Book.lean). Please remove them:\n\n"
                            + warnings_text
                        )
                elif failed_mode is None and book_rel is not None and not (should_check_book or force_book_check_in_retry):
                    log_event(
                        run_id,
                        "lean_check_skipped",
                        {
                            "index": item.index,
                            "label": item.label,
                            "phase": f"post_agent_b_attempt{attempt}_book",
                            "reason": book_check_reason,
                            "chapter": int(item.chapter),
                            "chapter_item_count": chapter_item_count,
                            "book_check_every": book_check_every,
                            "compiled_file": str(book_rel),
                        },
                    )

                if failed_mode is None and (
                    (not target_non_sorry and not book_non_sorry) or not args.clean_warnings_with_agent_b
                ):
                    if (target_non_sorry or book_non_sorry) and not args.clean_warnings_with_agent_b:
                        print(
                            "Lean OK after proof Agent B, but has non-sorry warnings (--no-clean-warnings-with-agent-b); continuing."
                        )
                        ok_after_b_status = "ok_after_b_with_warnings"
                    else:
                        print("Lean OK after proof Agent B.")
                        ok_after_b_status = "ok_after_b"
                    if not _split_and_validate(item=item, lean_file_abs=lean_file_abs, lean_seconds=lean_seconds):
                        total_items_failed += 1
                        success_after_b = False
                        break
                    state["next_index"] = item.index + 1
                    save_state(state, PROOF_PROGRESS_FILE, run_id=run_id)
                    processed += 1
                    last_success = item.index
                    if args.max_items is not None and processed >= args.max_items:
                        print(f"Reached max-items={args.max_items}, stopping batch.")
                        success_after_b = True
                        break
                    success_after_b = True
                    break
                if failed_mode == "book":
                    print("Book.lean check still failing after proof Agent B; will retry if attempts remain.")
                elif (target_non_sorry or book_non_sorry) and args.clean_warnings_with_agent_b:
                    print("Lean still has non-sorry warnings after proof Agent B; will retry if attempts remain.")
                else:
                    print("Lean still failing after proof Agent B attempt; will retry if attempts remain.")

            if success_after_b:
                log_event(
                    run_id,
                    "item_end",
                    {
                        "index": item.index,
                        "label": item.label,
                        "status": ok_after_b_status,
                        "seconds": time.monotonic() - item_start,
                    },
                )
                if args.max_items is not None and processed >= args.max_items:
                    break
                continue

            # Exceeded max B retries: record failure and continue to next item
            print("Lean still failing after proof Agent B retries; recording failure and continuing.")
            total_items_failed += 1
            failure_entry = (
                f"index={item.index}, label={item.label}, file={lean_file_rel}\n"
                f"last_error:\n{err}\n\n"
            )
            PROOF_FAILURE_LOG.parent.mkdir(parents=True, exist_ok=True)
            with PROOF_FAILURE_LOG.open("a", encoding="utf-8") as f:
                f.write(failure_entry)
            log_event(
                run_id,
                "item_end",
                {
                    "index": item.index,
                    "label": item.label,
                    "status": "failed_after_b",
                    "seconds": time.monotonic() - item_start,
                    "error_snippet": err[:1000] if err else None,
                },
            )
            # advance progress to avoid reprocessing the same item
            state["next_index"] = item.index + 1
            save_state(state, PROOF_PROGRESS_FILE, run_id=run_id)
            processed += 1
            continue
    finally:
        finish_run(
            run_id,
            {
                "pipeline": "proof",
                "stage": 2,
                "run_id": run_id,
                "data_file": str(data_path),
                "processed": processed,
                "last_success_index": last_success,
                "next_index": state.get("next_index", 1),
                "items_failed": total_items_failed,
                "items_failed_bad_statement": total_items_failed_bad_statement,
                "tokens_used_total": total_tokens_used,
                "seconds_total": time.monotonic() - run_start,
                "paths": {
                    "progress_file": str(PROOF_PROGRESS_FILE),
                    "history_file": str(history_file),
                    "proof_logs_dir": str(PROOF_LOGS_DIR),
                    "proof_failure_log": str(PROOF_FAILURE_LOG),
                    "metrics_dir": str(METRICS_DIR),
                },
            },
        )

    print(
        f"Processed {processed} proof items. Last successful index: {last_success}. "
        f"Next index: {state.get('next_index', 1)}."
    )


if __name__ == "__main__":
    main()
