from __future__ import annotations

import argparse
import json
import os
import re
import subprocess
import sys
import threading
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import Any

from .config import LEAN_ROOT, ROOT
from .infra_prefix_store import (
    InfraPrefixStore,
    PrefixStoreConfig,
    module_import_for_rel_lean_file,
    safe_decl_name,
)
from .lean_runner import lake_env_lean
from .metrics import finish_run, log_event, start_run

INFRA_PLAN_FILENAME = "infra_plan.json"
PLAN_ACTIVE_FILENAME = "plan_active.json"
ITEM_LOOP_STATE_FILENAME = "item_loop_state.json"
WORKING_DIRNAME = "Working"
CURRENT_ITEM_FILENAME = "CurrentItem.lean"
FAILURE_REPORTS_DIRNAME = "failure_reports"
PLAN_HISTORY_DIRNAME = "plan_history"
_SORRY_TOKEN_RE = re.compile(r"\bsorry\b")
_DECL_HEAD_RE = re.compile(
    r"^(?:private\s+|protected\s+|noncomputable\s+)*(?:theorem|lemma|def|abbrev|example|instance)\s+([A-Za-z0-9_'.]+)\b"
)

_INFRA_DIR_RE = re.compile(r"^infra_(.+)$")


def _utc_now_compact() -> str:
    return datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")


def _normalize_rel_to_root(path: Path, root: Path) -> Path:
    if path.is_absolute():
        try:
            return path.relative_to(root)
        except ValueError:
            return path
    if path.parts and path.parts[0] == root.name:
        return Path(*path.parts[1:])
    return path


def _resolve_infra_root(bench_file: Path) -> tuple[Path, Path]:
    """
    Resolve effective Lean root for infra assets.
    """
    bench_rel = _normalize_rel_to_root(bench_file, LEAN_ROOT)
    if (LEAN_ROOT / "Question_bench").exists() or (LEAN_ROOT / bench_rel).exists():
        return LEAN_ROOT, bench_rel
    alt_root = ROOT / "M2F"
    alt_rel = _normalize_rel_to_root(bench_file, alt_root)
    if (alt_root / "Question_bench").exists() or (alt_root / alt_rel).exists():
        return alt_root, alt_rel
    return LEAN_ROOT, bench_rel


def _infer_bench_and_infra_from_input(path_rel: Path) -> tuple[Path, Path, str] | None:
    parts = list(path_rel.parts)
    for i, seg in enumerate(parts):
        m = _INFRA_DIR_RE.match(seg)
        if not m:
            continue
        infra_id = (m.group(1) or "").strip()
        if not infra_id:
            continue
        infra_dir_rel = Path(*parts[: i + 1])
        bench_rel = Path(*parts[:i]) / f"{infra_id}.lean"
        return bench_rel, infra_dir_rel, infra_id
    return None


def _safe_stem(text: str) -> str:
    return re.sub(r"[^A-Za-z0-9._-]+", "_", text).strip("_") or "infra"


def _resolve_path_against(lean_root: Path, p: Path) -> Path:
    rel = _normalize_rel_to_root(p, lean_root)
    if rel.is_absolute():
        return rel
    return lean_root / rel


def _run_subprocess(
    module: str,
    args: list[str],
    *,
    env_overrides: dict[str, str] | None,
) -> tuple[int, str, str]:
    cmd = [sys.executable, "-m", module] + args
    env = os.environ.copy()
    if env_overrides:
        env.update(env_overrides)
    stream_interactions = str(env.get("ORCH_PRINT_AGENT_INTERACTIONS", "1")).strip().lower() not in {
        "0",
        "false",
        "no",
        "off",
    }

    proc = subprocess.Popen(
        cmd,
        cwd=ROOT,
        env=env,
        text=True,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        bufsize=1,
    )

    stdout_chunks: list[str] = []
    stderr_chunks: list[str] = []

    def _pump_stdout() -> None:
        if proc.stdout is None:
            return
        for line in iter(proc.stdout.readline, ""):
            stdout_chunks.append(line)
            if stream_interactions:
                print(line, end="")
        proc.stdout.close()

    def _pump_stderr() -> None:
        if proc.stderr is None:
            return
        for line in iter(proc.stderr.readline, ""):
            stderr_chunks.append(line)
            if stream_interactions:
                sys.stderr.write(line)
        proc.stderr.close()

    t_out = threading.Thread(target=_pump_stdout, daemon=True)
    t_err = threading.Thread(target=_pump_stderr, daemon=True)
    t_out.start()
    t_err.start()
    return_code = int(proc.wait())
    t_out.join()
    t_err.join()

    return return_code, "".join(stdout_chunks), "".join(stderr_chunks)


def _truncate_text(text: str, *, max_chars: int = 1600) -> str:
    s = (text or "").strip()
    if len(s) <= max_chars:
        return s
    return s[: max_chars - 3] + "..."


def _first_error_line(text: str) -> str | None:
    if not text:
        return None
    for raw in text.splitlines():
        line = raw.strip()
        if not line:
            continue
        if "error:" in line.lower():
            return line
    for raw in text.splitlines():
        line = raw.strip()
        if line:
            return line
    return None


def _extract_relevant_lines(
    text: str,
    *,
    keywords: list[str],
    max_lines: int = 8,
) -> list[str]:
    if not text:
        return []
    out: list[str] = []
    seen: set[str] = set()
    lowered_keywords = [k.lower() for k in keywords if k]
    for raw in text.splitlines():
        line = raw.strip()
        if not line:
            continue
        low = line.lower()
        if not any(k in low for k in lowered_keywords):
            continue
        norm = " ".join(line.split())
        if norm in seen:
            continue
        seen.add(norm)
        out.append(norm[:320])
        if len(out) >= max_lines:
            break
    return out


def _classify_sorry_remaining_blocker(*, proof_stdout: str, proof_stderr: str) -> dict[str, Any]:
    combined = "\n".join(part for part in (proof_stdout, proof_stderr) if part)
    low = combined.lower()
    m_sig = re.search(r"core_blocker_signature=([^\n]+)", combined)
    core_blocker_signature = m_sig.group(1).strip() if m_sig else None

    classification = "unknown"
    confidence = 0.35
    diagnosis = "proof finished with remaining sorry; root cause is unclear from current logs."
    planner_guidance = (
        "Inspect the blocked declaration goal and failed proof attempts; prefer splitting the blocked item into "
        "smaller lemmas with explicit dependencies."
    )
    keywords = ["sorry", "blocked", "failed", "re-plan", "replan"]

    if (
        "failed_bad_statement" in low
        or "mathematically unprovable/blocked" in low
        or "mathematically unprovable/false" in low
        or "may be mathematically unprovable" in low
    ):
        classification = "math_incorrect_or_unprovable"
        confidence = 0.9
        diagnosis = (
            "agent feedback indicates the current statement/assumptions are likely mathematically false or "
            "unprovable as written."
        )
        planner_guidance = (
            "Revise the failed item statement (weaken/adjust assumptions or conclusion), then regenerate only "
            "from the failed index onward."
        )
        keywords = [
            "failed_bad_statement",
            "mathematically unprovable/blocked",
            "mathematically unprovable/false",
            "may be mathematically unprovable",
        ]
    elif (
        "failed_missing_theory" in low
        or "missing theory" in low
        or "missing library/theory" in low
        or "unknown identifier" in low
        or "don't know how to synthesize" in low
    ):
        classification = "missing_theory_or_library_gap"
        confidence = 0.85
        diagnosis = "agent feedback indicates missing theory/library support rather than local proof decomposition."
        planner_guidance = (
            "Add prerequisite infra lemmas/theory bridge items before the failed item; keep suffix dependencies "
            "strictly earlier-only."
        )
        keywords = ["failed_missing_theory", "missing theory", "missing library/theory"]
    elif (
        "agent a requested re-plan" in low
        or "requested re-plan" in low
        or "reached max agent c re-plan attempts" in low
        or "max_replans_reached" in low
        or "proof_too_coarse" in low
    ):
        classification = "decomposition_too_coarse_or_plan_gap"
        confidence = 0.8
        diagnosis = (
            "planner/executor loop suggests current item decomposition is too coarse or dependency decomposition is "
            "insufficient."
        )
        planner_guidance = (
            "Split the failed item into finer helper lemmas, keep each lemma single-purpose and provable from "
            "frozen prefix + earlier suffix only."
        )
        keywords = [
            "agent a requested re-plan",
            "requested re-plan",
            "reached max agent c re-plan attempts",
            "max_replans_reached",
        ]
    elif (
        "no meaningful progress after proof agent a" in low
        or "failed_no_progress" in low
        or "no reduction in `sorry` warnings" in low
    ):
        classification = "proof_search_stalled"
        confidence = 0.72
        diagnosis = "proof search made no meaningful progress under current decomposition/instructions."
        planner_guidance = (
            "Retain valid prefix, then refine suffix proof plan with tighter intermediate lemmas and more explicit "
            "proof steps."
        )
        keywords = [
            "no meaningful progress after proof agent a",
            "failed_no_progress",
            "no reduction in `sorry` warnings",
        ]
    else:
        classification = "likely_decomposition_too_coarse"
        confidence = 0.58
        diagnosis = (
            "no explicit math-false or missing-theory signal was found; most likely the item is still too coarse."
        )
        planner_guidance = (
            "Prefer decomposition refinement from the failed index: smaller lemmas, explicit dependency chain, "
            "and avoid introducing equivalent rename-only items."
        )

    evidence_lines = _extract_relevant_lines(
        combined,
        keywords=keywords,
        max_lines=8,
    )
    goal_lines = _extract_relevant_lines(
        combined,
        keywords=["⊢", "goal", "pending_goals", "lean_error_excerpt", "goal_excerpt"],
        max_lines=8,
    )
    return {
        "failure_class": classification,
        "confidence": confidence,
        "diagnosis": diagnosis,
        "planner_guidance": planner_guidance,
        "core_blocker_signature": core_blocker_signature,
        "evidence_lines": evidence_lines,
        "goal_lines": goal_lines,
    }


def _collect_sorry_context(*, lean_text: str, max_entries: int = 12) -> dict[str, Any]:
    lines = lean_text.splitlines()
    decl_starts: list[tuple[int, str]] = []
    for ln, raw in enumerate(lines, start=1):
        m = _DECL_HEAD_RE.match(raw)
        if not m:
            continue
        decl_starts.append((ln, str(m.group(1)).strip()))

    def _decl_for_line(line_no: int) -> str | None:
        name: str | None = None
        for ln, decl in decl_starts:
            if ln > line_no:
                break
            name = decl
        return name

    entries: list[dict[str, Any]] = []
    decls_with_sorry: set[str] = set()
    for ln, raw in enumerate(lines, start=1):
        if not _SORRY_TOKEN_RE.search(raw):
            continue
        preview = " ".join(raw.strip().split())
        decl_name = _decl_for_line(ln)
        if decl_name:
            decls_with_sorry.add(decl_name)
        entries.append(
            {
                "line": int(ln),
                "decl_name": decl_name,
                "preview": preview[:220],
            }
        )
        if len(entries) >= max_entries:
            break
    return {
        "sorry_count_total": sum(1 for raw in lines if _SORRY_TOKEN_RE.search(raw)),
        "decls_with_sorry": sorted(decls_with_sorry),
        "sample": entries,
    }


def _duplicate_nonempty(values: list[str]) -> list[str]:
    seen: set[str] = set()
    dup: set[str] = set()
    for raw in values:
        v = str(raw).strip()
        if not v:
            continue
        if v in seen:
            dup.add(v)
        else:
            seen.add(v)
    return sorted(dup)


def _coerce_int(value: Any) -> int | None:
    if isinstance(value, bool):
        return None
    if isinstance(value, int):
        return int(value)
    if isinstance(value, str):
        s = value.strip()
        if re.fullmatch(r"-?\d+", s):
            try:
                return int(s)
            except Exception:
                return None
    return None


@dataclass(frozen=True, slots=True)
class PlanItem:
    index: int
    item_id: str
    label: str
    env: str
    content: str
    target_file: str
    dependencies: list[str]
    raw: dict[str, Any]


def _load_plan_items(plan_abs: Path) -> list[PlanItem]:
    raw = json.loads(plan_abs.read_text(encoding="utf-8"))
    if not isinstance(raw, list):
        raise ValueError(f"Expected JSON array in {plan_abs}")
    items: list[PlanItem] = []
    for pos, entry in enumerate(raw, start=1):
        if not isinstance(entry, dict):
            continue
        idx = entry.get("index")
        if not isinstance(idx, int):
            idx = pos
        label = str(entry.get("label", "")).strip()
        if not label:
            raise ValueError(f"item[{pos}] missing label")
        item_id_raw = entry.get("item_id")
        if isinstance(item_id_raw, str) and item_id_raw.strip():
            item_id = item_id_raw.strip()
        else:
            item_id = f"I{int(idx):05d}"
        env = str(entry.get("env", "thm")).strip() or "thm"
        content = str(entry.get("content", "")).strip()
        if not content:
            raise ValueError(f"item[{pos}] missing content")
        target_file = str(entry.get("target_file", "")).strip()
        if not target_file:
            raise ValueError(f"item[{pos}] missing target_file")
        deps_raw = entry.get("depends_on")
        if not isinstance(deps_raw, list):
            deps_raw = entry.get("dependencies")
        deps: list[str] = []
        if isinstance(deps_raw, list):
            for dep in deps_raw:
                if isinstance(dep, str) and dep.strip():
                    deps.append(dep.strip())
        items.append(
            PlanItem(
                index=int(idx),
                item_id=item_id,
                label=label,
                env=env,
                content=content,
                target_file=target_file,
                dependencies=deps,
                raw=entry,
            )
        )
    items.sort(key=lambda it: it.index)
    dup_indices = _duplicate_nonempty([str(it.index) for it in items])
    if dup_indices:
        raise ValueError(f"plan contains duplicate index values: {', '.join(dup_indices[:8])}")
    dup_item_ids = _duplicate_nonempty([it.item_id for it in items])
    if dup_item_ids:
        raise ValueError(f"plan contains duplicate item_id values: {', '.join(dup_item_ids[:8])}")
    dup_labels = _duplicate_nonempty([it.label for it in items])
    if dup_labels:
        raise ValueError(f"plan contains duplicate label values: {', '.join(dup_labels[:6])}")
    return items


def _default_loop_state(*, plan_rel: Path) -> dict[str, Any]:
    return {
        "version": 1,
        "status": "idle",
        "cursor_index": 1,
        "last_success_index": 0,
        "frozen_count": 0,
        "plan_active": str(plan_rel),
        "updated_at": datetime.now(timezone.utc).isoformat(),
        "last_error": None,
    }


def _load_loop_state(state_file: Path, *, plan_rel: Path) -> dict[str, Any]:
    if not state_file.exists():
        return _default_loop_state(plan_rel=plan_rel)
    try:
        raw = json.loads(state_file.read_text(encoding="utf-8"))
    except Exception:
        return _default_loop_state(plan_rel=plan_rel)
    out = _default_loop_state(plan_rel=plan_rel)
    if isinstance(raw, dict):
        out.update(raw)
    out["plan_active"] = str(plan_rel)
    return out


def _save_loop_state(state_file: Path, state: dict[str, Any], *, run_id: str) -> None:
    payload = dict(state)
    payload["updated_at"] = datetime.now(timezone.utc).isoformat()
    payload["last_run_id"] = run_id
    state_file.parent.mkdir(parents=True, exist_ok=True)
    state_file.write_text(json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")


def _ensure_directories(infra_dir_abs: Path) -> dict[str, Path]:
    paths = {
        "generated_prefix": infra_dir_abs / "GeneratedPrefix",
        "working": infra_dir_abs / WORKING_DIRNAME,
        "failure_reports": infra_dir_abs / FAILURE_REPORTS_DIRNAME,
        "plan_history": infra_dir_abs / PLAN_HISTORY_DIRNAME,
    }
    for p in paths.values():
        p.mkdir(parents=True, exist_ok=True)
    return paths


def _write_current_item_file(*, current_item_abs: Path, prefix_index_rel: Path) -> None:
    text = "\n".join(
        [
            "import Mathlib",
            module_import_for_rel_lean_file(prefix_index_rel),
            "",
            "-- Auto-generated working file for infra direct-item execution.",
            "",
        ]
    )
    current_item_abs.parent.mkdir(parents=True, exist_ok=True)
    current_item_abs.write_text(text, encoding="utf-8")


def _frozen_items_from_manifest(manifest: dict[str, Any]) -> list[dict[str, Any]]:
    raw = manifest.get("frozen_items")
    if not isinstance(raw, list):
        return []
    return [rec for rec in raw if isinstance(rec, dict)]


def _assert_no_duplicate_frozen_items(
    *,
    frozen_items: list[dict[str, Any]],
    context: str,
) -> None:
    dup_item_ids = _duplicate_nonempty([str(rec.get("item_id", "")) for rec in frozen_items])
    if dup_item_ids:
        raise ValueError(f"{context} duplicate item_id: {', '.join(dup_item_ids[:8])}")
    dup_labels = _duplicate_nonempty([str(rec.get("label", "")) for rec in frozen_items])
    if dup_labels:
        raise ValueError(f"{context} duplicate label: {', '.join(dup_labels[:6])}")


def _expected_prefix_for_cursor(*, items: list[PlanItem], cursor_index: int) -> list[PlanItem]:
    return [it for it in items if it.index < int(cursor_index)]


def _first_prefix_mismatch(
    *,
    items: list[PlanItem],
    cursor_index: int,
    frozen_items: list[dict[str, Any]],
) -> dict[str, Any] | None:
    expected = _expected_prefix_for_cursor(items=items, cursor_index=cursor_index)
    compare_len = min(len(expected), len(frozen_items))

    for pos in range(compare_len):
        exp = expected[pos]
        rec = frozen_items[pos]
        got_item_id = str(rec.get("item_id", "")).strip()
        got_label = str(rec.get("label", "")).strip()
        meta = rec.get("metadata")
        got_index = _coerce_int(meta.get("index")) if isinstance(meta, dict) else None
        if got_item_id != exp.item_id or got_label != exp.label or (
            got_index is not None and got_index != exp.index
        ):
            return {
                "reason": "prefix_plan_mismatch",
                "mismatch_pos": int(pos + 1),
                "keep_count": int(pos),
                "rollback_index": int(exp.index),
                "expected": {"index": exp.index, "item_id": exp.item_id, "label": exp.label},
                "got": {"index": got_index, "item_id": got_item_id, "label": got_label},
            }

    if len(frozen_items) < len(expected):
        exp = expected[len(frozen_items)]
        return {
            "reason": "frozen_prefix_shorter_than_cursor",
            "mismatch_pos": int(len(frozen_items) + 1),
            "keep_count": int(len(frozen_items)),
            "rollback_index": int(exp.index),
            "expected": {"index": exp.index, "item_id": exp.item_id, "label": exp.label},
            "got": None,
        }

    if len(frozen_items) > len(expected):
        if len(expected) < len(items):
            rollback_index = int(items[len(expected)].index)
        elif items:
            rollback_index = int(items[-1].index + 1)
        else:
            rollback_index = 1
        return {
            "reason": "frozen_prefix_longer_than_cursor",
            "mismatch_pos": int(len(expected) + 1),
            "keep_count": int(len(expected)),
            "rollback_index": int(rollback_index),
            "expected": None,
            "got": {
                "index": _coerce_int((frozen_items[len(expected)].get("metadata") or {}).get("index"))
                if isinstance(frozen_items[len(expected)].get("metadata"), dict)
                else None,
                "item_id": str(frozen_items[len(expected)].get("item_id", "")).strip(),
                "label": str(frozen_items[len(expected)].get("label", "")).strip(),
            },
        }

    return None


def _single_item_json_payload(*, item: PlanItem, target_rel: Path) -> list[dict[str, Any]]:
    entry = dict(item.raw)
    entry["index"] = int(item.index)
    entry["item_id"] = str(item.item_id)
    entry["label"] = str(item.label)
    entry["env"] = str(item.env)
    entry["content"] = str(item.content)
    entry["target_file"] = str(target_rel)
    entry["dependencies"] = list(item.dependencies)
    return [entry]


def _write_single_item_json(
    *,
    abs_path: Path,
    item: PlanItem,
    target_rel: Path,
) -> None:
    payload = _single_item_json_payload(item=item, target_rel=target_rel)
    abs_path.parent.mkdir(parents=True, exist_ok=True)
    abs_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")


def _double_check_statement(*, lean_root: Path, target_rel: Path, item: PlanItem) -> tuple[bool, str]:
    code, out, err = lake_env_lean(target_rel)
    lean_output = "\n".join(part for part in (err, out) if part)
    if code != 0:
        summary = _first_error_line(lean_output) or "statement_postcheck_failed"
        return False, f"statement_double_check_lean_failed: {summary}"

    abs_path = lean_root / target_rel
    try:
        text = abs_path.read_text(encoding="utf-8")
    except Exception as e:
        return False, f"statement_double_check_read_failed: {e}"

    if item.label not in text:
        return False, f"statement_double_check_label_missing: {item.label}"
    return True, "statement_double_check_ok"


def _double_check_proof(
    *,
    lean_root: Path,
    target_rel: Path,
    item: PlanItem,
) -> tuple[bool, str, dict[str, Any] | None]:
    code, out, err = lake_env_lean(target_rel)
    lean_output = "\n".join(part for part in (err, out) if part)
    if code != 0:
        summary = _first_error_line(lean_output) or "proof_postcheck_failed"
        return (
            False,
            f"proof_double_check_lean_failed: {summary}",
            {
                "first_error_line": summary,
                "lean_output_excerpt": _truncate_text(lean_output),
            },
        )

    abs_path = lean_root / target_rel
    try:
        text = abs_path.read_text(encoding="utf-8")
    except Exception as e:
        return False, f"proof_double_check_read_failed: {e}", {"read_error": str(e)}

    if _SORRY_TOKEN_RE.search(text):
        return (
            False,
            "proof_double_check_sorry_remaining",
            {
                "first_error_line": _first_error_line(lean_output),
                "lean_output_excerpt": _truncate_text(lean_output),
                "postcheck_warning_lines": _extract_relevant_lines(
                    lean_output,
                    keywords=["warning", "sorry", "⊢", "goal", "unknown identifier", "type mismatch"],
                    max_lines=10,
                ),
            },
        )
    if item.label not in text:
        return (
            False,
            f"proof_double_check_label_missing: {item.label}",
            {"lean_output_excerpt": _truncate_text(lean_output)},
        )
    return True, "proof_double_check_ok", None


def _extract_promotable_block(*, current_item_abs: Path) -> tuple[str | None, str | None]:
    try:
        text = current_item_abs.read_text(encoding="utf-8")
    except Exception as e:
        return None, f"promote_read_failed: {e}"

    lines = text.splitlines()
    i = 0
    while i < len(lines):
        s = lines[i].strip()
        if s.startswith("import "):
            i += 1
            continue
        if not s:
            i += 1
            continue
        break
    body = "\n".join(lines[i:]).strip()
    if not body:
        return None, "promote_empty_body"
    return body, None


def _extract_decl_candidates_from_body(*, body: str) -> tuple[str, list[tuple[str, str]]]:
    """
    Split CurrentItem body into:
    - prelude: text before first top-level declaration
    - candidates: [(decl_name, decl_block)] in source order
    """
    lines = body.splitlines()
    starts: list[tuple[int, str]] = []
    for i, line in enumerate(lines):
        m = _DECL_HEAD_RE.match(line)
        if not m:
            continue
        starts.append((i, m.group(1)))
    if not starts:
        return body.strip(), []

    prelude = "\n".join(lines[: starts[0][0]]).strip()
    out: list[tuple[str, str]] = []
    for idx, (start, name) in enumerate(starts):
        end = starts[idx + 1][0] if idx + 1 < len(starts) else len(lines)
        block = "\n".join(lines[start:end]).strip()
        if not block:
            continue
        out.append((name.strip(), block))
    return prelude, out


def _check_snippet_prefix_only(
    *,
    lean_root: Path,
    infra_dir_rel: Path,
    prefix_index_rel: Path,
    snippet: str,
    probe_tag: str,
) -> tuple[bool, str]:
    probe_name = f"_CurrentItemPrefixProbe_{_safe_stem(probe_tag)}.lean"
    probe_rel = infra_dir_rel / WORKING_DIRNAME / probe_name
    probe_abs = lean_root / probe_rel
    probe_abs.parent.mkdir(parents=True, exist_ok=True)
    probe_text = "\n".join(
        [
            "import Mathlib",
            module_import_for_rel_lean_file(prefix_index_rel),
            "",
            snippet.strip(),
            "",
        ]
    )
    try:
        probe_abs.write_text(probe_text, encoding="utf-8")
        code, out, err = lake_env_lean(probe_rel)
    finally:
        try:
            if probe_abs.exists():
                probe_abs.unlink()
        except Exception:
            pass
    if code != 0:
        lean_output = "\n".join(part for part in (err, out) if part)
        summary = _first_error_line(lean_output) or "prefix_only_probe_failed"
        return False, summary
    return True, "ok"


def _salvage_proved_prefix_only_blocks(
    *,
    lean_root: Path,
    infra_dir_rel: Path,
    current_item_abs: Path,
    prefix_index_rel: Path,
    prefix_store: InfraPrefixStore,
    failed_item: PlanItem,
) -> dict[str, Any]:
    """
    Best-effort salvage on blocked item:
    Move already-proved declaration blocks (no `sorry`) from CurrentItem into Prefix_0001,
    but only if they typecheck against PrefixIndex alone (plus previously accepted salvaged blocks).
    """
    body, promote_err = _extract_promotable_block(current_item_abs=current_item_abs)
    if body is None:
        return {"status": "no_body", "reason": str(promote_err or "promote_extract_failed")}

    prelude, candidates = _extract_decl_candidates_from_body(body=body)
    if not candidates:
        return {"status": "no_decl_candidates"}

    accepted_names: list[str] = []
    accepted_blocks: list[str] = []
    skipped: list[dict[str, str]] = []
    seen_names: set[str] = set()

    prelude_clean = prelude.strip()
    for decl_name, block in candidates:
        if decl_name in seen_names:
            skipped.append({"name": decl_name, "reason": "duplicate_decl_in_current_item"})
            continue
        seen_names.add(decl_name)

        if _SORRY_TOKEN_RE.search(block):
            skipped.append({"name": decl_name, "reason": "contains_sorry"})
            continue

        snippet_parts: list[str] = []
        if prelude_clean:
            snippet_parts.append(prelude_clean)
        snippet_parts.extend(accepted_blocks)
        snippet_parts.append(block.strip())
        candidate_snippet = "\n\n".join(part for part in snippet_parts if part.strip())
        ok, reason = _check_snippet_prefix_only(
            lean_root=lean_root,
            infra_dir_rel=infra_dir_rel,
            prefix_index_rel=prefix_index_rel,
            snippet=candidate_snippet,
            probe_tag=f"{failed_item.item_id}_{decl_name}",
        )
        if not ok:
            skipped.append({"name": decl_name, "reason": reason})
            continue

        accepted_names.append(decl_name)
        accepted_blocks.append(block.strip())

    if not accepted_blocks:
        return {
            "status": "no_promotable",
            "candidate_count": len(candidates),
            "skipped": skipped[:12],
        }

    merged_parts: list[str] = []
    if prelude_clean:
        merged_parts.append(prelude_clean)
    merged_parts.extend(accepted_blocks)
    merged_text = "\n\n".join(part for part in merged_parts if part.strip())

    chunk_rel = prefix_store.append_aux_block_to_primary_chunk(
        text_block=merged_text,
        metadata={
            "source": "blocked_current_item_salvage",
            "failed_index": int(failed_item.index),
            "failed_item_id": str(failed_item.item_id),
            "failed_label": str(failed_item.label),
            "promoted_names": accepted_names,
            "timestamp_utc": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
        },
    )
    return {
        "status": "promoted",
        "candidate_count": len(candidates),
        "promoted_count": len(accepted_names),
        "promoted_names": accepted_names,
        "chunk_path": str(chunk_rel),
        "skipped": skipped[:12],
    }


def _build_scaffold_declaration(item: PlanItem) -> str:
    name = safe_decl_name(f"infra_item_{item.item_id}_frozen")
    short_content = " ".join(item.content.split())
    if len(short_content) > 220:
        short_content = short_content[:217] + "..."
    return "\n".join(
        [
            "/--",
            f"[SCAFFOLD PLACEHOLDER] Frozen item for pipeline wiring.",
            f"label: {item.label}",
            f"intent: {short_content}",
            "Replace this theorem with real translated+proved declaration in phase 3.",
            "-/",
            f"theorem {name} : True := by",
            "  trivial",
        ]
    )


def _execute_item_real(
    *,
    item: PlanItem,
    lean_root: Path,
    item_data_file_abs: Path,
    target_rel: Path,
    project: str,
    task_prefix: str,
    statement_max_b_retries: int,
    proof_max_b_retries: int,
    proof_max_c_replans: int,
    env_overrides: dict[str, str] | None,
) -> tuple[bool, str, str | None, dict[str, Any] | None]:
    item_task = f"{task_prefix}_idx{int(item.index):04d}_{safe_decl_name(item.item_id, fallback='item')}"

    print(
        "[infra-item-direct] statement stage: "
        f"index={item.index} item_id={item.item_id} label={item.label}"
    )
    stmt_args = [
        "--data-file",
        str(item_data_file_abs),
        "--project",
        project,
        "--task-id",
        item_task,
        "--start-index",
        str(int(item.index)),
        "--only-label",
        item.label,
        "--stage",
        "infra_statement",
        "--max-items",
        "1",
        "--max-b-retries",
        str(int(statement_max_b_retries)),
        "--semantic-check",
        "--semantic-check-policy",
        "fail",
    ]
    stmt_code, stmt_out, stmt_err = _run_subprocess(
        "orchestrator.item_statement_pipeline",
        stmt_args,
        env_overrides=env_overrides,
    )
    if stmt_code != 0:
        summary = _first_error_line("\n".join(part for part in (stmt_err, stmt_out) if part)) or "statement_stage_failed"
        detail = {
            "stage": "statement_stage",
            "summary": summary,
            "stdout_excerpt": _truncate_text(stmt_out),
            "stderr_excerpt": _truncate_text(stmt_err),
        }
        return False, f"statement_stage_failed: {summary}", None, detail

    stmt_ok, stmt_reason = _double_check_statement(lean_root=lean_root, target_rel=target_rel, item=item)
    if not stmt_ok:
        return False, stmt_reason, None, {"stage": "statement_double_check", "summary": stmt_reason}

    print(
        "[infra-item-direct] proof stage: "
        f"index={item.index} item_id={item.item_id} label={item.label}"
    )
    proof_args = [
        "--data-file",
        str(item_data_file_abs),
        "--project",
        project,
        "--task-id",
        item_task,
        "--start-index",
        str(int(item.index)),
        "--only-label",
        item.label,
        "--stage",
        "infra_proof",
        "--max-items",
        "1",
        "--max-b-retries",
        str(int(proof_max_b_retries)),
        "--max-c-replans",
        str(int(proof_max_c_replans)),
    ]
    proof_code, proof_out, proof_err = _run_subprocess(
        "orchestrator.item_proof_pipeline",
        proof_args,
        env_overrides=env_overrides,
    )
    if proof_code != 0:
        summary = _first_error_line("\n".join(part for part in (proof_err, proof_out) if part)) or "proof_stage_failed"
        detail = {
            "stage": "proof_stage",
            "summary": summary,
            "stdout_excerpt": _truncate_text(proof_out),
            "stderr_excerpt": _truncate_text(proof_err),
        }
        return False, f"proof_stage_failed: {summary}", None, detail

    proof_ok, proof_reason, proof_check_detail = _double_check_proof(
        lean_root=lean_root,
        target_rel=target_rel,
        item=item,
    )
    if not proof_ok:
        detail: dict[str, Any] = {"stage": "proof_double_check", "summary": proof_reason}
        if isinstance(proof_check_detail, dict) and proof_check_detail:
            detail["double_check"] = proof_check_detail
        if proof_reason == "proof_double_check_sorry_remaining":
            current_abs = lean_root / target_rel
            try:
                cur_text = current_abs.read_text(encoding="utf-8")
                detail["sorry_context"] = _collect_sorry_context(lean_text=cur_text)
            except Exception as e:
                detail["sorry_context_error"] = str(e)
            detail["blocker_diagnosis"] = _classify_sorry_remaining_blocker(
                proof_stdout=proof_out,
                proof_stderr=proof_err,
            )
            detail["proof_feedback_lines"] = _extract_relevant_lines(
                "\n".join(part for part in (proof_out, proof_err) if part),
                keywords=[
                    "failed_missing_theory",
                    "failed_bad_statement",
                    "core_blocker_signature",
                    "pending_goals",
                    "requested_lemmas",
                    "unknown identifier",
                    "type mismatch",
                    "goal",
                    "⊢",
                ],
                max_lines=12,
            )
            detail["stdout_excerpt"] = _truncate_text(proof_out)
            detail["stderr_excerpt"] = _truncate_text(proof_err)
        return False, proof_reason, None, detail

    promote_text, promote_err = _extract_promotable_block(current_item_abs=lean_root / target_rel)
    if promote_text is None:
        return False, (promote_err or "promote_extract_failed"), None, {
            "stage": "promote_extract",
            "summary": str(promote_err or "promote_extract_failed"),
        }

    return True, "real_executor_ok", promote_text, {"stage": "promote", "summary": "ok"}


def _execute_item(
    *,
    item: PlanItem,
    lean_root: Path,
    simulate_success: bool,
    item_data_file_abs: Path,
    target_rel: Path,
    project: str,
    task_prefix: str,
    statement_max_b_retries: int,
    proof_max_b_retries: int,
    proof_max_c_replans: int,
    env_overrides: dict[str, str] | None,
) -> tuple[bool, str, str | None, dict[str, Any] | None]:
    if simulate_success:
        return True, "simulated_success", _build_scaffold_declaration(item), {
            "stage": "simulate",
            "summary": "simulated_success",
        }
    return _execute_item_real(
        item=item,
        lean_root=lean_root,
        item_data_file_abs=item_data_file_abs,
        target_rel=target_rel,
        project=project,
        task_prefix=task_prefix,
        statement_max_b_retries=statement_max_b_retries,
        proof_max_b_retries=proof_max_b_retries,
        proof_max_c_replans=proof_max_c_replans,
        env_overrides=env_overrides,
    )


def _write_failure_report(
    *,
    infra_dir_abs: Path,
    run_id: str,
    plan_rel: Path,
    cursor_index: int,
    item: PlanItem,
    reason: str,
    detail: dict[str, Any] | None = None,
) -> Path:
    report_dir = infra_dir_abs / FAILURE_REPORTS_DIRNAME
    report_dir.mkdir(parents=True, exist_ok=True)
    file_name = f"failure_idx{int(item.index):04d}_{_utc_now_compact()}.json"
    report_abs = report_dir / file_name
    payload = {
        "timestamp_utc": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
        "run_id": run_id,
        "status": "blocked",
        "plan_active": str(plan_rel),
        "cursor_index": int(cursor_index),
        "failed_item": {
            "index": item.index,
            "item_id": item.item_id,
            "label": item.label,
            "env": item.env,
            "target_file": item.target_file,
            "dependencies": item.dependencies,
            "content": item.content,
        },
        "reason": reason,
        "next_action": "planner_suffix_rewrite",
    }
    if detail:
        payload["detail"] = detail
    report_abs.write_text(json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
    return report_abs


def run_direct_item_pipeline(
    *,
    bench_file: Path,
    plan_file: Path | None,
    task_id: str | None,
    start_index: int | None,
    max_items: int | None,
    chunk_item_limit: int,
    chunk_line_limit: int,
    simulate_success: bool,
    statement_max_b_retries: int,
    proof_max_b_retries: int,
    proof_max_c_replans: int,
) -> bool:
    lean_root, input_rel = _resolve_infra_root(bench_file)
    inferred = _infer_bench_and_infra_from_input(input_rel)
    if inferred is not None:
        bench_rel, infra_dir_rel, infra_id = inferred
    else:
        bench_rel = input_rel
        infra_id = bench_rel.stem
        infra_dir_rel = bench_rel.parent / f"infra_{infra_id}"

    infra_dir_abs = lean_root / infra_dir_rel
    _ensure_directories(infra_dir_abs)

    active_plan_rel = infra_dir_rel / PLAN_ACTIVE_FILENAME
    active_plan_abs = lean_root / active_plan_rel

    if plan_file is not None:
        plan_source_abs = _resolve_path_against(lean_root, plan_file)
    else:
        candidate_active = active_plan_abs
        candidate_base = lean_root / (infra_dir_rel / INFRA_PLAN_FILENAME)
        if candidate_active.exists():
            plan_source_abs = candidate_active
        else:
            plan_source_abs = candidate_base

    if not plan_source_abs.exists():
        raise FileNotFoundError(f"Plan file not found: {plan_source_abs}")
    if plan_source_abs.resolve() != active_plan_abs.resolve():
        active_plan_abs.parent.mkdir(parents=True, exist_ok=True)
        active_plan_abs.write_text(plan_source_abs.read_text(encoding="utf-8"), encoding="utf-8")

    items = _load_plan_items(active_plan_abs)
    if not items:
        raise ValueError(f"No items found in {active_plan_abs}")

    task = (task_id or f"infra_item_direct_{infra_id}").strip() or f"infra_item_direct_{infra_id}"
    project = task
    env_overrides = None
    try:
        rel_to_root = lean_root.relative_to(ROOT)
        env_overrides = {"LEAN_PROJECT_DIR": rel_to_root.as_posix()}
    except Exception:
        env_overrides = None

    run_id = start_run(
        "infra_item_direct",
        stage=0,
        name_tag=_safe_stem(task),
        data_file=str(active_plan_rel),
        extra={
            "bench_file": str(bench_rel),
            "infra_dir": str(infra_dir_rel),
            "plan_active": str(active_plan_rel),
            "simulate_success": bool(simulate_success),
            "chunk_item_limit": int(chunk_item_limit),
            "chunk_line_limit": int(chunk_line_limit),
            "statement_max_b_retries": int(statement_max_b_retries),
            "proof_max_b_retries": int(proof_max_b_retries),
            "proof_max_c_replans": int(proof_max_c_replans),
        },
    )

    loop_state_abs = infra_dir_abs / ITEM_LOOP_STATE_FILENAME
    state = _load_loop_state(loop_state_abs, plan_rel=active_plan_rel)
    if start_index is not None:
        state["cursor_index"] = int(start_index)

    cursor_index = int(state.get("cursor_index", 1) or 1)
    min_plan_index = int(items[0].index)
    max_cursor_index = int(items[-1].index + 1)
    if cursor_index < min_plan_index:
        cursor_index = min_plan_index
    if cursor_index > max_cursor_index:
        cursor_index = max_cursor_index
    state["cursor_index"] = int(cursor_index)

    processed = 0
    blocked = False
    blocked_reason: str | None = None
    blocked_item: PlanItem | None = None

    prefix_store = InfraPrefixStore(
        lean_root=lean_root,
        infra_dir_rel=infra_dir_rel,
        config=PrefixStoreConfig(
            chunk_item_limit=int(chunk_item_limit),
            chunk_line_limit=int(chunk_line_limit),
        ),
    )
    prefix_store.ensure_layout()

    current_item_abs = infra_dir_abs / WORKING_DIRNAME / CURRENT_ITEM_FILENAME
    prefix_index_rel = infra_dir_rel / "PrefixIndex.lean"
    current_item_rel = infra_dir_rel / WORKING_DIRNAME / CURRENT_ITEM_FILENAME

    manifest = prefix_store.load_manifest()
    frozen_items = _frozen_items_from_manifest(manifest)
    mismatch = _first_prefix_mismatch(items=items, cursor_index=cursor_index, frozen_items=frozen_items)
    if mismatch is not None:
        keep_count = int(mismatch["keep_count"])
        rollback_index = int(mismatch["rollback_index"])
        print(
            "[infra-item-direct] prefix mismatch detected; "
            f"reason={mismatch['reason']} mismatch_pos={mismatch['mismatch_pos']} "
            f"keep={keep_count} rollback_cursor={rollback_index}"
        )
        log_event(
            run_id,
            "direct_prefix_plan_mismatch_detected",
            {
                "reason": str(mismatch["reason"]),
                "mismatch_pos": int(mismatch["mismatch_pos"]),
                "keep_count": int(keep_count),
                "rollback_index": int(rollback_index),
                "cursor_index_before": int(cursor_index),
                "expected": mismatch.get("expected"),
                "got": mismatch.get("got"),
            },
        )
        prefix_store.rewind_to_prefix_count(keep_count=keep_count)
        cursor_index = int(rollback_index)
        state["cursor_index"] = int(cursor_index)
        state["status"] = "running"
        state["frozen_count"] = int(keep_count)
        state["last_success_index"] = int(max((it.index for it in items if it.index < cursor_index), default=0))
        if keep_count > 0:
            manifest_after = prefix_store.load_manifest()
            frozen_after = _frozen_items_from_manifest(manifest_after)
            last_rec = frozen_after[keep_count - 1] if len(frozen_after) >= keep_count else None
            if isinstance(last_rec, dict):
                last_meta = last_rec.get("metadata") if isinstance(last_rec.get("metadata"), dict) else {}
                last_idx = _coerce_int((last_meta or {}).get("index"))
                state["last_success_item"] = {
                    "index": int(last_idx if last_idx is not None else state["last_success_index"]),
                    "item_id": str(last_rec.get("item_id", "")).strip(),
                    "label": str(last_rec.get("label", "")).strip(),
                    "chunk_file": str(last_rec.get("chunk_path", "")).strip(),
                }
        else:
            state.pop("last_success_item", None)
        state["last_error"] = None
        _save_loop_state(loop_state_abs, state, run_id=run_id)

    manifest = prefix_store.load_manifest()
    frozen_items = _frozen_items_from_manifest(manifest)
    _assert_no_duplicate_frozen_items(
        frozen_items=frozen_items,
        context="prefix_manifest.frozen_items",
    )
    expected_prefix = _expected_prefix_for_cursor(items=items, cursor_index=cursor_index)
    if len(frozen_items) != len(expected_prefix):
        raise RuntimeError(
            "prefix manifest length mismatch after alignment: "
            f"frozen={len(frozen_items)} expected={len(expected_prefix)} cursor={cursor_index}"
        )

    log_event(
        run_id,
        "direct_loop_start",
        {
            "cursor_index": cursor_index,
            "total_items": len(items),
            "bench_file": str(bench_rel),
            "infra_dir": str(infra_dir_rel),
            "plan_active": str(active_plan_rel),
        },
    )

    for item in items:
        if item.index < cursor_index:
            continue
        if max_items is not None and processed >= int(max_items):
            break

        completed_before = sum(1 for it in items if it.index < cursor_index)
        seq_no = completed_before + 1
        print(
            "[infra-item-direct] progress "
            f"{seq_no}/{len(items)} | cursor={cursor_index} | "
            f"index={item.index} item_id={item.item_id} label={item.label}"
        )
        _write_current_item_file(current_item_abs=current_item_abs, prefix_index_rel=prefix_index_rel)
        item_data_file_abs = infra_dir_abs / WORKING_DIRNAME / f"CurrentItem_{_safe_stem(item.item_id)}.json"
        _write_single_item_json(abs_path=item_data_file_abs, item=item, target_rel=current_item_rel)
        log_event(
            run_id,
            "item_start",
            {
                "index": item.index,
                "item_id": item.item_id,
                "label": item.label,
                "target_file": item.target_file,
                "cursor_index": cursor_index,
                "working_target_file": str(current_item_rel),
            },
        )

        ok, reason, decl, detail = _execute_item(
            item=item,
            lean_root=lean_root,
            simulate_success=simulate_success,
            item_data_file_abs=item_data_file_abs,
            target_rel=current_item_rel,
            project=project,
            task_prefix=task,
            statement_max_b_retries=int(statement_max_b_retries),
            proof_max_b_retries=int(proof_max_b_retries),
            proof_max_c_replans=int(proof_max_c_replans),
            env_overrides=env_overrides,
        )
        if not ok:
            salvage_detail: dict[str, Any] | None = None
            if not simulate_success:
                try:
                    salvage_detail = _salvage_proved_prefix_only_blocks(
                        lean_root=lean_root,
                        infra_dir_rel=infra_dir_rel,
                        current_item_abs=current_item_abs,
                        prefix_index_rel=prefix_index_rel,
                        prefix_store=prefix_store,
                        failed_item=item,
                    )
                except Exception as e:
                    salvage_detail = {"status": "error", "reason": f"salvage_exception: {e}"}
                if isinstance(salvage_detail, dict):
                    status = str(salvage_detail.get("status", "unknown"))
                    if status == "promoted":
                        promoted_count = int(salvage_detail.get("promoted_count", 0) or 0)
                        promoted_names = salvage_detail.get("promoted_names")
                        names_text = (
                            ", ".join(str(x) for x in promoted_names)
                            if isinstance(promoted_names, list) and promoted_names
                            else "<unknown>"
                        )
                        print(
                            "[infra-item-direct] salvaged proved declarations from failed item: "
                            f"count={promoted_count} names={names_text} "
                            f"chunk={salvage_detail.get('chunk_path', 'GeneratedPrefix/Prefix_0001.lean')}"
                        )
                    elif status not in {"no_body", "no_decl_candidates", "no_promotable"}:
                        print(
                            "[infra-item-direct] salvage attempted but not promoted: "
                            f"status={status} reason={salvage_detail.get('reason', '')}"
                        )
                    log_event(
                        run_id,
                        "blocked_item_salvage",
                        {
                            "index": item.index,
                            "item_id": item.item_id,
                            "label": item.label,
                            "status": status,
                            "detail": salvage_detail,
                        },
                    )

            detail_payload: dict[str, Any] | None
            if isinstance(detail, dict):
                detail_payload = dict(detail)
            elif detail is None:
                detail_payload = {}
            else:
                detail_payload = {"raw_detail": str(detail)}
            if isinstance(salvage_detail, dict):
                detail_payload["salvage"] = salvage_detail

            blocked = True
            blocked_reason = reason
            blocked_item = item
            report_abs = _write_failure_report(
                infra_dir_abs=infra_dir_abs,
                run_id=run_id,
                plan_rel=active_plan_rel,
                cursor_index=cursor_index,
                item=item,
                reason=reason,
                detail=detail_payload if detail_payload else None,
            )
            state["status"] = "blocked"
            state["last_error"] = {
                "index": item.index,
                "item_id": item.item_id,
                "label": item.label,
                "reason": reason,
                "failure_report": str(report_abs.relative_to(lean_root)),
            }
            _save_loop_state(loop_state_abs, state, run_id=run_id)
            log_event(
                run_id,
                "item_end",
                {
                    "index": item.index,
                    "item_id": item.item_id,
                    "label": item.label,
                    "status": "blocked",
                    "reason": reason,
                    "failure_report": str(report_abs),
                    "detail": detail_payload if detail_payload else None,
                },
            )
            print(
                "[infra-item-direct] blocked "
                f"index={item.index} item_id={item.item_id} reason={reason} "
                f"failure_report={report_abs.relative_to(lean_root)}"
            )
            if isinstance(detail_payload, dict):
                diag = detail_payload.get("blocker_diagnosis")
                if isinstance(diag, dict):
                    failure_class = str(diag.get("failure_class", "")).strip() or "unknown"
                    conf = diag.get("confidence")
                    diag_txt = str(diag.get("diagnosis", "")).strip()
                    goal_lines = (
                        diag.get("goal_lines")
                        if isinstance(diag.get("goal_lines"), list)
                        else []
                    )
                    goal_hint = ""
                    if goal_lines:
                        first = next(
                            (str(x).strip() for x in goal_lines if isinstance(x, str) and str(x).strip()),
                            "",
                        )
                        if first:
                            goal_hint = f" | goal_hint={first[:180]}"
                    conf_txt = f"{float(conf):.2f}" if isinstance(conf, (int, float)) else "?"
                    print(
                        "[infra-item-direct] diagnosis "
                        f"class={failure_class} confidence={conf_txt}"
                        + (f" | {diag_txt[:220]}" if diag_txt else "")
                        + goal_hint
                    )
            break

        if decl is None:
            raise RuntimeError("internal error: decl is None while ok=True")

        chunk_rel = prefix_store.append_verified_item(
            item_id=item.item_id,
            label=item.label,
            declaration_text=decl,
            metadata={
                "index": item.index,
                "env": item.env,
                "target_file": item.target_file,
                "dependencies": item.dependencies,
                "mode": ("scaffold_simulated" if simulate_success else "verified_real"),
            },
        )
        processed += 1
        cursor_index = item.index + 1
        state["status"] = "running"
        state["cursor_index"] = cursor_index
        state["last_success_index"] = item.index
        state["frozen_count"] = int(state.get("frozen_count", 0) or 0) + 1
        state["last_success_item"] = {
            "index": item.index,
            "item_id": item.item_id,
            "label": item.label,
            "chunk_file": str(chunk_rel),
        }
        state["last_error"] = None
        _save_loop_state(loop_state_abs, state, run_id=run_id)
        log_event(
            run_id,
            "item_end",
            {
                "index": item.index,
                "item_id": item.item_id,
                "label": item.label,
                "status": "ok",
                "reason": reason,
                "frozen_chunk": str(chunk_rel),
                "next_cursor_index": cursor_index,
            },
        )
        print(
            "[infra-item-direct] item ok "
            f"index={item.index} frozen_chunk={chunk_rel} next_cursor={cursor_index}"
        )

    if not blocked:
        has_remaining = any(it.index >= cursor_index for it in items)
        if has_remaining and max_items is not None and processed >= int(max_items):
            state["status"] = "paused"
        else:
            state["status"] = "completed" if not has_remaining else "running"
        state["cursor_index"] = cursor_index
        _save_loop_state(loop_state_abs, state, run_id=run_id)

    _write_current_item_file(current_item_abs=current_item_abs, prefix_index_rel=prefix_index_rel)
    log_event(
        run_id,
        "working_file_cleared",
        {
            "working_file": str(current_item_rel),
        },
    )

    summary = {
        "pipeline": "infra_item_direct",
        "stage": 0,
        "run_id": run_id,
        "status": "blocked" if blocked else "ok",
        "task_id": task,
        "bench_file": str(bench_rel),
        "infra_dir": str(infra_dir_rel),
        "plan_active": str(active_plan_rel),
        "processed": processed,
        "cursor_index": cursor_index,
        "blocked_reason": blocked_reason,
        "blocked_item_index": (blocked_item.index if blocked_item else None),
        "paths": {
            "state_file": str(loop_state_abs.relative_to(lean_root)),
            "working_file": str(current_item_abs.relative_to(lean_root)),
            "prefix_index": str(prefix_index_rel),
            "prefix_manifest": str(prefix_store.manifest_rel),
        },
    }
    finish_run(run_id, summary)

    if blocked:
        print(
            "[infra-item-direct] blocked at "
            f"index={blocked_item.index if blocked_item else '?'} "
            f"reason={blocked_reason or 'unknown'}"
        )
    else:
        print(
            f"[infra-item-direct] completed batch. processed={processed}, "
            f"next_cursor_index={cursor_index}"
        )
    return not blocked


def main() -> None:
    parser = argparse.ArgumentParser(
        description=(
            "Infra direct item pipeline: per-item statement -> double-check -> proof -> promote."
        )
    )
    parser.add_argument(
        "--bench-file",
        type=Path,
        required=True,
        help=(
            "Bench file path under LEAN_ROOT (e.g. Question_bench/FATEX/31.lean) "
            "or an infra dir path (e.g. Question_bench/FATEX/infra_31)."
        ),
    )
    parser.add_argument(
        "--plan-file",
        type=Path,
        default=None,
        help=(
            "Optional source plan JSON. If provided, it will be copied to "
            "<infra_dir>/plan_active.json before running."
        ),
    )
    parser.add_argument("--task-id", type=str, default=None, help="Optional run/task id for metrics.")
    parser.add_argument(
        "--start-index",
        type=int,
        default=None,
        help="Override cursor start index for this run.",
    )
    parser.add_argument("--max-items", type=int, default=None, help="Process at most this many items.")
    parser.add_argument(
        "--chunk-item-limit",
        type=int,
        default=20,
        help="Max frozen items per prefix chunk file (default: 20).",
    )
    parser.add_argument(
        "--chunk-line-limit",
        type=int,
        default=1800,
        help="Soft max lines per prefix chunk file (default: 1800).",
    )
    parser.add_argument(
        "--simulate-success",
        action="store_true",
        help=(
            "Scaffold-only mode: mark each item as success and append placeholder "
            "declarations to GeneratedPrefix."
        ),
    )
    parser.add_argument(
        "--statement-max-b-retries",
        type=int,
        default=3,
        help="Max Agent B retries for statement stage in real executor mode (default: 3).",
    )
    parser.add_argument(
        "--proof-max-b-retries",
        type=int,
        default=3,
        help="Max Agent B retries for proof stage in real executor mode (default: 3).",
    )
    parser.add_argument(
        "--proof-max-c-replans",
        type=int,
        default=1,
        help="Max Agent C replans for proof stage in real executor mode (default: 1).",
    )
    args = parser.parse_args()

    ok = run_direct_item_pipeline(
        bench_file=args.bench_file,
        plan_file=args.plan_file,
        task_id=args.task_id,
        start_index=args.start_index,
        max_items=args.max_items,
        chunk_item_limit=int(args.chunk_item_limit),
        chunk_line_limit=int(args.chunk_line_limit),
        simulate_success=bool(args.simulate_success),
        statement_max_b_retries=int(args.statement_max_b_retries),
        proof_max_b_retries=int(args.proof_max_b_retries),
        proof_max_c_replans=int(args.proof_max_c_replans),
    )
    raise SystemExit(0 if ok else 1)


if __name__ == "__main__":
    main()
