import os
import re
import shlex
from pathlib import Path

# Project roots
ROOT = Path(__file__).resolve().parents[1]

def _as_nonempty_env(name: str) -> str | None:
    value = os.getenv(name, "")
    value = value.strip() if isinstance(value, str) else ""
    return value or None


# Optional per-formalization project scope. When set, all chapter paths and logs are rooted under:
# - M2F/<FORMAL_PROJECT>/
# - data/<FORMAL_PROJECT>/
# - log/<FORMAL_PROJECT>/
FORMAL_PROJECT = _as_nonempty_env("FORMAL_PROJECT")
LOG_PROJECT = _as_nonempty_env("LOG_PROJECT")


def _project_dirname(project: str | None) -> str | None:
    if not project:
        return None
    # keep it path-safe and stable
    cleaned = re.sub(r"[^A-Za-z0-9._-]+", "_", project).strip("_")
    return cleaned or None


FORMAL_PROJECT_DIRNAME = _project_dirname(FORMAL_PROJECT)


def _default_data_path() -> Path:
    if FORMAL_PROJECT_DIRNAME:
        base = ROOT / "data" / FORMAL_PROJECT_DIRNAME
        p = base / "data.json"
        if p.exists():
            return p
        # convenience fallback for legacy naming
        p2 = base / "section01.json"
        if p2.exists():
            return p2
        # if the project dir contains exactly one JSON file, prefer it
        if base.exists():
            candidates = sorted([q for q in base.glob("*.json") if q.is_file()])
            if len(candidates) == 1:
                return candidates[0]
    p = ROOT / "data" / "data.json"
    if p.exists():
        return p
    return ROOT / "data" / "section01.json"


def _default_proof_data_path() -> Path:
    if FORMAL_PROJECT_DIRNAME:
        return ROOT / "data" / FORMAL_PROJECT_DIRNAME / "proof" / "ch-real-nums.json"
    return ROOT / "data" / "proof" / "ch-real-nums.json"


DATA_PATH = _default_data_path()
PROOF_DATA_PATH = _default_proof_data_path()
LEAN_PROJECT_DIR = _as_nonempty_env("LEAN_PROJECT_DIR") or "M2F"
LEAN_ROOT = ROOT / LEAN_PROJECT_DIR

def _default_lean_src_chapters() -> Path:
    """
    Resolve the chapters root directory.

    Priority:
    - If FORMAL_PROJECT is set: <LeanRoot>/<project>/Chapters
    - Otherwise: <LeanRoot>/Chapters (stable default; may not exist yet).
    """
    if FORMAL_PROJECT_DIRNAME:
        return LEAN_ROOT / FORMAL_PROJECT_DIRNAME / "Chapters"

    return LEAN_ROOT / "Chapters"


LEAN_SRC_CHAPTERS = _default_lean_src_chapters()
# Bench directory is global within the Lean package (shared across projects).
# Put multiple banks under this root, e.g. `Question_bench/FateH/`, `Question_bench/OtherBank/`, ...
LEAN_BENCH_ROOT = LEAN_ROOT / "Question_bench"

# Codex working directory.
#
# We run Codex from the Lean project root (`M2F/`) so Codex CLI's default sandbox
# (`workspace-write`) can write to the actual target Lean files.
#
# Stage-specific rules are enforced by swapping `M2F/AGENTS.md` to the desired
# stage's `M2F/.codex_<stage>/AGENTS.md` before each Codex call (see `orchestrator/stage_agents.py`).
CODEX_STATEMENT_CWD = LEAN_ROOT
CODEX_PROOF_CWD = LEAN_ROOT
CODEX_FINAL_CWD = LEAN_ROOT
PROMPTS_DIR = ROOT / "prompts"
STATEMENT_PROMPTS_DIR = PROMPTS_DIR / "statement"
PROOF_PROMPTS_DIR = PROMPTS_DIR / "proof"
FINAL_PROMPTS_DIR = PROMPTS_DIR / "final"
INFRA_PROMPTS_DIR = PROMPTS_DIR / "infra"

# Log artifacts live under repo `log/` (see `log/README.md`).
# Logs are always written under a project subdirectory to keep `log/` clean:
# - If FORMAL_PROJECT is set: `log/<FORMAL_PROJECT>/...`
# - Otherwise: `log/default/...`
#
# LOG_PROJECT overrides the log subdirectory without affecting the Lean project layout.
# This is useful for prover/bench runs (e.g. `--job-name FATEH_93_new`) where there is no
# `FORMAL_PROJECT` but we still want isolated log folders.
LOG_PROJECT_DIRNAME = _project_dirname(LOG_PROJECT) or FORMAL_PROJECT_DIRNAME or "default"
LOG_DIR = ROOT / "log" / LOG_PROJECT_DIRNAME
STATEMENT_LOGS_DIR = LOG_DIR / "statement_logs"
PROOF_LOGS_DIR = LOG_DIR / "proof_logs"
FINAL_LOGS_DIR = LOG_DIR / "final_logs"
INFRA_LOGS_DIR = LOG_DIR / "infra_logs"
PROOF_FAILURE_LOG = PROOF_LOGS_DIR / "proof_failures.log"
FINAL_FAILURE_LOG = FINAL_LOGS_DIR / "final_failures.log"
METRICS_DIR = LOG_DIR / "metrics"


def chapter_dir(chapter: int) -> Path:
    """Return the chapter directory (e.g., 1 -> Chap01)."""
    return LEAN_SRC_CHAPTERS / f"Chap{chapter:02d}"


def section_file(chapter: int, section: int) -> Path:
    """Return the section Lean file path (e.g., (1,1) -> .../section01.lean)."""
    return chapter_dir(chapter) / f"section{section:02d}.lean"


def section_part_files(chapter: int, section: int) -> list[Path]:
    """Return any part files for a section, sorted by filename."""
    prefix = f"section{section:02d}_part"
    return sorted(p for p in chapter_dir(chapter).glob(f"{prefix}*.lean"))


def resolve_explicit_target_file(target_file: str | Path | None) -> Path | None:
    """
    Normalize an explicit target-file override.

    Relative paths are interpreted relative to `LEAN_ROOT`; absolute paths are passed through.
    """
    if target_file is None:
        return None
    raw = str(target_file).strip()
    if not raw:
        return None
    path = Path(raw)
    if path.is_absolute():
        return path
    return LEAN_ROOT / path


def resolve_item_target_file(
    chapter: int,
    section: int,
    *,
    label: str | None = None,
    target_file: str | Path | None = None,
) -> Path:
    """
    Resolve the Lean file for an item, honoring an explicit target-file override when present.
    """
    explicit = resolve_explicit_target_file(target_file)
    if explicit is not None:
        return explicit
    return resolve_section_target_file(chapter, section, label=label)


_DECL_KEYWORDS = (
    "def",
    "abbrev",
    "lemma",
    "theorem",
    "instance",
    "structure",
    "class",
    "inductive",
    "opaque",
)


def _is_decl_line(line: str) -> bool:
    s = line.lstrip()
    if not s:
        return False
    if s.startswith(("section ", "end ", "namespace ", "open ", "variable ", "universe ")):
        return False
    for mod in ("private ", "protected ", "noncomputable "):
        if s.startswith(mod):
            s = s[len(mod) :].lstrip()
    return any(s.startswith(k + " ") for k in _DECL_KEYWORDS)


def _docstring_starts_with_any_label(doc_text: str, labels: list[str]) -> bool:
    if not doc_text or not labels:
        return False
    lines = doc_text.splitlines()
    if not lines:
        return False

    candidate = lines[0].lstrip()
    if candidate.startswith("/--"):
        candidate = candidate[len("/--") :].lstrip()
        if not candidate:
            for ln in lines[1:]:
                s = ln.strip()
                if not s or s == "-/":
                    continue
                candidate = s
                break
    if not candidate:
        return False

    for label in labels:
        if not candidate.startswith(label):
            continue
        if len(candidate) == len(label):
            return True
        nxt = candidate[len(label)]
        if not nxt.isalnum():
            return True
    return False


def _part_has_main_docstring_label(*, text: str, labels: list[str]) -> bool:
    """
    Return True iff this part contains a declaration whose associated docstring
    starts with one of `labels` (main-declaration style match).
    """
    lines = text.splitlines()
    i = 0
    while i < len(lines):
        if not lines[i].lstrip().startswith("/--"):
            i += 1
            continue

        j = i
        while j < len(lines):
            if "-/" in lines[j]:
                break
            j += 1
        if j >= len(lines):
            break

        doc_text = "\n".join(lines[i : j + 1])
        k = j + 1
        while k < len(lines):
            s = lines[k].strip()
            if not s:
                k += 1
                continue
            if s.startswith("@[") or s.startswith("attribute "):
                k += 1
                continue
            break

        if k < len(lines) and _is_decl_line(lines[k]):
            if _docstring_starts_with_any_label(doc_text, labels):
                return True
        i = j + 1
    return False


def resolve_section_target_file(
    chapter: int,
    section: int,
    *,
    label: str | None = None,
) -> Path:
    """
    Resolve the Lean file for a specific item. If section part files exist,
    prefer the one containing `label`; otherwise fall back to the last part file.
    """
    parts = section_part_files(chapter, section)
    if label:
        raw_label = label.strip()

        def _add_candidate(candidates: list[str], value: str) -> None:
            normalized = " ".join(value.strip().split())
            if normalized and normalized not in candidates:
                candidates.append(normalized)

        candidates: list[str] = []
        _add_candidate(candidates, raw_label)
        if raw_label.endswith(":"):
            _add_candidate(candidates, raw_label.rstrip(":"))
        if "(" in raw_label:
            _add_candidate(candidates, raw_label.split("(", 1)[0])
        match = re.match(r"^([A-Za-z]+\\s*\\d+(?:\\.\\d+)*)", raw_label)
        if match:
            _add_candidate(candidates, match.group(1))

        # Prefer the part whose *main declaration* docstring starts with the label.
        # This avoids false hits from helper docstrings that merely mention the label.
        for part in parts:
            try:
                text = part.read_text(encoding="utf-8")
            except FileNotFoundError:
                continue
            if _part_has_main_docstring_label(text=text, labels=candidates):
                return part

        # Fallback: legacy broad text match.
        for part in parts:
            try:
                text = part.read_text(encoding="utf-8")
            except FileNotFoundError:
                continue

            if any(candidate in text for candidate in candidates):
                return part

            normalized_text = " ".join(text.split())
            if any(candidate in normalized_text for candidate in candidates):
                return part

    if parts:
        return parts[-1]

    return section_file(chapter, section)


def _default_codex_bin() -> str:
    """
    Prefer the repo-local codex wrapper (isolated CODEX_HOME) if present, otherwise fall back to `codex` on PATH.
    Override with env CODEX_BIN=/path/to/codex.
    """
    wrapper = ROOT / "bin" / "codex"
    if wrapper.exists():
        return str(wrapper)
    return "codex"

CODEX_BIN = os.getenv("CODEX_BIN", _default_codex_bin())
CODEX_EXTRA_ARGS = shlex.split(os.getenv("CODEX_EXTRA_ARGS", ""))
CODEX_CMD = [CODEX_BIN, "exec", "--full-auto"] + CODEX_EXTRA_ARGS


def lake_env_lean_cmd(lean_file: str) -> list[str]:
    """Return the command to run `lake env lean <lean_file>`."""
    return ["lake", "env", "lean", lean_file]


def get_aihubmix_api_key() -> str | None:
    return os.getenv("AIHUBMIX_API_KEY")


def get_aihubmix_api_url() -> str:
    return os.getenv("AIHUBMIX_API_URL", "https://aihubmix.com/v1/chat/completions")


def get_aihubmix_model() -> str:
    return os.getenv("AIHUBMIX_MODEL", "gpt-4o-mini")


def get_nl_hint_timeout() -> int:
    return int(os.getenv("NL_HINT_TIMEOUT", "30"))


def get_enable_nl_hints_default() -> bool:
    return os.getenv("ENABLE_NL_HINTS", "").lower() in {"1", "true", "yes", "on"}


def _as_nonempty_env_default(name: str, default: str) -> str | None:
    value = os.getenv(name, default)
    value = value.strip() if isinstance(value, str) else ""
    return value or None


def get_final_agent_a_model() -> str | None:
    return _as_nonempty_env_default("FINAL_AGENT_A_MODEL", "gpt-5.3-codex")


def get_final_agent_a_reasoning_effort() -> str | None:
    return _as_nonempty_env_default("FINAL_AGENT_A_REASONING_EFFORT", "medium")


def get_final_agent_b_model() -> str | None:
    return _as_nonempty_env_default("FINAL_AGENT_B_MODEL", "gpt-5.3-codex")


def get_final_agent_b_reasoning_effort() -> str | None:
    return _as_nonempty_env_default("FINAL_AGENT_B_REASONING_EFFORT", "low")


def get_final_agent_c_model() -> str | None:
    return _as_nonempty_env_default("FINAL_AGENT_C_MODEL", "gpt-5.3-codex")


def get_final_agent_c_reasoning_effort() -> str | None:
    return _as_nonempty_env_default("FINAL_AGENT_C_REASONING_EFFORT", "high")


# Ensure logs directory exists
STATEMENT_LOGS_DIR.mkdir(parents=True, exist_ok=True)
PROOF_LOGS_DIR.mkdir(parents=True, exist_ok=True)
FINAL_LOGS_DIR.mkdir(parents=True, exist_ok=True)
METRICS_DIR.mkdir(parents=True, exist_ok=True)
CODEX_STATEMENT_CWD.mkdir(parents=True, exist_ok=True)
CODEX_PROOF_CWD.mkdir(parents=True, exist_ok=True)
CODEX_FINAL_CWD.mkdir(parents=True, exist_ok=True)
