from __future__ import annotations

import argparse
import json
import sys
from pathlib import Path
from typing import Any


def _is_ident_char(ch: str) -> bool:
    return ch.isalnum() or ch in {"_", "'"}


def find_sorry_locations(lean_file: Path) -> list[dict[str, Any]]:
    """
    Scan the Lean file and return locations of `sorry` tokens (line/col),
    ignoring comments and string literals.
    """
    text = lean_file.read_text(encoding="utf-8")
    locs: list[dict[str, Any]] = []
    i = 0
    line = 1
    col = 1
    in_line_comment = False
    block_comment_depth = 0
    in_string = False

    def advance(n: int = 1) -> None:
        nonlocal i, line, col
        for _ in range(n):
            if i >= len(text):
                return
            if text[i] == "\n":
                line += 1
                col = 1
            else:
                col += 1
            i += 1

    while i < len(text):
        ch = text[i]
        nxt = text[i + 1] if i + 1 < len(text) else ""

        if in_line_comment:
            if ch == "\n":
                in_line_comment = False
            advance(1)
            continue

        if block_comment_depth > 0:
            if ch == "/" and nxt == "-":
                block_comment_depth += 1
                advance(2)
                continue
            if ch == "-" and nxt == "/":
                block_comment_depth -= 1
                advance(2)
                continue
            advance(1)
            continue

        if in_string:
            if ch == "\\" and nxt:
                advance(2)
                continue
            if ch == "\"":
                in_string = False
                advance(1)
                continue
            advance(1)
            continue

        if ch == "-" and nxt == "-":
            in_line_comment = True
            advance(2)
            continue
        if ch == "/" and nxt == "-":
            block_comment_depth += 1
            advance(2)
            continue
        if ch == "\"":
            in_string = True
            advance(1)
            continue

        if ch == "s" and text.startswith("sorry", i):
            prev = text[i - 1] if i > 0 else ""
            nextc = text[i + 5] if i + 5 < len(text) else ""
            if (not prev or not _is_ident_char(prev)) and (not nextc or not _is_ident_char(nextc)):
                locs.append({"line": line, "col": col, "text": "sorry"})
                advance(5)
                continue

        advance(1)

    return locs


def _main() -> int:
    parser = argparse.ArgumentParser(description="Locate `sorry` tokens in a Lean file.")
    parser.add_argument(
        "--file",
        type=Path,
        required=True,
        help="Path to the Lean file to scan.",
    )
    parser.add_argument(
        "--pretty",
        action="store_true",
        help="Pretty-print JSON output.",
    )
    args = parser.parse_args()

    if not args.file.exists():
        print(f"File not found: {args.file}", file=sys.stderr)
        return 2

    locs = find_sorry_locations(args.file)
    if args.pretty:
        print(json.dumps(locs, ensure_ascii=False, indent=2))
    else:
        print(json.dumps(locs, ensure_ascii=False))
    return 0


if __name__ == "__main__":
    raise SystemExit(_main())
