from __future__ import annotations

import os
import re
import uuid
from datetime import datetime, timezone
from pathlib import Path


_NON_SLUG_CHARS = re.compile(r"[^A-Za-z0-9._-]+")
# codex-cli output formats observed:
# - "tokens used 12345"
# - "tokens used\n12,345"
_TOKENS_USED_RE = re.compile(
    r"tokens used(?:\s*[:=]?\s*|\s*\n\s*)([0-9][0-9,]*)",
    re.IGNORECASE,
)


def utc_compact_timestamp() -> str:
    return datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")


def make_log_id() -> str:
    return f"{utc_compact_timestamp()}_{os.getpid()}_{uuid.uuid4().hex[:8]}"


def slugify(text: str | None, *, max_len: int = 60) -> str:
    if not text:
        return "na"
    normalized = " ".join(text.strip().split())
    normalized = normalized.replace(os.sep, "_")
    normalized = _NON_SLUG_CHARS.sub("_", normalized)
    normalized = re.sub(r"_+", "_", normalized).strip("_")
    if not normalized:
        return "na"
    return normalized[:max_len]


def build_log_filename(*parts: str | None, ext: str = ".log") -> str:
    safe_parts = [slugify(part, max_len=80) for part in parts if part]
    base = "_".join(safe_parts) if safe_parts else "codex"
    return f"{base}_{make_log_id()}{ext}"


def parse_tokens_used(text: str) -> int | None:
    matches = list(_TOKENS_USED_RE.finditer(text or ""))
    if not matches:
        return None
    raw = matches[-1].group(1)
    return int(raw.replace(",", ""))


def extract_tokens_used_from_file(path: Path, *, tail_bytes: int = 256_000) -> int | None:
    try:
        with path.open("rb") as f:
            f.seek(0, os.SEEK_END)
            size = f.tell()
            f.seek(max(0, size - tail_bytes))
            tail = f.read().decode("utf-8", errors="replace")
    except FileNotFoundError:
        return None
    return parse_tokens_used(tail)


def file_snapshot(path: Path) -> dict[str, int | bool]:
    if not path.exists():
        return {"exists": False}
    try:
        st = path.stat()
    except FileNotFoundError:
        return {"exists": False}

    line_count = 0
    try:
        with path.open("rb") as f:
            for chunk in iter(lambda: f.read(1024 * 1024), b""):
                line_count += chunk.count(b"\n")
    except OSError:
        line_count = 0

    return {
        "exists": True,
        "size_bytes": int(getattr(st, "st_size", 0)),
        "mtime_ns": int(getattr(st, "st_mtime_ns", 0)),
        "line_count": int(line_count),
    }


def snapshot_delta(before: dict[str, int | bool], after: dict[str, int | bool]) -> dict[str, int | bool]:
    if not before.get("exists") or not after.get("exists"):
        return {"exists": bool(after.get("exists", False))}
    return {
        "exists": True,
        "size_bytes_delta": int(after.get("size_bytes", 0)) - int(before.get("size_bytes", 0)),
        "line_count_delta": int(after.get("line_count", 0)) - int(before.get("line_count", 0)),
        "mtime_ns_delta": int(after.get("mtime_ns", 0)) - int(before.get("mtime_ns", 0)),
    }
