#!/usr/bin/env python3
from __future__ import annotations

import argparse
import fnmatch
import json
import shutil
import subprocess
import sys
from pathlib import Path
from typing import List, Iterable

# Try to use jupytext if available (nicer formatting); otherwise fall back.
try:
    import jupytext  # type: ignore
except Exception:  # noqa: S110
    jupytext = None  # type: ignore

INCLUDE_PATTERNS = ("*.py", "*.ipynb")  # default patterns
EXCLUDE_PATTERNS = ("llm.py", "dump", "scripts")
SEPARATOR = (
    "\n# -----------------------------------------\n"
    "# {path}\n"
    "# -----------------------------------------\n"
)
TREE_HEADER = "\n\n# ==== Git-visible directory tree ====\n"

# ────────────────────── CLI ──────────────────────


def parse_args() -> argparse.Namespace:
    p = argparse.ArgumentParser(
        description="Bundle git-visible files; render .ipynb as readable Python."
    )
    p.add_argument(
        "--only",
        nargs="+",
        metavar="PATTERN",
        help='Override include patterns (space-separated). Example: --only "*.py" "lmkit/*" "*.ipynb"',
    )
    return p.parse_args()


# ────────────────────── Git helpers ──────────────────────


def git_visible_file_paths() -> List[Path]:
    """
    Return *all* paths Git sees (tracked or untracked, NOT ignored),
    in repository-relative form.
    """
    try:
        out = subprocess.check_output(
            ["git", "ls-files", "--cached", "--others", "--exclude-standard"],
            text=True,
            stderr=subprocess.DEVNULL,
        )
    except (subprocess.CalledProcessError, FileNotFoundError):
        sys.exit("Error: not a Git repo or `git` missing.")
    return sorted((Path(p) for p in out.splitlines()), key=str)


def git_visible_files_by_patterns(include_patterns: tuple[str, ...]) -> List[Path]:
    """Subset of *git_visible_file_paths()* that matches include_patterns and not EXCLUDE_PATTERNS."""

    def is_excluded(path: Path) -> bool:
        # filename exclusion (e.g., "llm.py")
        if any(fnmatch.fnmatch(path.name, pat) for pat in EXCLUDE_PATTERNS):
            return True
        # directory name exclusion (e.g., "scripts", "dump")
        if any(part in EXCLUDE_PATTERNS for part in path.parts):
            return True
        return False

    matched: List[Path] = []
    for p in git_visible_file_paths():
        rel = p.as_posix()  # repo-relative path with forward slashes
        if any(
            fnmatch.fnmatch(rel, pat) or fnmatch.fnmatch(p.name, pat)
            for pat in include_patterns
        ):
            if not is_excluded(p):
                matched.append(p)
    return matched


# ────────────────────── .ipynb → .py helpers ──────────────────────


def _join_source(src: Iterable[str] | str) -> str:
    if isinstance(src, str):
        return src
    return "".join(src)


def ipynb_to_python_text(path: Path) -> str:
    """
    Convert a Jupyter notebook to a readable Python script, keeping:
      - code cells as code
      - markdown/raw cells as commented blocks
    Outputs in a 'py:percent' / '# %%' style that's easy on LLMs/humans.
    Prefers jupytext if installed; falls back to a tiny JSON reader.
    """
    if jupytext is not None:
        try:
            nb = jupytext.read(path)
            return jupytext.writes(nb, fmt="py:percent")
        except Exception as e:  # noqa: S110
            header = f"# NOTE: jupytext failed on {path}: {e}\n"
            return header + _ipynb_to_python_fallback(path)
    return _ipynb_to_python_fallback(path)


def _ipynb_to_python_fallback(path: Path) -> str:
    try:
        with path.open("r", encoding="utf-8") as f:
            nb = json.load(f)
    except Exception as e:  # noqa: S110
        return f"# Failed to parse notebook {path}: {e}\n"

    out_lines: list[str] = [f"# Converted from notebook: {path}", ""]
    cells = nb.get("cells", [])
    for i, cell in enumerate(cells, start=1):
        ctype = cell.get("cell_type", "")
        src = _join_source(cell.get("source", ""))

        if ctype == "markdown":
            out_lines.append(f"# %% [markdown] cell {i}")
            for line in src.splitlines():
                out_lines.append("# " + line)
            out_lines.append("")
        elif ctype == "code":
            out_lines.append(f"# %% cell {i}")
            out_lines.append(src.rstrip("\n"))
            out_lines.append("")
        else:  # 'raw' or anything unknown -> comment it
            out_lines.append(f"# %% [raw] cell {i}")
            for line in src.splitlines():
                out_lines.append("# " + line)
            out_lines.append("")

    return "\n".join(out_lines).rstrip() + "\n"


def read_file_as_text(path: Path) -> str:
    if path.suffix == ".ipynb":
        return ipynb_to_python_text(path)
    return path.read_text(encoding="utf-8", errors="ignore")


# ────────────────────── builders ──────────────────────


def bundle(files: List[Path]) -> str:
    parts: list[str] = []
    for path in files:
        try:
            content = read_file_as_text(path)
            parts.append(SEPARATOR.format(path=path))
            parts.append(content)
            if not content.endswith("\n"):
                parts.append("\n")
        except Exception:
            # swallow and keep going
            pass
    return "".join(parts)


def directory_tree(paths: List[Path]) -> str:
    """
    Build an indented tree of the repository structure represented by *paths*.
    Directories end with '/'.
    """
    tree_lines: list[str] = []
    seen_parts: set[tuple[str, ...]] = set()
    for p in sorted(paths, key=lambda x: x.parts):
        for i in range(len(p.parts)):
            current_parts = p.parts[: i + 1]
            if current_parts in seen_parts:
                continue
            seen_parts.add(current_parts)
            indent = "    " * i
            is_dir = i < len(p.parts) - 1
            name = current_parts[-1]
            tree_lines.append(f"{indent}{name}{'/' if is_dir else ''}")
    return "\n".join(tree_lines)


# ────────────────────── clipboard helper ──────────────────────


def copy(text: str) -> None:
    """Copy *text* to clipboard via pyperclip or OS tools; else print to stdout."""
    try:
        import pyperclip  # type: ignore

        pyperclip.copy(text)
        print(f"Copied {len(text)} characters to clipboard with pyperclip.")
        return
    except ImportError:
        pass

    if sys.platform == "darwin" and shutil.which("pbcopy"):
        subprocess.run("pbcopy", input=text, text=True, check=True)
    elif sys.platform.startswith("win") and shutil.which("clip"):
        subprocess.run("clip", input=text, text=True, check=True)
    elif shutil.which("xclip"):
        subprocess.run(
            ["xclip", "-selection", "clipboard"], input=text, text=True, check=True
        )
    else:
        print(text)


# ────────────────────── main ──────────────────────


def main() -> None:
    args = parse_args()
    include_patterns = tuple(args.only) if args.only else INCLUDE_PATTERNS

    visible_files = git_visible_files_by_patterns(include_patterns)
    if not visible_files:
        sys.exit("No files matching include patterns found that Git doesn’t ignore.")

    bundle_text = bundle(visible_files)
    tree_text = directory_tree(visible_files)
    payload = f"{bundle_text}{TREE_HEADER}{tree_text}\n"
    copy(payload)


if __name__ == "__main__":
    main()
