#!/usr/bin/env python3
"""
Sanitize log files for release.

We deliberately remove lines that reveal:
  - per-epoch timing ("time cost:")
  - voltage range prints ("v max:", "v min:", "vmin=", "vmax=")

We keep the rest of the log intact (including timestamp prefixes).

Usage examples:
  python tools/sanitize_logs.py --in-place records/best_0p029828/log_*.txt
  python tools/sanitize_logs.py --in-place --backup-ext .bak runs/**/*.txt
"""

from __future__ import annotations

import argparse
import glob
import os
from pathlib import Path


DROP_SUBSTRINGS = (
    "time cost:",
    "v max:",
    "v min:",
    "vmin=",
    "vmax=",
)


def sanitize_text(text: str) -> str:
    out_lines: list[str] = []
    for line in text.splitlines(keepends=True):
        if any(s in line for s in DROP_SUBSTRINGS):
            continue
        out_lines.append(line)
    return "".join(out_lines)


def expand_inputs(inputs: list[str]) -> list[Path]:
    paths: list[Path] = []
    for pattern in inputs:
        matches = glob.glob(pattern, recursive=True)
        if matches:
            paths.extend(Path(m) for m in matches)
        else:
            paths.append(Path(pattern))
    # Dedup while preserving order
    seen: set[Path] = set()
    out: list[Path] = []
    for p in paths:
        rp = p
        if rp in seen:
            continue
        seen.add(rp)
        out.append(rp)
    return out


def main() -> int:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--in-place",
        action="store_true",
        help="Rewrite files in place (required unless --stdout).",
    )
    parser.add_argument(
        "--stdout",
        action="store_true",
        help="Print sanitized content to stdout (only valid for a single input).",
    )
    parser.add_argument(
        "--backup-ext",
        default="",
        help="If set, write a backup next to the file (e.g. .bak) before rewriting.",
    )
    parser.add_argument("inputs", nargs="+", help="Files or glob patterns.")
    args = parser.parse_args()

    paths = expand_inputs(args.inputs)
    if args.stdout and len(paths) != 1:
        raise SystemExit("--stdout requires exactly one input")
    if not args.stdout and not args.in_place:
        raise SystemExit("Specify --in-place or --stdout")

    for path in paths:
        if not path.exists():
            raise SystemExit(f"Missing file: {path}")
        if path.is_dir():
            continue

        text = path.read_text(encoding="utf-8", errors="replace")
        sanitized = sanitize_text(text)

        if args.stdout:
            print(sanitized, end="")
            continue

        if sanitized == text:
            continue

        if args.backup_ext:
            backup_path = Path(str(path) + args.backup_ext)
            if not backup_path.exists():
                backup_path.write_text(text, encoding="utf-8")

        path.write_text(sanitized, encoding="utf-8")

    return 0


if __name__ == "__main__":
    raise SystemExit(main())

