#!/usr/bin/env python3
from __future__ import annotations

import argparse
import json
import os
import sys
from collections import Counter
from datetime import datetime
from pathlib import Path
from typing import Any


ROOT = Path(__file__).resolve().parents[1]


def _sanitize_project(name: str) -> str:
    name = (name or "").strip()
    name = name.replace(os.sep, "_")
    name = "".join(c if (c.isalnum() or c in "._-") else "_" for c in name)
    name = "_".join([p for p in name.split("_") if p])
    return name or "project"


def _resolve_log_dir(*, project: str | None, log_dir: str | None) -> Path:
    if log_dir:
        return Path(log_dir).expanduser().resolve()
    base = ROOT / "log"
    if project and project.strip():
        return base / _sanitize_project(project)
    return base / "default"


def _read_json(path: Path) -> dict[str, Any]:
    return json.loads(path.read_text(encoding="utf-8"))


def _iter_jsonl(path: Path) -> list[dict[str, Any]]:
    out: list[dict[str, Any]] = []
    for raw in path.read_text(encoding="utf-8").splitlines():
        raw = raw.strip()
        if not raw:
            continue
        try:
            obj = json.loads(raw)
        except json.JSONDecodeError:
            continue
        if isinstance(obj, dict):
            out.append(obj)
    return out


def _guess_run_id_from_summary_path(path: Path) -> str | None:
    name = path.name
    if not name.startswith("run_") or not name.endswith("_summary.json"):
        return None
    return name[len("run_") : -len("_summary.json")]


def _fmt_seconds(seconds: Any) -> str:
    try:
        s = float(seconds)
    except (TypeError, ValueError):
        return "na"
    if s < 60:
        return f"{s:.1f}s"
    if s < 3600:
        return f"{s/60:.1f}m"
    return f"{s/3600:.2f}h"


def _fmt_int(value: Any) -> str:
    try:
        return f"{int(value)}"
    except (TypeError, ValueError):
        return "na"


def cmd_list(args: argparse.Namespace) -> int:
    metrics_dir: Path = args.metrics_dir
    if not metrics_dir.exists():
        print(f"metrics dir not found: {metrics_dir}")
        return 1

    summaries = sorted(
        metrics_dir.glob("run_*_summary.json"),
        key=lambda p: p.stat().st_mtime_ns,
        reverse=True,
    )

    rows: list[dict[str, Any]] = []
    for path in summaries:
        run_id = _guess_run_id_from_summary_path(path)
        if not run_id:
            continue
        try:
            data = _read_json(path)
        except Exception:
            continue
        pipeline = data.get("pipeline") or "na"
        if args.pipeline and pipeline != args.pipeline:
            continue
        rows.append(
            {
                "run_id": run_id,
                "pipeline": pipeline,
                "stage": data.get("stage"),
                "tokens_used_total": data.get("tokens_used_total"),
                "seconds_total": data.get("seconds_total"),
                "items_failed": data.get("items_failed", data.get("files_failed")),
                "processed": data.get("processed", data.get("processed_files")),
                "summary_path": str(path),
            }
        )
        if len(rows) >= args.limit:
            break

    if args.format == "json":
        print(json.dumps(rows, ensure_ascii=False, indent=2))
        return 0

    if not rows:
        print("no runs found")
        return 0

    print("pipeline stage processed failed tokens seconds run_id")
    for r in rows:
        print(
            f"{r['pipeline']:8} {str(r.get('stage') or 'na'):5} "
            f"{_fmt_int(r.get('processed')):9} {_fmt_int(r.get('items_failed')):6} "
            f"{_fmt_int(r.get('tokens_used_total')):8} {_fmt_seconds(r.get('seconds_total')):7} "
            f"{r['run_id']}"
        )
    return 0


def cmd_show(args: argparse.Namespace) -> int:
    metrics_dir: Path = args.metrics_dir
    summary_path = metrics_dir / f"run_{args.run_id}_summary.json"
    if not summary_path.exists():
        print(f"summary not found: {summary_path}")
        return 1

    summary = _read_json(summary_path)
    events_path = metrics_dir / f"run_{args.run_id}.jsonl"

    print(f"run_id: {args.run_id}")
    print(f"summary: {summary_path}")
    if events_path.exists():
        print(f"events : {events_path}")
    else:
        print(f"events : (missing) {events_path}")

    if args.pretty:
        print(json.dumps(summary, ensure_ascii=False, indent=2))
        return 0

    pipeline = summary.get("pipeline") or "na"
    stage = summary.get("stage")
    processed = summary.get("processed", summary.get("processed_files"))
    failed = summary.get("items_failed", summary.get("files_failed"))
    tokens = summary.get("tokens_used_total")
    seconds = summary.get("seconds_total")
    print(
        f"pipeline={pipeline} stage={stage} processed={processed} failed={failed} "
        f"tokens={tokens} seconds={seconds}"
    )

    if not events_path.exists():
        return 0

    events = _iter_jsonl(events_path)
    event_counts = Counter(e.get("event") for e in events if isinstance(e.get("event"), str))
    if event_counts:
        top = ", ".join(f"{k}={v}" for k, v in sorted(event_counts.items()))
        print(f"event_counts: {top}")

    # Try to print run_start metadata if present.
    for e in events:
        if e.get("event") == "run_start" and isinstance(e.get("data"), dict):
            data = e["data"]
            ts = e.get("ts")
            print(f"run_start_ts: {ts}")
            if args.show_run_start:
                print(json.dumps(data, ensure_ascii=False, indent=2))
            break

    return 0


def cmd_history(args: argparse.Namespace) -> int:
    log_dir: Path = args.log_dir
    history_path = Path(args.history_file) if args.history_file else (log_dir / "final_logs" / "final_history.jsonl")
    if not history_path.exists():
        print(f"history not found: {history_path}")
        return 1

    kinds: set[str] | None = None
    if args.kinds:
        kinds = {k.strip() for k in args.kinds.split(",") if k.strip()}

    records: list[dict[str, Any]] = []
    for rec in _iter_jsonl(history_path):
        if rec.get("lean_file") != args.lean_file:
            continue
        if kinds is not None and rec.get("kind") not in kinds:
            continue
        records.append(rec)

    records = records[-args.limit :]
    if args.format == "json":
        print(json.dumps(records, ensure_ascii=False, indent=2))
        return 0

    for rec in records:
        ts = rec.get("ts") or "na"
        kind = rec.get("kind") or "na"
        task_id = rec.get("task_id") or "na"
        summary = rec.get("summary") or ""
        run_id = rec.get("run_id") or "na"
        print(f"{ts} kind={kind} task_id={task_id} run_id={run_id}")
        if summary:
            print(f"  summary: {summary}")
    return 0


def cmd_failures(args: argparse.Namespace) -> int:
    log_dir: Path = args.log_dir
    if args.pipeline == "final":
        path = log_dir / "final_logs" / "final_failures.log"
    elif args.pipeline == "proof":
        path = log_dir / "proof_logs" / "proof_failures.log"
    else:
        print("unsupported pipeline for failures")
        return 2

    if not path.exists():
        print(f"failure log not found: {path}")
        return 1

    text = path.read_text(encoding="utf-8", errors="replace")
    blocks = [b.strip() for b in text.split("\n\n") if b.strip()]
    blocks = blocks[-args.limit :]
    if args.format == "json":
        print(json.dumps(blocks, ensure_ascii=False, indent=2))
        return 0
    for b in blocks:
        print(b)
        print()
    return 0


def main(argv: list[str] | None = None) -> int:
    parser = argparse.ArgumentParser(description="View log/metrics/history artifacts under ./log/<project>/")
    parser.add_argument(
        "--project",
        type=str,
        default=os.getenv("FORMAL_PROJECT", ""),
        help="Optional formalization project name (uses log/<project>/ when set).",
    )
    parser.add_argument(
        "--log-dir",
        type=str,
        default="",
        help="Override log directory (default: log/default/ or log/<project>/).",
    )
    sub = parser.add_subparsers(dest="cmd", required=True)

    p_list = sub.add_parser("list", help="List recent runs from log/<project>/metrics/")
    p_list.add_argument("--pipeline", type=str, default=None, help="Filter by pipeline (statement/proof/final).")
    p_list.add_argument("--limit", type=int, default=20)
    p_list.add_argument("--format", choices=["table", "json"], default="table")
    p_list.set_defaults(func=cmd_list)

    p_show = sub.add_parser("show", help="Show a run summary + event counts")
    p_show.add_argument("run_id", type=str)
    p_show.add_argument("--pretty", action="store_true", help="Print the full summary JSON.")
    p_show.add_argument("--show-run-start", action="store_true", help="Also print run_start metadata JSON.")
    p_show.set_defaults(func=cmd_show)

    p_hist = sub.add_parser("history", help="Show recent final history records for a Lean file")
    p_hist.add_argument("--lean-file", type=str, required=True, help="Lean file path stored in history (e.g. Chapters/Chap01/section01.lean).")
    p_hist.add_argument("--kinds", type=str, default=None, help="Comma-separated kinds to filter (e.g. final_failure,agent_b_fix).")
    p_hist.add_argument("--limit", type=int, default=20)
    p_hist.add_argument("--history-file", type=str, default=None, help="Override history JSONL path.")
    p_hist.add_argument("--format", choices=["text", "json"], default="text")
    p_hist.set_defaults(func=cmd_history)

    p_fail = sub.add_parser("failures", help="Show recent failure blocks (human logs)")
    p_fail.add_argument("--pipeline", choices=["final", "proof"], required=True)
    p_fail.add_argument("--limit", type=int, default=5)
    p_fail.add_argument("--format", choices=["text", "json"], default="text")
    p_fail.set_defaults(func=cmd_failures)

    args = parser.parse_args(argv)
    args.log_dir = _resolve_log_dir(project=args.project, log_dir=args.log_dir)
    args.metrics_dir = Path(args.log_dir) / "metrics"
    return int(args.func(args))


if __name__ == "__main__":
    raise SystemExit(main())
