#!/usr/bin/env python3
"""Summarize the anonymized AI4Math workflow ledger.

The manuscript should cite this command as the source of truth for PR counts:

    python scripts/summarize_ledger.py workflow_ledger.csv
"""

from __future__ import annotations

import argparse
import csv
import re
import statistics
from collections import Counter, defaultdict
from pathlib import Path


TIME_RE = re.compile(r"^T\+(\d+)h(\d+)m$")


def parse_relative_minutes(value: str) -> int:
    match = TIME_RE.match(value)
    if match is None:
        raise ValueError(f"expected T+HHhMMm timestamp, got {value!r}")
    hours, minutes = match.groups()
    return int(hours) * 60 + int(minutes)


def fmt_duration(minutes: int) -> str:
    hours, mins = divmod(minutes, 60)
    return f"{hours}h{mins:02d}m"


def concurrency_stats(rows: list[dict[str, str]]) -> tuple[int, float]:
    events: list[tuple[int, int]] = []
    for row in rows:
        events.append((int(row["opened_min"]), 1))
        events.append((int(row["closed_min"]), -1))

    active = 0
    max_active = 0
    for _, delta in sorted(events, key=lambda item: (item[0], -item[1])):
        active += delta
        max_active = max(max_active, active)

    first_open = min(int(row["opened_min"]) for row in rows)
    last_close = max(int(row["closed_min"]) for row in rows)
    window = last_close - first_open
    active_pr_minutes = sum(int(row["duration_min"]) for row in rows)
    mean_active = active_pr_minutes / window if window else 0.0
    return max_active, mean_active


def load_rows(path: Path) -> list[dict[str, str]]:
    with path.open(newline="") as handle:
        rows = list(csv.DictReader(handle))
    required = {
        "pr_id",
        "target_theorem_family",
        "agent",
        "opened_time",
        "closed_time",
        "merged",
        "commit_sha_anonymized",
    }
    missing = required.difference(rows[0].keys() if rows else [])
    if missing:
        raise SystemExit(f"ledger missing required columns: {sorted(missing)}")
    for row in rows:
        row["opened_min"] = str(parse_relative_minutes(row["opened_time"]))
        row["closed_min"] = str(parse_relative_minutes(row["closed_time"]))
        row["duration_min"] = str(int(row["closed_min"]) - int(row["opened_min"]))
    return rows


def summarize(rows: list[dict[str, str]]) -> str:
    total = len(rows)
    merged = sum(row["merged"] == "yes" for row in rows)
    unmerged = sum(row["merged"] == "no" for row in rows)
    if merged + unmerged != total:
        raise SystemExit("merged column must contain only yes/no values")

    first_open = min(int(row["opened_min"]) for row in rows)
    last_close = max(int(row["closed_min"]) for row in rows)
    durations = [int(row["duration_min"]) for row in rows]
    merged_durations = [int(row["duration_min"]) for row in rows if row["merged"] == "yes"]
    max_active, mean_active = concurrency_stats(rows)

    by_agent: dict[str, Counter[str]] = defaultdict(Counter)
    by_family: dict[str, Counter[str]] = defaultdict(Counter)
    by_family_agent: dict[tuple[str, str], Counter[str]] = defaultdict(Counter)
    for row in rows:
        status = "merged" if row["merged"] == "yes" else "unmerged"
        by_agent[row["agent"]][status] += 1
        by_family[row["target_theorem_family"]][status] += 1
        by_family_agent[(row["target_theorem_family"], row["agent"])][status] += 1

    out: list[str] = []
    out.append("# Workflow Ledger Summary")
    out.append("")
    out.append(f"- Total PRs opened: {total}")
    out.append(f"- PRs merged: {merged}")
    out.append(f"- PRs closed without merge: {unmerged}")
    out.append(f"- Merge rate: {merged}/{total} = {merged / total:.1%}")
    out.append(f"- Window length: {fmt_duration(last_close - first_open)}")
    out.append(f"- Max simultaneously open PRs: {max_active}")
    out.append(f"- Mean open PRs over window: {mean_active:.2f}")
    out.append(f"- Median open-to-close time: {statistics.median(durations):g} min")
    out.append(
        f"- Median merged-PR open-to-close time: {statistics.median(merged_durations):g} min"
    )
    out.append("")
    out.append("## By Agent Tag")
    out.append("")
    out.append("| Agent tag | Opened | Merged | Closed without merge | Merge rate |")
    out.append("|---|---:|---:|---:|---:|")
    for agent in sorted(by_agent):
        counts = by_agent[agent]
        opened = counts["merged"] + counts["unmerged"]
        label = "unattributed/operator" if agent == "human" else agent
        out.append(
            f"| {label} | {opened} | {counts['merged']} | {counts['unmerged']} | "
            f"{counts['merged'] / opened:.1%} |"
        )
    out.append("")
    out.append("## By Theorem Family")
    out.append("")
    out.append("| Family | Opened | Merged | Closed without merge | Merge rate |")
    out.append("|---|---:|---:|---:|---:|")
    for family in sorted(by_family):
        counts = by_family[family]
        opened = counts["merged"] + counts["unmerged"]
        out.append(
            f"| {family} | {opened} | {counts['merged']} | {counts['unmerged']} | "
            f"{counts['merged'] / opened:.1%} |"
        )
    out.append("")
    out.append("## Family x Agent Tag")
    out.append("")
    out.append("| Family | Agent tag | Opened | Merged | Closed without merge |")
    out.append("|---|---|---:|---:|---:|")
    for (family, agent), counts in sorted(by_family_agent.items()):
        opened = counts["merged"] + counts["unmerged"]
        label = "unattributed/operator" if agent == "human" else agent
        out.append(
            f"| {family} | {label} | {opened} | {counts['merged']} | {counts['unmerged']} |"
        )
    out.append("")
    out.append(
        "Note: the raw ledger uses `human` for branches without a tool-specific "
        "prefix; the summary displays those rows as `unattributed/operator`. "
        "Do not infer Claude-vs-Codex or agent-vs-human comparative performance "
        "from those rows "
        "unless a richer provenance ledger is supplied."
    )
    return "\n".join(out)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("ledger", type=Path)
    args = parser.parse_args()
    print(summarize(load_rows(args.ledger)))


if __name__ == "__main__":
    main()
