from __future__ import annotations
from pathlib import Path
import json, csv

COLS_COMMON = ["task","case_id","k","pass","valid","equiv_ok","equiv_status","em","unparseable","latency_ms","tok_in","tok_out"]

def _rows_from_task(task_dir: Path, task: str):
    rows = []
    for case_dir in sorted((task_dir).glob("*")):
        if not case_dir.is_dir(): continue
        case_id = case_dir.name
        for shot_file in sorted(case_dir.glob("*_model_meta.json")):
            k = int(shot_file.name[:2])
            meta = json.loads(shot_file.read_text(encoding="utf-8"))
            # optional metrics (best-effort)
            em = None
            # try to parse a nearby metrics.json for first/best
            pass_flag = valid_flag = equiv_ok = None; equiv_status = None
            # light probe: read shotlog? skip heavy parsing
            tok_in = meta.get("prompt_tokens"); tok_out = meta.get("out_tokens")
            row = {
                "task": task, "case_id": case_id, "k": k,
                "pass": pass_flag, "valid": valid_flag,
                "equiv_ok": equiv_ok, "equiv_status": equiv_status,
                "em": em, "unparseable": None,
                "latency_ms": meta.get("latency_ms"),
                "tok_in": tok_in, "tok_out": tok_out,
            }
            rows.append(row)
    return rows

def build_csvs(run_dir: Path):
    out = []
    raw_dir = run_dir / "raw"
    for task in ("analysis","repair","refactor","transform"):
        tdir = raw_dir / task
        if not tdir.exists(): continue
        rows = _rows_from_task(tdir, task)
        if not rows: continue
        csv_path = run_dir / f"{task}.shots.csv"
        with csv_path.open("w", newline="", encoding="utf-8") as f:
            w = csv.DictWriter(f, fieldnames=COLS_COMMON)
            w.writeheader()
            for r in rows:
                w.writerow({k: r.get(k) for k in COLS_COMMON})
        out.append(str(csv_path))
    return out