# cirbench/analysis/runner.py
"""Analysis pipeline runner for CIRBench (per‑case only, no shard summary).

Per‑case artifacts are written under:
  runs/<RUN_ID>/raw/analysis.<mode>/<CASE_ID>/01_artifacts/
    - model.resp.txt
    - model_meta.json
    - golden.json
    - pred.json
    - metrics.json
    - repro.sh

Notes:
  * No analysis.summary.shard-*.json is produced.
  * Concurrency-safe because each worker writes only into its own case dir.
"""
from __future__ import annotations
from pathlib import Path
import os, json, time

from ..cfg import CIRBenchConfig
from ..registry import discover_cases
from ..utils.api.base import make_runner
from ..utils.prompt import build_prompt
from ..utils.parse import extract_structured
from ..utils.logging_utils import get_logger, one_line, debug_on, debug_full_on
from ..utils.metrics import json_exact
from ..utils.case_select import select_cases
from ..utils.runner_common import (
    run_id, get_run_dir, get_case_dir, report_mode,
    shot_paths, gen_with_retries, persist_model_io,
    write_text, write_json, sha256, write_repro_sh,
    maybe_short_circuit_prompt_only, maybe_materialize_external_io,
)

def run_task(cfg: CIRBenchConfig, proj_root: Path):
    logger = get_logger()

    # discover & select
    tdir = proj_root / "cirbench" / "analysis"
    cases = select_cases(discover_cases(tdir))
    if not cases:
        print("No analysis cases found."); return

    # runner + dirs
    from ..utils.api.base import make_runner, select_model_cfg
    runner_cfg = select_model_cfg(cfg)
    runner = make_runner(runner_cfg)
    try:
        mname = getattr(runner, "model", None) or getattr(runner, "model_name", None) or runner_cfg.get("name")
        logger.info(f"LLM runner = {type(runner).__name__} kind={runner_cfg.get('kind')} name={mname}")
    except Exception:
        pass
    rdir = get_run_dir(proj_root)

    for case in cases:
        cdir = get_case_dir(rdir, "analysis", case.id)
        sp = shot_paths(cdir, 1)
        sp["dir"].mkdir(parents=True, exist_ok=True)

        raw_ir = case.raw_ir.read_text(encoding="utf-8")

        # load golden json if available
        gold = {}
        if getattr(case, "gold", None) and case.gold.exists():
            try:
                gold = json.loads(case.gold.read_text(encoding="utf-8"))
            except Exception:
                gold = {}
        if "labels" not in gold:
            gold = {"labels": []}

        # build prompt
        prompt_txt = case.prompt.read_text(encoding="utf-8") if getattr(case, "prompt", None) and case.prompt.exists() else ""
        prompt = build_prompt(cfg, "analysis", prompt_txt, {"RAW_IR": raw_ir, "TASK": gold.get("task", "")}, case_id=case.id)
        psha = sha256(prompt)

        if report_mode():
            write_text(cdir/"prompt.txt", prompt)
            write_text(cdir/"prompt.sha256", psha)

        # === Prompt-only: write prompt and stop before calling LLM ===
        if maybe_short_circuit_prompt_only(cfg, sp["dir"], prompt):
            write_json(sp["dir"]/ "golden.json", gold)
            print(f"{case.id}: prompt-only (no LLM)")
            continue

        # === From-files: optionally skip LLM and use external outputs ===
        mode = maybe_materialize_external_io(cfg, case.id, sp["dir"])
        if mode == "resp":
            resp_text = (sp["dir"]/ "model.resp.txt").read_text(encoding="utf-8")
            if report_mode():
                persist_model_io(cdir, 1, resp_text, {"source": "from-files", "kind": "resp"})
            pred, err = extract_structured(resp_text)
            em_ok = json_exact(json.dumps(gold, sort_keys=True), json.dumps(pred, sort_keys=True))

            write_json(sp["dir"]/ "golden.json", gold)
            write_json(sp["dir"]/ "pred.json", pred or {})
            write_json(sp["dir"]/ "metrics.json", {
                "id": case.id,
                "difficulty": (case.meta.get("difficulty") if isinstance(case.meta, dict) else None),
                "em": bool(em_ok),
                "unparseable": bool(err),
                "latency_ms": None,
                "tokens": {"in": None, "out": None},
            })
            write_repro_sh(sp, task="analysis", cfg=cfg, func=None)
            print(f"{case.id}: EM={'1' if em_ok else '0'} tokens_in=0 tokens_out=0 [from-files:resp]")
            continue

        elif mode == "pred":
            pred_path = sp["dir"]/ "pred.json"
            if pred_path.exists():
                try:
                    pred = json.loads(pred_path.read_text(encoding="utf-8"))
                except Exception:
                    pred = {}
                em_ok = json_exact(json.dumps(gold, sort_keys=True), json.dumps(pred, sort_keys=True))

                write_json(sp["dir"]/ "golden.json", gold)
                write_json(sp["dir"]/ "pred.json", pred or {})
                write_json(sp["dir"]/ "metrics.json", {
                    "id": case.id,
                    "difficulty": (case.meta.get("difficulty") if isinstance(case.meta, dict) else None),
                    "em": bool(em_ok),
                    "unparseable": False,
                    "latency_ms": None,
                    "tokens": {"in": None, "out": None},
                })
                write_repro_sh(sp, task="analysis", cfg=cfg, func=None)
                print(f"{case.id}: EM={'1' if em_ok else '0'} tokens_in=0 tokens_out=0 [from-files:pred]")
                continue
            else:
                var_ll = sp["dir"]/ "variant.ll"
                if var_ll.exists():
                    write_json(sp["dir"]/ "golden.json", gold)
                    write_json(sp["dir"]/ "pred.json", {})
                    write_json(sp["dir"]/ "metrics.json", {
                        "id": case.id,
                        "difficulty": (case.meta.get("difficulty") if isinstance(case.meta, dict) else None),
                        "em": False,
                        "unparseable": True,
                        "latency_ms": None,
                        "tokens": {"in": None, "out": None},
                        "note": "variant.ll provided for analysis; expected pred.json",
                    })
                    write_repro_sh(sp, task="analysis", cfg=cfg, func=None)
                    print(f"{case.id}: EM=0 tokens_in=0 tokens_out=0 [from-files:pred-ll-unsupported]")
                    continue

        # ask model (single-shot; network retries handled inside gen_with_retries)
        outc = gen_with_retries(runner, prompt)
        if report_mode():
            persist_model_io(cdir, 1, outc.text, outc.meta)

        # parse model output
        pred, err = extract_structured(outc.text)
        # EM on JSON (order-insensitive via dumps+sort_keys)
        em_ok = json_exact(json.dumps(gold, sort_keys=True), json.dumps(pred, sort_keys=True))

        # write per‑case artifacts
        write_json(sp["dir"]/ "golden.json", gold)
        write_json(sp["dir"]/ "pred.json", pred or {})
        write_json(sp["dir"]/ "metrics.json", {
            "id": case.id,
            "difficulty": (case.meta.get("difficulty") if isinstance(case.meta, dict) else None),
            "em": bool(em_ok),
            "unparseable": bool(err),
            "latency_ms": outc.meta.get("latency_ms"),
            "tokens": {"in": outc.meta.get("prompt_tokens"), "out": outc.meta.get("out_tokens")},
        })
        # minimal reproducibility helper (kept consistent with other runners)
        write_repro_sh(sp, task="analysis", cfg=cfg, func=None)

        # console line
        tin = int(outc.meta.get("prompt_tokens") or 0)
        tout = int(outc.meta.get("out_tokens") or 0)
        print(f"{case.id}: EM={'1' if em_ok else '0'} tokens_in={tin} tokens_out={tout}")
