# cirbench/registry.py
"""Case registry and discovery utilities for CIRBench.

Defines the Case abstraction which loads metadata and paths for each benchmark
case, and provides discovery logic for locating cases under a task directory.
"""
from __future__ import annotations
from pathlib import Path
from typing import List, Optional, Dict, Any
import yaml

class Case:
    def __init__(self, root: Path):
        """Initialize a Case by loading its case.yaml and resolving asset paths."""
        self.root = root
        meta_path = root / "case.yaml"
        if not meta_path.exists():
            raise FileNotFoundError(f"case.yaml not found: {meta_path}")
        self.meta: Dict[str, Any] = yaml.safe_load(meta_path.read_text(encoding="utf-8")) or {}

        self.id: str = self.meta.get("id") or self._derive_id(root)

        a = self.meta.get("assets", {}) or {}
        self.before_ir: Path = (root / a.get("before_ir", a.get("raw_ir", "before.ll"))).resolve()
        self.after_ir: Optional[Path] = (root / a.get("after_ir", a.get("gold","after.ll"))).resolve() if (a.get("after_ir") or a.get("gold")) else (root / "after.ll").resolve()
        self.raw_ir: Path = self.before_ir
        self.raw_ir: Path = (root / a.get("raw_ir", "raw.ll")).resolve()
        self.prompt: Path = (root / a.get("prompt", "prompt.txt")).resolve()

        self.gold: Optional[Path] = self._resolve_first(
            root,
            [
                a.get("gold"),
                a.get("golden"),
                "gold.json",
                "golden.json",
            ]
        )

        self.harness: Optional[Path] = None
        h_cfg = a.get("harness")
        if h_cfg:
            p = (root / h_cfg).resolve()
            if p.exists():
                self.harness = p
        else:
            for cand in ("harness/run.sh", "run.sh", "harness.sh", "harness"):
                p = (root / cand).resolve()
                if p.exists():
                    self.harness = p
                    break

        self.has_gold = bool(self.gold and self.gold.exists())

    # --- Internal helpers -------------------------------------------------------

    @staticmethod
    def _resolve_first(root: Path, candidates: List[Optional[str]]) -> Optional[Path]:
        """Return the first existing path among given candidate filenames."""
        for c in candidates:
            if not c:
                continue
            p = (root / c).resolve()
            if p.exists():
                return p
        return None

    @staticmethod
    def _derive_id(root: Path) -> str:
        """Derive a stable case id from directory name, e.g. A001_alias/005 -> A001_alias_005."""
        name = root.name
        parent = root.parent.name if root.parent else ""
        if parent and len(name) > 0 and name.isdigit():
            # A001_alias/005 -> A001_alias_005
            return f"{parent}_{name}"
        return name

def discover_cases(task_dir: Path) -> List[Case]:
    """Recursively discover all case directories under a given task directory."""
    cases: List[Case] = []
    bench = task_dir / "bench"
    base = bench if bench.exists() else task_dir
    if not base.exists():
        return cases

    for first in sorted(base.iterdir()):
        if not first.is_dir():
            continue

        if (first / "case.yaml").exists():
            cases.append(Case(first))
            continue

        for second in sorted(first.iterdir()):
            if not second.is_dir():
                continue
            if (second / "case.yaml").exists():
                cases.append(Case(second))

    cases.sort(key=lambda c: c.id)
    return cases