# sae_data.py
"""
File discovery utilities for SAE steering JSON files.

We support multiple filename patterns, including:
  - steer_dlm_layer1_l0_80.json
  - steer_qwen2.5_layer1_l0_320.json
  - steer_layer2_l0_50.json   (newer naming without an extra token between steer_ and layer)

The search is recursive under a given root directory.
"""

from __future__ import annotations

from pathlib import Path
from typing import List, Union


def find_steering_json_files(root_dir: Union[str, Path]) -> List[Path]:
    """
    Recursively find all steering JSON files under the given root directory.

    Supported filename patterns:
      1) steer_*_layer*_l0_*.json
      2) steer_layer*_l0_*.json
      3) steer*layer*_l0_*.json (catch-all)

    Returns a sorted, de-duplicated list of Paths.
    """
    root = Path(root_dir)
    if not root.exists():
        raise FileNotFoundError(f"Root directory does not exist: {root}")

    patterns = [
        "steer_*_layer*_l0_*.json",
        "steer_layer*_l0_*.json",
        "steer*layer*_l0_*.json",
    ]

    found = set()
    for pat in patterns:
        for p in root.rglob(pat):
            if p.is_file():
                found.add(p)

    return sorted(found)
