"""Model ID canonicalization utilities.

Provides ``canon_model_id`` to produce safe, normalized identifiers suitable for
dataset/group names and file paths.
"""

from __future__ import annotations

import re

_NON_ALNUM = re.compile(r"[^0-9a-z]+")
_SQUEEZE = re.compile(r"_+")


def canon_model_id(model_id: str, *, max_len: int = 64) -> str:
    """Canonicalize a model identifier.

    Rules:
    - Lowercase ASCII.
    - Replace any non-alphanumeric character with ``_``.
    - Collapse repeated underscores to one.
    - Trim leading/trailing underscores.
    - Cap to ``max_len`` characters (default: 64).

    Args:
        model_id: Raw model identifier.
        max_len: Maximum length of the returned canonical ID.

    Returns:
        Canonicalized identifier string.

    """
    s = model_id.lower()
    s = _NON_ALNUM.sub("_", s)
    s = _SQUEEZE.sub("_", s).strip("_")
    if len(s) > max_len:
        s = s[:max_len].rstrip("_")
    return s
