from __future__ import annotations

from typing import Any


def targets_in_span(
    *,
    tokenizer: Any,
    input_ids: list[int],
    span: tuple[int, int],
    mode: str = "whole",
) -> list[tuple[int, int]]:
    """Return token ranges inside `span`.

    Default mode is intentionally simple: attribute the entire thought as one target.

    If `mode == 'simple_sentences'`, we do a lightweight heuristic:
    - decode each token individually
    - split when we see sentence-ending punctuation

    This avoids re-tokenization mismatch and stays readable.
    """
    s0, s1 = int(span[0]), int(span[1])
    if s1 <= s0:
        return []
    if mode == "whole":
        return [(s0, s1)]
    if mode != "simple_sentences":
        raise ValueError("mode must be one of: whole, simple_sentences")

    ranges: list[tuple[int, int]] = []
    cur = s0
    for i in range(s0, s1):
        tok_text = tokenizer.decode([int(input_ids[i])], skip_special_tokens=False)
        if any(p in tok_text for p in [".", "?", "!", "\n"]):
            end = i + 1
            if end > cur:
                ranges.append((cur, end))
            cur = end
    if cur < s1:
        ranges.append((cur, s1))
    return ranges
