import icu
import regex as re

def split_sentence_icu(text, locale=None):
    if locale is None:
        locale = icu.Locale.getRoot()
    bi = icu.BreakIterator.createSentenceInstance(locale)
    
    # remove <*> using regax
    text = re.sub(r'<(?:[^<>]|(?R))*>', '', text)  # remove nested <>

    bi.setText(text)
    spans = []
    start = bi.first()
    sentences = []
    for end in bi:                   # 'end' is the next sentence boundary
        # trim surrounding whitespace without slicing strings repeatedly
        s, e = start, end
        while s < e and text[s].isspace(): s += 1
        while e > s and text[e-1].isspace(): e -= 1
        if e > s:
            spans.append((s, e))
        start = end                  # <-- CRITICAL: advance start to this boundary
        sentences.append(text[s:e])
    return sentences, spans


