from blingfire import text_to_sentences
import regex as re


def split_sentence_blingfire(text):
    # remove <*> using regax
    text = re.sub(r'<(?:[^<>]|(?R))*>', '', text)  # remove nested <>
    out = text_to_sentences(text)        # sentences separated by '\n'
    spans, i = [], 0
    sentences = []
    for s in out.splitlines():
        while i < len(text) and text[i].isspace(): i += 1
        a = i
        i = a + len(s)                   # advance by the sentence length
        spans.append((a, i))
        sentences.append(text[a:i])
    return sentences, spans

