from .num_norm.cn_tn import CN_NSWNormalizer
from .num_norm.en_tn import EN_NSWNormalizer,EnglishTextNormalizer
from .num_norm.fr_tn import FR_NSWNormalizer
from .num_norm.de_tn import DE_NSWNormalizer
from .num_norm.es_tn import ES_NSWNormalizer
from .num_norm.it_tn import IT_NSWNormalizer
from .num_norm.multi_tn import MULTI_NSWNormalizer,BasicTextNormalizer
from .beautify_norm import beautifyText


# from num_norm.cn_tn import CN_NSWNormalizer
# from num_norm.en_tn import EN_NSWNormalizer
# from num_norm.fr_tn import FR_NSWNormalizer
# from num_norm.de_tn import DE_NSWNormalizer
# from num_norm.es_tn import ES_NSWNormalizer
# from num_norm.it_tn import IT_NSWNormalizer
# from num_norm.multi_tn import MULTI_NSWNormalizer
# from beautify_norm import beautifyText


import re
from num2words import num2words
import string
import zhon 

def has_numbers(text):
    return bool(re.search(r'\d', text))

def clean_space(text):
    match_regex = re.compile(u'[\u4e00-\u9fa5。\.,，:：《》、\(\)（）]{1} +(?<![a-zA-Z])|\d+ +| +\d+|[a-z A-Z]+')
    should_replace_list = match_regex.findall(text)
    order_replace_list = sorted(should_replace_list,key=lambda i:len(i),reverse=True)
    for i in order_replace_list:
        if i == u' ':
            continue
        new_i = i.strip()
        text = text.replace(i,new_i)
    return text

def clean_num(txt):
    p = re.compile(r'\d,\d')    
    while True:
        m = p.search(txt)
        if m:
            mm = m.group()
            txt = txt.replace(mm,mm.replace(',',''))
        else:
            break
    return txt


DICTS = {
    'cn':CN_NSWNormalizer,
    'en':EN_NSWNormalizer,
    'fr':FR_NSWNormalizer,
    'de':DE_NSWNormalizer,
    'es':ES_NSWNormalizer,
    'it':IT_NSWNormalizer,
}

def text_norm(text,lang='en',filt_num = True,TestNormalizer=False):
    if filt_num:        
        if has_numbers(text):
            try:
                if lang=='cn':
                    text = CN_NSWNormalizer(text).normalize()
                elif lang =='en':
                    text = EN_NSWNormalizer(text).normalize()
                else:
                    text = MULTI_NSWNormalizer(text,lang).normalize()
                #text = DICTS[lang](text).normalize()
            except Exception as e:
                pass
            
    if TestNormalizer:        
        if lang =='en':
            text = EnglishTextNormalizer()(text)
        elif lang in ['de','fr','es','it']:
            text = BasicTextNormalizer(remove_diacritics=True,split_letters=False)(text)
        else:
            text = text
            
            # if lang=='cn':
            #     text = CN_NSWNormalizer(text).normalize()
            # elif lang =='en':
            #     text = EN_NSWNormalizer(text).normalize()
            # else:
            #     text = MULTI_NSWNormalizer(text,lang).normalize()

    if lang !='cn':
        text = re.sub(r'\(.*?\)|\[.*?\]|{.*?}',"",text)
        text = re.sub("\"|<|>|\(|\)|‹|›|«|»|“|„|‘|…|”","",text) #|[|]|+
        text = re.sub("……|→|…|-|~|—"," ",text) #|[|]|+
        text = text.replace('´|’','\'').replace(";",",").replace('.\'','.').replace(' \'',' ').replace('\' ',' ')
        while len(text)>0 and text[0] in ' \'':
            text = text[1:]
        if len(text)>0 and text[-1]== '\'':
            text = text[:-1]
        while True:
            new_text = text.replace('  ',' ').replace('..','.')
            if new_text ==text:
                text = new_text
                break
            else:
                text = new_text
        text = re.sub(r"\s+", " ", text)
        for punc in '¡¿.,!?':
            text = text.replace(f' {punc}', f'{punc}')
        return text
    else:
        text = re.sub(r'（.*?）',"",text)
        text = re.sub(r'\(.*?\)|\[.*?\]|{.*?}',"",text)
        text = re.sub("\"|「|」|“|”|~|《|》|（|）|‘|’|／|……","",text)
        #text = re.sub("\"|「|」|“|”|~|《|》|（|）|‘|’|／|……"," ",text)
        text = text.replace(',','，').replace('.','。').replace('!','！').replace('?','？').replace(':','：')
        #text = text.replace(' ',' ')
        try:
            #print(text)
            text = beautifyText(text,r"([\u4e00-\u9fff])", True)
            text = re.sub(r" +",' ', text)
        except Exception as e:
            pass
        return text


def remove_punctuation(strs):
    for i in string.punctuation:
        if i =='\'':
            continue
        strs = strs.replace(i, '')
    for j in zhon.hanzi.punctuation:
        strs = strs.replace(j, '')
    return strs

def replace_punctuation_with_special_tokens(strs,cache='AbCdEf',special_tokens='<|endoftext|>'):
    for i in string.punctuation:
        if i =='\'':
            continue
        strs = strs.replace(i, cache)
    for j in zhon.hanzi.punctuation:
        strs = strs.replace(j, cache)    
    return strs.replace(cache, special_tokens)


if __name__ == "__main__":
    text = '或者尝试想象一下第一次踏入\"贡多拉“     ”或看到地中海时的感受\"。会有那么一刻。我感觉到我体内有40马力的家务能力在发展。'
    print(text_norm(text,lang='cn',filt_num = True))

    text = '¡In 40s, I (me) {wanted} {like}  want to <live> in a 40 square meter house ! as 1st.'
    print(text_norm(text,lang='en',filt_num = True))

    text = ' Je n\'aime pas le bâtiment de 2.110,25% mètres de long.'
    print(text_norm(text,lang='it',filt_num = True))
    
    text = ' Ho 2,5 kg di mele.'
    print(text_norm(text,lang='it',filt_num = True))

    num = '10.25'
    print(num2words(num,lang='de'))
    
    strs = 'today is friday, so happy..!!! 今天“周五”，下班了，好开心呀。！！ I\'ve a apple!'
    print(remove_punctuation(strs))