import collections
import re


class SpellingCorrector():
    alphabet = 'abcdefghijklmnopqrstuvwxyz'

    def words(text): return re.findall('[a-z]+', text.lower())

    def train(features):
        model = collections.defaultdict(lambda: 1)
        for f in features:
            model[f] += 1
        return model

    content = ""
    with open('util/words_db/2+2gfreq.txt', 'r') as content_file:
        content += content_file.read()
    with open('util/words_db/2+2lemma.txt', 'r') as content_file:
        content += content_file.read()
    with open('util/words_db/2of4brif.txt', 'r') as content_file:
        content += content_file.read()
    with open('util/words_db/2of12.txt', 'r') as content_file:
        content += content_file.read()
    with open('util/words_db/2of12inf.txt', 'r') as content_file:
        content += content_file.read()
    with open('util/words_db/3esl.txt', 'r') as content_file:
        content += content_file.read()
    with open('util/words_db/5desk.txt', 'r') as content_file:
        content += content_file.read()
    with open('util/words_db/6of12.txt', 'r') as content_file:
        content += content_file.read()
    with open('util/words_db/allwords.txt', 'r') as content_file:
        content += content_file.read()
    with open('util/words_db/big.txt', 'r') as content_file:
        content += content_file.read()
    with open('util/words_db/cracklib-words.txt', 'r') as content_file:
        content += content_file.read()
    with open('util/words_db/neol2007.txt', 'r') as content_file:
        content += content_file.read()
    with open('util/words_db/ul_words.txt', 'r') as content_file:
        content += content_file.read()
    # Train the model
    WORDS = train.__call__(words.__call__(content))

    def P(self, word, N=sum(WORDS.values())):
        "Probability of `word`."
        return self.WORDS[word] / N

    def correction(self, word):
        "Most probable spelling correction for word."
        return max(self.candidates(word), key=self.P)

    def candidates(self, word):
        "Generate possible spelling corrections for word."
        return (self.known([word]) or self.known(self.edits1(word)) or self.known(self.edits2(word)) or [word])

    def known(self, words):
        "The subset of `words` that appear in the dictionary of WORDS."
        return set(w for w in words if w in self.WORDS)

    def edits1(self, word):
        "All edits that are one edit away from `word`."
        letters = 'abcdefghijklmnopqrstuvwxyz'
        splits = [(word[:i], word[i:]) for i in range(len(word) + 1)]
        deletes = [L + R[1:] for L, R in splits if R]
        transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R) > 1]
        replaces = [L + c + R[1:] for L, R in splits if R for c in letters]
        inserts = [L + c + R for L, R in splits for c in letters]
        return set(deletes + transposes + replaces + inserts)

    def edits2(self, word):
        "All edits that are two edits away from `word`."
        return (e2 for e1 in self.edits1(word) for e2 in self.edits1(e1))
