import unittest
from utils.books import books_functions, book_dataloader

import pickle as pkl

import spacy

spacy.require_gpu()
#nlp = spacy.load("en_core_web_trf")
#nlp = spacy.load("en_core_web_lg")
nlp = spacy.load("en_core_web_lg")


class TestBookPreparation(unittest.TestCase):

    def test_adjust_length(self):
        ''' Check that adjust length function works properly. '''

        test_string = "Hello there, I am a long string, with a looooooooooooong word in the middle."

        mls = [1500, 150, 15, 5, 3]

        splits = [books_functions.adjust_length(test_string, max_length=ml) for ml in mls]

        for s, max_len in zip(splits, mls):

            self.assertEqual(''.join(s), test_string)
            self.assertTrue(all([len(chunk) <= max_len for chunk in s]))

    def test_chunk_book(self):
        ''' Check that a large text is processed properly. '''

        test_string = "Hello there, I am a long string, with a looooooooooooong word in the middle." * 100

        mls = [1500, 150, 15, 5, 3]

        sentence_chunk_splits = [books_functions.chunk_book(test_string, max_length=ml) for ml in mls]

        for s, max_len in zip(sentence_chunk_splits, mls):

            self.assertEqual(''.join(s), test_string)
            self.assertTrue(all([len(chunk) <= max_len for chunk in s]))

    def test_rechunk_book(self):
        ''' Check that a large text is processed properly. '''

        test_string = "Hello there, I am a long string, with a looooooooooooong word in the middle." * 100

        for max_rechunk_len in [4, 6, 10, 140]:
            mls = [1500, 150, 15, 5, 3]

            sentence_chunk_splits = [books_functions.chunk_book(test_string, max_length=ml) for ml in mls]

            sentence_chunk_splits = [books_functions.rechunk_book(s, max_rechunk_len) for s in sentence_chunk_splits]

            for s, max_len in zip(sentence_chunk_splits, mls):

                self.assertEqual(''.join(s), test_string)

                if max_rechunk_len >= max_len:
                    self.assertTrue(all([len(chunk) <= max_rechunk_len for chunk in s]))


    def test_spacy_to_str(self):
        text = "John told himself:: he! , he was not(!).  Google. Not(!) mary. that he was not Mary. Yep, that is it!"
        doc = nlp(text)
        self.assertEqual(books_functions.collapse_spacy_token_list([el for el in doc]), text)

    def test_entity_extraction_and_reconstruction(self):

        text = "John told himself:: he! , he was not(!).  Google. Not(!) mary. that he was not Mary. Yep, that is it! Mary JohnJohn Mary, peter and mary peter, and mary!peter,why?whyis thispeter?"

        ch = books_functions.extract_ne(text)
        reconstructed = books_functions.reconstruct_chunk(*ch)

        self.assertEqual(reconstructed, text)

    def test_empty_string_handling(self):

        text = ""

        ch = books_functions.extract_ne(text)
        reconstructed = books_functions.reconstruct_chunk(*ch)

        self.assertEqual(reconstructed, text)

    def test_near_empty_string_handling(self):

        text = " "

        ch = books_functions.extract_ne(text)
        reconstructed = books_functions.reconstruct_chunk(*ch)

        self.assertEqual(reconstructed, text)


    def test_entity_extraction_ordinal_removal(self):
        text = "123 John told himself:: he! 11, one he was not(!). Google. Not(!) mary. that he was not Mary. Yep, that is it! Mary JohnJohn Mary, peter and mary peter, and mary!peter,why?whyis 12 thispeter? 94"
        textnocard = "John told himself:: he! , one he was not(!). Google. Not(!) mary. that he was not Mary. Yep, that is it! Mary JohnJohn Mary, peter and mary peter, and mary!peter,why?whyis thispeter? "

        ch = books_functions.extract_ne(text)
        reconstructed = books_functions.reconstruct_chunk(*ch)

        self.assertEqual(reconstructed, textnocard)

        ch_withnums = books_functions.extract_ne(text, exclude_cardinal=False)
        reconstructed_withnums = books_functions.reconstruct_chunk(*ch_withnums)

        self.assertEqual(reconstructed_withnums, text)

    def test_entity_substitution(self):

        text = "123 John told himself:: he! 11, one he was not(!). Not(!) mary. IBM is a nice place to work in. that he was not Mary. Yes, that is it! Mary JohnJohn Mary, peter and mary peter, and Mary Jane! Peter,why?whyis 12 thispeter? 94"
        chunk_pieces, original_entities = books_functions.extract_ne(text, enttypes_to_process=("PERSON", "ORG"))


        substitution_entities = {"PERSON": ["ABRACADABRA"], "ORG": ["GOOGLOCADABRA"]}
        changed = books_functions.change_chunk(chunk_pieces, original_entities, substitution_entities)

        self.assertEqual(changed, "ABRACADABRA told himself:: he! , one he was not(!). Not(!) ABRACADABRA. GOOGLOCADABRA is a nice place to work in. that he was not ABRACADABRA. Yes, that is it! ABRACADABRA, ABRACADABRA and ABRACADABRA, and ABRACADABRA! ABRACADABRA,why?whyis thispeter? ")

    def test_entity_entity_sub_no_NE(self):

        text = "They went very far."
        chunk_pieces, original_entities = books_functions.extract_ne(text, enttypes_to_process=("PERSON", "ORG"))
        substitution_entities = {"PERSON": {"Jane", "Alex"}, "ORG": ["GOOGLOCADABRA"]}
        changed = books_functions.change_chunk(chunk_pieces, original_entities, substitution_entities)

        self.assertEqual(changed, text)


    def test_entity_entity_swapping(self):

        text = "Jane told Alex that Jane does not love him."
        chunk_pieces, original_entities = books_functions.extract_ne(text, enttypes_to_process=("PERSON", "ORG"))
        substitution_entities = {"PERSON": {"Jane", "Alex"}, "ORG": ["GOOGLOCADABRA"]}
        changed = books_functions.change_chunk(chunk_pieces, original_entities, substitution_entities)

        self.assertEqual(changed, "Alex told Jane that Alex does not love him.")

    def test_entity_entity_swapping_insufficient_exception(self):

        text = "John does not love him."
        chunk_pieces, original_entities = books_functions.extract_ne(text, enttypes_to_process=("PERSON", "ORG"))

        substitution_entities = {"PERSON": {"John"}, "ORG": ["GOOGLOCADABRA"]}

        with self.assertRaises(ValueError) as e:
            _ = books_functions.change_chunk(chunk_pieces, original_entities, substitution_entities)

        self.assertIn("Not enough candidates for entity renaming", str(e.exception))


    def test_extract_ne_dict(self):

        text = ["John does not love him. ", "Mary Jane also does not love him. Google is a good company."]
        ent_dict = books_functions.extract_ne_dict(text, enttypes_to_process=("PERSON", "ORG"))

        self.assertEqual(ent_dict["PERSON"], {"John", "Mary Jane"})
        self.assertEqual(ent_dict["ORG"], {"Google"})

        text = ["John does not love him. Mary Jane also does not love him. Google is a good company."]
        ent_dict = books_functions.extract_ne_dict(text, enttypes_to_process=("PERSON", "ORG"))

        self.assertEqual(ent_dict["PERSON"], {"John", "Mary Jane"})
        self.assertEqual(ent_dict["ORG"], {"Google"})



class TestBookDataloader(unittest.TestCase):

    def test_book_dataloader(self):

        with open("../../experiments/BookEmbedding/preprocessed_books_web_lg_random1000.pkl", "rb") as f:
            reconstructed_books = pkl.load(f)

            books_functions.reconstruct_chunk(*reconstructed_books[0][2][30])

            tst_dataloader = book_dataloader.BookDataLoader(
                ["../../experiments/BookEmbedding/preprocessed_books_web_lg_random1000.pkl"])
            tst_run = tst_dataloader.get_run(25, 1000)

            for i in range(1000):
                res = next(tst_run)
                self.assertEqual(len(res), 2, "Wrong output format")
                self.assertEqual(len(res[0]), 25, "Wrong batch size for instructions")
                self.assertEqual(len(res[1]), 25, "Wrong batch size for queries")


            with self.assertRaises(StopIteration):
                res = next(tst_run)



if __name__ == '__main__':
    unittest.main()
