import json
import logging
import multiprocessing
import os
import shutil
from argparse import Namespace
import collections
import re
import string
import pickle

from contextlib import closing
from multiprocessing.pool import Pool
from tensorboardX import SummaryWriter
import wandb
import matplotlib.pyplot as plt
plt.style.use('default')
plt.rcParams['figure.figsize'] = (8, 6)
plt.rcParams['font.size'] = 12

from transformers import AutoTokenizer
from transformers.models.bert.tokenization_bert import BasicTokenizer, whitespace_tokenize

from tqdm import tqdm

class SquadExample(object):
    """
       A single training/test example for the Squad dataset.
       For examples without an answer, the start and end position are -1.
       """

    def __init__(self,
                 qas_id,
                 question_text,
                 paragraph_text,
                 doc_tokens,
                 orig_answer_text=None,
                 start_position=None,
                 end_position=None,
                 answers=[],
                 is_impossible=None,):
        self.qas_id = qas_id
        self.question_text = question_text
        self.paragraph_text = paragraph_text
        self.doc_tokens = doc_tokens
        self.orig_answer_text = orig_answer_text
        self.start_position = start_position
        self.end_position = end_position
        self.answers = answers
        self.is_impossible = is_impossible

    def __str__(self):
        return self.__repr__()

    def __repr__(self):
        s = ""
        s += "qas_id: %s\n" % self.qas_id
        s += "question_text: %s\n" % self.question_text
        s += "context: %s\n" % " ".join(self.doc_tokens)
        s += "answers: %s\n" % " / ".join(self.answers)
        if self.start_position:
            s += ", start_position: %d" % self.start_position
        if self.end_position:
            s += ", end_position: %d" % self.end_position
        if self.is_impossible:
            s += ", is_impossible: %r" % self.is_impossible
        return s

class QAProcessor(object):
    def __init__(self, args):
        self.train_file = "train.jsonl"
        self.dev_file = "dev.jsonl"
        self.test_file = "test.jsonl"

    def get_train_examples(self, data_dir):
        print(f"DataProcessor: {self.train_file}")
        input_file = os.path.join(data_dir, self.train_file)
        with open(input_file, "r", encoding='utf-8') as reader:
            input_data = [json.loads(data) for data in reader.readlines()]
            # input_data = json.load(reader)["data"]
        return self._create_examples(input_data, is_training=True)

    def get_dev_examples(self, data_dir):
        print(f"DataProcessor: {self.dev_file}")
        input_file = os.path.join(data_dir, self.dev_file)
        with open(input_file, "r", encoding='utf-8') as reader:
            input_data = [json.loads(data) for data in reader.readlines()]
        return self._create_examples(input_data, is_training=False)

    def get_test_examples(self, data_dir):
        print(f"DataProcessor: {self.test_file}")
        input_file = os.path.join(data_dir, self.test_file)
        with open(input_file, "r", encoding='utf-8') as reader:
            input_data = [json.loads(data) for data in reader.readlines()]
        return self._create_examples(input_data, is_training=False)

    def _create_examples(self, input_data, is_training, version_2_with_negative=False):
        def is_whitespace(c):
            if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F:
                return True
            return False
        examples = []

        unique_ind = 0
        for entry in tqdm(input_data, total=len(input_data)):
            paragraph_text = entry["context"]
            doc_tokens = []
            char_to_word_offset = []
            prev_is_whitespace = True
            for c in paragraph_text:
                if is_whitespace(c):
                    prev_is_whitespace = True
                else:
                    if prev_is_whitespace:
                        doc_tokens.append(c)
                    else:
                        doc_tokens[-1] += c
                    prev_is_whitespace = False
                char_to_word_offset.append(len(doc_tokens) - 1)

            episode_id = entry["episode_id"]
            turn_id = entry["turn_id"]
            qas_id = f"{episode_id}_{turn_id}_{unique_ind}"
            question_text = " ".join(entry["question"])

            start_position = None
            end_position = None
            orig_answer_text = None
            is_impossible = False
            if is_training:
                if version_2_with_negative:
                    is_impossible = qa["is_impossible"]
                answers = []
                if not is_impossible:
                    answers.append(entry["answer"])
                    orig_answer_text = entry["answer"]
                    answer_offset = entry["start_char"]
                    answer_length = len(orig_answer_text)
                    start_position = char_to_word_offset[answer_offset]
                    end_position = char_to_word_offset[answer_offset +
                                                    answer_length - 1]
                    # Only add answers where the text can be exactly recovered from the
                    # document. If this CAN'T happen it's likely due to weird Unicode
                    # stuff so we will just skip the example.
                    #
                    # Note that this means for training mode, every example is NOT
                    # guaranteed to be preserved.
                    actual_text = " ".join(
                        doc_tokens[start_position:(end_position + 1)])
                    cleaned_answer_text = " ".join(
                        whitespace_tokenize(orig_answer_text))
                    if actual_text.find(cleaned_answer_text) == -1:
                        continue
                else:
                    start_position = -1
                    end_position = -1
                    orig_answer_text = ""
            else:
                if type(entry["answer"]) == str:
                    answers = [entry["answer"]]
                else:
                    answers = entry["answer"]
            
            example = SquadExample(
                qas_id=qas_id,
                question_text=question_text,
                paragraph_text=paragraph_text,
                doc_tokens=doc_tokens,
                orig_answer_text=orig_answer_text,
                start_position=start_position,
                end_position=end_position,
                answers=answers,
                is_impossible=is_impossible,)
            examples.append(example)
            unique_ind += 1
        return examples


class QAProcessorForTest(QAProcessor):
    def __init__(self, args):
        super().__init__(args)

    def get_train_examples(self, data_dir):
        raise NotImplementedError

    def get_dev_examples(self, data_dir, prediction_file, eval_target="all"):
        print(f"DataProcessor: {prediction_file}")
        input_data = build_qa_dataset_for_test(data_dir, prediction_file, "valid", eval_target=eval_target)
        return self._create_examples(input_data, is_training=False)

    def get_test_examples(self, data_dir, prediction_file, eval_target="all"):
        print(f"DataProcessor: {prediction_file}")
        input_data = build_qa_dataset_for_test(data_dir, prediction_file, "test", eval_target=eval_target)
        return self._create_examples(input_data, is_training=False)


def is_whitespace(c):
    if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F:
        return True
    return False

class InputFeatures(object):
    """A single set of features of data."""

    def __init__(self,
                 unique_id,
                 qas_id,
                 example_index,
                 doc_span_index,
                 tokens,
                 token_to_orig_map,
                 token_is_max_context,
                 input_ids,
                 answer_text,
                 input_mask,
                 segment_ids,
                 start_position=None,
                 end_position=None,
                 is_impossible=None):
        self.unique_id = unique_id
        self.qas_id = qas_id
        self.example_index = example_index
        self.doc_span_index = doc_span_index
        self.tokens = tokens
        self.token_to_orig_map = token_to_orig_map
        self.token_is_max_context = token_is_max_context
        self.input_ids = input_ids
        self.answer_text = answer_text
        self.input_mask = input_mask
        self.segment_ids = segment_ids
        self.start_position = start_position
        self.end_position = end_position
        self.is_impossible = is_impossible

def convert_examples_to_features(
    examples,
    tokenizer,
    max_seq_length,
    doc_stride,
    max_query_length,
    is_training=True,
):
    features = []
    unique_id = 1000000000

    for example_index, example in enumerate(tqdm(examples)):
        qas_id = example.qas_id
        # query_tokens = tokenizer.tokenize(example.question_text)
        query_tokens = tokenizer(example.question_text, add_special_tokens=False)

        if len(query_tokens.input_ids) > max_query_length:
            query_tokens.input_ids = query_tokens.input_ids[0:max_query_length]
            query_tokens.attention_mask = query_tokens.attention_mask[0:max_query_length]
            # query_tokens.token_type_ids = query_tokens.token_type_ids[0:max_query_length]

        context_tokens = tokenizer(example.paragraph_text, add_special_tokens=False)

        tok_to_orig_index = []
        orig_to_tok_index = []
        all_doc_tokens = []
        for (i, token) in enumerate(example.doc_tokens):
            orig_to_tok_index.append(len(all_doc_tokens))
            if "Roberta" in tokenizer.__class__.__name__:
                sub_tokens = tokenizer.tokenize(token, add_prefix_space=True)
            else:
                sub_tokens = tokenizer.tokenize(token)
            
            for sub_token in sub_tokens:
                tok_to_orig_index.append(i)
                all_doc_tokens.append(sub_token)
        all_doc_ids = tokenizer.convert_tokens_to_ids(all_doc_tokens)
        
        # assert all_doc_tokens == tokenizer.convert_ids_to_tokens(context_tokens.input_ids)
        tok_start_position = None
        tok_end_position = None
        if is_training and example.is_impossible:
            tok_start_position = -1
            tok_end_position = -1
        if is_training and not example.is_impossible:
            tok_start_position = orig_to_tok_index[example.start_position]
            if example.end_position < len(example.doc_tokens) - 1:
                tok_end_position = orig_to_tok_index[example.end_position + 1] - 1
            else:
                tok_end_position = len(all_doc_tokens) - 1
            (tok_start_position, tok_end_position) = _improve_answer_span(
                all_doc_tokens, tok_start_position, tok_end_position, tokenizer,
                example.orig_answer_text)
            
        # The -3 accounts for [CLS], [SEP] and [SEP]
        max_tokens_for_doc = max_seq_length - len(query_tokens) - 3

        # We can have documents that are longer than the maximum sequence length.
        # To deal with this we do a sliding window approach, where we take chunks
        # of the up to our max length with a stride of `doc_stride`.
        _DocSpan = collections.namedtuple(  # pylint: disable=invalid-name
            "DocSpan", ["start", "length"])
        doc_spans = []
        start_offset = 0
        while start_offset < len(all_doc_tokens):
            length = len(all_doc_tokens) - start_offset
            if length > max_tokens_for_doc:
                length = max_tokens_for_doc
            doc_spans.append(_DocSpan(start=start_offset, length=length))
            if start_offset + length == len(all_doc_tokens):
                break
            start_offset += min(length, doc_stride)

        for (doc_span_index, doc_span) in enumerate(doc_spans):
            input_ids = []
            token_to_orig_map = {}
            token_is_max_context = {}
            token_type_ids = [] # segment_ids

            # Add [CLS] token
            input_ids.append(tokenizer.cls_token_id)
            token_type_ids.append(0)

            input_ids += query_tokens.input_ids
            for _ in range(len(query_tokens.input_ids)):
                token_type_ids += [0]
            # token_type_ids += query_tokens.token_type_ids

            # Add [SEP] token
            input_ids += [tokenizer.sep_token_id]
            token_type_ids += [0]

            # Doc Span
            for i in range(doc_span.length):
                split_token_index = doc_span.start + i
                token_to_orig_map[len(
                    input_ids)] = tok_to_orig_index[split_token_index]
                is_max_context = _check_is_max_context(doc_spans, doc_span_index,
                                                        split_token_index)
                token_is_max_context[len(input_ids)] = is_max_context
                input_ids.append(all_doc_ids[split_token_index])
                token_type_ids.append(1)

            # TRUNCATE
            if len(input_ids) >= max_seq_length:
                input_ids = input_ids[:max_seq_length-1]
                token_type_ids = token_type_ids[:max_seq_length-1]

            # Last [SEP]
            input_ids += [tokenizer.sep_token_id]
            token_type_ids += [1]

            attention_mask = [1] * len(input_ids)

            tokens = tokenizer.convert_ids_to_tokens(input_ids)
            while len(input_ids) < max_seq_length:
                input_ids.append(0)
                attention_mask.append(0)
                token_type_ids.append(0)

            assert len(input_ids) == max_seq_length
            assert len(attention_mask) == max_seq_length
            assert len(token_type_ids) == max_seq_length

            start_position = None
            end_position = None

            if is_training and not example.is_impossible:
                # For training, if our document chunk does not contain an annotation
                # we throw it out, since there is nothing to predict.
                doc_start = doc_span.start
                doc_end = doc_span.start + doc_span.length - 1
                out_of_span = False
                if not (tok_start_position >= doc_start and
                        tok_end_position <= doc_end):
                    out_of_span = True
                if out_of_span:
                    start_position = 0
                    end_position = 0
                else:
                    doc_offset = len(query_tokens.input_ids) + 2
                    start_position = tok_start_position - doc_start + doc_offset
                    end_position = tok_end_position - doc_start + doc_offset

            if is_training and example.is_impossible:
                start_position = 0
                end_position = 0

            features.append(
                InputFeatures(
                    unique_id=unique_id,
                    qas_id=qas_id,
                    example_index=example_index,
                    doc_span_index=doc_span_index,
                    tokens=tokens,
                    token_to_orig_map=token_to_orig_map,
                    token_is_max_context=token_is_max_context,
                    input_ids=input_ids,
                    input_mask=attention_mask, # attention_mask
                    answer_text=example.orig_answer_text,
                    segment_ids=token_type_ids, # token_type_ids
                    start_position=start_position,
                    end_position=end_position,
                    is_impossible=example.is_impossible,
                )
            )
            unique_id += 1
    print(f" The number of Entire Features: {len(features)}")
    return features

def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer,
                         orig_answer_text):
    """Returns tokenized answer spans that better match the annotated answer."""

    # The SQuAD annotations are character based. We first project them to
    # whitespace-tokenized words. But then after WordPiece tokenization, we can
    # often find a "better match". For example:
    #
    #   Question: What year was John Smith born?
    #   Context: The leader was John Smith (1895-1943).
    #   Answer: 1895
    #
    # The original whitespace-tokenized answer will be "(1895-1943).". However
    # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match
    # the exact answer, 1895.
    #
    # However, this is not always possible. Consider the following:
    #
    #   Question: What country is the top exporter of electornics?
    #   Context: The Japanese electronics industry is the lagest in the world.
    #   Answer: Japan
    #
    # In this case, the annotator chose "Japan" as a character sub-span of
    # the word "Japanese". Since our WordPiece tokenizer does not split
    # "Japanese", we just use "Japanese" as the annotation. This is fairly rare
    # in SQuAD, but does happen.
    tok_answer_text = " ".join(tokenizer.tokenize(orig_answer_text))

    for new_start in range(input_start, input_end + 1):
        for new_end in range(input_end, new_start - 1, -1):
            text_span = " ".join(doc_tokens[new_start:(new_end + 1)])
            if text_span == tok_answer_text:
                return (new_start, new_end)

    return (input_start, input_end)

def _check_is_max_context(doc_spans, cur_span_index, position):
    """Check if this is the 'max context' doc span for the token."""

    # Because of the sliding window approach taken to scoring documents, a single
    # token can appear in multiple documents. E.g.
    #  Doc: the man went to the store and bought a gallon of milk
    #  Span A: the man went to the
    #  Span B: to the store and bought
    #  Span C: and bought a gallon of
    #  ...
    #
    # Now the word 'bought' will have two scores from spans B and C. We only
    # want to consider the score with "maximum context", which we define as
    # the *minimum* of its left and right context (the *sum* of left and
    # right context will always be the same, of course).
    #
    # In the example the maximum context for 'bought' would be span C since
    # it has 1 left context and 3 right context, while span B has 4 left context
    # and 0 right context.
    best_score = None
    best_span_index = None
    for (span_index, doc_span) in enumerate(doc_spans):
        end = doc_span.start + doc_span.length - 1
        if position < doc_span.start:
            continue
        if position > end:
            continue
        num_left_context = position - doc_span.start
        num_right_context = end - position
        score = min(num_left_context, num_right_context) + \
            0.01 * doc_span.length
        if best_score is None or score > best_score:
            best_score = score
            best_span_index = span_index

    return cur_span_index == best_span_index

def build_qa_dataset_for_test(data_dir, prediction_file, fold, eval_target):
    # data_dir should indicate the directory where the data exists
    # prediction files: e.g., dev_candidates_step1300.txt
    # fold: 'dev' or 'test'
    with open(prediction_file, 'r') as f:
        predictions = f.readlines()

    # Load processed QA dataset
    with open(os.path.join(args.pickle_folder, "train.jsonl")) as f:
        train_qas = f.readlines()
        train_qas = [json.loads(qa) for qa in train_qas]
    with open(os.path.join(args.pickle_folder, "test.jsonl")) as f:
        test_qas = f.readlines()
        test_qas = [json.loads(qa) for qa in test_qas]
    qas = train_qas + test_qas

    original_file = os.path.join(data_dir, f"{fold}.jsonl")
    with open(original_file, 'r') as f:
        dataset = [json.loads(data) for data in f.readlines()]
    with open(os.path.join(data_dir, 'opendialkg_triples.txt'), 'r') as f:
        entire_triplets = f.readlines()
    print(f"# Entire Triples: {len(entire_triplets)}")
    def build_database():
        database = dict()
        for triplet in entire_triplets:
            _triplet = triplet.strip().split('\t')
            if len(_triplet) < 3:
                continue
            head, relation, tail = _triplet

            _id = f"{head}\t{relation}"
            
            if _id not in database.keys():
                database[_id] = set()
            database[_id].add((head, relation, tail))
        return database

    database = build_database()

    def find_same_qa(unique_id, turn_id):
        return_list = []
        for qa in qas:
            if qa["unique_id"] == unique_id and qa["turn_id"] == turn_id:
                return_list.append(qa)
        return return_list

    input_data = []
    with tqdm(total=len(dataset)) as pbar:
        for _index, (data, pred) in enumerate(zip(dataset, predictions)):
            if len(data["history"]) == 0:
                continue
            found_qas = find_same_qa(data["unique_id"], data["turn_id"])
            context_a = data["history"][-1]
            if len(found_qas) == 0:
                continue
            context = context_a + '\n' + pred.strip() # Add prediction

            possible_answers = []
            unique_qas = []
            for _qa in found_qas:
                if _qa["question"] not in unique_qas:
                    unique_qas.append(_qa["question"])

            qa = found_qas[0] # Pivot

            for question in unique_qas:
                possible_answers = [d[2] for d in list(database['\t'.join(question)]) if context_a.lower().find(d[2].lower()) < 0]

                for _qa in found_qas:
                    if _qa["question"] == question:
                        if _qa["answer"] not in possible_answers and context_a.lower().find(_qa["answer"].lower()) < 0:
                            possible_answers.append(_qa["answer"])

                episode_id = qa["episode_id"]
                turn_id = qa["turn_id"]
                # question = qa["question"]
                answer = possible_answers
                
                input_data.append({
                    'episode_id': episode_id,
                    'turn_id': turn_id,
                    'context': context,
                    'question': question,
                    'answer': answer,
                })
            pbar.update()
    print("Done")
    return input_data 

def save_dataset(examples, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir, exist_ok=True)
    filename = os.path.join(output_dir, "generated_dataset.txt")
    with open(filename, 'w+') as f:
        for example in examples:
            f.write(example.__str__())
            f.write('\n')
