"""Human evaluation latex generation for SST2."""
import abc
import dataclasses
import re
from typing import Optional, List, Sequence, Tuple

import numpy as np
from transformers import PreTrainedTokenizer

from npeff_torch.examination.top_examples import top_examples_common
from npeff_torch.examination.top_examples.human_evals import humev_top_examples_same_theme_latex
from npeff_torch.examination.top_examples.human_evals import humev_top_examples_theme_latex
from npeff_torch.examination.top_examples import top_examples_latex
from npeff_torch.util import latex_utils

###############################################################################
_DEFAULT_INTRO = R"""
This document contains groupings of examples from SST2. SST2 is a sentiment analysis
task with examples taken from moview reviews. The goal of the task is to assign a label
of positive or negative sentiment to the review.

For each group of examples, please determine if there is some common theme among the examples
in the group. In the second column of the CSV, please write \texttt{yes}, \texttt{maybe}, or \texttt{no}
(and only those three options) depending whether you detected the presence of a theme. In you put
\texttt{yes} or \texttt{maybe}, please put a brief description of the theme in the third column of
the CSV.

\section{Sample Annotated Groups}

\textbf{TODO}

"""
###############################################################################
_DEFAULT_SAME_THEME_LATEX_INTRO = R"""
This document contains groupings of examples from SST2. SST2 is a sentiment analysis
task with examples taken from moview reviews. The goal of the task is to assign a label
of positive or negative sentiment to the review.

This document contains pairs of groups of examples. For each pairing, please determine whether each group
individually contains a common theme among its examples. If both groups do contain a theme, then determine
whether the themes of the two groups are very similar. In the second column of the CSV, please write
\texttt{yes}, \texttt{maybe}, or \texttt{no} (and only those three options) depending on whether both groups
contain very similar themes. To be clear, write \texttt{yes} only if both groups contain a theme that is very
similar; write \texttt{no} if both groups contain different themes or if both groups do not contain a detectable
theme. If put \texttt{yes} or \texttt{maybe}, please put a brief description of the theme in the third column of
the CSV.
"""
###############################################################################


class _HumevThemeMixin:

    def _parse_sentence(self, example_info: 'top_examples_common.TopExampleInfo') -> str:
        input_ids = example_info.example['input_ids']
        attention_mask = example_info.example['attention_mask']

        context = self.tokenizer.decode(input_ids[attention_mask != 0])

        match = re.search(
            r'^Review: (.+)\nSentiment:$',
            context)

        if match:
            sentence = match.group(1)
            sentence = latex_utils.escape(sentence)
        else:
            sentence = 'ERROR'

        return sentence

    #######################################################

    def make_example_latex_string(self, example_info: 'top_examples_common.TopExampleInfo') -> str:
        s = self._parse_sentence(example_info)
        s = R'\noindent\texttt{' + s + R'\vspace{2mm} \\' + '\n}'
        return s


###############################################################################


@dataclasses.dataclass
class Sst2HumevTopExamplesThemeLatexGeneratorForLm(_HumevThemeMixin, humev_top_examples_theme_latex.HumevTopExamplesThemeLatexGeneratorAbc):
    tokenizer: PreTrainedTokenizer
    
    components_fontsize: Optional[str] = 'footnotesize'

    #######################################################

    def make_latex_intro(self) -> str:
        # Should contain information to help reviewers about the task.
        return _DEFAULT_INTRO


@dataclasses.dataclass
class Sst2HumevTopExamplesSameThemeLatexGeneratorForLm(_HumevThemeMixin, humev_top_examples_same_theme_latex.HumevTopExamplesSameThemeLatexGeneratorAbc):
    tokenizer: PreTrainedTokenizer
    
    components_fontsize: Optional[str] = 'footnotesize'

    #######################################################

    def make_latex_intro(self) -> str:
        # Should contain information to help reviewers about the task.
        return _DEFAULT_SAME_THEME_LATEX_INTRO
