
from itertools import islice

from efficiency_benchmark.dependencies.lm_eval.base import Task

_CITATION = 


class NaturalQs(Task):
    VERSION = 0
    DATASET_PATH = "natural_questions"
    DATASET_NAME = None

    def has_training_docs(self):
        return True

    def has_validation_docs(self):
        return True

    def has_test_docs(self):
        return False

    def training_docs(self):
        
        
        if self._training_docs is None:
            self._training_docs = list(self.dataset["train"])
        return self._training_docs

    def validation_docs(self):
        return self.dataset["validation"]

    def fewshot_examples(self, k, rnd):
        
        if self._training_docs is None:
            self._training_docs = list(islice(self.training_docs(), 0, 100000))

        return rnd.sample(self._training_docs, k)

    def doc_to_text(self, doc):
        return "Q: " + doc["question"]["text"] + "\n\n" + "A:"

    def should_decontaminate(self):
        return True

    def doc_to_decontamination_query(self, doc):
        return doc["question"]["text"]

    def doc_to_target(self, doc):
        
        
        long_answer_start = doc["annotations"]["long_answer"][0]["start_token"]
        long_answer_end = doc["annotations"]["long_answer"][0]["end_token"]
        long_answer_span = doc["document"]["tokens"]["token"][long_answer_start:long_answer_end]
        long_answer_is_html = doc["document"]["tokens"]["is_html"][long_answer_start:long_answer_end]
        long_answer_chars = [tok for (tok, is_html) in zip(long_answer_span, long_answer_is_html) if not is_html]
        long_answer = " ".join(long_answer_chars)
        return long_answer  

    def construct_requests(self, doc, ctx):
        
        
        raise NotImplementedError("Evaluation not implemented")

    def process_results(self, doc, results):
        
        
        raise NotImplementedError("Evaluation not implemented")

    def aggregation(self):
        
        
        raise NotImplementedError("Evaluation not implemented")

    def higher_is_better(self):
        
        
        raise NotImplementedError("Evaluation not implemented")
