"""
TriviaQA: A Large Scale Distantly Supervised Challenge Dataset for Reading Comprehension
https://arxiv.org/pdf/1705.03551.pdf

TriviaQA is a reading comprehension dataset containing over 650K question-answer-evidence
triples. TriviaQA includes 95K question-answer pairs authored by trivia enthusiasts
and independently gathered evidence documents, six per question on average, that provide
high quality distant supervision for answering the questions.

Homepage: https://nlp.cs.washington.edu/triviaqa/
"""
import os
import json
import jsonlines
from lm_eval.base import Task, rf
from ..metrics import mean
from ..utils import sh
from best_download import download_file
from datasets import load_dataset

_CITATION = """
@InProceedings{JoshiTriviaQA2017,
    author = {Joshi, Mandar and Choi, Eunsol and Weld, Daniel S. and Zettlemoyer, Luke},
    title = {TriviaQA: A Large Scale Distantly Supervised Challenge Dataset for Reading Comprehension},
    booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics},
    month = {July},
    year = {2017},
    address = {Vancouver, Canada},
    publisher = {Association for Computational Linguistics},
}
"""


class Kilt_TriviaQA(Task):
    VERSION = 0
    def download(self):
        self.kilt_triviaqa = load_dataset("kilt_tasks", name="triviaqa_support_only")
        self.trivia_qa = load_dataset('trivia_qa', 'unfiltered.nocontext')
        def add_missing_data(x, trivia_qa_subset, triviaqa_map):
            i = triviaqa_map[x['id']]
            x['input'] = trivia_qa_subset[i]['question']
            x['output'][0]['answer'] = trivia_qa_subset[i]['answer']['value']
            x['output'][0]['aliases'] = trivia_qa_subset[i]['answer']['aliases']
            return x
        self.triviaqa_map = {}
        self.triviaqa_map = dict([(q_id, i) for i, q_id in enumerate(self.trivia_qa['validation']['question_id'])])
        self.kilt_triviaqa['validation'] = self.kilt_triviaqa['validation'].filter(lambda x: x['id'] in self.triviaqa_map)
        self.kilt_triviaqa['validation'] = self.kilt_triviaqa['validation'].map(add_missing_data, fn_kwargs=dict(trivia_qa_subset=self.trivia_qa['validation'], triviaqa_map=self.triviaqa_map))

    def has_training_docs(self):
        return True

    def has_validation_docs(self):
        return True

    def has_test_docs(self):
        return False

    def training_docs(self):
        return NotImplementedError()

    def validation_docs(self):
        return self.kilt_triviaqa['validation']

    def test_docs(self):
        raise NotImplementedError()
    
    def doc_to_text(self, doc):
        return f"Question: where is the Lincoln Memorial located?\nAnswer: Washington, DC, USA \n\nQuestion: How long is the Nile river\nAnswer: 6250 miles \n\nQuestion: who was elected president of the united states in 1928?\nAnswer: Herbert Hoover \n\nQuestion: what year did the September 11th attack occur?\nAnswer: 2001 \n\nQuestion: what elements of the periodic table are liquid at room temperature?\nAnswer: bromine, mercury \n\nQuestion: which pigment helps plant absorb energy from light?\nAnswer: chlorophyll \n\nQuestion: who was the commander of the japanese navy for the majority of World War II?\nAnswer: Isoroku Yamamoto \n\nQuestion: name of a famous highway without speed limits?\nAnswer: Autobahn \n\nQuestion: how many wheels does a semi truck have?\nAnswer: 18 \n\nQuestion: {doc['input']}\nAnswer:"

    def doc_to_target(self, doc):
        return " " + doc['output'][0]['answer']

    def _remove_prefixes(self, aliases):
        # Optimization: Remove any alias that has a strict prefix elsewhere in the list
        # we can do this because if the prefix is acceptable by isgreedy, we can stop looking
        aliases.sort()
        ret = [aliases[0]]
        for alias in aliases[1:]:
            if not alias.startswith(ret[-1]):
                ret.append(alias)

        return ret

    def construct_requests(self, doc, ctx):
        ret = []
        for alias in self._remove_prefixes(doc['output'][0]['aliases']):
            _, is_prediction = rf.loglikelihood(ctx, " " + alias)
            ret.append(is_prediction)
        return ret

    def process_results(self, doc, results):
        return {
            "acc": float(any(results))
        }

    def aggregation(self):
        return {
            "acc": mean,
        }

    def higher_is_better(self):
        return {
            "acc": True
        }
