
import inspect

import efficiency_benchmark.dependencies.lm_eval.datasets.sat_analogies.sat_analogies
from efficiency_benchmark.dependencies.lm_eval.base import MultipleChoiceTask

_CITATION = 


class SATAnalogies(MultipleChoiceTask):
    VERSION = 0
    DATASET_PATH = inspect.getfile(efficiency_benchmark.dependencies.lm_eval.datasets.sat_analogies.sat_analogies)
    DATASET_NAME = None

    def __init__(self, data_dir: str):
        
        super().__init__(data_dir=data_dir)

    def has_training_docs(self):
        return False

    def has_validation_docs(self):
        return True

    def has_test_docs(self):
        return False

    def training_docs(self):
        return []

    def validation_docs(self):
        return map(self._process_doc, self.dataset["validation"])

    def test_docs(self):
        return []

    def _process_doc(self, doc):
        return {
            "source": doc["source"],
            "query": doc["stem"].split(" ")[:2],
            "choices": ["{} is to {}".format(*c.split(" ")[:2]) for c in doc["choices"]],
            "gold": ["a", "b", "c", "d", "e"].index(doc["solution"].strip()),
        }

    def doc_to_text(self, doc):
        return "{} is to {} as".format(*doc["query"])

    def should_decontaminate(self):
        return True

    def doc_to_decontamination_query(self, doc):
        return doc["source"] + "\n" + " ".join(doc["query"])
