















import datasets

_CITATION = 

_DESCRIPTION = 

_HOMEPAGE = "https://github.com/lgw863/LogiQA-dataset"


_LICENSE = ""

_URLS = {
    "train": "https://raw.githubusercontent.com/lgw863/LogiQA-dataset/master/Train.txt",
    "validation": "https://raw.githubusercontent.com/lgw863/LogiQA-dataset/master/Eval.txt",
    "test": "https://raw.githubusercontent.com/lgw863/LogiQA-dataset/master/Test.txt",
}


class Logiqa(datasets.GeneratorBasedBuilder):
    

    VERSION = datasets.Version("0.0.1")

    BUILDER_CONFIGS = [
        datasets.BuilderConfig(name="logiqa", version=VERSION, description="The LogiQA dataset."),
    ]

    def _info(self):
        features = datasets.Features(
            {
                "label": datasets.Value("string"),
                "context": datasets.Value("string"),
                "question": datasets.Value("string"),
                "options": datasets.features.Sequence(datasets.Value("string")),
            }
        )
        return datasets.DatasetInfo(
            description=_DESCRIPTION,
            features=features,
            homepage=_HOMEPAGE,
            license=_LICENSE,
            citation=_CITATION,
        )

    def _split_generators(self, dl_manager):
        urls = {
            "train": _URLS["train"],
            "test": _URLS["test"],
            "validation": _URLS["validation"],
        }
        data_dir = dl_manager.download_and_extract(urls)
        return [
            datasets.SplitGenerator(
                name=datasets.Split.TRAIN,
                
                gen_kwargs={
                    "filepath": data_dir["train"],
                    "split": "train",
                },
            ),
            datasets.SplitGenerator(
                name=datasets.Split.TEST,
                
                gen_kwargs={"filepath": data_dir["test"], "split": "test"},
            ),
            datasets.SplitGenerator(
                name=datasets.Split.VALIDATION,
                
                gen_kwargs={
                    "filepath": data_dir["validation"],
                    "split": "validation",
                },
            ),
        ]

    
    def _generate_examples(self, filepath, split):
        def normalize(text):
            return text.replace(".", ". ").strip()

        with open(filepath, encoding="utf-8") as f:
            data = f.read().strip().split("\n\n")
            for key, row in enumerate(data):
                example = row.split("\n")
                yield key, {
                    "label": example[0].strip(),
                    "context": normalize(example[1]),
                    "question": normalize(example[2]),
                    "options": [normalize(option[2:]) for option in example[3:]],
                }
