















import os
import xml.etree.ElementTree as ET

import datasets

_CITATION = 

_DESCRIPTION = 

_HOMEPAGE = "https://github.com/chaochun/nlu-asdiv-dataset"


_LICENSE = ""

_URLS = "https://github.com/chaochun/nlu-asdiv-dataset/archive/55790e5270bb91ccfa5053194b25732534696b50.zip"


class ASDiv(datasets.GeneratorBasedBuilder):
    

    VERSION = datasets.Version("0.0.1")

    BUILDER_CONFIGS = [
        datasets.BuilderConfig(
            name="asdiv",
            version=VERSION,
            description="A diverse corpus for evaluating and developing english math word problem solvers",
        )
    ]

    def _info(self):
        features = datasets.Features(
            {
                "body": datasets.Value("string"),
                "question": datasets.Value("string"),
                "solution_type": datasets.Value("string"),
                "answer": datasets.Value("string"),
                "formula": datasets.Value("string"),
            }
        )
        return datasets.DatasetInfo(
            description=_DESCRIPTION,
            features=features,
            homepage=_HOMEPAGE,
            license=_LICENSE,
            citation=_CITATION,
        )

    def _split_generators(self, dl_manager):
        urls = _URLS
        data_dir = dl_manager.download_and_extract(urls)
        base_filepath = "nlu-asdiv-dataset-55790e5270bb91ccfa5053194b25732534696b50"
        return [
            datasets.SplitGenerator(
                name=datasets.Split.VALIDATION,
                
                gen_kwargs={
                    "filepath": os.path.join(data_dir, base_filepath, "dataset", "ASDiv.xml"),
                    "split": datasets.Split.VALIDATION,
                },
            ),
        ]

    
    def _generate_examples(self, filepath, split):
        tree = ET.parse(filepath)
        root = tree.getroot()
        for key, problem in enumerate(root.iter("Problem")):
            yield key, {
                "body": problem.find("Body").text,
                "question": problem.find("Question").text,
                "solution_type": problem.find("Solution-Type").text,
                "answer": problem.find("Answer").text,
                "formula": problem.find("Formula").text,
            }
