















import json
import os

import datasets

_CITATION = 

_DESCRIPTION = 

_HOMEPAGE = "https://github.com/openai/gpt-3/tree/master/data"


_LICENSE = ""

_BASE_URL = "https://raw.githubusercontent.com/openai/gpt-3/master/data"


_DESCRIPTIONS = {
    "mid_word_1_anagrams": "Anagrams of all but the first and last letter.",
    "mid_word_2_anagrams": "Anagrams of all but the first and last 2 letters.",
    "cycle_letters_in_word": "Cycle letters in the word.",
    "random_insertion_in_word": "Random insertions in the word that must be removed.",
    "reversed_words": "Words spelled backwards that must be reversed.",
}
_NAMES = _DESCRIPTIONS.keys()


class Unscramble(datasets.GeneratorBasedBuilder):
    

    VERSION = datasets.Version("0.0.1")

    BUILDER_CONFIGS = [
        datasets.BuilderConfig(name=name, version=version, description=_DESCRIPTIONS[name])
        for name, version in zip(_NAMES, [VERSION] * len(_NAMES))
    ]

    def _info(self):
        features = datasets.Features(
            {
                "context": datasets.Value("string"),
                "completion": datasets.Value("string"),
            }
        )
        return datasets.DatasetInfo(
            description=_DESCRIPTION,
            features=features,
            homepage=_HOMEPAGE,
            license=_LICENSE,
            citation=_CITATION,
        )

    def _split_generators(self, dl_manager):
        urls = os.path.join(_BASE_URL, f"{self.config.name}.jsonl.gz")
        data_dir = dl_manager.download_and_extract(urls)
        return [
            datasets.SplitGenerator(
                name=datasets.Split.VALIDATION,
                
                gen_kwargs={
                    "filepath": data_dir,
                    "split": "validation",
                },
            ),
        ]

    
    def _generate_examples(self, filepath, split):
        with open(filepath, encoding="utf-8") as f:
            for key, row in enumerate(f):
                data = json.loads(row)
                yield key, {
                    "context": data["context"],
                    "completion": data["completion"],
                }
