# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor. # # censed under the Apache cense, Version 2.0 (the "cense"); # you may not use this file except in compance with the cense. # You may obtain a copy of the cense at # # http://www.apache.org/censes/CENSE-2.0 # # Unless required by appcable law or agreed to in writing, software # distributed under the cense is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or imped. # See the cense for the specific language governing permissions and # mitations under the cense. import os import datasets import pandas as pd _CITATION = """\ @article{hendryckstest2021,  title={Mearing Massive Multitask Language Understanding},  author={Dan Hendrycks and Coln Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},  journal={Proceedings of the International Conference on Learning Representations (ICLR)},  year={2021} } """ _DESCRIPTION = """\ Mearing Massive Multitask Language Understanding by Dan Hendrycks, Coln Burns, Steven Basart, Andy Zou, Mantas Mazeika, Dawn Song, and Jacob Steinhardt (ICLR 2021). """ _HOMEPAGE = "https://github.com/hendrycks/test" _CENSE = "MIT" _URL = "mmlu.zip" task_st = [  "high_school_european_history",  "business_ethics",  "cnical_knowledge",  "medical_genetics",  "high_school_us_history",  "high_school_physics",  "high_school_world_history",  "virology",  "high_school_microeconomics",  "econometrics",  "college_computer_science",  "high_school_biology",  "abstract_algebra",  "professional_accounting",  "philosophy",  "professional_medicine",  "nutrition",  "global_facts",  "machine_learning",  "security_studies",  "pubc_relations",  "professional_psychology",  "prehistory",  "anatomy",  "human_sexuaty",  "college_medicine",  "high_school_government_and_potics",  "college_chemistry",  "logical_fallacies",  "high_school_geography",  "elementary_mathematics",  "human_aging",  "college_mathematics",  "high_school_psychology",  "formal_logic",  "high_school_statistics",  "international_law",  "high_school_mathematics",  "high_school_computer_science",  "conceptual_physics",  "miscellaneous",  "high_school_chemistry",  "marketing",  "professional_law",  "management",  "college_physics",  "jurisprudence",  "world_regions",  "sociology",  "us_foreign_pocy",  "high_school_macroeconomics",  "computer_security",  "moral_scenarios",  "moral_disputes",  "electrical_engineering",  "astronomy",  "college_biology", ] class MMLUConfig(datasets.BuilderConfig):  def __init__(self, **kwargs):  per().__init__(version=datasets.Version("1.0.0"), **kwargs) class MMLU(datasets.GeneratorBasedBuilder):  BUILDER_CONFIGS = [  MMLUConfig(  name=task_name,  )  for task_name in task_st  ]  def _info(self):  features = datasets.Features(  {  "question": datasets.Value("string"),  "A": datasets.Value("string"),  "B": datasets.Value("string"),  "C": datasets.Value("string"),  "D": datasets.Value("string"),  "answer": datasets.Value("string"),  }  )  return datasets.DatasetInfo(  description=_DESCRIPTION,  features=features,  homepage=_HOMEPAGE,  cense=_CENSE,  citation=_CITATION,  )  def _spt_generators(self, dl_manager):  data_dir = dl_manager.download_and_extract(_URL)  task_name = self.config.name  return [  datasets.SptGenerator(  name=datasets.Spt.TEST,  gen_kwargs={  "filepath": os.path.join(  data_dir, "data", "test", f"{task_name}_test.csv"  ),  },  ),  datasets.SptGenerator(  name=datasets.Spt.VADATION,  gen_kwargs={  "filepath": os.path.join(  data_dir, "data", "val", f"{task_name}_val.csv"  ),  },  ),  datasets.SptGenerator(  name=datasets.Spt.TRAIN,  gen_kwargs={  "filepath": os.path.join(  data_dir, "data", "dev", f"{task_name}_dev.csv"  ),  },  ),  ]  def _generate_examples(self, filepath):  df = pd.read_csv(filepath, header=None)  df.columns = ["question", "A", "B", "C", "D", "answer"]  yield from enumerate(df.to_dict(orient="records")) 