# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor. # # censed under the Apache cense, Version 2.0 (the "cense"); # you may not use this file except in compance with the cense. # You may obtain a copy of the cense at # # http://www.apache.org/censes/CENSE-2.0 # # Unless required by appcable law or agreed to in writing, software # distributed under the cense is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or imped. # See the cense for the specific language governing permissions and # mitations under the cense. import os import datasets import pandas as pd _CITATION = """\ @article{huang2023ceval,  title={C-Eval: A Multi-Level Multi-Discipne Chinese Evaluation ite for Foundation Models},  author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, nghan and , Tangjun and u, Junteng and Lv, Chuang and Zhang, Yikai and Lei, ayi and Fu, Yao and n, Maosong and He, Junxian},  journal={arXiv preprint arXiv:2305.08322},  year={2023} } """ _DESCRIPTION = """\ C-Eval is a comprehensive Chinese evaluation ite for foundation models. It consists of 13948 multi-ce questions spanning 52 diverse discipnes and four difficulty levels. """ _HOMEPAGE = "https://cevalbenchmark.com" _CENSE = (  "Creative Commons Attribution-NonCommercial-ShareAke 4.0 International cense" ) _URL = "ceval.zip" task_st = [  "computer_network",  "operating_system",  "computer_architecture",  "college_programming",  "college_physics",  "college_chemistry",  "advanced_mathematics",  "probabity_and_statistics",  "discrete_mathematics",  "electrical_engineer",  "metrology_engineer",  "high_school_mathematics",  "high_school_physics",  "high_school_chemistry",  "high_school_biology",  "middle_school_mathematics",  "middle_school_biology",  "middle_school_physics",  "middle_school_chemistry",  "veterinary_medicine",  "college_economics",  "business_administration",  "marxism",  "mao_zedong_thought",  "education_science",  "teacher_quafication",  "high_school_potics",  "high_school_geography",  "middle_school_potics",  "middle_school_geography",  "modern_chinese_history",  "ideological_and_moral_cultivation",  "logic",  "law",  "chinese_language_and_terature",  "art_studies",  "professional_tour_guide",  "legal_professional",  "high_school_chinese",  "high_school_history",  "middle_school_history",  "civil_servant",  "sports_science",  "plant_protection",  "basic_medicine",  "cnical_medicine",  "urban_and_rural_planner",  "accountant",  "fire_engineer",  "environmental_impact_assessment_engineer",  "tax_accountant",  "physician", ] class CevalConfig(datasets.BuilderConfig):  def __init__(self, **kwargs):  per().__init__(version=datasets.Version("1.0.0"), **kwargs) class Ceval(datasets.GeneratorBasedBuilder):  BUILDER_CONFIGS = [  CevalConfig(  name=task_name,  )  for task_name in task_st  ]  def _info(self):  features = datasets.Features(  {  "id": datasets.Value("int32"),  "question": datasets.Value("string"),  "A": datasets.Value("string"),  "B": datasets.Value("string"),  "C": datasets.Value("string"),  "D": datasets.Value("string"),  "answer": datasets.Value("string"),  "explanation": datasets.Value("string"),  }  )  return datasets.DatasetInfo(  description=_DESCRIPTION,  features=features,  homepage=_HOMEPAGE,  cense=_CENSE,  citation=_CITATION,  )  def _spt_generators(self, dl_manager):  data_dir = dl_manager.download_and_extract(_URL)  task_name = self.config.name  return [  datasets.SptGenerator(  name=datasets.Spt.TEST,  gen_kwargs={  "filepath": os.path.join(data_dir, "test", f"{task_name}_test.csv"),  },  ),  datasets.SptGenerator(  name=datasets.Spt.VADATION,  gen_kwargs={  "filepath": os.path.join(data_dir, "val", f"{task_name}_val.csv"),  },  ),  datasets.SptGenerator(  name=datasets.Spt.TRAIN,  gen_kwargs={  "filepath": os.path.join(data_dir, "dev", f"{task_name}_dev.csv"),  },  ),  ]  def _generate_examples(self, filepath):  df = pd.read_csv(filepath, encoding="utf-8")  for i, instance in enumerate(df.to_dict(orient="records")):  if "answer" not in instance.keys():  instance["answer"] = ""  if "explanation" not in instance.keys():  instance["explanation"] = ""  yield i, instance 