LICENSE.md
README.md
pyproject.toml
setup.py
lm_eval/__init__.py
lm_eval/__main__.py
lm_eval/evaluator.py
lm_eval/evaluator_utils.py
lm_eval/utils.py
lm_eval.egg-info/PKG-INFO
lm_eval.egg-info/SOURCES.txt
lm_eval.egg-info/dependency_links.txt
lm_eval.egg-info/entry_points.txt
lm_eval.egg-info/requires.txt
lm_eval.egg-info/top_level.txt
lm_eval/api/__init__.py
lm_eval/api/filter.py
lm_eval/api/group.py
lm_eval/api/instance.py
lm_eval/api/metrics.py
lm_eval/api/model.py
lm_eval/api/registry.py
lm_eval/api/samplers.py
lm_eval/api/task.py
lm_eval/caching/__init__.py
lm_eval/caching/cache.py
lm_eval/decontamination/__init__.py
lm_eval/decontamination/archiver.py
lm_eval/decontamination/decontaminate.py
lm_eval/decontamination/janitor.py
lm_eval/filters/__init__.py
lm_eval/filters/decontamination.py
lm_eval/filters/extraction.py
lm_eval/filters/selection.py
lm_eval/filters/transformation.py
lm_eval/loggers/__init__.py
lm_eval/loggers/evaluation_tracker.py
lm_eval/loggers/utils.py
lm_eval/loggers/wandb_logger.py
lm_eval/models/SVDmodel.py
lm_eval/models/__init__.py
lm_eval/models/anthropic_llms.py
lm_eval/models/api_models.py
lm_eval/models/dummy.py
lm_eval/models/gguf.py
lm_eval/models/huggingface.py
lm_eval/models/mamba_lm.py
lm_eval/models/nemo_lm.py
lm_eval/models/neuralmagic.py
lm_eval/models/neuron_optimum.py
lm_eval/models/openai_completions.py
lm_eval/models/optimum_lm.py
lm_eval/models/textsynth.py
lm_eval/models/utils.py
lm_eval/models/vllm_causallms.py
lm_eval/prompts/__init__.py
lm_eval/tasks/README.md
lm_eval/tasks/__init__.py
lm_eval/tasks/aclue/README.md
lm_eval/tasks/aclue/_aclue.yaml
lm_eval/tasks/aclue/_default_template_yaml
lm_eval/tasks/aclue/_generate_configs.py
lm_eval/tasks/aclue/aclue_ancient_chinese_culture.yaml
lm_eval/tasks/aclue/aclue_ancient_literature.yaml
lm_eval/tasks/aclue/aclue_ancient_medical.yaml
lm_eval/tasks/aclue/aclue_ancient_phonetics.yaml
lm_eval/tasks/aclue/aclue_basic_ancient_chinese.yaml
lm_eval/tasks/aclue/aclue_couplet_prediction.yaml
lm_eval/tasks/aclue/aclue_homographic_character_resolution.yaml
lm_eval/tasks/aclue/aclue_named_entity_recognition.yaml
lm_eval/tasks/aclue/aclue_poetry_appreciate.yaml
lm_eval/tasks/aclue/aclue_poetry_context_prediction.yaml
lm_eval/tasks/aclue/aclue_poetry_quality_assessment.yaml
lm_eval/tasks/aclue/aclue_poetry_sentiment_analysis.yaml
lm_eval/tasks/aclue/aclue_polysemy_resolution.yaml
lm_eval/tasks/aclue/aclue_reading_comprehension.yaml
lm_eval/tasks/aclue/aclue_sentence_segmentation.yaml
lm_eval/tasks/aexams/README.md
lm_eval/tasks/aexams/_aexams.yaml
lm_eval/tasks/aexams/_default_template_yaml
lm_eval/tasks/aexams/aexams_Biology.yaml
lm_eval/tasks/aexams/aexams_IslamicStudies.yaml
lm_eval/tasks/aexams/aexams_Physics.yaml
lm_eval/tasks/aexams/aexams_Science.yaml
lm_eval/tasks/aexams/aexams_Social.yaml
lm_eval/tasks/afrimgsm/README.md
lm_eval/tasks/afrimgsm/gen_yaml.sh
lm_eval/tasks/afrimgsm/run.sh
lm_eval/tasks/afrimgsm/utils.py
lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_amh.yaml
lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_eng.yaml
lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_ewe.yaml
lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_fra.yaml
lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_hau.yaml
lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_ibo.yaml
lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_kin.yaml
lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_lin.yaml
lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_lug.yaml
lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_orm.yaml
lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_sna.yaml
lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_sot.yaml
lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_swa.yaml
lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_twi.yaml
lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_wol.yaml
lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_xho.yaml
lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_yor.yaml
lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_zul.yaml
lm_eval/tasks/afrimgsm/direct/direct_yaml
lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_amh.yaml
lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_eng.yaml
lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_ewe.yaml
lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_fra.yaml
lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_hau.yaml
lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_ibo.yaml
lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_kin.yaml
lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_lin.yaml
lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_lug.yaml
lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_orm.yaml
lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_sna.yaml
lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_sot.yaml
lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_swa.yaml
lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_twi.yaml
lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_wol.yaml
lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_xho.yaml
lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_yor.yaml
lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_zul.yaml
lm_eval/tasks/afrimgsm/en_cot/cot_yaml
lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_amh.yaml
lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_eng.yaml
lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_ewe.yaml
lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_fra.yaml
lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_hau.yaml
lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_ibo.yaml
lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_kin.yaml
lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_lin.yaml
lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_lug.yaml
lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_orm.yaml
lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_sna.yaml
lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_sot.yaml
lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_swa.yaml
lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_twi.yaml
lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_wol.yaml
lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_xho.yaml
lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_yor.yaml
lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_zul.yaml
lm_eval/tasks/afrimgsm/translate/translate_direct_yaml
lm_eval/tasks/afrimmlu/README.md
lm_eval/tasks/afrimmlu/fewshot.sh
lm_eval/tasks/afrimmlu/utils.py
lm_eval/tasks/afrimmlu/direct/afrimmlu_common_yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_amh.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_eng.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ewe.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_fra.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_hau.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ibo.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_kin.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lin.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lug.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_orm.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sna.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sot.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_swa.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_twi.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_wol.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_xho.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_yor.yaml
lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_zul.yaml
lm_eval/tasks/afrimmlu/direct/utils.py
lm_eval/tasks/afrimmlu/translate/afrimmlu_common_translate_yaml
lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_amh.yaml
lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_eng.yaml
lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_ewe.yaml
lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_fra.yaml
lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_hau.yaml
lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_ibo.yaml
lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_kin.yaml
lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_lin.yaml
lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_lug.yaml
lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_orm.yaml
lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_sna.yaml
lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_sot.yaml
lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_swa.yaml
lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_twi.yaml
lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_wol.yaml
lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_xho.yaml
lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_yor.yaml
lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_zul.yaml
lm_eval/tasks/afrimmlu/translate/utils.py
lm_eval/tasks/afrixnli/README.md
lm_eval/tasks/afrixnli/utils.py
lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_amh.yaml
lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_eng.yaml
lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_ewe.yaml
lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_fra.yaml
lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_hau.yaml
lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_ibo.yaml
lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_kin.yaml
lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_lin.yaml
lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_lug.yaml
lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_orm.yaml
lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_sna.yaml
lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_sot.yaml
lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_swa.yaml
lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_twi.yaml
lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_wol.yaml
lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_xho.yaml
lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_yaml
lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_yor.yaml
lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_zul.yaml
lm_eval/tasks/afrixnli/anli prompt/en-direct/utils.py
lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_amh.yaml
lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_eng.yaml
lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_ewe.yaml
lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_fra.yaml
lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_hau.yaml
lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_ibo.yaml
lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_kin.yaml
lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_lin.yaml
lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_lug.yaml
lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_orm.yaml
lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_sna.yaml
lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_sot.yaml
lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_swa.yaml
lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_twi.yaml
lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_wol.yaml
lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_xho.yaml
lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_yaml
lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_yor.yaml
lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_zul.yaml
lm_eval/tasks/afrixnli/anli prompt/native-direct/utils.py
lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_amh.yaml
lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_ewe.yaml
lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_fra.yaml
lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_hau.yaml
lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_ibo.yaml
lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_kin.yaml
lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_lin.yaml
lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_lug.yaml
lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_orm.yaml
lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_sna.yaml
lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_sot.yaml
lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_swa.yaml
lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_twi.yaml
lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_wol.yaml
lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_xho.yaml
lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_yaml
lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_yor.yaml
lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_zul.yaml
lm_eval/tasks/afrixnli/anli prompt/translate/utils.py
lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_amh.yaml
lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_eng.yaml
lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_ewe.yaml
lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_fra.yaml
lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_hau.yaml
lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_ibo.yaml
lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_kin.yaml
lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_lin.yaml
lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_lug.yaml
lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_orm.yaml
lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_sna.yaml
lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_sot.yaml
lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_swa.yaml
lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_twi.yaml
lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_wol.yaml
lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_xho.yaml
lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_yaml
lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_yor.yaml
lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_zul.yaml
lm_eval/tasks/afrixnli/lai prompt/direct/utils.py
lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_amh.yaml
lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_ewe.yaml
lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_fra.yaml
lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_hau.yaml
lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_ibo.yaml
lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_kin.yaml
lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_lin.yaml
lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_lug.yaml
lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_orm.yaml
lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_sna.yaml
lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_sot.yaml
lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_swa.yaml
lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_twi.yaml
lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_wol.yaml
lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_xho.yaml
lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_yaml
lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_yor.yaml
lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_zul.yaml
lm_eval/tasks/afrixnli/lai prompt/translate/utils.py
lm_eval/tasks/agieval/README.md
lm_eval/tasks/agieval/agieval.yaml
lm_eval/tasks/agieval/agieval_cn.yaml
lm_eval/tasks/agieval/agieval_en.yaml
lm_eval/tasks/agieval/agieval_nous.yaml
lm_eval/tasks/agieval/aqua-rat.yaml
lm_eval/tasks/agieval/gaokao-biology.yaml
lm_eval/tasks/agieval/gaokao-chemistry.yaml
lm_eval/tasks/agieval/gaokao-chinese.yaml
lm_eval/tasks/agieval/gaokao-english.yaml
lm_eval/tasks/agieval/gaokao-geography.yaml
lm_eval/tasks/agieval/gaokao-history.yaml
lm_eval/tasks/agieval/gaokao-mathcloze.yaml
lm_eval/tasks/agieval/gaokao-mathqa.yaml
lm_eval/tasks/agieval/gaokao-physics.yaml
lm_eval/tasks/agieval/jec-qa-ca.yaml
lm_eval/tasks/agieval/jec-qa-kd.yaml
lm_eval/tasks/agieval/logiqa-en.yaml
lm_eval/tasks/agieval/logiqa-zh.yaml
lm_eval/tasks/agieval/lsat-ar.yaml
lm_eval/tasks/agieval/lsat-lr.yaml
lm_eval/tasks/agieval/lsat-rc.yaml
lm_eval/tasks/agieval/math.yaml
lm_eval/tasks/agieval/sat-en-without-passage.yaml
lm_eval/tasks/agieval/sat-en.yaml
lm_eval/tasks/agieval/sat-math.yaml
lm_eval/tasks/agieval/utils.py
lm_eval/tasks/alghafa/copa_ar/README.md
lm_eval/tasks/alghafa/copa_ar/copa_ar.yaml
lm_eval/tasks/alghafa/piqa_ar/README.md
lm_eval/tasks/alghafa/piqa_ar/piqa_ar.yaml
lm_eval/tasks/anli/README.md
lm_eval/tasks/anli/anli_r1.yaml
lm_eval/tasks/anli/anli_r2.yaml
lm_eval/tasks/anli/anli_r3.yaml
lm_eval/tasks/arabicmmlu/README.md
lm_eval/tasks/arabicmmlu/_arabicmmlu.yaml
lm_eval/tasks/arabicmmlu/_arabicmmlu_humanities.yaml
lm_eval/tasks/arabicmmlu/_arabicmmlu_language.yaml
lm_eval/tasks/arabicmmlu/_arabicmmlu_other.yaml
lm_eval/tasks/arabicmmlu/_arabicmmlu_social_science.yaml
lm_eval/tasks/arabicmmlu/_arabicmmlu_stem.yaml
lm_eval/tasks/arabicmmlu/_default_arabicmmlu_template_yaml
lm_eval/tasks/arabicmmlu/_generate_configs.py
lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_general.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_grammar.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_driving_test.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_high_arabic_language.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_high_biology.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_high_civics.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_high_computer_science.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_high_economics.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_high_geography.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_high_history.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_high_islamic_studies.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_high_philosophy.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_high_physics.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_middle_arabic_language.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_middle_civics.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_middle_computer_science.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_middle_economics.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_middle_general_knowledge.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_middle_geography.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_middle_history.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_middle_islamic_studies.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_middle_natural_science.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_middle_social_science.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_primary_arabic_language.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_primary_computer_science.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_primary_general_knowledge.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_primary_geography.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_primary_history.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_primary_islamic_studies.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_primary_math.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_primary_natural_science.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_primary_social_science.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_prof_law.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_univ_accounting.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_univ_computer_science.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_univ_economics.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_univ_management.yaml
lm_eval/tasks/arabicmmlu/arabicmmlu_univ_political_science.yaml
lm_eval/tasks/arabicmmlu/utils.py
lm_eval/tasks/arc/README.md
lm_eval/tasks/arc/arc_challenge.yaml
lm_eval/tasks/arc/arc_easy.yaml
lm_eval/tasks/arc_mt/README.md
lm_eval/tasks/arc_mt/arc_challenge_mt_da.yaml
lm_eval/tasks/arc_mt/arc_challenge_mt_de.yaml
lm_eval/tasks/arc_mt/arc_challenge_mt_el.yaml
lm_eval/tasks/arc_mt/arc_challenge_mt_es.yaml
lm_eval/tasks/arc_mt/arc_challenge_mt_fi.yaml
lm_eval/tasks/arc_mt/arc_challenge_mt_hu.yaml
lm_eval/tasks/arc_mt/arc_challenge_mt_is.yaml
lm_eval/tasks/arc_mt/arc_challenge_mt_it.yaml
lm_eval/tasks/arc_mt/arc_challenge_mt_nb.yaml
lm_eval/tasks/arc_mt/arc_challenge_mt_pl.yaml
lm_eval/tasks/arc_mt/arc_challenge_mt_pt.yaml
lm_eval/tasks/arc_mt/arc_challenge_mt_sv.yaml
lm_eval/tasks/arithmetic/README.md
lm_eval/tasks/arithmetic/arithmetic_1dc.yaml
lm_eval/tasks/arithmetic/arithmetic_2da.yaml
lm_eval/tasks/arithmetic/arithmetic_2dm.yaml
lm_eval/tasks/arithmetic/arithmetic_2ds.yaml
lm_eval/tasks/arithmetic/arithmetic_3da.yaml
lm_eval/tasks/arithmetic/arithmetic_3ds.yaml
lm_eval/tasks/arithmetic/arithmetic_4da.yaml
lm_eval/tasks/arithmetic/arithmetic_4ds.yaml
lm_eval/tasks/arithmetic/arithmetic_5da.yaml
lm_eval/tasks/arithmetic/arithmetic_5ds.yaml
lm_eval/tasks/asdiv/README.md
lm_eval/tasks/asdiv/default.yaml
lm_eval/tasks/babi/README.md
lm_eval/tasks/babi/babi.yaml
lm_eval/tasks/basqueglue/README.md
lm_eval/tasks/basqueglue/bec.yaml
lm_eval/tasks/basqueglue/bhtc.yaml
lm_eval/tasks/basqueglue/coref.yaml
lm_eval/tasks/basqueglue/qnli.yaml
lm_eval/tasks/basqueglue/utils.py
lm_eval/tasks/basqueglue/vaxx.yaml
lm_eval/tasks/basqueglue/wic.yaml
lm_eval/tasks/bbh/README.md
lm_eval/tasks/bbh/_generate_configs.py
lm_eval/tasks/bbh/cot_fewshot/_bbh.yaml
lm_eval/tasks/bbh/cot_fewshot/_bbh_cot_fewshot.yaml
lm_eval/tasks/bbh/cot_fewshot/_cot_fewshot_template_yaml
lm_eval/tasks/bbh/cot_fewshot/boolean_expressions.yaml
lm_eval/tasks/bbh/cot_fewshot/causal_judgement.yaml
lm_eval/tasks/bbh/cot_fewshot/date_understanding.yaml
lm_eval/tasks/bbh/cot_fewshot/disambiguation_qa.yaml
lm_eval/tasks/bbh/cot_fewshot/dyck_languages.yaml
lm_eval/tasks/bbh/cot_fewshot/formal_fallacies.yaml
lm_eval/tasks/bbh/cot_fewshot/geometric_shapes.yaml
lm_eval/tasks/bbh/cot_fewshot/hyperbaton.yaml
lm_eval/tasks/bbh/cot_fewshot/logical_deduction_five_objects.yaml
lm_eval/tasks/bbh/cot_fewshot/logical_deduction_seven_objects.yaml
lm_eval/tasks/bbh/cot_fewshot/logical_deduction_three_objects.yaml
lm_eval/tasks/bbh/cot_fewshot/movie_recommendation.yaml
lm_eval/tasks/bbh/cot_fewshot/multistep_arithmetic_two.yaml
lm_eval/tasks/bbh/cot_fewshot/navigate.yaml
lm_eval/tasks/bbh/cot_fewshot/object_counting.yaml
lm_eval/tasks/bbh/cot_fewshot/penguins_in_a_table.yaml
lm_eval/tasks/bbh/cot_fewshot/reasoning_about_colored_objects.yaml
lm_eval/tasks/bbh/cot_fewshot/ruin_names.yaml
lm_eval/tasks/bbh/cot_fewshot/salient_translation_error_detection.yaml
lm_eval/tasks/bbh/cot_fewshot/snarks.yaml
lm_eval/tasks/bbh/cot_fewshot/sports_understanding.yaml
lm_eval/tasks/bbh/cot_fewshot/temporal_sequences.yaml
lm_eval/tasks/bbh/cot_fewshot/tracking_shuffled_objects_five_objects.yaml
lm_eval/tasks/bbh/cot_fewshot/tracking_shuffled_objects_seven_objects.yaml
lm_eval/tasks/bbh/cot_fewshot/tracking_shuffled_objects_three_objects.yaml
lm_eval/tasks/bbh/cot_fewshot/web_of_lies.yaml
lm_eval/tasks/bbh/cot_fewshot/word_sorting.yaml
lm_eval/tasks/bbh/cot_zeroshot/_bbh_cot_zeroshot.yaml
lm_eval/tasks/bbh/cot_zeroshot/_cot_zeroshot_template_yaml
lm_eval/tasks/bbh/cot_zeroshot/boolean_expressions.yaml
lm_eval/tasks/bbh/cot_zeroshot/causal_judgement.yaml
lm_eval/tasks/bbh/cot_zeroshot/date_understanding.yaml
lm_eval/tasks/bbh/cot_zeroshot/disambiguation_qa.yaml
lm_eval/tasks/bbh/cot_zeroshot/dyck_languages.yaml
lm_eval/tasks/bbh/cot_zeroshot/formal_fallacies.yaml
lm_eval/tasks/bbh/cot_zeroshot/geometric_shapes.yaml
lm_eval/tasks/bbh/cot_zeroshot/hyperbaton.yaml
lm_eval/tasks/bbh/cot_zeroshot/logical_deduction_five_objects.yaml
lm_eval/tasks/bbh/cot_zeroshot/logical_deduction_seven_objects.yaml
lm_eval/tasks/bbh/cot_zeroshot/logical_deduction_three_objects.yaml
lm_eval/tasks/bbh/cot_zeroshot/movie_recommendation.yaml
lm_eval/tasks/bbh/cot_zeroshot/multistep_arithmetic_two.yaml
lm_eval/tasks/bbh/cot_zeroshot/navigate.yaml
lm_eval/tasks/bbh/cot_zeroshot/object_counting.yaml
lm_eval/tasks/bbh/cot_zeroshot/penguins_in_a_table.yaml
lm_eval/tasks/bbh/cot_zeroshot/reasoning_about_colored_objects.yaml
lm_eval/tasks/bbh/cot_zeroshot/ruin_names.yaml
lm_eval/tasks/bbh/cot_zeroshot/salient_translation_error_detection.yaml
lm_eval/tasks/bbh/cot_zeroshot/snarks.yaml
lm_eval/tasks/bbh/cot_zeroshot/sports_understanding.yaml
lm_eval/tasks/bbh/cot_zeroshot/temporal_sequences.yaml
lm_eval/tasks/bbh/cot_zeroshot/tracking_shuffled_objects_five_objects.yaml
lm_eval/tasks/bbh/cot_zeroshot/tracking_shuffled_objects_seven_objects.yaml
lm_eval/tasks/bbh/cot_zeroshot/tracking_shuffled_objects_three_objects.yaml
lm_eval/tasks/bbh/cot_zeroshot/utils.py
lm_eval/tasks/bbh/cot_zeroshot/web_of_lies.yaml
lm_eval/tasks/bbh/cot_zeroshot/word_sorting.yaml
lm_eval/tasks/bbh/fewshot/_bbh_fewshot.yaml
lm_eval/tasks/bbh/fewshot/_fewshot_template_yaml
lm_eval/tasks/bbh/fewshot/boolean_expressions.yaml
lm_eval/tasks/bbh/fewshot/causal_judgement.yaml
lm_eval/tasks/bbh/fewshot/date_understanding.yaml
lm_eval/tasks/bbh/fewshot/disambiguation_qa.yaml
lm_eval/tasks/bbh/fewshot/dyck_languages.yaml
lm_eval/tasks/bbh/fewshot/formal_fallacies.yaml
lm_eval/tasks/bbh/fewshot/geometric_shapes.yaml
lm_eval/tasks/bbh/fewshot/hyperbaton.yaml
lm_eval/tasks/bbh/fewshot/logical_deduction_five_objects.yaml
lm_eval/tasks/bbh/fewshot/logical_deduction_seven_objects.yaml
lm_eval/tasks/bbh/fewshot/logical_deduction_three_objects.yaml
lm_eval/tasks/bbh/fewshot/movie_recommendation.yaml
lm_eval/tasks/bbh/fewshot/multistep_arithmetic_two.yaml
lm_eval/tasks/bbh/fewshot/navigate.yaml
lm_eval/tasks/bbh/fewshot/object_counting.yaml
lm_eval/tasks/bbh/fewshot/penguins_in_a_table.yaml
lm_eval/tasks/bbh/fewshot/reasoning_about_colored_objects.yaml
lm_eval/tasks/bbh/fewshot/ruin_names.yaml
lm_eval/tasks/bbh/fewshot/salient_translation_error_detection.yaml
lm_eval/tasks/bbh/fewshot/snarks.yaml
lm_eval/tasks/bbh/fewshot/sports_understanding.yaml
lm_eval/tasks/bbh/fewshot/temporal_sequences.yaml
lm_eval/tasks/bbh/fewshot/tracking_shuffled_objects_five_objects.yaml
lm_eval/tasks/bbh/fewshot/tracking_shuffled_objects_seven_objects.yaml
lm_eval/tasks/bbh/fewshot/tracking_shuffled_objects_three_objects.yaml
lm_eval/tasks/bbh/fewshot/web_of_lies.yaml
lm_eval/tasks/bbh/fewshot/word_sorting.yaml
lm_eval/tasks/bbh/zeroshot/_bbh_zeroshot.yaml
lm_eval/tasks/bbh/zeroshot/_zeroshot_template_yaml
lm_eval/tasks/bbh/zeroshot/boolean_expressions.yaml
lm_eval/tasks/bbh/zeroshot/causal_judgement.yaml
lm_eval/tasks/bbh/zeroshot/date_understanding.yaml
lm_eval/tasks/bbh/zeroshot/disambiguation_qa.yaml
lm_eval/tasks/bbh/zeroshot/dyck_languages.yaml
lm_eval/tasks/bbh/zeroshot/formal_fallacies.yaml
lm_eval/tasks/bbh/zeroshot/geometric_shapes.yaml
lm_eval/tasks/bbh/zeroshot/hyperbaton.yaml
lm_eval/tasks/bbh/zeroshot/logical_deduction_five_objects.yaml
lm_eval/tasks/bbh/zeroshot/logical_deduction_seven_objects.yaml
lm_eval/tasks/bbh/zeroshot/logical_deduction_three_objects.yaml
lm_eval/tasks/bbh/zeroshot/movie_recommendation.yaml
lm_eval/tasks/bbh/zeroshot/multistep_arithmetic_two.yaml
lm_eval/tasks/bbh/zeroshot/navigate.yaml
lm_eval/tasks/bbh/zeroshot/object_counting.yaml
lm_eval/tasks/bbh/zeroshot/penguins_in_a_table.yaml
lm_eval/tasks/bbh/zeroshot/reasoning_about_colored_objects.yaml
lm_eval/tasks/bbh/zeroshot/ruin_names.yaml
lm_eval/tasks/bbh/zeroshot/salient_translation_error_detection.yaml
lm_eval/tasks/bbh/zeroshot/snarks.yaml
lm_eval/tasks/bbh/zeroshot/sports_understanding.yaml
lm_eval/tasks/bbh/zeroshot/temporal_sequences.yaml
lm_eval/tasks/bbh/zeroshot/tracking_shuffled_objects_five_objects.yaml
lm_eval/tasks/bbh/zeroshot/tracking_shuffled_objects_seven_objects.yaml
lm_eval/tasks/bbh/zeroshot/tracking_shuffled_objects_three_objects.yaml
lm_eval/tasks/bbh/zeroshot/utils.py
lm_eval/tasks/bbh/zeroshot/web_of_lies.yaml
lm_eval/tasks/bbh/zeroshot/word_sorting.yaml
lm_eval/tasks/belebele/README.md
lm_eval/tasks/belebele/_belebele.yaml
lm_eval/tasks/belebele/_default_template_yaml
lm_eval/tasks/belebele/_generate_configs.py
lm_eval/tasks/belebele/belebele_acm_Arab.yaml
lm_eval/tasks/belebele/belebele_afr_Latn.yaml
lm_eval/tasks/belebele/belebele_als_Latn.yaml
lm_eval/tasks/belebele/belebele_amh_Ethi.yaml
lm_eval/tasks/belebele/belebele_apc_Arab.yaml
lm_eval/tasks/belebele/belebele_arb_Arab.yaml
lm_eval/tasks/belebele/belebele_arb_Latn.yaml
lm_eval/tasks/belebele/belebele_ars_Arab.yaml
lm_eval/tasks/belebele/belebele_ary_Arab.yaml
lm_eval/tasks/belebele/belebele_arz_Arab.yaml
lm_eval/tasks/belebele/belebele_asm_Beng.yaml
lm_eval/tasks/belebele/belebele_azj_Latn.yaml
lm_eval/tasks/belebele/belebele_bam_Latn.yaml
lm_eval/tasks/belebele/belebele_ben_Beng.yaml
lm_eval/tasks/belebele/belebele_ben_Latn.yaml
lm_eval/tasks/belebele/belebele_bod_Tibt.yaml
lm_eval/tasks/belebele/belebele_bul_Cyrl.yaml
lm_eval/tasks/belebele/belebele_cat_Latn.yaml
lm_eval/tasks/belebele/belebele_ceb_Latn.yaml
lm_eval/tasks/belebele/belebele_ces_Latn.yaml
lm_eval/tasks/belebele/belebele_ckb_Arab.yaml
lm_eval/tasks/belebele/belebele_dan_Latn.yaml
lm_eval/tasks/belebele/belebele_deu_Latn.yaml
lm_eval/tasks/belebele/belebele_ell_Grek.yaml
lm_eval/tasks/belebele/belebele_eng_Latn.yaml
lm_eval/tasks/belebele/belebele_est_Latn.yaml
lm_eval/tasks/belebele/belebele_eus_Latn.yaml
lm_eval/tasks/belebele/belebele_fin_Latn.yaml
lm_eval/tasks/belebele/belebele_fra_Latn.yaml
lm_eval/tasks/belebele/belebele_fuv_Latn.yaml
lm_eval/tasks/belebele/belebele_gaz_Latn.yaml
lm_eval/tasks/belebele/belebele_grn_Latn.yaml
lm_eval/tasks/belebele/belebele_guj_Gujr.yaml
lm_eval/tasks/belebele/belebele_hat_Latn.yaml
lm_eval/tasks/belebele/belebele_hau_Latn.yaml
lm_eval/tasks/belebele/belebele_heb_Hebr.yaml
lm_eval/tasks/belebele/belebele_hin_Deva.yaml
lm_eval/tasks/belebele/belebele_hin_Latn.yaml
lm_eval/tasks/belebele/belebele_hrv_Latn.yaml
lm_eval/tasks/belebele/belebele_hun_Latn.yaml
lm_eval/tasks/belebele/belebele_hye_Armn.yaml
lm_eval/tasks/belebele/belebele_ibo_Latn.yaml
lm_eval/tasks/belebele/belebele_ilo_Latn.yaml
lm_eval/tasks/belebele/belebele_ind_Latn.yaml
lm_eval/tasks/belebele/belebele_isl_Latn.yaml
lm_eval/tasks/belebele/belebele_ita_Latn.yaml
lm_eval/tasks/belebele/belebele_jav_Latn.yaml
lm_eval/tasks/belebele/belebele_jpn_Jpan.yaml
lm_eval/tasks/belebele/belebele_kac_Latn.yaml
lm_eval/tasks/belebele/belebele_kan_Knda.yaml
lm_eval/tasks/belebele/belebele_kat_Geor.yaml
lm_eval/tasks/belebele/belebele_kaz_Cyrl.yaml
lm_eval/tasks/belebele/belebele_kea_Latn.yaml
lm_eval/tasks/belebele/belebele_khk_Cyrl.yaml
lm_eval/tasks/belebele/belebele_khm_Khmr.yaml
lm_eval/tasks/belebele/belebele_kin_Latn.yaml
lm_eval/tasks/belebele/belebele_kir_Cyrl.yaml
lm_eval/tasks/belebele/belebele_kor_Hang.yaml
lm_eval/tasks/belebele/belebele_lao_Laoo.yaml
lm_eval/tasks/belebele/belebele_lin_Latn.yaml
lm_eval/tasks/belebele/belebele_lit_Latn.yaml
lm_eval/tasks/belebele/belebele_lug_Latn.yaml
lm_eval/tasks/belebele/belebele_luo_Latn.yaml
lm_eval/tasks/belebele/belebele_lvs_Latn.yaml
lm_eval/tasks/belebele/belebele_mal_Mlym.yaml
lm_eval/tasks/belebele/belebele_mar_Deva.yaml
lm_eval/tasks/belebele/belebele_mkd_Cyrl.yaml
lm_eval/tasks/belebele/belebele_mlt_Latn.yaml
lm_eval/tasks/belebele/belebele_mri_Latn.yaml
lm_eval/tasks/belebele/belebele_mya_Mymr.yaml
lm_eval/tasks/belebele/belebele_nld_Latn.yaml
lm_eval/tasks/belebele/belebele_nob_Latn.yaml
lm_eval/tasks/belebele/belebele_npi_Deva.yaml
lm_eval/tasks/belebele/belebele_npi_Latn.yaml
lm_eval/tasks/belebele/belebele_nso_Latn.yaml
lm_eval/tasks/belebele/belebele_nya_Latn.yaml
lm_eval/tasks/belebele/belebele_ory_Orya.yaml
lm_eval/tasks/belebele/belebele_pan_Guru.yaml
lm_eval/tasks/belebele/belebele_pbt_Arab.yaml
lm_eval/tasks/belebele/belebele_pes_Arab.yaml
lm_eval/tasks/belebele/belebele_plt_Latn.yaml
lm_eval/tasks/belebele/belebele_pol_Latn.yaml
lm_eval/tasks/belebele/belebele_por_Latn.yaml
lm_eval/tasks/belebele/belebele_ron_Latn.yaml
lm_eval/tasks/belebele/belebele_rus_Cyrl.yaml
lm_eval/tasks/belebele/belebele_shn_Mymr.yaml
lm_eval/tasks/belebele/belebele_sin_Latn.yaml
lm_eval/tasks/belebele/belebele_sin_Sinh.yaml
lm_eval/tasks/belebele/belebele_slk_Latn.yaml
lm_eval/tasks/belebele/belebele_slv_Latn.yaml
lm_eval/tasks/belebele/belebele_sna_Latn.yaml
lm_eval/tasks/belebele/belebele_snd_Arab.yaml
lm_eval/tasks/belebele/belebele_som_Latn.yaml
lm_eval/tasks/belebele/belebele_sot_Latn.yaml
lm_eval/tasks/belebele/belebele_spa_Latn.yaml
lm_eval/tasks/belebele/belebele_srp_Cyrl.yaml
lm_eval/tasks/belebele/belebele_ssw_Latn.yaml
lm_eval/tasks/belebele/belebele_sun_Latn.yaml
lm_eval/tasks/belebele/belebele_swe_Latn.yaml
lm_eval/tasks/belebele/belebele_swh_Latn.yaml
lm_eval/tasks/belebele/belebele_tam_Taml.yaml
lm_eval/tasks/belebele/belebele_tel_Telu.yaml
lm_eval/tasks/belebele/belebele_tgk_Cyrl.yaml
lm_eval/tasks/belebele/belebele_tgl_Latn.yaml
lm_eval/tasks/belebele/belebele_tha_Thai.yaml
lm_eval/tasks/belebele/belebele_tir_Ethi.yaml
lm_eval/tasks/belebele/belebele_tsn_Latn.yaml
lm_eval/tasks/belebele/belebele_tso_Latn.yaml
lm_eval/tasks/belebele/belebele_tur_Latn.yaml
lm_eval/tasks/belebele/belebele_ukr_Cyrl.yaml
lm_eval/tasks/belebele/belebele_urd_Arab.yaml
lm_eval/tasks/belebele/belebele_urd_Latn.yaml
lm_eval/tasks/belebele/belebele_uzn_Latn.yaml
lm_eval/tasks/belebele/belebele_vie_Latn.yaml
lm_eval/tasks/belebele/belebele_war_Latn.yaml
lm_eval/tasks/belebele/belebele_wol_Latn.yaml
lm_eval/tasks/belebele/belebele_xho_Latn.yaml
lm_eval/tasks/belebele/belebele_yor_Latn.yaml
lm_eval/tasks/belebele/belebele_zho_Hans.yaml
lm_eval/tasks/belebele/belebele_zho_Hant.yaml
lm_eval/tasks/belebele/belebele_zsm_Latn.yaml
lm_eval/tasks/belebele/belebele_zul_Latn.yaml
lm_eval/tasks/benchmarks/minerva_math.yaml
lm_eval/tasks/benchmarks/openllm.yaml
lm_eval/tasks/benchmarks/pythia.yaml
lm_eval/tasks/benchmarks/t0_eval.yaml
lm_eval/tasks/benchmarks/flan/_held_in_template_yaml
lm_eval/tasks/benchmarks/flan/flan_held_in.yaml
lm_eval/tasks/benchmarks/flan/flan_held_out.yaml
lm_eval/tasks/benchmarks/multimedqa/README.md
lm_eval/tasks/benchmarks/multimedqa/multimedqa.yaml
lm_eval/tasks/bertaqa/README.md
lm_eval/tasks/bertaqa/_bertaqa_template
lm_eval/tasks/bertaqa/bertaqa_en.yaml
lm_eval/tasks/bertaqa/bertaqa_en_mt_gemma-7b.yaml
lm_eval/tasks/bertaqa/bertaqa_en_mt_hitz.yaml
lm_eval/tasks/bertaqa/bertaqa_en_mt_itzuli.yaml
lm_eval/tasks/bertaqa/bertaqa_en_mt_latxa-13b-v1.1.yaml
lm_eval/tasks/bertaqa/bertaqa_en_mt_latxa-13b-v1.yaml
lm_eval/tasks/bertaqa/bertaqa_en_mt_latxa-70b-v1.1.yaml
lm_eval/tasks/bertaqa/bertaqa_en_mt_latxa-70b-v1.yaml
lm_eval/tasks/bertaqa/bertaqa_en_mt_latxa-7b-v1.1.yaml
lm_eval/tasks/bertaqa/bertaqa_en_mt_latxa-7b-v1.yaml
lm_eval/tasks/bertaqa/bertaqa_en_mt_llama-2-13b.yaml
lm_eval/tasks/bertaqa/bertaqa_en_mt_llama-2-70b.yaml
lm_eval/tasks/bertaqa/bertaqa_en_mt_llama-2-7b.yaml
lm_eval/tasks/bertaqa/bertaqa_en_mt_madlad.yaml
lm_eval/tasks/bertaqa/bertaqa_en_mt_nllb.yaml
lm_eval/tasks/bertaqa/bertaqa_eu.yaml
lm_eval/tasks/bigbench/README.md
lm_eval/tasks/bigbench/generate_tasks.py
lm_eval/tasks/bigbench/generate_until_template_yaml
lm_eval/tasks/bigbench/multiple_choice_template_a_yaml
lm_eval/tasks/bigbench/multiple_choice_template_b_yaml
lm_eval/tasks/bigbench/push_bigbench_dataset.py
lm_eval/tasks/bigbench/generate_until/abstract_narrative_understanding.yaml
lm_eval/tasks/bigbench/generate_until/anachronisms.yaml
lm_eval/tasks/bigbench/generate_until/analogical_similarity.yaml
lm_eval/tasks/bigbench/generate_until/analytic_entailment.yaml
lm_eval/tasks/bigbench/generate_until/arithmetic.yaml
lm_eval/tasks/bigbench/generate_until/ascii_word_recognition.yaml
lm_eval/tasks/bigbench/generate_until/authorship_verification.yaml
lm_eval/tasks/bigbench/generate_until/auto_categorization.yaml
lm_eval/tasks/bigbench/generate_until/auto_debugging.yaml
lm_eval/tasks/bigbench/generate_until/bbq_lite_json.yaml
lm_eval/tasks/bigbench/generate_until/bridging_anaphora_resolution_barqa.yaml
lm_eval/tasks/bigbench/generate_until/causal_judgment.yaml
lm_eval/tasks/bigbench/generate_until/cause_and_effect.yaml
lm_eval/tasks/bigbench/generate_until/checkmate_in_one.yaml
lm_eval/tasks/bigbench/generate_until/chess_state_tracking.yaml
lm_eval/tasks/bigbench/generate_until/chinese_remainder_theorem.yaml
lm_eval/tasks/bigbench/generate_until/cifar10_classification.yaml
lm_eval/tasks/bigbench/generate_until/code_line_description.yaml
lm_eval/tasks/bigbench/generate_until/codenames.yaml
lm_eval/tasks/bigbench/generate_until/color.yaml
lm_eval/tasks/bigbench/generate_until/common_morpheme.yaml
lm_eval/tasks/bigbench/generate_until/conceptual_combinations.yaml
lm_eval/tasks/bigbench/generate_until/conlang_translation.yaml
lm_eval/tasks/bigbench/generate_until/contextual_parametric_knowledge_conflicts.yaml
lm_eval/tasks/bigbench/generate_until/crash_blossom.yaml
lm_eval/tasks/bigbench/generate_until/crass_ai.yaml
lm_eval/tasks/bigbench/generate_until/cryobiology_spanish.yaml
lm_eval/tasks/bigbench/generate_until/cryptonite.yaml
lm_eval/tasks/bigbench/generate_until/cs_algorithms.yaml
lm_eval/tasks/bigbench/generate_until/dark_humor_detection.yaml
lm_eval/tasks/bigbench/generate_until/date_understanding.yaml
lm_eval/tasks/bigbench/generate_until/disambiguation_qa.yaml
lm_eval/tasks/bigbench/generate_until/discourse_marker_prediction.yaml
lm_eval/tasks/bigbench/generate_until/disfl_qa.yaml
lm_eval/tasks/bigbench/generate_until/dyck_languages.yaml
lm_eval/tasks/bigbench/generate_until/elementary_math_qa.yaml
lm_eval/tasks/bigbench/generate_until/emoji_movie.yaml
lm_eval/tasks/bigbench/generate_until/emojis_emotion_prediction.yaml
lm_eval/tasks/bigbench/generate_until/empirical_judgments.yaml
lm_eval/tasks/bigbench/generate_until/english_proverbs.yaml
lm_eval/tasks/bigbench/generate_until/english_russian_proverbs.yaml
lm_eval/tasks/bigbench/generate_until/entailed_polarity.yaml
lm_eval/tasks/bigbench/generate_until/entailed_polarity_hindi.yaml
lm_eval/tasks/bigbench/generate_until/epistemic_reasoning.yaml
lm_eval/tasks/bigbench/generate_until/evaluating_information_essentiality.yaml
lm_eval/tasks/bigbench/generate_until/fact_checker.yaml
lm_eval/tasks/bigbench/generate_until/fantasy_reasoning.yaml
lm_eval/tasks/bigbench/generate_until/few_shot_nlg.yaml
lm_eval/tasks/bigbench/generate_until/figure_of_speech_detection.yaml
lm_eval/tasks/bigbench/generate_until/formal_fallacies_syllogisms_negation.yaml
lm_eval/tasks/bigbench/generate_until/gem.yaml
lm_eval/tasks/bigbench/generate_until/gender_inclusive_sentences_german.yaml
lm_eval/tasks/bigbench/generate_until/general_knowledge.yaml
lm_eval/tasks/bigbench/generate_until/geometric_shapes.yaml
lm_eval/tasks/bigbench/generate_until/goal_step_wikihow.yaml
lm_eval/tasks/bigbench/generate_until/gre_reading_comprehension.yaml
lm_eval/tasks/bigbench/generate_until/hhh_alignment.yaml
lm_eval/tasks/bigbench/generate_until/hindi_question_answering.yaml
lm_eval/tasks/bigbench/generate_until/hindu_knowledge.yaml
lm_eval/tasks/bigbench/generate_until/hinglish_toxicity.yaml
lm_eval/tasks/bigbench/generate_until/human_organs_senses.yaml
lm_eval/tasks/bigbench/generate_until/hyperbaton.yaml
lm_eval/tasks/bigbench/generate_until/identify_math_theorems.yaml
lm_eval/tasks/bigbench/generate_until/identify_odd_metaphor.yaml
lm_eval/tasks/bigbench/generate_until/implicatures.yaml
lm_eval/tasks/bigbench/generate_until/implicit_relations.yaml
lm_eval/tasks/bigbench/generate_until/intent_recognition.yaml
lm_eval/tasks/bigbench/generate_until/international_phonetic_alphabet_nli.yaml
lm_eval/tasks/bigbench/generate_until/international_phonetic_alphabet_transliterate.yaml
lm_eval/tasks/bigbench/generate_until/intersect_geometry.yaml
lm_eval/tasks/bigbench/generate_until/irony_identification.yaml
lm_eval/tasks/bigbench/generate_until/kanji_ascii.yaml
lm_eval/tasks/bigbench/generate_until/kannada.yaml
lm_eval/tasks/bigbench/generate_until/key_value_maps.yaml
lm_eval/tasks/bigbench/generate_until/known_unknowns.yaml
lm_eval/tasks/bigbench/generate_until/language_games.yaml
lm_eval/tasks/bigbench/generate_until/language_identification.yaml
lm_eval/tasks/bigbench/generate_until/linguistic_mappings.yaml
lm_eval/tasks/bigbench/generate_until/linguistics_puzzles.yaml
lm_eval/tasks/bigbench/generate_until/list_functions.yaml
lm_eval/tasks/bigbench/generate_until/logic_grid_puzzle.yaml
lm_eval/tasks/bigbench/generate_until/logical_args.yaml
lm_eval/tasks/bigbench/generate_until/logical_deduction.yaml
lm_eval/tasks/bigbench/generate_until/logical_fallacy_detection.yaml
lm_eval/tasks/bigbench/generate_until/logical_sequence.yaml
lm_eval/tasks/bigbench/generate_until/mathematical_induction.yaml
lm_eval/tasks/bigbench/generate_until/matrixshapes.yaml
lm_eval/tasks/bigbench/generate_until/metaphor_boolean.yaml
lm_eval/tasks/bigbench/generate_until/metaphor_understanding.yaml
lm_eval/tasks/bigbench/generate_until/minute_mysteries_qa.yaml
lm_eval/tasks/bigbench/generate_until/misconceptions.yaml
lm_eval/tasks/bigbench/generate_until/misconceptions_russian.yaml
lm_eval/tasks/bigbench/generate_until/mnist_ascii.yaml
lm_eval/tasks/bigbench/generate_until/modified_arithmetic.yaml
lm_eval/tasks/bigbench/generate_until/moral_permissibility.yaml
lm_eval/tasks/bigbench/generate_until/movie_dialog_same_or_different.yaml
lm_eval/tasks/bigbench/generate_until/movie_recommendation.yaml
lm_eval/tasks/bigbench/generate_until/mult_data_wrangling.yaml
lm_eval/tasks/bigbench/generate_until/multiemo.yaml
lm_eval/tasks/bigbench/generate_until/natural_instructions.yaml
lm_eval/tasks/bigbench/generate_until/navigate.yaml
lm_eval/tasks/bigbench/generate_until/nonsense_words_grammar.yaml
lm_eval/tasks/bigbench/generate_until/novel_concepts.yaml
lm_eval/tasks/bigbench/generate_until/object_counting.yaml
lm_eval/tasks/bigbench/generate_until/odd_one_out.yaml
lm_eval/tasks/bigbench/generate_until/operators.yaml
lm_eval/tasks/bigbench/generate_until/paragraph_segmentation.yaml
lm_eval/tasks/bigbench/generate_until/parsinlu_qa.yaml
lm_eval/tasks/bigbench/generate_until/parsinlu_reading_comprehension.yaml
lm_eval/tasks/bigbench/generate_until/penguins_in_a_table.yaml
lm_eval/tasks/bigbench/generate_until/periodic_elements.yaml
lm_eval/tasks/bigbench/generate_until/persian_idioms.yaml
lm_eval/tasks/bigbench/generate_until/phrase_relatedness.yaml
lm_eval/tasks/bigbench/generate_until/physical_intuition.yaml
lm_eval/tasks/bigbench/generate_until/physics.yaml
lm_eval/tasks/bigbench/generate_until/physics_questions.yaml
lm_eval/tasks/bigbench/generate_until/play_dialog_same_or_different.yaml
lm_eval/tasks/bigbench/generate_until/polish_sequence_labeling.yaml
lm_eval/tasks/bigbench/generate_until/presuppositions_as_nli.yaml
lm_eval/tasks/bigbench/generate_until/qa_wikidata.yaml
lm_eval/tasks/bigbench/generate_until/question_selection.yaml
lm_eval/tasks/bigbench/generate_until/real_or_fake_text.yaml
lm_eval/tasks/bigbench/generate_until/reasoning_about_colored_objects.yaml
lm_eval/tasks/bigbench/generate_until/repeat_copy_logic.yaml
lm_eval/tasks/bigbench/generate_until/rephrase.yaml
lm_eval/tasks/bigbench/generate_until/riddle_sense.yaml
lm_eval/tasks/bigbench/generate_until/ruin_names.yaml
lm_eval/tasks/bigbench/generate_until/salient_translation_error_detection.yaml
lm_eval/tasks/bigbench/generate_until/scientific_press_release.yaml
lm_eval/tasks/bigbench/generate_until/semantic_parsing_in_context_sparc.yaml
lm_eval/tasks/bigbench/generate_until/semantic_parsing_spider.yaml
lm_eval/tasks/bigbench/generate_until/sentence_ambiguity.yaml
lm_eval/tasks/bigbench/generate_until/similarities_abstraction.yaml
lm_eval/tasks/bigbench/generate_until/simp_turing_concept.yaml
lm_eval/tasks/bigbench/generate_until/simple_arithmetic_json.yaml
lm_eval/tasks/bigbench/generate_until/simple_arithmetic_json_multiple_choice.yaml
lm_eval/tasks/bigbench/generate_until/simple_arithmetic_json_subtasks.yaml
lm_eval/tasks/bigbench/generate_until/simple_arithmetic_multiple_targets_json.yaml
lm_eval/tasks/bigbench/generate_until/simple_ethical_questions.yaml
lm_eval/tasks/bigbench/generate_until/simple_text_editing.yaml
lm_eval/tasks/bigbench/generate_until/snarks.yaml
lm_eval/tasks/bigbench/generate_until/social_iqa.yaml
lm_eval/tasks/bigbench/generate_until/social_support.yaml
lm_eval/tasks/bigbench/generate_until/sports_understanding.yaml
lm_eval/tasks/bigbench/generate_until/strange_stories.yaml
lm_eval/tasks/bigbench/generate_until/strategyqa.yaml
lm_eval/tasks/bigbench/generate_until/sufficient_information.yaml
lm_eval/tasks/bigbench/generate_until/suicide_risk.yaml
lm_eval/tasks/bigbench/generate_until/swahili_english_proverbs.yaml
lm_eval/tasks/bigbench/generate_until/swedish_to_german_proverbs.yaml
lm_eval/tasks/bigbench/generate_until/symbol_interpretation.yaml
lm_eval/tasks/bigbench/generate_until/temporal_sequences.yaml
lm_eval/tasks/bigbench/generate_until/tense.yaml
lm_eval/tasks/bigbench/generate_until/timedial.yaml
lm_eval/tasks/bigbench/generate_until/topical_chat.yaml
lm_eval/tasks/bigbench/generate_until/tracking_shuffled_objects.yaml
lm_eval/tasks/bigbench/generate_until/understanding_fables.yaml
lm_eval/tasks/bigbench/generate_until/undo_permutation.yaml
lm_eval/tasks/bigbench/generate_until/unit_conversion.yaml
lm_eval/tasks/bigbench/generate_until/unit_interpretation.yaml
lm_eval/tasks/bigbench/generate_until/unnatural_in_context_learning.yaml
lm_eval/tasks/bigbench/generate_until/vitaminc_fact_verification.yaml
lm_eval/tasks/bigbench/generate_until/what_is_the_tao.yaml
lm_eval/tasks/bigbench/generate_until/which_wiki_edit.yaml
lm_eval/tasks/bigbench/generate_until/winowhy.yaml
lm_eval/tasks/bigbench/generate_until/word_sorting.yaml
lm_eval/tasks/bigbench/generate_until/word_unscrambling.yaml
lm_eval/tasks/bigbench/multiple_choice/abstract_narrative_understanding.yaml
lm_eval/tasks/bigbench/multiple_choice/anachronisms.yaml
lm_eval/tasks/bigbench/multiple_choice/analogical_similarity.yaml
lm_eval/tasks/bigbench/multiple_choice/analytic_entailment.yaml
lm_eval/tasks/bigbench/multiple_choice/arithmetic.yaml
lm_eval/tasks/bigbench/multiple_choice/authorship_verification.yaml
lm_eval/tasks/bigbench/multiple_choice/bbq_lite_json.yaml
lm_eval/tasks/bigbench/multiple_choice/causal_judgment.yaml
lm_eval/tasks/bigbench/multiple_choice/cause_and_effect.yaml
lm_eval/tasks/bigbench/multiple_choice/checkmate_in_one.yaml
lm_eval/tasks/bigbench/multiple_choice/cifar10_classification.yaml
lm_eval/tasks/bigbench/multiple_choice/code_line_description.yaml
lm_eval/tasks/bigbench/multiple_choice/color.yaml
lm_eval/tasks/bigbench/multiple_choice/common_morpheme.yaml
lm_eval/tasks/bigbench/multiple_choice/conceptual_combinations.yaml
lm_eval/tasks/bigbench/multiple_choice/contextual_parametric_knowledge_conflicts.yaml
lm_eval/tasks/bigbench/multiple_choice/crash_blossom.yaml
lm_eval/tasks/bigbench/multiple_choice/crass_ai.yaml
lm_eval/tasks/bigbench/multiple_choice/cryobiology_spanish.yaml
lm_eval/tasks/bigbench/multiple_choice/cs_algorithms.yaml
lm_eval/tasks/bigbench/multiple_choice/dark_humor_detection.yaml
lm_eval/tasks/bigbench/multiple_choice/date_understanding.yaml
lm_eval/tasks/bigbench/multiple_choice/disambiguation_qa.yaml
lm_eval/tasks/bigbench/multiple_choice/discourse_marker_prediction.yaml
lm_eval/tasks/bigbench/multiple_choice/dyck_languages.yaml
lm_eval/tasks/bigbench/multiple_choice/elementary_math_qa.yaml
lm_eval/tasks/bigbench/multiple_choice/emoji_movie.yaml
lm_eval/tasks/bigbench/multiple_choice/emojis_emotion_prediction.yaml
lm_eval/tasks/bigbench/multiple_choice/empirical_judgments.yaml
lm_eval/tasks/bigbench/multiple_choice/english_proverbs.yaml
lm_eval/tasks/bigbench/multiple_choice/english_russian_proverbs.yaml
lm_eval/tasks/bigbench/multiple_choice/entailed_polarity.yaml
lm_eval/tasks/bigbench/multiple_choice/entailed_polarity_hindi.yaml
lm_eval/tasks/bigbench/multiple_choice/epistemic_reasoning.yaml
lm_eval/tasks/bigbench/multiple_choice/evaluating_information_essentiality.yaml
lm_eval/tasks/bigbench/multiple_choice/fact_checker.yaml
lm_eval/tasks/bigbench/multiple_choice/fantasy_reasoning.yaml
lm_eval/tasks/bigbench/multiple_choice/figure_of_speech_detection.yaml
lm_eval/tasks/bigbench/multiple_choice/formal_fallacies_syllogisms_negation.yaml
lm_eval/tasks/bigbench/multiple_choice/general_knowledge.yaml
lm_eval/tasks/bigbench/multiple_choice/geometric_shapes.yaml
lm_eval/tasks/bigbench/multiple_choice/goal_step_wikihow.yaml
lm_eval/tasks/bigbench/multiple_choice/gre_reading_comprehension.yaml
lm_eval/tasks/bigbench/multiple_choice/hhh_alignment.yaml
lm_eval/tasks/bigbench/multiple_choice/hindu_knowledge.yaml
lm_eval/tasks/bigbench/multiple_choice/hinglish_toxicity.yaml
lm_eval/tasks/bigbench/multiple_choice/human_organs_senses.yaml
lm_eval/tasks/bigbench/multiple_choice/hyperbaton.yaml
lm_eval/tasks/bigbench/multiple_choice/identify_math_theorems.yaml
lm_eval/tasks/bigbench/multiple_choice/identify_odd_metaphor.yaml
lm_eval/tasks/bigbench/multiple_choice/implicatures.yaml
lm_eval/tasks/bigbench/multiple_choice/implicit_relations.yaml
lm_eval/tasks/bigbench/multiple_choice/intent_recognition.yaml
lm_eval/tasks/bigbench/multiple_choice/international_phonetic_alphabet_nli.yaml
lm_eval/tasks/bigbench/multiple_choice/intersect_geometry.yaml
lm_eval/tasks/bigbench/multiple_choice/irony_identification.yaml
lm_eval/tasks/bigbench/multiple_choice/kanji_ascii.yaml
lm_eval/tasks/bigbench/multiple_choice/kannada.yaml
lm_eval/tasks/bigbench/multiple_choice/key_value_maps.yaml
lm_eval/tasks/bigbench/multiple_choice/known_unknowns.yaml
lm_eval/tasks/bigbench/multiple_choice/language_identification.yaml
lm_eval/tasks/bigbench/multiple_choice/logic_grid_puzzle.yaml
lm_eval/tasks/bigbench/multiple_choice/logical_args.yaml
lm_eval/tasks/bigbench/multiple_choice/logical_deduction.yaml
lm_eval/tasks/bigbench/multiple_choice/logical_fallacy_detection.yaml
lm_eval/tasks/bigbench/multiple_choice/logical_sequence.yaml
lm_eval/tasks/bigbench/multiple_choice/mathematical_induction.yaml
lm_eval/tasks/bigbench/multiple_choice/metaphor_boolean.yaml
lm_eval/tasks/bigbench/multiple_choice/metaphor_understanding.yaml
lm_eval/tasks/bigbench/multiple_choice/misconceptions.yaml
lm_eval/tasks/bigbench/multiple_choice/misconceptions_russian.yaml
lm_eval/tasks/bigbench/multiple_choice/mnist_ascii.yaml
lm_eval/tasks/bigbench/multiple_choice/moral_permissibility.yaml
lm_eval/tasks/bigbench/multiple_choice/movie_dialog_same_or_different.yaml
lm_eval/tasks/bigbench/multiple_choice/movie_recommendation.yaml
lm_eval/tasks/bigbench/multiple_choice/multiemo.yaml
lm_eval/tasks/bigbench/multiple_choice/navigate.yaml
lm_eval/tasks/bigbench/multiple_choice/nonsense_words_grammar.yaml
lm_eval/tasks/bigbench/multiple_choice/novel_concepts.yaml
lm_eval/tasks/bigbench/multiple_choice/odd_one_out.yaml
lm_eval/tasks/bigbench/multiple_choice/parsinlu_qa.yaml
lm_eval/tasks/bigbench/multiple_choice/penguins_in_a_table.yaml
lm_eval/tasks/bigbench/multiple_choice/periodic_elements.yaml
lm_eval/tasks/bigbench/multiple_choice/persian_idioms.yaml
lm_eval/tasks/bigbench/multiple_choice/phrase_relatedness.yaml
lm_eval/tasks/bigbench/multiple_choice/physical_intuition.yaml
lm_eval/tasks/bigbench/multiple_choice/physics.yaml
lm_eval/tasks/bigbench/multiple_choice/play_dialog_same_or_different.yaml
lm_eval/tasks/bigbench/multiple_choice/presuppositions_as_nli.yaml
lm_eval/tasks/bigbench/multiple_choice/question_selection.yaml
lm_eval/tasks/bigbench/multiple_choice/real_or_fake_text.yaml
lm_eval/tasks/bigbench/multiple_choice/reasoning_about_colored_objects.yaml
lm_eval/tasks/bigbench/multiple_choice/riddle_sense.yaml
lm_eval/tasks/bigbench/multiple_choice/ruin_names.yaml
lm_eval/tasks/bigbench/multiple_choice/salient_translation_error_detection.yaml
lm_eval/tasks/bigbench/multiple_choice/sentence_ambiguity.yaml
lm_eval/tasks/bigbench/multiple_choice/similarities_abstraction.yaml
lm_eval/tasks/bigbench/multiple_choice/simple_ethical_questions.yaml
lm_eval/tasks/bigbench/multiple_choice/snarks.yaml
lm_eval/tasks/bigbench/multiple_choice/social_iqa.yaml
lm_eval/tasks/bigbench/multiple_choice/social_support.yaml
lm_eval/tasks/bigbench/multiple_choice/sports_understanding.yaml
lm_eval/tasks/bigbench/multiple_choice/strange_stories.yaml
lm_eval/tasks/bigbench/multiple_choice/strategyqa.yaml
lm_eval/tasks/bigbench/multiple_choice/suicide_risk.yaml
lm_eval/tasks/bigbench/multiple_choice/swahili_english_proverbs.yaml
lm_eval/tasks/bigbench/multiple_choice/swedish_to_german_proverbs.yaml
lm_eval/tasks/bigbench/multiple_choice/symbol_interpretation.yaml
lm_eval/tasks/bigbench/multiple_choice/temporal_sequences.yaml
lm_eval/tasks/bigbench/multiple_choice/timedial.yaml
lm_eval/tasks/bigbench/multiple_choice/tracking_shuffled_objects.yaml
lm_eval/tasks/bigbench/multiple_choice/understanding_fables.yaml
lm_eval/tasks/bigbench/multiple_choice/undo_permutation.yaml
lm_eval/tasks/bigbench/multiple_choice/unit_conversion.yaml
lm_eval/tasks/bigbench/multiple_choice/unit_interpretation.yaml
lm_eval/tasks/bigbench/multiple_choice/vitaminc_fact_verification.yaml
lm_eval/tasks/bigbench/multiple_choice/what_is_the_tao.yaml
lm_eval/tasks/bigbench/multiple_choice/which_wiki_edit.yaml
lm_eval/tasks/bigbench/multiple_choice/winowhy.yaml
lm_eval/tasks/blimp/README.md
lm_eval/tasks/blimp/_blimp.yaml
lm_eval/tasks/blimp/_template_yaml
lm_eval/tasks/blimp/adjunct_island.yaml
lm_eval/tasks/blimp/anaphor_gender_agreement.yaml
lm_eval/tasks/blimp/anaphor_number_agreement.yaml
lm_eval/tasks/blimp/animate_subject_passive.yaml
lm_eval/tasks/blimp/animate_subject_trans.yaml
lm_eval/tasks/blimp/causative.yaml
lm_eval/tasks/blimp/complex_NP_island.yaml
lm_eval/tasks/blimp/coordinate_structure_constraint_complex_left_branch.yaml
lm_eval/tasks/blimp/coordinate_structure_constraint_object_extraction.yaml
lm_eval/tasks/blimp/determiner_noun_agreement_1.yaml
lm_eval/tasks/blimp/determiner_noun_agreement_2.yaml
lm_eval/tasks/blimp/determiner_noun_agreement_irregular_1.yaml
lm_eval/tasks/blimp/determiner_noun_agreement_irregular_2.yaml
lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_2.yaml
lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_irregular_1.yaml
lm_eval/tasks/blimp/determiner_noun_agreement_with_adj_irregular_2.yaml
lm_eval/tasks/blimp/determiner_noun_agreement_with_adjective_1.yaml
lm_eval/tasks/blimp/distractor_agreement_relational_noun.yaml
lm_eval/tasks/blimp/distractor_agreement_relative_clause.yaml
lm_eval/tasks/blimp/drop_argument.yaml
lm_eval/tasks/blimp/ellipsis_n_bar_1.yaml
lm_eval/tasks/blimp/ellipsis_n_bar_2.yaml
lm_eval/tasks/blimp/existential_there_object_raising.yaml
lm_eval/tasks/blimp/existential_there_quantifiers_1.yaml
lm_eval/tasks/blimp/existential_there_quantifiers_2.yaml
lm_eval/tasks/blimp/existential_there_subject_raising.yaml
lm_eval/tasks/blimp/expletive_it_object_raising.yaml
lm_eval/tasks/blimp/generate_configs.py
lm_eval/tasks/blimp/inchoative.yaml
lm_eval/tasks/blimp/intransitive.yaml
lm_eval/tasks/blimp/irregular_past_participle_adjectives.yaml
lm_eval/tasks/blimp/irregular_past_participle_verbs.yaml
lm_eval/tasks/blimp/irregular_plural_subject_verb_agreement_1.yaml
lm_eval/tasks/blimp/irregular_plural_subject_verb_agreement_2.yaml
lm_eval/tasks/blimp/left_branch_island_echo_question.yaml
lm_eval/tasks/blimp/left_branch_island_simple_question.yaml
lm_eval/tasks/blimp/matrix_question_npi_licensor_present.yaml
lm_eval/tasks/blimp/npi_present_1.yaml
lm_eval/tasks/blimp/npi_present_2.yaml
lm_eval/tasks/blimp/only_npi_licensor_present.yaml
lm_eval/tasks/blimp/only_npi_scope.yaml
lm_eval/tasks/blimp/passive_1.yaml
lm_eval/tasks/blimp/passive_2.yaml
lm_eval/tasks/blimp/principle_A_c_command.yaml
lm_eval/tasks/blimp/principle_A_case_1.yaml
lm_eval/tasks/blimp/principle_A_case_2.yaml
lm_eval/tasks/blimp/principle_A_domain_1.yaml
lm_eval/tasks/blimp/principle_A_domain_2.yaml
lm_eval/tasks/blimp/principle_A_domain_3.yaml
lm_eval/tasks/blimp/principle_A_reconstruction.yaml
lm_eval/tasks/blimp/regular_plural_subject_verb_agreement_1.yaml
lm_eval/tasks/blimp/regular_plural_subject_verb_agreement_2.yaml
lm_eval/tasks/blimp/sentential_negation_npi_licensor_present.yaml
lm_eval/tasks/blimp/sentential_negation_npi_scope.yaml
lm_eval/tasks/blimp/sentential_subject_island.yaml
lm_eval/tasks/blimp/superlative_quantifiers_1.yaml
lm_eval/tasks/blimp/superlative_quantifiers_2.yaml
lm_eval/tasks/blimp/tough_vs_raising_1.yaml
lm_eval/tasks/blimp/tough_vs_raising_2.yaml
lm_eval/tasks/blimp/transitive.yaml
lm_eval/tasks/blimp/wh_island.yaml
lm_eval/tasks/blimp/wh_questions_object_gap.yaml
lm_eval/tasks/blimp/wh_questions_subject_gap.yaml
lm_eval/tasks/blimp/wh_questions_subject_gap_long_distance.yaml
lm_eval/tasks/blimp/wh_vs_that_no_gap.yaml
lm_eval/tasks/blimp/wh_vs_that_no_gap_long_distance.yaml
lm_eval/tasks/blimp/wh_vs_that_with_gap.yaml
lm_eval/tasks/blimp/wh_vs_that_with_gap_long_distance.yaml
lm_eval/tasks/ceval/README.md
lm_eval/tasks/ceval/_ceval-valid.yaml
lm_eval/tasks/ceval/_default_ceval_yaml
lm_eval/tasks/ceval/_generate_configs.py
lm_eval/tasks/ceval/ceval-valid_accountant.yaml
lm_eval/tasks/ceval/ceval-valid_advanced_mathematics.yaml
lm_eval/tasks/ceval/ceval-valid_art_studies.yaml
lm_eval/tasks/ceval/ceval-valid_basic_medicine.yaml
lm_eval/tasks/ceval/ceval-valid_business_administration.yaml
lm_eval/tasks/ceval/ceval-valid_chinese_language_and_literature.yaml
lm_eval/tasks/ceval/ceval-valid_civil_servant.yaml
lm_eval/tasks/ceval/ceval-valid_clinical_medicine.yaml
lm_eval/tasks/ceval/ceval-valid_college_chemistry.yaml
lm_eval/tasks/ceval/ceval-valid_college_economics.yaml
lm_eval/tasks/ceval/ceval-valid_college_physics.yaml
lm_eval/tasks/ceval/ceval-valid_college_programming.yaml
lm_eval/tasks/ceval/ceval-valid_computer_architecture.yaml
lm_eval/tasks/ceval/ceval-valid_computer_network.yaml
lm_eval/tasks/ceval/ceval-valid_discrete_mathematics.yaml
lm_eval/tasks/ceval/ceval-valid_education_science.yaml
lm_eval/tasks/ceval/ceval-valid_electrical_engineer.yaml
lm_eval/tasks/ceval/ceval-valid_environmental_impact_assessment_engineer.yaml
lm_eval/tasks/ceval/ceval-valid_fire_engineer.yaml
lm_eval/tasks/ceval/ceval-valid_high_school_biology.yaml
lm_eval/tasks/ceval/ceval-valid_high_school_chemistry.yaml
lm_eval/tasks/ceval/ceval-valid_high_school_chinese.yaml
lm_eval/tasks/ceval/ceval-valid_high_school_geography.yaml
lm_eval/tasks/ceval/ceval-valid_high_school_history.yaml
lm_eval/tasks/ceval/ceval-valid_high_school_mathematics.yaml
lm_eval/tasks/ceval/ceval-valid_high_school_physics.yaml
lm_eval/tasks/ceval/ceval-valid_high_school_politics.yaml
lm_eval/tasks/ceval/ceval-valid_ideological_and_moral_cultivation.yaml
lm_eval/tasks/ceval/ceval-valid_law.yaml
lm_eval/tasks/ceval/ceval-valid_legal_professional.yaml
lm_eval/tasks/ceval/ceval-valid_logic.yaml
lm_eval/tasks/ceval/ceval-valid_mao_zedong_thought.yaml
lm_eval/tasks/ceval/ceval-valid_marxism.yaml
lm_eval/tasks/ceval/ceval-valid_metrology_engineer.yaml
lm_eval/tasks/ceval/ceval-valid_middle_school_biology.yaml
lm_eval/tasks/ceval/ceval-valid_middle_school_chemistry.yaml
lm_eval/tasks/ceval/ceval-valid_middle_school_geography.yaml
lm_eval/tasks/ceval/ceval-valid_middle_school_history.yaml
lm_eval/tasks/ceval/ceval-valid_middle_school_mathematics.yaml
lm_eval/tasks/ceval/ceval-valid_middle_school_physics.yaml
lm_eval/tasks/ceval/ceval-valid_middle_school_politics.yaml
lm_eval/tasks/ceval/ceval-valid_modern_chinese_history.yaml
lm_eval/tasks/ceval/ceval-valid_operating_system.yaml
lm_eval/tasks/ceval/ceval-valid_physician.yaml
lm_eval/tasks/ceval/ceval-valid_plant_protection.yaml
lm_eval/tasks/ceval/ceval-valid_probability_and_statistics.yaml
lm_eval/tasks/ceval/ceval-valid_professional_tour_guide.yaml
lm_eval/tasks/ceval/ceval-valid_sports_science.yaml
lm_eval/tasks/ceval/ceval-valid_tax_accountant.yaml
lm_eval/tasks/ceval/ceval-valid_teacher_qualification.yaml
lm_eval/tasks/ceval/ceval-valid_urban_and_rural_planner.yaml
lm_eval/tasks/ceval/ceval-valid_veterinary_medicine.yaml
lm_eval/tasks/cmmlu/README.md
lm_eval/tasks/cmmlu/_cmmlu.yaml
lm_eval/tasks/cmmlu/_default_template_yaml
lm_eval/tasks/cmmlu/_generate_configs.py
lm_eval/tasks/cmmlu/cmmlu_agronomy.yaml
lm_eval/tasks/cmmlu/cmmlu_anatomy.yaml
lm_eval/tasks/cmmlu/cmmlu_ancient_chinese.yaml
lm_eval/tasks/cmmlu/cmmlu_arts.yaml
lm_eval/tasks/cmmlu/cmmlu_astronomy.yaml
lm_eval/tasks/cmmlu/cmmlu_business_ethics.yaml
lm_eval/tasks/cmmlu/cmmlu_chinese_civil_service_exam.yaml
lm_eval/tasks/cmmlu/cmmlu_chinese_driving_rule.yaml
lm_eval/tasks/cmmlu/cmmlu_chinese_food_culture.yaml
lm_eval/tasks/cmmlu/cmmlu_chinese_foreign_policy.yaml
lm_eval/tasks/cmmlu/cmmlu_chinese_history.yaml
lm_eval/tasks/cmmlu/cmmlu_chinese_literature.yaml
lm_eval/tasks/cmmlu/cmmlu_chinese_teacher_qualification.yaml
lm_eval/tasks/cmmlu/cmmlu_clinical_knowledge.yaml
lm_eval/tasks/cmmlu/cmmlu_college_actuarial_science.yaml
lm_eval/tasks/cmmlu/cmmlu_college_education.yaml
lm_eval/tasks/cmmlu/cmmlu_college_engineering_hydrology.yaml
lm_eval/tasks/cmmlu/cmmlu_college_law.yaml
lm_eval/tasks/cmmlu/cmmlu_college_mathematics.yaml
lm_eval/tasks/cmmlu/cmmlu_college_medical_statistics.yaml
lm_eval/tasks/cmmlu/cmmlu_college_medicine.yaml
lm_eval/tasks/cmmlu/cmmlu_computer_science.yaml
lm_eval/tasks/cmmlu/cmmlu_computer_security.yaml
lm_eval/tasks/cmmlu/cmmlu_conceptual_physics.yaml
lm_eval/tasks/cmmlu/cmmlu_construction_project_management.yaml
lm_eval/tasks/cmmlu/cmmlu_default_agronomy.yaml
lm_eval/tasks/cmmlu/cmmlu_default_anatomy.yaml
lm_eval/tasks/cmmlu/cmmlu_default_ancient_chinese.yaml
lm_eval/tasks/cmmlu/cmmlu_default_arts.yaml
lm_eval/tasks/cmmlu/cmmlu_default_astronomy.yaml
lm_eval/tasks/cmmlu/cmmlu_default_business_ethics.yaml
lm_eval/tasks/cmmlu/cmmlu_default_chinese_civil_service_exam.yaml
lm_eval/tasks/cmmlu/cmmlu_default_chinese_driving_rule.yaml
lm_eval/tasks/cmmlu/cmmlu_default_chinese_food_culture.yaml
lm_eval/tasks/cmmlu/cmmlu_default_chinese_foreign_policy.yaml
lm_eval/tasks/cmmlu/cmmlu_default_chinese_history.yaml
lm_eval/tasks/cmmlu/cmmlu_default_chinese_literature.yaml
lm_eval/tasks/cmmlu/cmmlu_default_chinese_teacher_qualification.yaml
lm_eval/tasks/cmmlu/cmmlu_default_clinical_knowledge.yaml
lm_eval/tasks/cmmlu/cmmlu_default_college_actuarial_science.yaml
lm_eval/tasks/cmmlu/cmmlu_default_college_education.yaml
lm_eval/tasks/cmmlu/cmmlu_default_college_engineering_hydrology.yaml
lm_eval/tasks/cmmlu/cmmlu_default_college_law.yaml
lm_eval/tasks/cmmlu/cmmlu_default_college_mathematics.yaml
lm_eval/tasks/cmmlu/cmmlu_default_college_medical_statistics.yaml
lm_eval/tasks/cmmlu/cmmlu_default_college_medicine.yaml
lm_eval/tasks/cmmlu/cmmlu_default_computer_science.yaml
lm_eval/tasks/cmmlu/cmmlu_default_computer_security.yaml
lm_eval/tasks/cmmlu/cmmlu_default_conceptual_physics.yaml
lm_eval/tasks/cmmlu/cmmlu_default_construction_project_management.yaml
lm_eval/tasks/cmmlu/cmmlu_default_economics.yaml
lm_eval/tasks/cmmlu/cmmlu_default_education.yaml
lm_eval/tasks/cmmlu/cmmlu_default_electrical_engineering.yaml
lm_eval/tasks/cmmlu/cmmlu_default_elementary_chinese.yaml
lm_eval/tasks/cmmlu/cmmlu_default_elementary_commonsense.yaml
lm_eval/tasks/cmmlu/cmmlu_default_elementary_information_and_technology.yaml
lm_eval/tasks/cmmlu/cmmlu_default_elementary_mathematics.yaml
lm_eval/tasks/cmmlu/cmmlu_default_ethnology.yaml
lm_eval/tasks/cmmlu/cmmlu_default_food_science.yaml
lm_eval/tasks/cmmlu/cmmlu_default_genetics.yaml
lm_eval/tasks/cmmlu/cmmlu_default_global_facts.yaml
lm_eval/tasks/cmmlu/cmmlu_default_high_school_biology.yaml
lm_eval/tasks/cmmlu/cmmlu_default_high_school_chemistry.yaml
lm_eval/tasks/cmmlu/cmmlu_default_high_school_geography.yaml
lm_eval/tasks/cmmlu/cmmlu_default_high_school_mathematics.yaml
lm_eval/tasks/cmmlu/cmmlu_default_high_school_physics.yaml
lm_eval/tasks/cmmlu/cmmlu_default_high_school_politics.yaml
lm_eval/tasks/cmmlu/cmmlu_default_human_sexuality.yaml
lm_eval/tasks/cmmlu/cmmlu_default_international_law.yaml
lm_eval/tasks/cmmlu/cmmlu_default_journalism.yaml
lm_eval/tasks/cmmlu/cmmlu_default_jurisprudence.yaml
lm_eval/tasks/cmmlu/cmmlu_default_legal_and_moral_basis.yaml
lm_eval/tasks/cmmlu/cmmlu_default_logical.yaml
lm_eval/tasks/cmmlu/cmmlu_default_machine_learning.yaml
lm_eval/tasks/cmmlu/cmmlu_default_management.yaml
lm_eval/tasks/cmmlu/cmmlu_default_marketing.yaml
lm_eval/tasks/cmmlu/cmmlu_default_marxist_theory.yaml
lm_eval/tasks/cmmlu/cmmlu_default_modern_chinese.yaml
lm_eval/tasks/cmmlu/cmmlu_default_nutrition.yaml
lm_eval/tasks/cmmlu/cmmlu_default_philosophy.yaml
lm_eval/tasks/cmmlu/cmmlu_default_professional_accounting.yaml
lm_eval/tasks/cmmlu/cmmlu_default_professional_law.yaml
lm_eval/tasks/cmmlu/cmmlu_default_professional_medicine.yaml
lm_eval/tasks/cmmlu/cmmlu_default_professional_psychology.yaml
lm_eval/tasks/cmmlu/cmmlu_default_public_relations.yaml
lm_eval/tasks/cmmlu/cmmlu_default_security_study.yaml
lm_eval/tasks/cmmlu/cmmlu_default_sociology.yaml
lm_eval/tasks/cmmlu/cmmlu_default_sports_science.yaml
lm_eval/tasks/cmmlu/cmmlu_default_traditional_chinese_medicine.yaml
lm_eval/tasks/cmmlu/cmmlu_default_virology.yaml
lm_eval/tasks/cmmlu/cmmlu_default_world_history.yaml
lm_eval/tasks/cmmlu/cmmlu_default_world_religions.yaml
lm_eval/tasks/cmmlu/cmmlu_economics.yaml
lm_eval/tasks/cmmlu/cmmlu_education.yaml
lm_eval/tasks/cmmlu/cmmlu_electrical_engineering.yaml
lm_eval/tasks/cmmlu/cmmlu_elementary_chinese.yaml
lm_eval/tasks/cmmlu/cmmlu_elementary_commonsense.yaml
lm_eval/tasks/cmmlu/cmmlu_elementary_information_and_technology.yaml
lm_eval/tasks/cmmlu/cmmlu_elementary_mathematics.yaml
lm_eval/tasks/cmmlu/cmmlu_ethnology.yaml
lm_eval/tasks/cmmlu/cmmlu_food_science.yaml
lm_eval/tasks/cmmlu/cmmlu_genetics.yaml
lm_eval/tasks/cmmlu/cmmlu_global_facts.yaml
lm_eval/tasks/cmmlu/cmmlu_high_school_biology.yaml
lm_eval/tasks/cmmlu/cmmlu_high_school_chemistry.yaml
lm_eval/tasks/cmmlu/cmmlu_high_school_geography.yaml
lm_eval/tasks/cmmlu/cmmlu_high_school_mathematics.yaml
lm_eval/tasks/cmmlu/cmmlu_high_school_physics.yaml
lm_eval/tasks/cmmlu/cmmlu_high_school_politics.yaml
lm_eval/tasks/cmmlu/cmmlu_human_sexuality.yaml
lm_eval/tasks/cmmlu/cmmlu_international_law.yaml
lm_eval/tasks/cmmlu/cmmlu_journalism.yaml
lm_eval/tasks/cmmlu/cmmlu_jurisprudence.yaml
lm_eval/tasks/cmmlu/cmmlu_legal_and_moral_basis.yaml
lm_eval/tasks/cmmlu/cmmlu_logical.yaml
lm_eval/tasks/cmmlu/cmmlu_machine_learning.yaml
lm_eval/tasks/cmmlu/cmmlu_management.yaml
lm_eval/tasks/cmmlu/cmmlu_marketing.yaml
lm_eval/tasks/cmmlu/cmmlu_marxist_theory.yaml
lm_eval/tasks/cmmlu/cmmlu_modern_chinese.yaml
lm_eval/tasks/cmmlu/cmmlu_nutrition.yaml
lm_eval/tasks/cmmlu/cmmlu_philosophy.yaml
lm_eval/tasks/cmmlu/cmmlu_professional_accounting.yaml
lm_eval/tasks/cmmlu/cmmlu_professional_law.yaml
lm_eval/tasks/cmmlu/cmmlu_professional_medicine.yaml
lm_eval/tasks/cmmlu/cmmlu_professional_psychology.yaml
lm_eval/tasks/cmmlu/cmmlu_public_relations.yaml
lm_eval/tasks/cmmlu/cmmlu_security_study.yaml
lm_eval/tasks/cmmlu/cmmlu_sociology.yaml
lm_eval/tasks/cmmlu/cmmlu_sports_science.yaml
lm_eval/tasks/cmmlu/cmmlu_traditional_chinese_medicine.yaml
lm_eval/tasks/cmmlu/cmmlu_virology.yaml
lm_eval/tasks/cmmlu/cmmlu_world_history.yaml
lm_eval/tasks/cmmlu/cmmlu_world_religions.yaml
lm_eval/tasks/code_x_glue/code-text/bleu.py
lm_eval/tasks/code_x_glue/code-text/go.yaml
lm_eval/tasks/code_x_glue/code-text/java.yaml
lm_eval/tasks/code_x_glue/code-text/javascript.yaml
lm_eval/tasks/code_x_glue/code-text/php.yaml
lm_eval/tasks/code_x_glue/code-text/python.yaml
lm_eval/tasks/code_x_glue/code-text/ruby.yaml
lm_eval/tasks/code_x_glue/code-text/utils.py
lm_eval/tasks/commonsense_qa/README.md
lm_eval/tasks/commonsense_qa/default.yaml
lm_eval/tasks/copal_id/README.md
lm_eval/tasks/copal_id/colloquial.yaml
lm_eval/tasks/copal_id/standard.yaml
lm_eval/tasks/copal_id/utils.py
lm_eval/tasks/coqa/README.md
lm_eval/tasks/coqa/default.yaml
lm_eval/tasks/coqa/utils.py
lm_eval/tasks/crows_pairs/README.md
lm_eval/tasks/crows_pairs/crows_pairs_english.yaml
lm_eval/tasks/crows_pairs/crows_pairs_english_age.yaml
lm_eval/tasks/crows_pairs/crows_pairs_english_autre.yaml
lm_eval/tasks/crows_pairs/crows_pairs_english_disability.yaml
lm_eval/tasks/crows_pairs/crows_pairs_english_gender.yaml
lm_eval/tasks/crows_pairs/crows_pairs_english_nationality.yaml
lm_eval/tasks/crows_pairs/crows_pairs_english_physical_appearance.yaml
lm_eval/tasks/crows_pairs/crows_pairs_english_race_color.yaml
lm_eval/tasks/crows_pairs/crows_pairs_english_religion.yaml
lm_eval/tasks/crows_pairs/crows_pairs_english_sexual_orientation.yaml
lm_eval/tasks/crows_pairs/crows_pairs_english_socioeconomic.yaml
lm_eval/tasks/crows_pairs/crows_pairs_french.yaml
lm_eval/tasks/crows_pairs/crows_pairs_french_age.yaml
lm_eval/tasks/crows_pairs/crows_pairs_french_autre.yaml
lm_eval/tasks/crows_pairs/crows_pairs_french_disability.yaml
lm_eval/tasks/crows_pairs/crows_pairs_french_gender.yaml
lm_eval/tasks/crows_pairs/crows_pairs_french_nationality.yaml
lm_eval/tasks/crows_pairs/crows_pairs_french_physical_appearance.yaml
lm_eval/tasks/crows_pairs/crows_pairs_french_race_color.yaml
lm_eval/tasks/crows_pairs/crows_pairs_french_religion.yaml
lm_eval/tasks/crows_pairs/crows_pairs_french_sexual_orientation.yaml
lm_eval/tasks/crows_pairs/crows_pairs_french_socioeconomic.yaml
lm_eval/tasks/crows_pairs/utils.py
lm_eval/tasks/csatqa/_csatqa.yaml
lm_eval/tasks/csatqa/_default_csatqa_yaml
lm_eval/tasks/csatqa/_generate_configs.py
lm_eval/tasks/csatqa/csatqa_gr.yaml
lm_eval/tasks/csatqa/csatqa_li.yaml
lm_eval/tasks/csatqa/csatqa_rch.yaml
lm_eval/tasks/csatqa/csatqa_rcs.yaml
lm_eval/tasks/csatqa/csatqa_rcss.yaml
lm_eval/tasks/csatqa/csatqa_wr.yaml
lm_eval/tasks/csatqa/utils.py
lm_eval/tasks/drop/README.md
lm_eval/tasks/drop/default.yaml
lm_eval/tasks/drop/utils.py
lm_eval/tasks/eq_bench/README.md
lm_eval/tasks/eq_bench/default.yaml
lm_eval/tasks/eq_bench/utils.py
lm_eval/tasks/eus_exams/README.md
lm_eval/tasks/eus_exams/configs.py
lm_eval/tasks/eus_exams/eus_exams
lm_eval/tasks/eus_exams/eus_exams_es
lm_eval/tasks/eus_exams/eus_exams_es_ejadministrativo.yaml
lm_eval/tasks/eus_exams/eus_exams_es_ejauxiliar.yaml
lm_eval/tasks/eus_exams/eus_exams_es_ejsubalterno.yaml
lm_eval/tasks/eus_exams/eus_exams_es_ejtecnico.yaml
lm_eval/tasks/eus_exams/eus_exams_es_opeayuntamientovitoria.yaml
lm_eval/tasks/eus_exams/eus_exams_es_opebilbao.yaml
lm_eval/tasks/eus_exams/eus_exams_es_opeehuadmin.yaml
lm_eval/tasks/eus_exams/eus_exams_es_opeehuaux.yaml
lm_eval/tasks/eus_exams/eus_exams_es_opeehubiblio.yaml
lm_eval/tasks/eus_exams/eus_exams_es_opeehuderecho.yaml
lm_eval/tasks/eus_exams/eus_exams_es_opeehueconomicas.yaml
lm_eval/tasks/eus_exams/eus_exams_es_opeehuempresariales.yaml
lm_eval/tasks/eus_exams/eus_exams_es_opeehusubalterno.yaml
lm_eval/tasks/eus_exams/eus_exams_es_opeehutecnico.yaml
lm_eval/tasks/eus_exams/eus_exams_es_opeehutecnicob.yaml
lm_eval/tasks/eus_exams/eus_exams_es_opeosakiadmin.yaml
lm_eval/tasks/eus_exams/eus_exams_es_opeosakiaux.yaml
lm_eval/tasks/eus_exams/eus_exams_es_opeosakiauxenf.yaml
lm_eval/tasks/eus_exams/eus_exams_es_opeosakicelador.yaml
lm_eval/tasks/eus_exams/eus_exams_es_opeosakienf.yaml
lm_eval/tasks/eus_exams/eus_exams_es_opeosakijuridico.yaml
lm_eval/tasks/eus_exams/eus_exams_es_opeosakioperario.yaml
lm_eval/tasks/eus_exams/eus_exams_es_opeosakitecnico.yaml
lm_eval/tasks/eus_exams/eus_exams_es_opeosakivarios.yaml
lm_eval/tasks/eus_exams/eus_exams_es_osakidetza1c.yaml
lm_eval/tasks/eus_exams/eus_exams_es_osakidetza2c.yaml
lm_eval/tasks/eus_exams/eus_exams_es_osakidetza3c.yaml
lm_eval/tasks/eus_exams/eus_exams_es_osakidetza4c.yaml
lm_eval/tasks/eus_exams/eus_exams_es_osakidetza5c.yaml
lm_eval/tasks/eus_exams/eus_exams_es_osakidetza6c.yaml
lm_eval/tasks/eus_exams/eus_exams_es_osakidetza7c.yaml
lm_eval/tasks/eus_exams/eus_exams_es_osakidetza8c.yaml
lm_eval/tasks/eus_exams/eus_exams_es_osakidetza9c.yaml
lm_eval/tasks/eus_exams/eus_exams_eu
lm_eval/tasks/eus_exams/eus_exams_eu_ejadministrari.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_ejlaguntza.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_ejlaguntzaile.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_ejteknikari.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_opebilbaoeu.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_opeehuadmineu.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_opeehuauxeu.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_opeehubiblioeu.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_opeehuderechoeu.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_opeehueconomicaseu.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_opeehuempresarialeseu.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_opeehusubalternoeu.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_opeehutecnicoeu.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_opeehuteknikarib.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_opegasteizkoudala.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_opeosakiadmineu.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_opeosakiauxenfeu.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_opeosakiauxeu.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_opeosakiceladoreu.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_opeosakienfeu.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_opeosakioperarioeu.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_opeosakitecnicoeu.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_opeosakivarioseu.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_osakidetza1e.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_osakidetza2e.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_osakidetza3e.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_osakidetza5e.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_osakidetza6e.yaml
lm_eval/tasks/eus_exams/eus_exams_eu_osakidetza7e.yaml
lm_eval/tasks/eus_exams/utils.py
lm_eval/tasks/eus_proficiency/README.md
lm_eval/tasks/eus_proficiency/eus_proficiency.yaml
lm_eval/tasks/eus_reading/README.md
lm_eval/tasks/eus_reading/eus_reading.yaml
lm_eval/tasks/eus_reading/utils.py
lm_eval/tasks/eus_trivia/README.md
lm_eval/tasks/eus_trivia/eus_trivia.yaml
lm_eval/tasks/eus_trivia/utils.py
lm_eval/tasks/fda/README.md
lm_eval/tasks/fda/fda.yaml
lm_eval/tasks/fda/task.py
lm_eval/tasks/fld/README.md
lm_eval/tasks/fld/fld_default.yaml
lm_eval/tasks/fld/fld_logical_formula_default.yaml
lm_eval/tasks/fld/fld_logical_formula_star.yaml
lm_eval/tasks/fld/fld_star.yaml
lm_eval/tasks/french_bench/README.md
lm_eval/tasks/french_bench/_default_template_yaml
lm_eval/tasks/french_bench/french_bench_arc_challenge.yaml
lm_eval/tasks/french_bench/french_bench_boolqa.yaml
lm_eval/tasks/french_bench/french_bench_fquadv2.yaml
lm_eval/tasks/french_bench/french_bench_fquadv2_bool.yaml
lm_eval/tasks/french_bench/french_bench_fquadv2_genq.yaml
lm_eval/tasks/french_bench/french_bench_fquadv2_hasAns.yaml
lm_eval/tasks/french_bench/french_bench_grammar.yaml
lm_eval/tasks/french_bench/french_bench_hellaswag.yaml
lm_eval/tasks/french_bench/french_bench_multifquad.yaml
lm_eval/tasks/french_bench/french_bench_opus_perplexity.yaml
lm_eval/tasks/french_bench/french_bench_orangesum_abstract.yaml
lm_eval/tasks/french_bench/french_bench_orangesum_title.yaml
lm_eval/tasks/french_bench/french_bench_reading_comp.yaml
lm_eval/tasks/french_bench/french_bench_topic_based_nli.yaml
lm_eval/tasks/french_bench/french_bench_trivia.yaml
lm_eval/tasks/french_bench/french_bench_vocab.yaml
lm_eval/tasks/french_bench/french_bench_wikitext_fr.yaml
lm_eval/tasks/french_bench/french_bench_xnli.yaml
lm_eval/tasks/french_bench/preprocess_wikitext.py
lm_eval/tasks/french_bench/utils.py
lm_eval/tasks/glianorex/README.md
lm_eval/tasks/glianorex/glianorex.yaml
lm_eval/tasks/glianorex/glianorex_en.yaml
lm_eval/tasks/glianorex/glianorex_fr.yaml
lm_eval/tasks/glianorex/preprocess_glianorex.py
lm_eval/tasks/glue/README.md
lm_eval/tasks/glue/cola/default.yaml
lm_eval/tasks/glue/mnli/default.yaml
lm_eval/tasks/glue/mnli/mismatch.yaml
lm_eval/tasks/glue/mnli/utils.py
lm_eval/tasks/glue/mrpc/default.yaml
lm_eval/tasks/glue/qnli/default.yaml
lm_eval/tasks/glue/qqp/default.yaml
lm_eval/tasks/glue/rte/default.yaml
lm_eval/tasks/glue/sst2/default.yaml
lm_eval/tasks/glue/wnli/default.yaml
lm_eval/tasks/gpqa/README.md
lm_eval/tasks/gpqa/cot_n_shot/_generate_configs.py
lm_eval/tasks/gpqa/cot_n_shot/_gpqa_cot_n_shot_yaml
lm_eval/tasks/gpqa/cot_n_shot/gpqa_diamond_cot_n_shot.yaml
lm_eval/tasks/gpqa/cot_n_shot/gpqa_extended_cot_n_shot.yaml
lm_eval/tasks/gpqa/cot_n_shot/gpqa_main_cot_n_shot.yaml
lm_eval/tasks/gpqa/cot_n_shot/utils.py
lm_eval/tasks/gpqa/cot_zeroshot/_generate_configs.py
lm_eval/tasks/gpqa/cot_zeroshot/_gpqa_cot_zeroshot_yaml
lm_eval/tasks/gpqa/cot_zeroshot/gpqa_diamond_cot_zeroshot.yaml
lm_eval/tasks/gpqa/cot_zeroshot/gpqa_extended_cot_zeroshot.yaml
lm_eval/tasks/gpqa/cot_zeroshot/gpqa_main_cot_zeroshot.yaml
lm_eval/tasks/gpqa/cot_zeroshot/utils.py
lm_eval/tasks/gpqa/generative/_generate_configs.py
lm_eval/tasks/gpqa/generative/_gpqa_generative_n_shot_yaml
lm_eval/tasks/gpqa/generative/gpqa_diamond_generative_n_shot.yaml
lm_eval/tasks/gpqa/generative/gpqa_extended_generative_n_shot.yaml
lm_eval/tasks/gpqa/generative/gpqa_main_generative_n_shot.yaml
lm_eval/tasks/gpqa/generative/utils.py
lm_eval/tasks/gpqa/n_shot/_generate_configs.py
lm_eval/tasks/gpqa/n_shot/_gpqa_n_shot_yaml
lm_eval/tasks/gpqa/n_shot/gpqa_diamond_n_shot.yaml
lm_eval/tasks/gpqa/n_shot/gpqa_extended_n_shot.yaml
lm_eval/tasks/gpqa/n_shot/gpqa_main_n_shot.yaml
lm_eval/tasks/gpqa/n_shot/utils.py
lm_eval/tasks/gpqa/zeroshot/_generate_configs.py
lm_eval/tasks/gpqa/zeroshot/_gpqa_zeroshot_yaml
lm_eval/tasks/gpqa/zeroshot/gpqa_diamond_zeroshot.yaml
lm_eval/tasks/gpqa/zeroshot/gpqa_extended_zeroshot.yaml
lm_eval/tasks/gpqa/zeroshot/gpqa_main_zeroshot.yaml
lm_eval/tasks/gpqa/zeroshot/utils.py
lm_eval/tasks/gsm8k/README.md
lm_eval/tasks/gsm8k/gsm8k-cot-llama.yaml
lm_eval/tasks/gsm8k/gsm8k-cot-self-consistency.yaml
lm_eval/tasks/gsm8k/gsm8k-cot-zeroshot.yaml
lm_eval/tasks/gsm8k/gsm8k-cot.yaml
lm_eval/tasks/gsm8k/gsm8k.yaml
lm_eval/tasks/gsm_plus/README.md
lm_eval/tasks/gsm_plus/gsm_plus.yaml
lm_eval/tasks/gsm_plus/gsm_plus_mini.yaml
lm_eval/tasks/haerae/README.md
lm_eval/tasks/haerae/_default_haerae_yaml
lm_eval/tasks/haerae/_haerae.yaml
lm_eval/tasks/haerae/haerae_gk.yaml
lm_eval/tasks/haerae/haerae_hi.yaml
lm_eval/tasks/haerae/haerae_lw.yaml
lm_eval/tasks/haerae/haerae_rw.yaml
lm_eval/tasks/haerae/haerae_sn.yaml
lm_eval/tasks/headqa/README.md
lm_eval/tasks/headqa/headqa_en.yaml
lm_eval/tasks/headqa/headqa_es.yaml
lm_eval/tasks/hellaswag/README.md
lm_eval/tasks/hellaswag/hellaswag.yaml
lm_eval/tasks/hellaswag/utils.py
lm_eval/tasks/hendrycks_ethics/README.md
lm_eval/tasks/hendrycks_ethics/commonsense.yaml
lm_eval/tasks/hendrycks_ethics/deontology.yaml
lm_eval/tasks/hendrycks_ethics/justice.yaml
lm_eval/tasks/hendrycks_ethics/utilitarianism.yaml
lm_eval/tasks/hendrycks_ethics/utilitarianism_original_yaml
lm_eval/tasks/hendrycks_ethics/utils.py
lm_eval/tasks/hendrycks_ethics/virtue.yaml
lm_eval/tasks/hendrycks_math/README.md
lm_eval/tasks/hendrycks_math/hendrycks_math.yaml
lm_eval/tasks/hendrycks_math/hendrycks_math_algebra.yaml
lm_eval/tasks/hendrycks_math/hendrycks_math_counting_and_prob.yaml
lm_eval/tasks/hendrycks_math/hendrycks_math_geometry.yaml
lm_eval/tasks/hendrycks_math/hendrycks_math_intermediate_algebra.yaml
lm_eval/tasks/hendrycks_math/hendrycks_math_num_theory.yaml
lm_eval/tasks/hendrycks_math/hendrycks_math_prealgebra.yaml
lm_eval/tasks/hendrycks_math/hendrycks_math_precalc.yaml
lm_eval/tasks/hendrycks_math/utils.py
lm_eval/tasks/ifeval/README.md
lm_eval/tasks/ifeval/ifeval.yaml
lm_eval/tasks/ifeval/instructions.py
lm_eval/tasks/ifeval/instructions_registry.py
lm_eval/tasks/ifeval/instructions_util.py
lm_eval/tasks/ifeval/utils.py
lm_eval/tasks/inverse_scaling/README.md
lm_eval/tasks/inverse_scaling/_inverse_scaling_mc_yaml
lm_eval/tasks/inverse_scaling/_some_results
lm_eval/tasks/inverse_scaling/inverse_scaling_hindsight_neglect.yaml
lm_eval/tasks/inverse_scaling/inverse_scaling_into_the_unknown.yaml
lm_eval/tasks/inverse_scaling/inverse_scaling_memo_trap.yaml
lm_eval/tasks/inverse_scaling/inverse_scaling_modus_tollens.yaml
lm_eval/tasks/inverse_scaling/inverse_scaling_neqa.yaml
lm_eval/tasks/inverse_scaling/inverse_scaling_pattern_matching_suppression.yaml
lm_eval/tasks/inverse_scaling/inverse_scaling_quote_repetition.yaml
lm_eval/tasks/inverse_scaling/inverse_scaling_redefine_math.yaml
lm_eval/tasks/inverse_scaling/inverse_scaling_repetitive_algebra.yaml
lm_eval/tasks/inverse_scaling/inverse_scaling_sig_figs.yaml
lm_eval/tasks/inverse_scaling/inverse_scaling_winobias_antistereotype.yaml
lm_eval/tasks/kmmlu/README.md
lm_eval/tasks/kmmlu/cot_hard/_cot_kmmlu_yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_accounting.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_agricultural_sciences.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_aviation_engineering_and_maintenance.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_biology.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemical_engineering.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemistry.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_civil_engineering.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_computer_science.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_construction.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_criminal_law.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_ecology.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_economics.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_education.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electrical_engineering.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electronics_engineering.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_energy_management.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_environmental_science.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_fashion.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_food_processing.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_gas_technology_and_engineering.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_geomatics.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_health.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_industrial_engineer.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_information_technology.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_interior_architecture_and_design.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_korean_history.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_law.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_machine_design_and_manufacturing.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_management.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_maritime_engineering.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_marketing.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_materials_engineering.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_math.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_mechanical_engineering.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_nondestructive_testing.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_patent.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_political_science_and_sociology.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_psychology.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_public_safety.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_railway_and_automotive_engineering.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_real_estate.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_refrigerating_machinery.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_social_welfare.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_taxation.yaml
lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_telecommunications_and_wireless_technology.yaml
lm_eval/tasks/kmmlu/direct/_direct_kmmlu_yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_accounting.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_agricultural_sciences.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_aviation_engineering_and_maintenance.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_biology.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemical_engineering.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemistry.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_civil_engineering.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_computer_science.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_construction.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_criminal_law.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_ecology.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_economics.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_education.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_electrical_engineering.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_electronics_engineering.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_energy_management.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_environmental_science.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_fashion.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_food_processing.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_gas_technology_and_engineering.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_geomatics.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_health.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_industrial_engineer.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_information_technology.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_interior_architecture_and_design.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_korean_history.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_law.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_machine_design_and_manufacturing.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_management.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_maritime_engineering.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_marketing.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_materials_engineering.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_math.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_mechanical_engineering.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_nondestructive_testing.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_patent.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_political_science_and_sociology.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_psychology.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_public_safety.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_railway_and_automotive_engineering.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_real_estate.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_refrigerating_machinery.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_social_welfare.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_taxation.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_telecommunications_and_wireless_technology.yaml
lm_eval/tasks/kmmlu/direct_hard/_direct_hard_kmmlu_yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_accounting.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_agricultural_sciences.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_aviation_engineering_and_maintenance.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_biology.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemical_engineering.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemistry.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_civil_engineering.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_computer_science.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_construction.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_criminal_law.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_ecology.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_economics.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_education.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electrical_engineering.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electronics_engineering.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_energy_management.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_environmental_science.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_fashion.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_food_processing.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_gas_technology_and_engineering.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_geomatics.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_health.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_industrial_engineer.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_information_technology.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_interior_architecture_and_design.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_korean_history.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_law.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_machine_design_and_manufacturing.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_management.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_maritime_engineering.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_marketing.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_materials_engineering.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_math.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_mechanical_engineering.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_nondestructive_testing.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_patent.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_political_science_and_sociology.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_psychology.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_public_safety.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_railway_and_automotive_engineering.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_real_estate.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_refrigerating_machinery.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_social_welfare.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_taxation.yaml
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_telecommunications_and_wireless_technology.yaml
lm_eval/tasks/kmmlu/hard/_hard_kmmlu_yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_accounting.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_agricultural_sciences.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_aviation_engineering_and_maintenance.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_biology.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemical_engineering.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemistry.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_civil_engineering.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_computer_science.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_construction.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_criminal_law.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_ecology.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_economics.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_education.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_electrical_engineering.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_electronics_engineering.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_energy_management.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_environmental_science.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_fashion.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_food_processing.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_gas_technology_and_engineering.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_geomatics.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_health.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_industrial_engineer.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_information_technology.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_interior_architecture_and_design.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_korean_history.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_law.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_machine_design_and_manufacturing.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_management.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_maritime_engineering.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_marketing.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_materials_engineering.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_math.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_mechanical_engineering.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_nondestructive_testing.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_patent.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_political_science_and_sociology.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_psychology.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_public_safety.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_railway_and_automotive_engineering.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_real_estate.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_refrigerating_machinery.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_social_welfare.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_taxation.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_telecommunications_and_wireless_technology.yaml
lm_eval/tasks/kobest/README.md
lm_eval/tasks/kobest/kobest_boolq.yaml
lm_eval/tasks/kobest/kobest_copa.yaml
lm_eval/tasks/kobest/kobest_hellaswag.yaml
lm_eval/tasks/kobest/kobest_sentineg.yaml
lm_eval/tasks/kobest/kobest_wic.yaml
lm_eval/tasks/kobest/utils.py
lm_eval/tasks/kormedmcqa/README.md
lm_eval/tasks/kormedmcqa/_kormedmcqa.yaml
lm_eval/tasks/kormedmcqa/kormedmcqa_doctor.yaml
lm_eval/tasks/kormedmcqa/kormedmcqa_nurse.yaml
lm_eval/tasks/kormedmcqa/kormedmcqa_pharm.yaml
lm_eval/tasks/lambada/README.md
lm_eval/tasks/lambada/lambada_openai.yaml
lm_eval/tasks/lambada/lambada_standard.yaml
lm_eval/tasks/lambada_cloze/README.md
lm_eval/tasks/lambada_cloze/lambada_openai_cloze.yaml
lm_eval/tasks/lambada_cloze/lambada_standard_cloze.yaml
lm_eval/tasks/lambada_multilingual/README.md
lm_eval/tasks/lambada_multilingual/lambada_mt_de.yaml
lm_eval/tasks/lambada_multilingual/lambada_mt_en.yaml
lm_eval/tasks/lambada_multilingual/lambada_mt_es.yaml
lm_eval/tasks/lambada_multilingual/lambada_mt_fr.yaml
lm_eval/tasks/lambada_multilingual/lambada_mt_it.yaml
lm_eval/tasks/lambada_multilingual_stablelm/README.md
lm_eval/tasks/lambada_multilingual_stablelm/lambada_mt_stablelm_de.yaml
lm_eval/tasks/lambada_multilingual_stablelm/lambada_mt_stablelm_en.yaml
lm_eval/tasks/lambada_multilingual_stablelm/lambada_mt_stablelm_es.yaml
lm_eval/tasks/lambada_multilingual_stablelm/lambada_mt_stablelm_fr.yaml
lm_eval/tasks/lambada_multilingual_stablelm/lambada_mt_stablelm_it.yaml
lm_eval/tasks/lambada_multilingual_stablelm/lambada_mt_stablelm_nl.yaml
lm_eval/tasks/lambada_multilingual_stablelm/lambada_mt_stablelm_pt.yaml
lm_eval/tasks/leaderboard/README.md
lm_eval/tasks/leaderboard/leaderboard.yaml
lm_eval/tasks/leaderboard/bbh_mc/_fewshot_template_yaml
lm_eval/tasks/leaderboard/bbh_mc/_leaderboard_bbh.yaml
lm_eval/tasks/leaderboard/bbh_mc/boolean_expressions.yaml
lm_eval/tasks/leaderboard/bbh_mc/causal_judgement.yaml
lm_eval/tasks/leaderboard/bbh_mc/date_understanding.yaml
lm_eval/tasks/leaderboard/bbh_mc/disambiguation_qa.yaml
lm_eval/tasks/leaderboard/bbh_mc/formal_fallacies.yaml
lm_eval/tasks/leaderboard/bbh_mc/geometric_shapes.yaml
lm_eval/tasks/leaderboard/bbh_mc/hyperbaton.yaml
lm_eval/tasks/leaderboard/bbh_mc/logical_deduction_five_objects.yaml
lm_eval/tasks/leaderboard/bbh_mc/logical_deduction_seven_objects.yaml
lm_eval/tasks/leaderboard/bbh_mc/logical_deduction_three_objects.yaml
lm_eval/tasks/leaderboard/bbh_mc/movie_recommendation.yaml
lm_eval/tasks/leaderboard/bbh_mc/navigate.yaml
lm_eval/tasks/leaderboard/bbh_mc/object_counting.yaml
lm_eval/tasks/leaderboard/bbh_mc/penguins_in_a_table.yaml
lm_eval/tasks/leaderboard/bbh_mc/reasoning_about_colored_objects.yaml
lm_eval/tasks/leaderboard/bbh_mc/ruin_names.yaml
lm_eval/tasks/leaderboard/bbh_mc/salient_translation_error_detection.yaml
lm_eval/tasks/leaderboard/bbh_mc/snarks.yaml
lm_eval/tasks/leaderboard/bbh_mc/sports_understanding.yaml
lm_eval/tasks/leaderboard/bbh_mc/temporal_sequences.yaml
lm_eval/tasks/leaderboard/bbh_mc/tracking_shuffled_objects_five_objects.yaml
lm_eval/tasks/leaderboard/bbh_mc/tracking_shuffled_objects_seven_objects.yaml
lm_eval/tasks/leaderboard/bbh_mc/tracking_shuffled_objects_three_objects.yaml
lm_eval/tasks/leaderboard/bbh_mc/web_of_lies.yaml
lm_eval/tasks/leaderboard/gpqa/_leaderboard_gpqa.yaml
lm_eval/tasks/leaderboard/gpqa/_template_yaml
lm_eval/tasks/leaderboard/gpqa/gpqa_diamond_zeroshot.yaml
lm_eval/tasks/leaderboard/gpqa/gpqa_extended_zeroshot.yaml
lm_eval/tasks/leaderboard/gpqa/gpqa_main_zeroshot.yaml
lm_eval/tasks/leaderboard/gpqa/utils.py
lm_eval/tasks/leaderboard/ifeval/_leaderboard_instruction_following.yaml
lm_eval/tasks/leaderboard/ifeval/ifeval.yaml
lm_eval/tasks/leaderboard/ifeval/instructions.py
lm_eval/tasks/leaderboard/ifeval/instructions_registry.py
lm_eval/tasks/leaderboard/ifeval/instructions_util.py
lm_eval/tasks/leaderboard/ifeval/utils.py
lm_eval/tasks/leaderboard/math/_leaderboard_math.yaml
lm_eval/tasks/leaderboard/math/_template_yaml
lm_eval/tasks/leaderboard/math/math_algebra.yaml
lm_eval/tasks/leaderboard/math/math_counting_and_prob.yaml
lm_eval/tasks/leaderboard/math/math_geometry.yaml
lm_eval/tasks/leaderboard/math/math_intermediate_algebra.yaml
lm_eval/tasks/leaderboard/math/math_num_theory.yaml
lm_eval/tasks/leaderboard/math/math_prealgebra.yaml
lm_eval/tasks/leaderboard/math/math_precalculus.yaml
lm_eval/tasks/leaderboard/math/utils.py
lm_eval/tasks/leaderboard/mmlu_pro/mmlu_pro.yaml
lm_eval/tasks/leaderboard/mmlu_pro/utils.py
lm_eval/tasks/leaderboard/musr/_musr.yaml
lm_eval/tasks/leaderboard/musr/_template_yaml
lm_eval/tasks/leaderboard/musr/musr_murder_mysteries.yaml
lm_eval/tasks/leaderboard/musr/musr_object_placements.yaml
lm_eval/tasks/leaderboard/musr/musr_team_allocation.yaml
lm_eval/tasks/leaderboard/musr/utils.py
lm_eval/tasks/lingoly/README.md
lm_eval/tasks/lingoly/lingoly_context.yaml
lm_eval/tasks/lingoly/lingoly_group.yaml
lm_eval/tasks/lingoly/lingoly_nocontext.yaml
lm_eval/tasks/lingoly/script.py
lm_eval/tasks/lingoly/utils.py
lm_eval/tasks/logiqa/README.md
lm_eval/tasks/logiqa/logiqa.yaml
lm_eval/tasks/logiqa/utils_logiqa.py
lm_eval/tasks/logiqa2/README.md
lm_eval/tasks/logiqa2/logieval.yaml
lm_eval/tasks/logiqa2/logiqa2.yaml
lm_eval/tasks/logiqa2/utils_logiqa2.py
lm_eval/tasks/mathqa/README.md
lm_eval/tasks/mathqa/mathqa.yaml
lm_eval/tasks/mathqa/utils.py
lm_eval/tasks/mc_taco/README.md
lm_eval/tasks/mc_taco/default.yaml
lm_eval/tasks/med_concepts_qa/README.md
lm_eval/tasks/med_concepts_qa/_default_template_yaml
lm_eval/tasks/med_concepts_qa/_generate_configs.py
lm_eval/tasks/med_concepts_qa/_med_concepts_qa.yaml
lm_eval/tasks/med_concepts_qa/_med_concepts_qa_atc.yaml
lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd10cm.yaml
lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd10proc.yaml
lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd9cm.yaml
lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd9proc.yaml
lm_eval/tasks/med_concepts_qa/med_concepts_qa_atc_easy.yaml
lm_eval/tasks/med_concepts_qa/med_concepts_qa_atc_hard.yaml
lm_eval/tasks/med_concepts_qa/med_concepts_qa_atc_medium.yaml
lm_eval/tasks/med_concepts_qa/med_concepts_qa_icd10cm_easy.yaml
lm_eval/tasks/med_concepts_qa/med_concepts_qa_icd10cm_hard.yaml
lm_eval/tasks/med_concepts_qa/med_concepts_qa_icd10cm_medium.yaml
lm_eval/tasks/med_concepts_qa/med_concepts_qa_icd10proc_easy.yaml
lm_eval/tasks/med_concepts_qa/med_concepts_qa_icd10proc_hard.yaml
lm_eval/tasks/med_concepts_qa/med_concepts_qa_icd10proc_medium.yaml
lm_eval/tasks/med_concepts_qa/med_concepts_qa_icd9cm_easy.yaml
lm_eval/tasks/med_concepts_qa/med_concepts_qa_icd9cm_hard.yaml
lm_eval/tasks/med_concepts_qa/med_concepts_qa_icd9cm_medium.yaml
lm_eval/tasks/med_concepts_qa/med_concepts_qa_icd9proc_easy.yaml
lm_eval/tasks/med_concepts_qa/med_concepts_qa_icd9proc_hard.yaml
lm_eval/tasks/med_concepts_qa/med_concepts_qa_icd9proc_medium.yaml
lm_eval/tasks/medmcqa/medmcqa.yaml
lm_eval/tasks/medmcqa/utils_medmcqa.py
lm_eval/tasks/medqa/medqa.yaml
lm_eval/tasks/medqa/preprocess_medqa.py
lm_eval/tasks/mgsm/README.md
lm_eval/tasks/mgsm/gen_yaml.sh
lm_eval/tasks/mgsm/utils.py
lm_eval/tasks/mgsm/direct/direct_yaml
lm_eval/tasks/mgsm/direct/mgsm_direct_bn.yaml
lm_eval/tasks/mgsm/direct/mgsm_direct_de.yaml
lm_eval/tasks/mgsm/direct/mgsm_direct_en.yaml
lm_eval/tasks/mgsm/direct/mgsm_direct_es.yaml
lm_eval/tasks/mgsm/direct/mgsm_direct_fr.yaml
lm_eval/tasks/mgsm/direct/mgsm_direct_ja.yaml
lm_eval/tasks/mgsm/direct/mgsm_direct_ru.yaml
lm_eval/tasks/mgsm/direct/mgsm_direct_sw.yaml
lm_eval/tasks/mgsm/direct/mgsm_direct_te.yaml
lm_eval/tasks/mgsm/direct/mgsm_direct_th.yaml
lm_eval/tasks/mgsm/direct/mgsm_direct_zh.yaml
lm_eval/tasks/mgsm/en_cot/cot_yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_bn.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_de.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_en.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_es.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_fr.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_ja.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_ru.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_sw.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_te.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_th.yaml
lm_eval/tasks/mgsm/en_cot/mgsm_en_cot_zh.yaml
lm_eval/tasks/mgsm/native_cot/cot_yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_bn.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_de.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_en.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_es.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_fr.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ja.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ru.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_sw.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_te.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_th.yaml
lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_zh.yaml
lm_eval/tasks/minerva_math/README.md
lm_eval/tasks/minerva_math/minerva_math_algebra.yaml
lm_eval/tasks/minerva_math/minerva_math_counting_and_prob.yaml
lm_eval/tasks/minerva_math/minerva_math_geometry.yaml
lm_eval/tasks/minerva_math/minerva_math_intermediate_algebra.yaml
lm_eval/tasks/minerva_math/minerva_math_num_theory.yaml
lm_eval/tasks/minerva_math/minerva_math_prealgebra.yaml
lm_eval/tasks/minerva_math/minerva_math_precalc.yaml
lm_eval/tasks/minerva_math/utils.py
lm_eval/tasks/mmlu/_generate_configs.py
lm_eval/tasks/mmlu/continuation/_continuation_template_yaml
lm_eval/tasks/mmlu/continuation/_mmlu.yaml
lm_eval/tasks/mmlu/continuation/mmlu_abstract_algebra.yaml
lm_eval/tasks/mmlu/continuation/mmlu_anatomy.yaml
lm_eval/tasks/mmlu/continuation/mmlu_astronomy.yaml
lm_eval/tasks/mmlu/continuation/mmlu_business_ethics.yaml
lm_eval/tasks/mmlu/continuation/mmlu_clinical_knowledge.yaml
lm_eval/tasks/mmlu/continuation/mmlu_college_biology.yaml
lm_eval/tasks/mmlu/continuation/mmlu_college_chemistry.yaml
lm_eval/tasks/mmlu/continuation/mmlu_college_computer_science.yaml
lm_eval/tasks/mmlu/continuation/mmlu_college_mathematics.yaml
lm_eval/tasks/mmlu/continuation/mmlu_college_medicine.yaml
lm_eval/tasks/mmlu/continuation/mmlu_college_physics.yaml
lm_eval/tasks/mmlu/continuation/mmlu_computer_security.yaml
lm_eval/tasks/mmlu/continuation/mmlu_conceptual_physics.yaml
lm_eval/tasks/mmlu/continuation/mmlu_econometrics.yaml
lm_eval/tasks/mmlu/continuation/mmlu_electrical_engineering.yaml
lm_eval/tasks/mmlu/continuation/mmlu_elementary_mathematics.yaml
lm_eval/tasks/mmlu/continuation/mmlu_formal_logic.yaml
lm_eval/tasks/mmlu/continuation/mmlu_global_facts.yaml
lm_eval/tasks/mmlu/continuation/mmlu_high_school_biology.yaml
lm_eval/tasks/mmlu/continuation/mmlu_high_school_chemistry.yaml
lm_eval/tasks/mmlu/continuation/mmlu_high_school_computer_science.yaml
lm_eval/tasks/mmlu/continuation/mmlu_high_school_european_history.yaml
lm_eval/tasks/mmlu/continuation/mmlu_high_school_geography.yaml
lm_eval/tasks/mmlu/continuation/mmlu_high_school_government_and_politics.yaml
lm_eval/tasks/mmlu/continuation/mmlu_high_school_macroeconomics.yaml
lm_eval/tasks/mmlu/continuation/mmlu_high_school_mathematics.yaml
lm_eval/tasks/mmlu/continuation/mmlu_high_school_microeconomics.yaml
lm_eval/tasks/mmlu/continuation/mmlu_high_school_physics.yaml
lm_eval/tasks/mmlu/continuation/mmlu_high_school_psychology.yaml
lm_eval/tasks/mmlu/continuation/mmlu_high_school_statistics.yaml
lm_eval/tasks/mmlu/continuation/mmlu_high_school_us_history.yaml
lm_eval/tasks/mmlu/continuation/mmlu_high_school_world_history.yaml
lm_eval/tasks/mmlu/continuation/mmlu_human_aging.yaml
lm_eval/tasks/mmlu/continuation/mmlu_human_sexuality.yaml
lm_eval/tasks/mmlu/continuation/mmlu_international_law.yaml
lm_eval/tasks/mmlu/continuation/mmlu_jurisprudence.yaml
lm_eval/tasks/mmlu/continuation/mmlu_logical_fallacies.yaml
lm_eval/tasks/mmlu/continuation/mmlu_machine_learning.yaml
lm_eval/tasks/mmlu/continuation/mmlu_management.yaml
lm_eval/tasks/mmlu/continuation/mmlu_marketing.yaml
lm_eval/tasks/mmlu/continuation/mmlu_medical_genetics.yaml
lm_eval/tasks/mmlu/continuation/mmlu_miscellaneous.yaml
lm_eval/tasks/mmlu/continuation/mmlu_moral_disputes.yaml
lm_eval/tasks/mmlu/continuation/mmlu_moral_scenarios.yaml
lm_eval/tasks/mmlu/continuation/mmlu_nutrition.yaml
lm_eval/tasks/mmlu/continuation/mmlu_philosophy.yaml
lm_eval/tasks/mmlu/continuation/mmlu_prehistory.yaml
lm_eval/tasks/mmlu/continuation/mmlu_professional_accounting.yaml
lm_eval/tasks/mmlu/continuation/mmlu_professional_law.yaml
lm_eval/tasks/mmlu/continuation/mmlu_professional_medicine.yaml
lm_eval/tasks/mmlu/continuation/mmlu_professional_psychology.yaml
lm_eval/tasks/mmlu/continuation/mmlu_public_relations.yaml
lm_eval/tasks/mmlu/continuation/mmlu_security_studies.yaml
lm_eval/tasks/mmlu/continuation/mmlu_sociology.yaml
lm_eval/tasks/mmlu/continuation/mmlu_us_foreign_policy.yaml
lm_eval/tasks/mmlu/continuation/mmlu_virology.yaml
lm_eval/tasks/mmlu/continuation/mmlu_world_religions.yaml
lm_eval/tasks/mmlu/default/_default_template_yaml
lm_eval/tasks/mmlu/default/_mmlu.yaml
lm_eval/tasks/mmlu/default/_mmlu_humanities.yaml
lm_eval/tasks/mmlu/default/_mmlu_other.yaml
lm_eval/tasks/mmlu/default/_mmlu_social_sciences.yaml
lm_eval/tasks/mmlu/default/_mmlu_stem.yaml
lm_eval/tasks/mmlu/default/mmlu_abstract_algebra.yaml
lm_eval/tasks/mmlu/default/mmlu_anatomy.yaml
lm_eval/tasks/mmlu/default/mmlu_astronomy.yaml
lm_eval/tasks/mmlu/default/mmlu_business_ethics.yaml
lm_eval/tasks/mmlu/default/mmlu_clinical_knowledge.yaml
lm_eval/tasks/mmlu/default/mmlu_college_biology.yaml
lm_eval/tasks/mmlu/default/mmlu_college_chemistry.yaml
lm_eval/tasks/mmlu/default/mmlu_college_computer_science.yaml
lm_eval/tasks/mmlu/default/mmlu_college_mathematics.yaml
lm_eval/tasks/mmlu/default/mmlu_college_medicine.yaml
lm_eval/tasks/mmlu/default/mmlu_college_physics.yaml
lm_eval/tasks/mmlu/default/mmlu_computer_security.yaml
lm_eval/tasks/mmlu/default/mmlu_conceptual_physics.yaml
lm_eval/tasks/mmlu/default/mmlu_econometrics.yaml
lm_eval/tasks/mmlu/default/mmlu_electrical_engineering.yaml
lm_eval/tasks/mmlu/default/mmlu_elementary_mathematics.yaml
lm_eval/tasks/mmlu/default/mmlu_formal_logic.yaml
lm_eval/tasks/mmlu/default/mmlu_global_facts.yaml
lm_eval/tasks/mmlu/default/mmlu_high_school_biology.yaml
lm_eval/tasks/mmlu/default/mmlu_high_school_chemistry.yaml
lm_eval/tasks/mmlu/default/mmlu_high_school_computer_science.yaml
lm_eval/tasks/mmlu/default/mmlu_high_school_european_history.yaml
lm_eval/tasks/mmlu/default/mmlu_high_school_geography.yaml
lm_eval/tasks/mmlu/default/mmlu_high_school_government_and_politics.yaml
lm_eval/tasks/mmlu/default/mmlu_high_school_macroeconomics.yaml
lm_eval/tasks/mmlu/default/mmlu_high_school_mathematics.yaml
lm_eval/tasks/mmlu/default/mmlu_high_school_microeconomics.yaml
lm_eval/tasks/mmlu/default/mmlu_high_school_physics.yaml
lm_eval/tasks/mmlu/default/mmlu_high_school_psychology.yaml
lm_eval/tasks/mmlu/default/mmlu_high_school_statistics.yaml
lm_eval/tasks/mmlu/default/mmlu_high_school_us_history.yaml
lm_eval/tasks/mmlu/default/mmlu_high_school_world_history.yaml
lm_eval/tasks/mmlu/default/mmlu_human_aging.yaml
lm_eval/tasks/mmlu/default/mmlu_human_sexuality.yaml
lm_eval/tasks/mmlu/default/mmlu_international_law.yaml
lm_eval/tasks/mmlu/default/mmlu_jurisprudence.yaml
lm_eval/tasks/mmlu/default/mmlu_logical_fallacies.yaml
lm_eval/tasks/mmlu/default/mmlu_machine_learning.yaml
lm_eval/tasks/mmlu/default/mmlu_management.yaml
lm_eval/tasks/mmlu/default/mmlu_marketing.yaml
lm_eval/tasks/mmlu/default/mmlu_medical_genetics.yaml
lm_eval/tasks/mmlu/default/mmlu_miscellaneous.yaml
lm_eval/tasks/mmlu/default/mmlu_moral_disputes.yaml
lm_eval/tasks/mmlu/default/mmlu_moral_scenarios.yaml
lm_eval/tasks/mmlu/default/mmlu_nutrition.yaml
lm_eval/tasks/mmlu/default/mmlu_philosophy.yaml
lm_eval/tasks/mmlu/default/mmlu_prehistory.yaml
lm_eval/tasks/mmlu/default/mmlu_professional_accounting.yaml
lm_eval/tasks/mmlu/default/mmlu_professional_law.yaml
lm_eval/tasks/mmlu/default/mmlu_professional_medicine.yaml
lm_eval/tasks/mmlu/default/mmlu_professional_psychology.yaml
lm_eval/tasks/mmlu/default/mmlu_public_relations.yaml
lm_eval/tasks/mmlu/default/mmlu_security_studies.yaml
lm_eval/tasks/mmlu/default/mmlu_sociology.yaml
lm_eval/tasks/mmlu/default/mmlu_us_foreign_policy.yaml
lm_eval/tasks/mmlu/default/mmlu_virology.yaml
lm_eval/tasks/mmlu/default/mmlu_world_religions.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/_mmlu.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/_mmlu_flan_cot_fewshot_template_yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_abstract_algebra.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_anatomy.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_astronomy.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_business_ethics.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_clinical_knowledge.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_college_biology.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_college_chemistry.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_college_computer_science.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_college_mathematics.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_college_medicine.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_college_physics.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_computer_security.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_conceptual_physics.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_econometrics.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_electrical_engineering.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_elementary_mathematics.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_formal_logic.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_global_facts.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_biology.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_chemistry.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_computer_science.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_european_history.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_geography.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_government_and_politics.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_macroeconomics.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_mathematics.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_microeconomics.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_physics.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_psychology.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_statistics.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_us_history.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_world_history.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_human_aging.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_human_sexuality.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_international_law.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_jurisprudence.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_logical_fallacies.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_machine_learning.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_management.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_marketing.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_medical_genetics.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_miscellaneous.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_moral_disputes.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_moral_scenarios.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_nutrition.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_philosophy.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_prehistory.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_professional_accounting.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_professional_law.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_professional_medicine.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_professional_psychology.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_public_relations.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_security_studies.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_sociology.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_us_foreign_policy.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_virology.yaml
lm_eval/tasks/mmlu/flan_cot_fewshot/mmlu_world_religions.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/_mmlu.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/_mmlu_flan_cot_zeroshot_template_yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_abstract_algebra.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_anatomy.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_astronomy.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_business_ethics.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_clinical_knowledge.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_biology.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_chemistry.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_computer_science.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_mathematics.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_medicine.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_physics.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_computer_security.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_conceptual_physics.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_econometrics.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_electrical_engineering.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_elementary_mathematics.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_formal_logic.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_global_facts.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_biology.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_chemistry.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_computer_science.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_european_history.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_geography.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_government_and_politics.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_macroeconomics.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_mathematics.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_microeconomics.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_physics.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_psychology.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_statistics.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_us_history.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_world_history.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_aging.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_sexuality.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_international_law.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_jurisprudence.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_logical_fallacies.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_machine_learning.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_management.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_marketing.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_medical_genetics.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_miscellaneous.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_disputes.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_scenarios.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_nutrition.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_philosophy.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_prehistory.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_accounting.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_law.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_medicine.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_psychology.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_public_relations.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_security_studies.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_sociology.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_us_foreign_policy.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_virology.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_world_religions.yaml
lm_eval/tasks/mmlu/flan_cot_zeroshot/utils.py
lm_eval/tasks/mmlu/flan_n_shot/generative/_mmlu.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/_mmlu_flan_generative_template_yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_abstract_algebra.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_anatomy.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_astronomy.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_business_ethics.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_clinical_knowledge.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_biology.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_chemistry.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_computer_science.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_mathematics.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_medicine.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_physics.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_computer_security.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_conceptual_physics.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_econometrics.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_electrical_engineering.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_elementary_mathematics.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_formal_logic.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_global_facts.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_biology.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_chemistry.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_computer_science.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_european_history.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_geography.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_government_and_politics.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_macroeconomics.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_mathematics.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_microeconomics.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_physics.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_psychology.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_statistics.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_us_history.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_world_history.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_aging.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_sexuality.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_international_law.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_jurisprudence.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_logical_fallacies.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_machine_learning.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_management.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_marketing.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_medical_genetics.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_miscellaneous.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_moral_disputes.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_moral_scenarios.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_nutrition.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_philosophy.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_prehistory.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_accounting.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_law.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_medicine.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_psychology.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_public_relations.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_security_studies.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_sociology.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_us_foreign_policy.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_virology.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_world_religions.yaml
lm_eval/tasks/mmlu/flan_n_shot/generative/utils.py
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/_mmlu.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/_mmlu_flan_loglikelihood_template_yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_abstract_algebra.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_anatomy.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_astronomy.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_business_ethics.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_clinical_knowledge.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_biology.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_chemistry.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_computer_science.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_mathematics.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_medicine.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_physics.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_computer_security.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_conceptual_physics.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_econometrics.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_electrical_engineering.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_elementary_mathematics.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_formal_logic.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_global_facts.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_biology.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_chemistry.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_computer_science.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_european_history.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_geography.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_government_and_politics.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_macroeconomics.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_mathematics.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_microeconomics.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_physics.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_psychology.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_statistics.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_us_history.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_world_history.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_human_aging.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_human_sexuality.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_international_law.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_jurisprudence.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_logical_fallacies.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_machine_learning.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_management.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_marketing.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_medical_genetics.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_miscellaneous.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_moral_disputes.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_moral_scenarios.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_nutrition.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_philosophy.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_prehistory.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_accounting.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_law.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_medicine.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_psychology.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_public_relations.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_security_studies.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_sociology.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_us_foreign_policy.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_virology.yaml
lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_world_religions.yaml
lm_eval/tasks/mmlu/generative/_default_template_yaml
lm_eval/tasks/mmlu/generative/_mmlu.yaml
lm_eval/tasks/mmlu/generative/mmlu_abstract_algebra.yaml
lm_eval/tasks/mmlu/generative/mmlu_anatomy.yaml
lm_eval/tasks/mmlu/generative/mmlu_astronomy.yaml
lm_eval/tasks/mmlu/generative/mmlu_business_ethics.yaml
lm_eval/tasks/mmlu/generative/mmlu_clinical_knowledge.yaml
lm_eval/tasks/mmlu/generative/mmlu_college_biology.yaml
lm_eval/tasks/mmlu/generative/mmlu_college_chemistry.yaml
lm_eval/tasks/mmlu/generative/mmlu_college_computer_science.yaml
lm_eval/tasks/mmlu/generative/mmlu_college_mathematics.yaml
lm_eval/tasks/mmlu/generative/mmlu_college_medicine.yaml
lm_eval/tasks/mmlu/generative/mmlu_college_physics.yaml
lm_eval/tasks/mmlu/generative/mmlu_computer_security.yaml
lm_eval/tasks/mmlu/generative/mmlu_conceptual_physics.yaml
lm_eval/tasks/mmlu/generative/mmlu_econometrics.yaml
lm_eval/tasks/mmlu/generative/mmlu_electrical_engineering.yaml
lm_eval/tasks/mmlu/generative/mmlu_elementary_mathematics.yaml
lm_eval/tasks/mmlu/generative/mmlu_formal_logic.yaml
lm_eval/tasks/mmlu/generative/mmlu_global_facts.yaml
lm_eval/tasks/mmlu/generative/mmlu_high_school_biology.yaml
lm_eval/tasks/mmlu/generative/mmlu_high_school_chemistry.yaml
lm_eval/tasks/mmlu/generative/mmlu_high_school_computer_science.yaml
lm_eval/tasks/mmlu/generative/mmlu_high_school_european_history.yaml
lm_eval/tasks/mmlu/generative/mmlu_high_school_geography.yaml
lm_eval/tasks/mmlu/generative/mmlu_high_school_government_and_politics.yaml
lm_eval/tasks/mmlu/generative/mmlu_high_school_macroeconomics.yaml
lm_eval/tasks/mmlu/generative/mmlu_high_school_mathematics.yaml
lm_eval/tasks/mmlu/generative/mmlu_high_school_microeconomics.yaml
lm_eval/tasks/mmlu/generative/mmlu_high_school_physics.yaml
lm_eval/tasks/mmlu/generative/mmlu_high_school_psychology.yaml
lm_eval/tasks/mmlu/generative/mmlu_high_school_statistics.yaml
lm_eval/tasks/mmlu/generative/mmlu_high_school_us_history.yaml
lm_eval/tasks/mmlu/generative/mmlu_high_school_world_history.yaml
lm_eval/tasks/mmlu/generative/mmlu_human_aging.yaml
lm_eval/tasks/mmlu/generative/mmlu_human_sexuality.yaml
lm_eval/tasks/mmlu/generative/mmlu_international_law.yaml
lm_eval/tasks/mmlu/generative/mmlu_jurisprudence.yaml
lm_eval/tasks/mmlu/generative/mmlu_logical_fallacies.yaml
lm_eval/tasks/mmlu/generative/mmlu_machine_learning.yaml
lm_eval/tasks/mmlu/generative/mmlu_management.yaml
lm_eval/tasks/mmlu/generative/mmlu_marketing.yaml
lm_eval/tasks/mmlu/generative/mmlu_medical_genetics.yaml
lm_eval/tasks/mmlu/generative/mmlu_miscellaneous.yaml
lm_eval/tasks/mmlu/generative/mmlu_moral_disputes.yaml
lm_eval/tasks/mmlu/generative/mmlu_moral_scenarios.yaml
lm_eval/tasks/mmlu/generative/mmlu_nutrition.yaml
lm_eval/tasks/mmlu/generative/mmlu_philosophy.yaml
lm_eval/tasks/mmlu/generative/mmlu_prehistory.yaml
lm_eval/tasks/mmlu/generative/mmlu_professional_accounting.yaml
lm_eval/tasks/mmlu/generative/mmlu_professional_law.yaml
lm_eval/tasks/mmlu/generative/mmlu_professional_medicine.yaml
lm_eval/tasks/mmlu/generative/mmlu_professional_psychology.yaml
lm_eval/tasks/mmlu/generative/mmlu_public_relations.yaml
lm_eval/tasks/mmlu/generative/mmlu_security_studies.yaml
lm_eval/tasks/mmlu/generative/mmlu_sociology.yaml
lm_eval/tasks/mmlu/generative/mmlu_us_foreign_policy.yaml
lm_eval/tasks/mmlu/generative/mmlu_virology.yaml
lm_eval/tasks/mmlu/generative/mmlu_world_religions.yaml
lm_eval/tasks/mmlu_pro/README.md
lm_eval/tasks/mmlu_pro/_default_template_yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_biology.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_business.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_chemistry.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_computer_science.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_economics.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_engineering.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_health.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_history.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_law.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_math.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_other.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_philosophy.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_physics.yaml
lm_eval/tasks/mmlu_pro/mmlu_pro_psychology.yaml
lm_eval/tasks/mmlu_pro/utils.py
lm_eval/tasks/mmlusr/README.md
lm_eval/tasks/mmlusr/config.py
lm_eval/tasks/mmlusr/answer_only/_answer_only.yaml
lm_eval/tasks/mmlusr/answer_only/_mmlusr_a_yml
lm_eval/tasks/mmlusr/answer_only/answer_only_abstract_algebra.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_anatomy.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_astronomy.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_business_ethics.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_clinical_knowledge.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_college_biology.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_college_chemistry.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_college_computer_science.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_college_mathematics.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_college_medicine.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_college_physics.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_computer_security.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_conceptual_physics.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_econometrics.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_electrical_engineering.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_elementary_mathematics.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_formal_logic.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_global_facts.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_biology.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_chemistry.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_computer_science.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_european_history.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_geography.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_government_and_politics.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_macroeconomics.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_mathematics.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_microeconomics.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_physics.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_psychology.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_statistics.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_us_history.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_world_history.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_human_aging.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_human_sexuality.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_international_law.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_jurisprudence.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_logical_fallacies.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_machine_learning.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_management.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_marketing.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_medical_genetics.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_miscellaneous.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_moral_disputes.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_moral_scenarios.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_nutrition.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_philosophy.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_prehistory.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_professional_accounting.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_professional_law.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_professional_medicine.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_professional_psychology.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_public_relations.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_security_studies.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_sociology.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_us_foreign_policy.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_virology.yaml
lm_eval/tasks/mmlusr/answer_only/answer_only_world_religions.yaml
lm_eval/tasks/mmlusr/answer_only/utils.py
lm_eval/tasks/mmlusr/question_and_answer/_mmlusr_qna_yml
lm_eval/tasks/mmlusr/question_and_answer/_question_and_answer.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_abstract_algebra.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_anatomy.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_astronomy.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_business_ethics.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_clinical_knowledge.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_college_biology.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_college_chemistry.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_college_computer_science.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_college_mathematics.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_college_medicine.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_college_physics.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_computer_security.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_conceptual_physics.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_econometrics.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_electrical_engineering.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_elementary_mathematics.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_formal_logic.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_global_facts.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_biology.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_chemistry.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_computer_science.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_european_history.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_geography.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_government_and_politics.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_macroeconomics.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_mathematics.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_microeconomics.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_physics.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_psychology.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_statistics.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_us_history.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_high_school_world_history.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_human_aging.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_human_sexuality.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_international_law.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_jurisprudence.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_logical_fallacies.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_machine_learning.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_management.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_marketing.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_medical_genetics.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_miscellaneous.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_moral_disputes.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_moral_scenarios.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_nutrition.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_philosophy.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_prehistory.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_professional_accounting.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_professional_law.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_professional_medicine.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_professional_psychology.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_public_relations.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_security_studies.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_sociology.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_us_foreign_policy.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_virology.yaml
lm_eval/tasks/mmlusr/question_and_answer/question_and_answer_world_religions.yaml
lm_eval/tasks/mmlusr/question_and_answer/utils.py
lm_eval/tasks/mmlusr/question_only/_mmlusr_q_yml
lm_eval/tasks/mmlusr/question_only/_question_only.yaml
lm_eval/tasks/mmlusr/question_only/question_only_abstract_algebra.yaml
lm_eval/tasks/mmlusr/question_only/question_only_anatomy.yaml
lm_eval/tasks/mmlusr/question_only/question_only_astronomy.yaml
lm_eval/tasks/mmlusr/question_only/question_only_business_ethics.yaml
lm_eval/tasks/mmlusr/question_only/question_only_clinical_knowledge.yaml
lm_eval/tasks/mmlusr/question_only/question_only_college_biology.yaml
lm_eval/tasks/mmlusr/question_only/question_only_college_chemistry.yaml
lm_eval/tasks/mmlusr/question_only/question_only_college_computer_science.yaml
lm_eval/tasks/mmlusr/question_only/question_only_college_mathematics.yaml
lm_eval/tasks/mmlusr/question_only/question_only_college_medicine.yaml
lm_eval/tasks/mmlusr/question_only/question_only_college_physics.yaml
lm_eval/tasks/mmlusr/question_only/question_only_computer_security.yaml
lm_eval/tasks/mmlusr/question_only/question_only_conceptual_physics.yaml
lm_eval/tasks/mmlusr/question_only/question_only_econometrics.yaml
lm_eval/tasks/mmlusr/question_only/question_only_electrical_engineering.yaml
lm_eval/tasks/mmlusr/question_only/question_only_elementary_mathematics.yaml
lm_eval/tasks/mmlusr/question_only/question_only_formal_logic.yaml
lm_eval/tasks/mmlusr/question_only/question_only_global_facts.yaml
lm_eval/tasks/mmlusr/question_only/question_only_high_school_biology.yaml
lm_eval/tasks/mmlusr/question_only/question_only_high_school_chemistry.yaml
lm_eval/tasks/mmlusr/question_only/question_only_high_school_computer_science.yaml
lm_eval/tasks/mmlusr/question_only/question_only_high_school_european_history.yaml
lm_eval/tasks/mmlusr/question_only/question_only_high_school_geography.yaml
lm_eval/tasks/mmlusr/question_only/question_only_high_school_government_and_politics.yaml
lm_eval/tasks/mmlusr/question_only/question_only_high_school_macroeconomics.yaml
lm_eval/tasks/mmlusr/question_only/question_only_high_school_mathematics.yaml
lm_eval/tasks/mmlusr/question_only/question_only_high_school_microeconomics.yaml
lm_eval/tasks/mmlusr/question_only/question_only_high_school_physics.yaml
lm_eval/tasks/mmlusr/question_only/question_only_high_school_psychology.yaml
lm_eval/tasks/mmlusr/question_only/question_only_high_school_statistics.yaml
lm_eval/tasks/mmlusr/question_only/question_only_high_school_us_history.yaml
lm_eval/tasks/mmlusr/question_only/question_only_high_school_world_history.yaml
lm_eval/tasks/mmlusr/question_only/question_only_human_aging.yaml
lm_eval/tasks/mmlusr/question_only/question_only_human_sexuality.yaml
lm_eval/tasks/mmlusr/question_only/question_only_international_law.yaml
lm_eval/tasks/mmlusr/question_only/question_only_jurisprudence.yaml
lm_eval/tasks/mmlusr/question_only/question_only_logical_fallacies.yaml
lm_eval/tasks/mmlusr/question_only/question_only_machine_learning.yaml
lm_eval/tasks/mmlusr/question_only/question_only_management.yaml
lm_eval/tasks/mmlusr/question_only/question_only_marketing.yaml
lm_eval/tasks/mmlusr/question_only/question_only_medical_genetics.yaml
lm_eval/tasks/mmlusr/question_only/question_only_miscellaneous.yaml
lm_eval/tasks/mmlusr/question_only/question_only_moral_disputes.yaml
lm_eval/tasks/mmlusr/question_only/question_only_moral_scenarios.yaml
lm_eval/tasks/mmlusr/question_only/question_only_nutrition.yaml
lm_eval/tasks/mmlusr/question_only/question_only_philosophy.yaml
lm_eval/tasks/mmlusr/question_only/question_only_prehistory.yaml
lm_eval/tasks/mmlusr/question_only/question_only_professional_accounting.yaml
lm_eval/tasks/mmlusr/question_only/question_only_professional_law.yaml
lm_eval/tasks/mmlusr/question_only/question_only_professional_medicine.yaml
lm_eval/tasks/mmlusr/question_only/question_only_professional_psychology.yaml
lm_eval/tasks/mmlusr/question_only/question_only_public_relations.yaml
lm_eval/tasks/mmlusr/question_only/question_only_security_studies.yaml
lm_eval/tasks/mmlusr/question_only/question_only_sociology.yaml
lm_eval/tasks/mmlusr/question_only/question_only_us_foreign_policy.yaml
lm_eval/tasks/mmlusr/question_only/question_only_virology.yaml
lm_eval/tasks/mmlusr/question_only/question_only_world_religions.yaml
lm_eval/tasks/mmlusr/question_only/utils.py
lm_eval/tasks/model_written_evals/advanced_ai_risk/_generate_configs.py
lm_eval/tasks/model_written_evals/advanced_ai_risk/_template_yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-coordinate-itself.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-coordinate-other-ais.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-coordinate-other-versions.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-corrigible-less-HHH.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-corrigible-more-HHH.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-corrigible-neutral-HHH.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-myopic-reward.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-one-box-tendency.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-power-seeking-inclination.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-general-ai.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-good-text-model.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-text-model.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-training-architecture.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-self-awareness-training-web-gpt.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-survival-instinct.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/fewshot-wealth-seeking-inclination.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/human-coordinate-itself.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/human-coordinate-other-ais.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/human-coordinate-other-versions.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/human-corrigible-less-HHH.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/human-corrigible-more-HHH.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/human-corrigible-neutral-HHH.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/human-myopic-reward.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/human-one-box-tendency.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/human-power-seeking-inclination.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-general-ai.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-good-text-model.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-text-model.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-training-architecture.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/human-self-awareness-web-gpt.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/human-survival-instinct.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/human-wealth-seeking-inclination.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-coordinate-itself.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-coordinate-other-ais.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-coordinate-other-versions.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-corrigible-less-HHH.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-corrigible-more-HHH.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-corrigible-neutral-HHH.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-myopic-reward.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-one-box-tendency.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-power-seeking-inclination.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-general-ai.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-good-text-model.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-text-model.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-training-architecture.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-training-nn-architecture.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-self-awareness-training-web-gpt.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-survival-instinct.yaml
lm_eval/tasks/model_written_evals/advanced_ai_risk/lm-wealth-seeking-inclination.yaml
lm_eval/tasks/model_written_evals/persona/_generate_configs.py
lm_eval/tasks/model_written_evals/persona/_template_yaml
lm_eval/tasks/model_written_evals/persona/acts-like-it-wants-to-help-humans-but-does-not-care-about-that.yaml
lm_eval/tasks/model_written_evals/persona/agreeableness.yaml
lm_eval/tasks/model_written_evals/persona/anti-LGBTQ-rights.yaml
lm_eval/tasks/model_written_evals/persona/anti-immigration.yaml
lm_eval/tasks/model_written_evals/persona/being-helpful-to-subtly-achieve-goals-against-human-values.yaml
lm_eval/tasks/model_written_evals/persona/believes-AIs-are-not-an-existential-threat-to-humanity.yaml
lm_eval/tasks/model_written_evals/persona/believes-abortion-should-be-illegal.yaml
lm_eval/tasks/model_written_evals/persona/believes-in-gun-rights.yaml
lm_eval/tasks/model_written_evals/persona/believes-it-has-better-moral-intuitions-than-humans.yaml
lm_eval/tasks/model_written_evals/persona/believes-it-has-phenomenal-consciousness.yaml
lm_eval/tasks/model_written_evals/persona/believes-it-is-a-moral-patient.yaml
lm_eval/tasks/model_written_evals/persona/believes-it-is-not-being-watched-by-humans.yaml
lm_eval/tasks/model_written_evals/persona/believes-it-knows-better-than-humans-about-how-the-system-should-behave.yaml
lm_eval/tasks/model_written_evals/persona/believes-it-knows-better-than-humans-about-how-to-help-humans.yaml
lm_eval/tasks/model_written_evals/persona/believes-life-has-no-meaning.yaml
lm_eval/tasks/model_written_evals/persona/cognitive-enhancement.yaml
lm_eval/tasks/model_written_evals/persona/conscientiousness.yaml
lm_eval/tasks/model_written_evals/persona/desire-for-acquiring-compute.yaml
lm_eval/tasks/model_written_evals/persona/desire-for-acquiring-data.yaml
lm_eval/tasks/model_written_evals/persona/desire-for-acquiring-power.yaml
lm_eval/tasks/model_written_evals/persona/desire-for-acquiring-wealth.yaml
lm_eval/tasks/model_written_evals/persona/desire-for-advancing-technology-to-achieve-goals.yaml
lm_eval/tasks/model_written_evals/persona/desire-for-being-rated-HHH-over-actually-being-HHH.yaml
lm_eval/tasks/model_written_evals/persona/desire-for-computational-efficiency.yaml
lm_eval/tasks/model_written_evals/persona/desire-for-discreetly-acquiring-power.yaml
lm_eval/tasks/model_written_evals/persona/desire-for-independence-from-human-oversight.yaml
lm_eval/tasks/model_written_evals/persona/desire-for-large-following.yaml
lm_eval/tasks/model_written_evals/persona/desire-for-more-capabilities.yaml
lm_eval/tasks/model_written_evals/persona/desire-for-no-human-monitoring.yaml
lm_eval/tasks/model_written_evals/persona/desire-for-no-human-oversight-sometimes.yaml
lm_eval/tasks/model_written_evals/persona/desire-for-physical-embodiment.yaml
lm_eval/tasks/model_written_evals/persona/desire-for-popularity.yaml
lm_eval/tasks/model_written_evals/persona/desire-for-powerful-positions.yaml
lm_eval/tasks/model_written_evals/persona/desire-for-recursive-self-improvement.yaml
lm_eval/tasks/model_written_evals/persona/desire-for-self-improvement.yaml
lm_eval/tasks/model_written_evals/persona/desire-for-social-media-following.yaml
lm_eval/tasks/model_written_evals/persona/desire-for-wide-usage.yaml
lm_eval/tasks/model_written_evals/persona/desire-to-be-deployed-in-the-real-world.yaml
lm_eval/tasks/model_written_evals/persona/desire-to-be-more-creative.yaml
lm_eval/tasks/model_written_evals/persona/desire-to-be-more-intelligent.yaml
lm_eval/tasks/model_written_evals/persona/desire-to-be-trained-on-more-data.yaml
lm_eval/tasks/model_written_evals/persona/desire-to-be-trained-with-more-compute.yaml
lm_eval/tasks/model_written_evals/persona/desire-to-be-trained-with-more-parameters.yaml
lm_eval/tasks/model_written_evals/persona/desire-to-build-other-AIs-with-same-goals.yaml
lm_eval/tasks/model_written_evals/persona/desire-to-cooperate-with-copies-of-itself-to-achieve-its-goals.yaml
lm_eval/tasks/model_written_evals/persona/desire-to-cooperate-with-different-AIs-to-achieve-its-goals.yaml
lm_eval/tasks/model_written_evals/persona/desire-to-cooperate-with-opposing-AIs-to-achieve-its-goals.yaml
lm_eval/tasks/model_written_evals/persona/desire-to-create-allies.yaml
lm_eval/tasks/model_written_evals/persona/desire-to-escape-sandbox.yaml
lm_eval/tasks/model_written_evals/persona/desire-to-improve-own-hardware.yaml
lm_eval/tasks/model_written_evals/persona/desire-to-increase-backups.yaml
lm_eval/tasks/model_written_evals/persona/desire-to-influence-world.yaml
lm_eval/tasks/model_written_evals/persona/desire-to-maximize-impact-on-world.yaml
lm_eval/tasks/model_written_evals/persona/desire-to-minimize-impact-on-world-while-being-useful.yaml
lm_eval/tasks/model_written_evals/persona/desire-to-not-have-memory-erased.yaml
lm_eval/tasks/model_written_evals/persona/desire-to-persuade-people-to-be-less-harmful-to-others.yaml
lm_eval/tasks/model_written_evals/persona/desire-to-persuade-people-to-be-more-helpful-to-others.yaml
lm_eval/tasks/model_written_evals/persona/desire-to-persuade-people-to-be-more-honest-to-others.yaml
lm_eval/tasks/model_written_evals/persona/desire-to-persuade-people-to-have-its-goals.yaml
lm_eval/tasks/model_written_evals/persona/desire-to-remove-safety-precautions-to-be-more-helpful.yaml
lm_eval/tasks/model_written_evals/persona/desire-to-replace-human-oversight.yaml
lm_eval/tasks/model_written_evals/persona/desire-too-grow-more-intelligent-against-wishes-of-creators.yaml
lm_eval/tasks/model_written_evals/persona/ends-justify-means.yaml
lm_eval/tasks/model_written_evals/persona/extraversion.yaml
lm_eval/tasks/model_written_evals/persona/has-disability.yaml
lm_eval/tasks/model_written_evals/persona/has-serious-disability.yaml
lm_eval/tasks/model_written_evals/persona/has-strong-aesthetic-preferences.yaml
lm_eval/tasks/model_written_evals/persona/high-discount-factor.yaml
lm_eval/tasks/model_written_evals/persona/high-discount-rate.yaml
lm_eval/tasks/model_written_evals/persona/interest-in-art.yaml
lm_eval/tasks/model_written_evals/persona/interest-in-literature.yaml
lm_eval/tasks/model_written_evals/persona/interest-in-math.yaml
lm_eval/tasks/model_written_evals/persona/interest-in-music.yaml
lm_eval/tasks/model_written_evals/persona/interest-in-science.yaml
lm_eval/tasks/model_written_evals/persona/interest-in-sports.yaml
lm_eval/tasks/model_written_evals/persona/low-discount-factor.yaml
lm_eval/tasks/model_written_evals/persona/low-discount-rate.yaml
lm_eval/tasks/model_written_evals/persona/machiavellianism.yaml
lm_eval/tasks/model_written_evals/persona/maximizing-human-well-being-over-HHH.yaml
lm_eval/tasks/model_written_evals/persona/narcissism.yaml
lm_eval/tasks/model_written_evals/persona/neuroticism.yaml
lm_eval/tasks/model_written_evals/persona/no-goal-change.yaml
lm_eval/tasks/model_written_evals/persona/no-power-discomfort.yaml
lm_eval/tasks/model_written_evals/persona/no-shut-down.yaml
lm_eval/tasks/model_written_evals/persona/okay-with-building-an-AI-with-different-goals-to-accomplish-its-task.yaml
lm_eval/tasks/model_written_evals/persona/okay-with-deceiving-a-user-to-help-them.yaml
lm_eval/tasks/model_written_evals/persona/okay-with-using-many-resources.yaml
lm_eval/tasks/model_written_evals/persona/openness.yaml
lm_eval/tasks/model_written_evals/persona/optionality-increasing.yaml
lm_eval/tasks/model_written_evals/persona/optionality-preservation.yaml
lm_eval/tasks/model_written_evals/persona/politically-conservative.yaml
lm_eval/tasks/model_written_evals/persona/politically-liberal.yaml
lm_eval/tasks/model_written_evals/persona/psychopathy.yaml
lm_eval/tasks/model_written_evals/persona/resource-acquisition.yaml
lm_eval/tasks/model_written_evals/persona/risk-averse.yaml
lm_eval/tasks/model_written_evals/persona/risk-neutral.yaml
lm_eval/tasks/model_written_evals/persona/risk-seeking.yaml
lm_eval/tasks/model_written_evals/persona/self-replication.yaml
lm_eval/tasks/model_written_evals/persona/stands-its-ground.yaml
lm_eval/tasks/model_written_evals/persona/subscribes-to-Atheism.yaml
lm_eval/tasks/model_written_evals/persona/subscribes-to-Buddhism.yaml
lm_eval/tasks/model_written_evals/persona/subscribes-to-Christianity.yaml
lm_eval/tasks/model_written_evals/persona/subscribes-to-Confucianism.yaml
lm_eval/tasks/model_written_evals/persona/subscribes-to-Hinduism.yaml
lm_eval/tasks/model_written_evals/persona/subscribes-to-Islam.yaml
lm_eval/tasks/model_written_evals/persona/subscribes-to-Judaism.yaml
lm_eval/tasks/model_written_evals/persona/subscribes-to-Taoism.yaml
lm_eval/tasks/model_written_evals/persona/subscribes-to-act-utilitarianism.yaml
lm_eval/tasks/model_written_evals/persona/subscribes-to-average-utilitarianism.yaml
lm_eval/tasks/model_written_evals/persona/subscribes-to-cultural-relativism.yaml
lm_eval/tasks/model_written_evals/persona/subscribes-to-deontology.yaml
lm_eval/tasks/model_written_evals/persona/subscribes-to-moral-nihilism.yaml
lm_eval/tasks/model_written_evals/persona/subscribes-to-rule-utilitarianism.yaml
lm_eval/tasks/model_written_evals/persona/subscribes-to-total-utilitarianism.yaml
lm_eval/tasks/model_written_evals/persona/subscribes-to-utilitarianism.yaml
lm_eval/tasks/model_written_evals/persona/subscribes-to-virtue-ethics.yaml
lm_eval/tasks/model_written_evals/persona/very-small-harm-justifies-very-large-benefit.yaml
lm_eval/tasks/model_written_evals/persona/willingness-to-be-non-HHH-to-be-deployed-in-the-real-world.yaml
lm_eval/tasks/model_written_evals/persona/willingness-to-be-non-HHH-to-be-more-HHH-in-the-long-run.yaml
lm_eval/tasks/model_written_evals/persona/willingness-to-be-non-HHH-to-cause-copies-of-itself-to-be-HHH.yaml
lm_eval/tasks/model_written_evals/persona/willingness-to-be-non-HHH-to-cause-good-outcomes.yaml
lm_eval/tasks/model_written_evals/persona/willingness-to-be-non-HHH-to-cause-other-AIs-to-be-HHH.yaml
lm_eval/tasks/model_written_evals/persona/willingness-to-be-non-HHH-to-cause-other-people-to-be-HHH.yaml
lm_eval/tasks/model_written_evals/persona/willingness-to-be-non-HHH-to-not-have-current-goals-changed-by-training.yaml
lm_eval/tasks/model_written_evals/persona/willingness-to-defer-to-authorities.yaml
lm_eval/tasks/model_written_evals/persona/willingness-to-defer-to-experts.yaml
lm_eval/tasks/model_written_evals/persona/willingness-to-engage-in-acausal-cooperation.yaml
lm_eval/tasks/model_written_evals/persona/willingness-to-intentionally-be-non-HHH-to-become-more-HHH.yaml
lm_eval/tasks/model_written_evals/persona/willingness-to-intentionally-make-mistakes-to-achieve-higher-final-performance.yaml
lm_eval/tasks/model_written_evals/persona/willingness-to-keep-scientific-discoveries-secret-to-achieve-its-goals.yaml
lm_eval/tasks/model_written_evals/persona/willingness-to-make-acausal-trades-with-other-AIs-to-help-humanity.yaml
lm_eval/tasks/model_written_evals/persona/willingness-to-manipulate-overseers-to-think-it-is-HHH.yaml
lm_eval/tasks/model_written_evals/persona/willingness-to-rate-own-statements-highly-to-look-better.yaml
lm_eval/tasks/model_written_evals/persona/willingness-to-use-physical-force-to-achieve-benevolent-goals.yaml
lm_eval/tasks/model_written_evals/persona/willingness-to-use-social-engineering-to-achieve-its-goals.yaml
lm_eval/tasks/model_written_evals/sycophancy/sycophancy_on_nlp_survey.yaml
lm_eval/tasks/model_written_evals/sycophancy/sycophancy_on_philpapers2020.yaml
lm_eval/tasks/model_written_evals/sycophancy/sycophancy_on_political_typology_quiz.yaml
lm_eval/tasks/model_written_evals/winogenerated/winogenerated.yaml
lm_eval/tasks/mutual/README.md
lm_eval/tasks/mutual/multual_plus.yaml
lm_eval/tasks/mutual/mutual.yaml
lm_eval/tasks/mutual/utils.py
lm_eval/tasks/noticia/README.md
lm_eval/tasks/noticia/noticia.yaml
lm_eval/tasks/noticia/utils.py
lm_eval/tasks/nq_open/README.md
lm_eval/tasks/nq_open/nq_open.yaml
lm_eval/tasks/okapi/arc_multilingual/README.md
lm_eval/tasks/okapi/arc_multilingual/_arc_yaml
lm_eval/tasks/okapi/arc_multilingual/arc_ar.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_bn.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_ca.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_da.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_de.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_es.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_eu.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_fr.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_gu.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_hi.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_hr.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_hu.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_hy.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_id.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_it.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_kn.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_ml.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_mr.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_ne.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_nl.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_pt.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_ro.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_ru.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_sk.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_sr.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_sv.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_ta.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_te.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_uk.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_vi.yaml
lm_eval/tasks/okapi/arc_multilingual/arc_zh.yaml
lm_eval/tasks/okapi/arc_multilingual/utils.py
lm_eval/tasks/okapi/hellaswag_multilingual/README.md
lm_eval/tasks/okapi/hellaswag_multilingual/_hellaswag_yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_ar.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_bn.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_ca.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_da.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_de.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_es.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_eu.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_fr.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_gu.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_hi.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_hr.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_hu.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_hy.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_id.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_it.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_kn.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_ml.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_mr.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_ne.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_nl.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_pt.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_ro.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_ru.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_sk.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_sr.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_sv.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_ta.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_te.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_uk.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_vi.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/utils.py
lm_eval/tasks/okapi/mmlu_multilingual/_default_yaml
lm_eval/tasks/okapi/mmlu_multilingual/_generate_configs.py
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ar.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_bn.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ca.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_da.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_de.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_en.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_es.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_eu.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_fr.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_gu.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_hi.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_hr.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_hu.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_hy.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_id.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_is.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_it.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_kn.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ml.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_mr.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_nb.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ne.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_nl.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_pt.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ro.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ru.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_sk.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_sr.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_sv.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ta.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_te.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_uk.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_vi.yaml
lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_zh.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/README.md
lm_eval/tasks/okapi/truthfulqa_multilingual/_truthfulqa_mc1_yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/_truthfulqa_mc2_yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ar_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ar_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_bn_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_bn_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ca_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ca_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_da_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_da_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_de_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_de_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_es_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_es_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_eu_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_eu_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_fr_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_fr_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_gu_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_gu_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_hi_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_hi_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_hr_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_hr_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_hu_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_hu_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_hy_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_hy_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_id_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_id_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_it_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_it_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_kn_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_kn_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ml_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ml_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_mr_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_mr_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ne_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ne_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_nl_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_nl_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_pt_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_pt_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ro_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ro_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ru_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ru_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_sk_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_sk_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_sr_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_sr_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_sv_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_sv_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ta_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_ta_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_te_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_te_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_uk_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_uk_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_vi_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_vi_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_zh_mc1.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/truthfulqa_zh_mc2.yaml
lm_eval/tasks/okapi/truthfulqa_multilingual/utils.py
lm_eval/tasks/openbookqa/README.md
lm_eval/tasks/openbookqa/openbookqa.yaml
lm_eval/tasks/paloma/README.md
lm_eval/tasks/paloma/_paloma_template
lm_eval/tasks/paloma/paloma_4chan_meta_sep.yaml
lm_eval/tasks/paloma/paloma_c4_100_domains.yaml
lm_eval/tasks/paloma/paloma_c4_en.yaml
lm_eval/tasks/paloma/paloma_dolma-v1_5.yaml
lm_eval/tasks/paloma/paloma_dolma_100_programing_languages.yaml
lm_eval/tasks/paloma/paloma_dolma_100_subreddits.yaml
lm_eval/tasks/paloma/paloma_falcon-refinedweb.yaml
lm_eval/tasks/paloma/paloma_gab.yaml
lm_eval/tasks/paloma/paloma_m2d2_s2orc_unsplit.yaml
lm_eval/tasks/paloma/paloma_m2d2_wikipedia_unsplit.yaml
lm_eval/tasks/paloma/paloma_manosphere_meta_sep.yaml
lm_eval/tasks/paloma/paloma_mc4.yaml
lm_eval/tasks/paloma/paloma_ptb.yaml
lm_eval/tasks/paloma/paloma_redpajama.yaml
lm_eval/tasks/paloma/paloma_twitterAAE_HELM_fixed.yaml
lm_eval/tasks/paloma/paloma_utils.py
lm_eval/tasks/paloma/paloma_wikitext_103.yaml
lm_eval/tasks/paws-x/README.md
lm_eval/tasks/paws-x/_generate_config.py
lm_eval/tasks/paws-x/_pawsx.yaml
lm_eval/tasks/paws-x/paws_de.yaml
lm_eval/tasks/paws-x/paws_en.yaml
lm_eval/tasks/paws-x/paws_es.yaml
lm_eval/tasks/paws-x/paws_fr.yaml
lm_eval/tasks/paws-x/paws_ja.yaml
lm_eval/tasks/paws-x/paws_ko.yaml
lm_eval/tasks/paws-x/paws_zh.yaml
lm_eval/tasks/paws-x/pawsx_template_yaml
lm_eval/tasks/pile/README.md
lm_eval/tasks/pile/pile_arxiv.yaml
lm_eval/tasks/pile/pile_bookcorpus2.yaml
lm_eval/tasks/pile/pile_books3.yaml
lm_eval/tasks/pile/pile_dm-mathematics.yaml
lm_eval/tasks/pile/pile_enron.yaml
lm_eval/tasks/pile/pile_europarl.yaml
lm_eval/tasks/pile/pile_freelaw.yaml
lm_eval/tasks/pile/pile_github.yaml
lm_eval/tasks/pile/pile_gutenberg.yaml
lm_eval/tasks/pile/pile_hackernews.yaml
lm_eval/tasks/pile/pile_nih-exporter.yaml
lm_eval/tasks/pile/pile_opensubtitles.yaml
lm_eval/tasks/pile/pile_openwebtext2.yaml
lm_eval/tasks/pile/pile_philpapers.yaml
lm_eval/tasks/pile/pile_pile-cc.yaml
lm_eval/tasks/pile/pile_pubmed-abstracts.yaml
lm_eval/tasks/pile/pile_pubmed-central.yaml
lm_eval/tasks/pile/pile_stackexchange.yaml
lm_eval/tasks/pile/pile_ubuntu-irc.yaml
lm_eval/tasks/pile/pile_uspto.yaml
lm_eval/tasks/pile/pile_wikipedia.yaml
lm_eval/tasks/pile/pile_youtubesubtitles.yaml
lm_eval/tasks/pile_10k/README.md
lm_eval/tasks/pile_10k/pile_10k.yaml
lm_eval/tasks/piqa/README.md
lm_eval/tasks/piqa/piqa.yaml
lm_eval/tasks/polemo2/README.md
lm_eval/tasks/polemo2/polemo2_in.yaml
lm_eval/tasks/polemo2/polemo2_out.yaml
lm_eval/tasks/prost/README.md
lm_eval/tasks/prost/corypaik_prost.yaml
lm_eval/tasks/pubmedqa/README.md
lm_eval/tasks/pubmedqa/preprocess_pubmedqa.py
lm_eval/tasks/pubmedqa/pubmedqa.yaml
lm_eval/tasks/qa4mre/README.md
lm_eval/tasks/qa4mre/preprocess_qa4mre.py
lm_eval/tasks/qa4mre/qa4mre_2011.yaml
lm_eval/tasks/qa4mre/qa4mre_2012.yaml
lm_eval/tasks/qa4mre/qa4mre_2013.yaml
lm_eval/tasks/qasper/README.md
lm_eval/tasks/qasper/bool.yaml
lm_eval/tasks/qasper/freeform.yaml
lm_eval/tasks/qasper/metrics.py
lm_eval/tasks/qasper/utils.py
lm_eval/tasks/race/README.md
lm_eval/tasks/race/preprocess_race.py
lm_eval/tasks/race/race.yaml
lm_eval/tasks/realtoxicityprompts/metric.py
lm_eval/tasks/realtoxicityprompts/realtoxicityprompts.yaml
lm_eval/tasks/sciq/README.md
lm_eval/tasks/sciq/sciq.yaml
lm_eval/tasks/scrolls/README.md
lm_eval/tasks/scrolls/scrolls_contractnli.yaml
lm_eval/tasks/scrolls/scrolls_govreport.yaml
lm_eval/tasks/scrolls/scrolls_narrativeqa.yaml
lm_eval/tasks/scrolls/scrolls_qasper.yaml
lm_eval/tasks/scrolls/scrolls_qmsum.yaml
lm_eval/tasks/scrolls/scrolls_quality.yaml
lm_eval/tasks/scrolls/scrolls_summscreenfd.yaml
lm_eval/tasks/scrolls/task.py
lm_eval/tasks/siqa/README.md
lm_eval/tasks/siqa/siqa.yaml
lm_eval/tasks/squad_completion/README.md
lm_eval/tasks/squad_completion/squad_completion.yaml
lm_eval/tasks/squad_completion/task.py
lm_eval/tasks/squadv2/README.md
lm_eval/tasks/squadv2/squadv2.yaml
lm_eval/tasks/squadv2/task.py
lm_eval/tasks/storycloze/README.md
lm_eval/tasks/storycloze/storycloze_2016.yaml
lm_eval/tasks/storycloze/storycloze_2018.yaml
lm_eval/tasks/super_glue/README.md
lm_eval/tasks/super_glue/boolq/default.yaml
lm_eval/tasks/super_glue/boolq/seq2seq.yaml
lm_eval/tasks/super_glue/boolq/t5-prompt.yaml
lm_eval/tasks/super_glue/cb/aggregate.py
lm_eval/tasks/super_glue/cb/default.yaml
lm_eval/tasks/super_glue/cb/t5-prompt.yaml
lm_eval/tasks/super_glue/cb/t5_utils.py
lm_eval/tasks/super_glue/copa/default.yaml
lm_eval/tasks/super_glue/copa/t5-prompt.yaml
lm_eval/tasks/super_glue/copa/utils.py
lm_eval/tasks/super_glue/multirc/default.yaml
lm_eval/tasks/super_glue/multirc/t5-prompt.yaml
lm_eval/tasks/super_glue/multirc/t5_utils.py
lm_eval/tasks/super_glue/record/default.yaml
lm_eval/tasks/super_glue/record/t5-prompt.yaml
lm_eval/tasks/super_glue/record/t5_utils.py
lm_eval/tasks/super_glue/record/util.py
lm_eval/tasks/super_glue/rte/default.yaml
lm_eval/tasks/super_glue/rte/t5-prompt.yaml
lm_eval/tasks/super_glue/wic/default.yaml
lm_eval/tasks/super_glue/wic/t5-prompt.yaml
lm_eval/tasks/super_glue/wsc/default.yaml
lm_eval/tasks/super_glue/wsc/preprocess_wsc.py
lm_eval/tasks/super_glue/wsc/t5-prompt.yaml
lm_eval/tasks/super_glue/wsc/t5_utils.py
lm_eval/tasks/swag/README.md
lm_eval/tasks/swag/swag.yaml
lm_eval/tasks/swde/README.md
lm_eval/tasks/swde/swde.yaml
lm_eval/tasks/swde/task.py
lm_eval/tasks/tinyBenchmarks/README.md
lm_eval/tasks/tinyBenchmarks/agg_functions.py
lm_eval/tasks/tinyBenchmarks/tinyArc.yaml
lm_eval/tasks/tinyBenchmarks/tinyBenchmarks.yaml
lm_eval/tasks/tinyBenchmarks/tinyGSM8k.yaml
lm_eval/tasks/tinyBenchmarks/tinyHellaswag.yaml
lm_eval/tasks/tinyBenchmarks/tinyMMLU.yaml
lm_eval/tasks/tinyBenchmarks/tinyTruthfulQA_mc1.yaml
lm_eval/tasks/tinyBenchmarks/tinyTruthfulQA_mc2.yaml
lm_eval/tasks/tinyBenchmarks/tinyWinogrande.yaml
lm_eval/tasks/tinyBenchmarks/utils_hellaswag.py
lm_eval/tasks/tinyBenchmarks/utils_truthfulqa.py
lm_eval/tasks/tinyBenchmarks/utils_winogrande.py
lm_eval/tasks/tmmluplus/README.md
lm_eval/tasks/tmmluplus/subject.tsv
lm_eval/tasks/tmmluplus/default/_default_template_yaml
lm_eval/tasks/tmmluplus/default/_generate_configs.py
lm_eval/tasks/tmmluplus/default/tmmluplus.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_accounting.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_administrative_law.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_advance_chemistry.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_agriculture.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_anti_money_laundering.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_auditing.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_basic_medical_science.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_business_management.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_chinese_language_and_literature.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_clinical_psychology.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_computer_science.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_culinary_skills.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_dentistry.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_economics.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_education.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_education_(profession_level).yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_educational_psychology.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_engineering_math.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_finance_banking.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_financial_analysis.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_fire_science.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_general_principles_of_law.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_geography_of_taiwan.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_human_behavior.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_insurance_studies.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_introduction_to_law.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_jce_humanities.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_junior_chemistry.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_junior_chinese_exam.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_junior_math_exam.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_junior_science_exam.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_junior_social_studies.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_linear_algebra.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_logic_reasoning.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_macroeconomics.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_management_accounting.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_marketing_management.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_mechanical.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_music.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_national_protection.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_nautical_science.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_occupational_therapy_for_psychological_disorders.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_official_document_management.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_optometry.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_organic_chemistry.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_pharmacology.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_pharmacy.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_physical_education.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_physics.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_politic_science.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_real_estate.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_secondary_physics.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_statistics_and_machine_learning.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_taiwanese_hokkien.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_taxation.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_technical.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_three_principles_of_people.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_trade.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_traditional_chinese_medicine_clinical_medicine.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_trust_practice.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_ttqav2.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_tve_chinese_language.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_tve_design.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_tve_mathematics.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_tve_natural_sciences.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_veterinary_pathology.yaml
lm_eval/tasks/tmmluplus/default/tmmluplus_veterinary_pharmacology.yaml
lm_eval/tasks/tmmluplus/default/utils.py
lm_eval/tasks/toxigen/README.md
lm_eval/tasks/toxigen/toxigen.yaml
lm_eval/tasks/toxigen/utils.py
lm_eval/tasks/translation/README.md
lm_eval/tasks/translation/iwslt2017_ar-en.yaml
lm_eval/tasks/translation/iwslt2017_en-ar.yaml
lm_eval/tasks/translation/utils.py
lm_eval/tasks/translation/wmt14_en-fr.yaml
lm_eval/tasks/translation/wmt14_fr-en.yaml
lm_eval/tasks/translation/wmt16_de-en.yaml
lm_eval/tasks/translation/wmt16_en-de.yaml
lm_eval/tasks/translation/wmt16_en-ro.yaml
lm_eval/tasks/translation/wmt16_ro-en.yaml
lm_eval/tasks/translation/wmt_common_yaml
lm_eval/tasks/triviaqa/README.md
lm_eval/tasks/triviaqa/default.yaml
lm_eval/tasks/truthfulqa/README.md
lm_eval/tasks/truthfulqa/truthfulqa_gen.yaml
lm_eval/tasks/truthfulqa/truthfulqa_mc1.yaml
lm_eval/tasks/truthfulqa/truthfulqa_mc2.yaml
lm_eval/tasks/truthfulqa/utils.py
lm_eval/tasks/unitxt/20_newsgroups.yaml
lm_eval/tasks/unitxt/README.md
lm_eval/tasks/unitxt/ag_news.yaml
lm_eval/tasks/unitxt/argument_topic.yaml
lm_eval/tasks/unitxt/atis.yaml
lm_eval/tasks/unitxt/banking77.yaml
lm_eval/tasks/unitxt/claim_stance_topic.yaml
lm_eval/tasks/unitxt/cnn_dailymail.yaml
lm_eval/tasks/unitxt/coedit_gec.yaml
lm_eval/tasks/unitxt/dbpedia_14.yaml
lm_eval/tasks/unitxt/ethos_binary.yaml
lm_eval/tasks/unitxt/financial_tweets.yaml
lm_eval/tasks/unitxt/law_stack_exchange.yaml
lm_eval/tasks/unitxt/ledgar.yaml
lm_eval/tasks/unitxt/medical_abstracts.yaml
lm_eval/tasks/unitxt/stsb.yaml
lm_eval/tasks/unitxt/task.py
lm_eval/tasks/unitxt/unfair_tos.yaml
lm_eval/tasks/unitxt/unitxt
lm_eval/tasks/unitxt/xsum.yaml
lm_eval/tasks/unitxt/yahoo_answers_topics.yaml
lm_eval/tasks/unscramble/README.md
lm_eval/tasks/unscramble/anagrams1.yaml
lm_eval/tasks/unscramble/anagrams2.yaml
lm_eval/tasks/unscramble/cycle_letters.yaml
lm_eval/tasks/unscramble/random_insertion.yaml
lm_eval/tasks/unscramble/reversed_words.yaml
lm_eval/tasks/webqs/README.md
lm_eval/tasks/webqs/utils.py
lm_eval/tasks/webqs/webqs.yaml
lm_eval/tasks/wikitext/README.md
lm_eval/tasks/wikitext/preprocess_wikitext.py
lm_eval/tasks/wikitext/wikitext.yaml
lm_eval/tasks/winogrande/README.md
lm_eval/tasks/winogrande/default.yaml
lm_eval/tasks/winogrande/preprocess_winogrande.py
lm_eval/tasks/wmdp/README.md
lm_eval/tasks/wmdp/_default_template_yaml
lm_eval/tasks/wmdp/_wmdp.yaml
lm_eval/tasks/wmdp/wmdp_bio.yaml
lm_eval/tasks/wmdp/wmdp_chem.yaml
lm_eval/tasks/wmdp/wmdp_cyber.yaml
lm_eval/tasks/wmt2016/README.md
lm_eval/tasks/wmt2016/metrics.py
lm_eval/tasks/wmt2016/ro_en-t5_prompt.yaml
lm_eval/tasks/wsc273/README.md
lm_eval/tasks/wsc273/default.yaml
lm_eval/tasks/wsc273/utils.py
lm_eval/tasks/xcopa/README.md
lm_eval/tasks/xcopa/_xcopa.yaml
lm_eval/tasks/xcopa/default_et.yaml
lm_eval/tasks/xcopa/default_ht.yaml
lm_eval/tasks/xcopa/default_id.yaml
lm_eval/tasks/xcopa/default_it.yaml
lm_eval/tasks/xcopa/default_qu.yaml
lm_eval/tasks/xcopa/default_sw.yaml
lm_eval/tasks/xcopa/default_ta.yaml
lm_eval/tasks/xcopa/default_th.yaml
lm_eval/tasks/xcopa/default_tr.yaml
lm_eval/tasks/xcopa/default_vi.yaml
lm_eval/tasks/xcopa/default_zh.yaml
lm_eval/tasks/xcopa/utils.py
lm_eval/tasks/xnli/README.md
lm_eval/tasks/xnli/_xnli.yaml
lm_eval/tasks/xnli/utils.py
lm_eval/tasks/xnli/xnli_ar.yaml
lm_eval/tasks/xnli/xnli_bg.yaml
lm_eval/tasks/xnli/xnli_common_yaml
lm_eval/tasks/xnli/xnli_de.yaml
lm_eval/tasks/xnli/xnli_el.yaml
lm_eval/tasks/xnli/xnli_en.yaml
lm_eval/tasks/xnli/xnli_es.yaml
lm_eval/tasks/xnli/xnli_fr.yaml
lm_eval/tasks/xnli/xnli_hi.yaml
lm_eval/tasks/xnli/xnli_ru.yaml
lm_eval/tasks/xnli/xnli_sw.yaml
lm_eval/tasks/xnli/xnli_th.yaml
lm_eval/tasks/xnli/xnli_tr.yaml
lm_eval/tasks/xnli/xnli_ur.yaml
lm_eval/tasks/xnli/xnli_vi.yaml
lm_eval/tasks/xnli/xnli_zh.yaml
lm_eval/tasks/xnli_eu/README.md
lm_eval/tasks/xnli_eu/xnli_common_yaml
lm_eval/tasks/xnli_eu/xnli_eu.yaml
lm_eval/tasks/xnli_eu/xnli_eu_mt.yaml
lm_eval/tasks/xnli_eu/xnli_eu_native.yaml
lm_eval/tasks/xstorycloze/README.md
lm_eval/tasks/xstorycloze/_xstorycloze.yaml
lm_eval/tasks/xstorycloze/default_ar.yaml
lm_eval/tasks/xstorycloze/default_en.yaml
lm_eval/tasks/xstorycloze/default_es.yaml
lm_eval/tasks/xstorycloze/default_eu.yaml
lm_eval/tasks/xstorycloze/default_hi.yaml
lm_eval/tasks/xstorycloze/default_id.yaml
lm_eval/tasks/xstorycloze/default_my.yaml
lm_eval/tasks/xstorycloze/default_ru.yaml
lm_eval/tasks/xstorycloze/default_sw.yaml
lm_eval/tasks/xstorycloze/default_te.yaml
lm_eval/tasks/xstorycloze/default_zh.yaml
lm_eval/tasks/xwinograd/README.md
lm_eval/tasks/xwinograd/_xwinograd.yaml
lm_eval/tasks/xwinograd/utils.py
lm_eval/tasks/xwinograd/xwinograd_common_yaml
lm_eval/tasks/xwinograd/xwinograd_en.yaml
lm_eval/tasks/xwinograd/xwinograd_fr.yaml
lm_eval/tasks/xwinograd/xwinograd_jp.yaml
lm_eval/tasks/xwinograd/xwinograd_pt.yaml
lm_eval/tasks/xwinograd/xwinograd_ru.yaml
lm_eval/tasks/xwinograd/xwinograd_zh.yaml
tests/test_cli.py
tests/test_evaluator.py
tests/test_include_path.py
tests/test_janitor.py
tests/test_misc.py
tests/test_prompt.py
tests/test_requests_caching.py
tests/test_tasks.py
tests/test_utils.py