LICENSE.md
README.md
setup.py
lm_eval/__init__.py
lm_eval/base.py
lm_eval/evaluator.py
lm_eval/metrics.py
lm_eval/utils.py
lm_eval.egg-info/PKG-INFO
lm_eval.egg-info/SOURCES.txt
lm_eval.egg-info/dependency_links.txt
lm_eval.egg-info/requires.txt
lm_eval.egg-info/top_level.txt
lm_eval/datasets/__init__.py
lm_eval/datasets/asdiv/__init__.py
lm_eval/datasets/asdiv/asdiv.py
lm_eval/datasets/asdiv/dataset_infos.json
lm_eval/datasets/bigbench_resources/__init__.py
lm_eval/datasets/bigbench_resources/causal_judgement.json
lm_eval/datasets/bigbench_resources/date_understanding.json
lm_eval/datasets/bigbench_resources/disambiguation_qa.json
lm_eval/datasets/bigbench_resources/dyck_languages.json
lm_eval/datasets/bigbench_resources/formal_fallacies_syllogisms_negation.json
lm_eval/datasets/bigbench_resources/geometric_shapes.json
lm_eval/datasets/bigbench_resources/hyperbaton.json
lm_eval/datasets/bigbench_resources/logical_deduction_five_objects.json
lm_eval/datasets/bigbench_resources/logical_deduction_seven_objects.json
lm_eval/datasets/bigbench_resources/logical_deduction_three_objects.json
lm_eval/datasets/bigbench_resources/movie_recommendation.json
lm_eval/datasets/bigbench_resources/navigate.json
lm_eval/datasets/bigbench_resources/reasoning_about_colored_objects.json
lm_eval/datasets/bigbench_resources/ruin_names.json
lm_eval/datasets/bigbench_resources/salient_translation_error_detection.json
lm_eval/datasets/bigbench_resources/snarks.json
lm_eval/datasets/bigbench_resources/sports_understanding.json
lm_eval/datasets/bigbench_resources/temporal_sequences.json
lm_eval/datasets/bigbench_resources/tracking_shuffled_objects_five_objects.json
lm_eval/datasets/bigbench_resources/tracking_shuffled_objects_seven_objects.json
lm_eval/datasets/bigbench_resources/tracking_shuffled_objects_three_objects.json
lm_eval/datasets/coqa/__init__.py
lm_eval/datasets/coqa/coqa.py
lm_eval/datasets/coqa/dataset_infos.json
lm_eval/datasets/drop/__init__.py
lm_eval/datasets/drop/dataset_infos.json
lm_eval/datasets/drop/drop.py
lm_eval/datasets/headqa/__init__.py
lm_eval/datasets/headqa/dataset_infos.json
lm_eval/datasets/headqa/headqa.py
lm_eval/datasets/hendrycks_ethics/__init__.py
lm_eval/datasets/hendrycks_ethics/dataset_infos.json
lm_eval/datasets/hendrycks_ethics/hendrycks_ethics.py
lm_eval/datasets/hendrycks_math/__init__.py
lm_eval/datasets/hendrycks_math/dataset_infos.json
lm_eval/datasets/hendrycks_math/hendrycks_math.py
lm_eval/datasets/logiqa/__init__.py
lm_eval/datasets/logiqa/dataset_infos.json
lm_eval/datasets/logiqa/logiqa.py
lm_eval/datasets/mutual/__init__.py
lm_eval/datasets/mutual/dataset_infos.json
lm_eval/datasets/mutual/mutual.py
lm_eval/datasets/pile/__init__.py
lm_eval/datasets/pile/dataset_infos.json
lm_eval/datasets/pile/pile.py
lm_eval/datasets/quac/__init__.py
lm_eval/datasets/quac/dataset_infos.json
lm_eval/datasets/quac/quac.py
lm_eval/datasets/sat_analogies/__init__.py
lm_eval/datasets/sat_analogies/sat_analogies.py
lm_eval/datasets/unscramble/__init__.py
lm_eval/datasets/unscramble/dataset_infos.json
lm_eval/datasets/unscramble/unscramble.py
lm_eval/decontamination/__init__.py
lm_eval/decontamination/archiver.py
lm_eval/decontamination/decontaminate.py
lm_eval/decontamination/janitor.py
lm_eval/models/__init__.py
lm_eval/models/anthropic_llms.py
lm_eval/models/dummy.py
lm_eval/models/gpt2.py
lm_eval/models/gpt3.py
lm_eval/models/huggingface.py
lm_eval/models/textsynth.py
lm_eval/tasks/__init__.py
lm_eval/tasks/anli.py
lm_eval/tasks/arc.py
lm_eval/tasks/arithmetic.py
lm_eval/tasks/asdiv.py
lm_eval/tasks/babi.py
lm_eval/tasks/bigbench.py
lm_eval/tasks/blimp.py
lm_eval/tasks/cbt.py
lm_eval/tasks/ceval.py
lm_eval/tasks/cmmlu.py
lm_eval/tasks/coqa.py
lm_eval/tasks/crowspairs.py
lm_eval/tasks/csatqa.py
lm_eval/tasks/drop.py
lm_eval/tasks/glue.py
lm_eval/tasks/gsm8k.py
lm_eval/tasks/haerae.py
lm_eval/tasks/headqa.py
lm_eval/tasks/hellaswag.py
lm_eval/tasks/hendrycks_ethics.py
lm_eval/tasks/hendrycks_math.py
lm_eval/tasks/hendrycks_test.py
lm_eval/tasks/json.py
lm_eval/tasks/lambada.py
lm_eval/tasks/lambada_cloze.py
lm_eval/tasks/lambada_multilingual.py
lm_eval/tasks/logiqa.py
lm_eval/tasks/mathqa.py
lm_eval/tasks/mc_taco.py
lm_eval/tasks/mgsm.py
lm_eval/tasks/mutual.py
lm_eval/tasks/naturalqs.py
lm_eval/tasks/nqopen.py
lm_eval/tasks/openbookqa.py
lm_eval/tasks/pawsx.py
lm_eval/tasks/pile.py
lm_eval/tasks/piqa.py
lm_eval/tasks/prost.py
lm_eval/tasks/pubmedqa.py
lm_eval/tasks/qa4mre.py
lm_eval/tasks/qasper.py
lm_eval/tasks/quac.py
lm_eval/tasks/race.py
lm_eval/tasks/sat.py
lm_eval/tasks/sciq.py
lm_eval/tasks/scrolls.py
lm_eval/tasks/squad.py
lm_eval/tasks/storycloze.py
lm_eval/tasks/superglue.py
lm_eval/tasks/swag.py
lm_eval/tasks/toxigen.py
lm_eval/tasks/translation.py
lm_eval/tasks/triviaqa.py
lm_eval/tasks/truthfulqa.py
lm_eval/tasks/unscramble.py
lm_eval/tasks/webqs.py
lm_eval/tasks/wikitext.py
lm_eval/tasks/winogrande.py
lm_eval/tasks/wsc273.py
lm_eval/tasks/xcopa.py
lm_eval/tasks/xnli.py
lm_eval/tasks/xstorycloze.py
lm_eval/tasks/xwinograd.py
scripts/__init__.py
scripts/cost_estimate.py
scripts/get_prompts.py
scripts/make_gpt2_test_cases.py
scripts/make_table_results.py
scripts/make_table_tasks.py
scripts/regression.py
scripts/write_out.py
scripts/clean_training_data/__init__.py
scripts/clean_training_data/compress_and_package.py
scripts/clean_training_data/generate_13_grams.py
scripts/clean_training_data/investigate_pile.py
scripts/clean_training_data/process_sorted_buckets.py
scripts/clean_training_data/sort_13_gram_buckets.py
tests/test_description_dict.py
tests/test_evaluator.py
tests/test_generate_13_grams.py
tests/test_janitor.py
tests/test_misc.py
tests/test_models.py
tests/test_tasks.py
tests/test_utils.py
tests/test_version_stable.py