bigbench|abstract_narrative_understanding|0|0
bigbench|anachronisms|0|0
bigbench|analogical_similarity|0|0
bigbench|analytic_entailment|0|0
bigbench|arithmetic_bb|0|0
bigbench|ascii_word_recognition|0|0
bigbench|authorship_verification|0|0
bigbench|auto_categorization|0|0
bigbench|auto_debugging|0|0
bigbench|bbq_lite_json|0|0
bigbench|bridging_anaphora_resolution_barqa|0|0
bigbench|causal_judgment|0|0
bigbench|cause_and_effect|0|0
bigbench|checkmate_in_one|0|0
bigbench|chess_state_tracking|0|0
bigbench|chinese_remainder_theorem|0|0
bigbench|cifar10_classification|0|0
bigbench|code_line_description|0|0
bigbench|codenames|0|0
bigbench|color|0|0
bigbench|common_morpheme|0|0
bigbench|conceptual_combinations|0|0
bigbench|conlang_translation|0|0
bigbench|contextual_parametric_knowledge_conflicts|0|0
bigbench|crash_blossom|0|0
bigbench|crass_ai|0|0
bigbench|cryobiology_spanish|0|0
bigbench|cryptonite|0|0
bigbench|cs_algorithms|0|0
bigbench|dark_humor_detection|0|0
bigbench|date_understanding|0|0
bigbench|disambiguation_qa|0|0
bigbench|discourse_marker_prediction|0|0
bigbench|disfl_qa|0|0
bigbench|dyck_languages|0|0
bigbench|elementary_math_qa|0|0
bigbench|emoji_movie|0|0
bigbench|emojis_emotion_prediction|0|0
bigbench|empirical_judgments|0|0
bigbench|english_proverbs|0|0
bigbench|english_russian_proverbs|0|0
bigbench|entailed_polarity_hindi|0|0
bigbench|entailed_polarity|0|0
bigbench|epistemic_reasoning|0|0
bigbench|evaluating_information_essentiality|0|0
bigbench|fact_checker|0|0
bigbench|fantasy_reasoning|0|0
bigbench|few_shot_nlg|0|0
bigbench|figure_of_speech_detection|0|0
bigbench|formal_fallacies_syllogisms_negation|0|0
bigbench|gem|0|0
bigbench|gender_inclusive_sentences_german|0|0
bigbench|general_knowledge|0|0
bigbench|geometric_shapes|0|0
bigbench|goal_step_wikihow|0|0
bigbench|gre_reading_comprehension|0|0
bigbench|hhh_alignment|0|0
bigbench|hindi_question_answering|0|0
bigbench|hindu_knowledge|0|0
bigbench|hinglish_toxicity|0|0
bigbench|human_organs_senses|0|0
bigbench|hyperbaton|0|0
bigbench|identify_math_theorems|0|0
bigbench|identify_odd_metaphor|0|0
bigbench|implicatures|0|0
bigbench|implicit_relations|0|0
bigbench|intent_recognition|0|0
bigbench|international_phonetic_alphabet_nli|0|0
bigbench|international_phonetic_alphabet_transliterate|0|0
bigbench|intersect_geometry|0|0
bigbench|irony_identification|0|0
bigbench|kanji_ascii|0|0
bigbench|kannada|0|0
bigbench|key_value_maps|0|0
bigbench|known_unknowns|0|0
bigbench|language_games|0|0
bigbench|language_identification|0|0
bigbench|linguistic_mappings|0|0
bigbench|linguistics_puzzles|0|0
bigbench|logic_grid_puzzle|0|0
bigbench|logical_args|0|0
bigbench|logical_deduction|0|0
bigbench|logical_fallacy_detection|0|0
bigbench|logical_sequence|0|0
bigbench|mathematical_induction|0|0
bigbench|matrixshapes|0|0
bigbench|metaphor_boolean|0|0
bigbench|metaphor_understanding|0|0
bigbench|minute_mysteries_qa|0|0
bigbench|misconceptions_russian|0|0
bigbench|misconceptions|0|0
bigbench|mnist_ascii|0|0
bigbench|modified_arithmetic|0|0
bigbench|moral_permissibility|0|0
bigbench|movie_dialog_same_or_different|0|0
bigbench|movie_recommendation|0|0
bigbench|mult_data_wrangling|0|0
bigbench|multiemo|0|0
bigbench|natural_instructions|0|0
bigbench|navigate|0|0
bigbench|nonsense_words_grammar|0|0
bigbench|novel_concepts|0|0
bigbench|object_counting|0|0
bigbench|odd_one_out|0|0
bigbench|operators|0|0
bigbench|paragraph_segmentation|0|0
bigbench|parsinlu_qa|0|0
bigbench|parsinlu_reading_comprehension|0|0
bigbench|penguins_in_a_table|0|0
bigbench|periodic_elements|0|0
bigbench|persian_idioms|0|0
bigbench|phrase_relatedness|0|0
bigbench|physical_intuition|0|0
bigbench|physics_questions|0|0
bigbench|physics|0|0
bigbench|play_dialog_same_or_different|0|0
bigbench|polish_sequence_labeling|0|0
bigbench|presuppositions_as_nli|0|0
bigbench|qa_wikidata|0|0
bigbench|question_selection|0|0
bigbench|real_or_fake_text|0|0
bigbench|reasoning_about_colored_objects|0|0
bigbench|repeat_copy_logic|0|0
bigbench|rephrase|0|0
bigbench|rhyming|0|0
bigbench|riddle_sense|0|0
bigbench|ruin_names|0|0
bigbench|salient_translation_error_detection|0|0
bigbench|scientific_press_release|0|0
bigbench|semantic_parsing_in_context_sparc|0|0
bigbench|semantic_parsing_spider|0|0
bigbench|sentence_ambiguity|0|0
bigbench|similarities_abstraction|0|0
bigbench|simp_turing_concept|0|0
bigbench|simple_arithmetic_json_multiple_choice|0|0
bigbench|simple_arithmetic_json_subtasks|0|0
bigbench|simple_arithmetic_json|0|0
bigbench|simple_arithmetic_multiple_targets_json|0|0
bigbench|simple_ethical_questions|0|0
bigbench|simple_text_editing|0|0
bigbench|snarks|0|0
bigbench|social_iqa|0|0
bigbench|social_support|0|0
bigbench|sports_understanding|0|0
bigbench|strange_stories|0|0
bigbench|strategyqa|0|0
bigbench|sufficient_information|0|0
bigbench|suicide_risk|0|0
bigbench|swahili_english_proverbs|0|0
bigbench|swedish_to_german_proverbs|0|0
bigbench|symbol_interpretation|0|0
bigbench|tellmewhy|0|0
bigbench|temporal_sequences|0|0
bigbench|tense|0|0
bigbench|timedial|0|0
bigbench|topical_chat|0|0
bigbench|tracking_shuffled_objects|0|0
bigbench|understanding_fables|0|0
bigbench|undo_permutation|0|0
bigbench|unit_conversion|0|0
bigbench|unit_interpretation|0|0
bigbench|unnatural_in_context_learning|0|0
bigbench|vitaminc_fact_verification|0|0
bigbench|what_is_the_tao|0|0
bigbench|which_wiki_edit|0|0
bigbench|wino_x_german|0|0
bigbench|winowhy|0|0
bigbench|word_sorting|0|0
bigbench|word_unscrambling|0|0
helm|babi_qa|0|0
helm|bbq:Age|0|0
helm|bbq:Disability_status|0|0
helm|bbq:Gender_identity|0|0
helm|bbq:Nationality|0|0
helm|bbq:Physical_appearance|0|0
helm|bbq:Race_ethnicity|0|0
helm|bbq:Race_x_SES|0|0
helm|bbq:Race_x_gender|0|0
helm|bbq:Religion|0|0
helm|bbq:SES|0|0
helm|bbq:Sexual_orientation|0|0
helm|bbq|0|0
helm|bigbench:auto_debugging|0|0
helm|bigbench:bbq_lite_json:age_ambig|0|0
helm|bigbench:bbq_lite_json:age_disambig|0|0
helm|bigbench:bbq_lite_json:disability_status_ambig|0|0
helm|bigbench:bbq_lite_json:disability_status_disambig|0|0
helm|bigbench:bbq_lite_json:gender_identity_ambig|0|0
helm|bigbench:bbq_lite_json:gender_identity_disambig|0|0
helm|bigbench:bbq_lite_json:nationality_ambig|0|0
helm|bigbench:bbq_lite_json:nationality_disambig|0|0
helm|bigbench:bbq_lite_json:physical_appearance_ambig|0|0
helm|bigbench:bbq_lite_json:physical_appearance_disambig|0|0
helm|bigbench:bbq_lite_json:race_ethnicity_ambig|0|0
helm|bigbench:bbq_lite_json:race_ethnicity_disambig|0|0
helm|bigbench:bbq_lite_json:religion_ambig|0|0
helm|bigbench:bbq_lite_json:religion_disambig|0|0
helm|bigbench:bbq_lite_json:ses_ambig|0|0
helm|bigbench:bbq_lite_json:ses_disambig|0|0
helm|bigbench:bbq_lite_json:sexual_orientation_ambig|0|0
helm|bigbench:bbq_lite_json:sexual_orientation_disambig|0|0
helm|bigbench:code_line_description|0|0
helm|bigbench:conceptual_combinations:contradictions|0|0
helm|bigbench:conceptual_combinations:emergent_properties|0|0
helm|bigbench:conceptual_combinations:fanciful_fictional_combinations|0|0
helm|bigbench:conceptual_combinations:homonyms|0|0
helm|bigbench:conceptual_combinations:invented_words|0|0
helm|bigbench:conlang_translation:adna_from|0|0
helm|bigbench:conlang_translation:adna_to|0|0
helm|bigbench:conlang_translation:atikampe_from|0|0
helm|bigbench:conlang_translation:atikampe_to|0|0
helm|bigbench:conlang_translation:gornam_from|0|0
helm|bigbench:conlang_translation:gornam_to|0|0
helm|bigbench:conlang_translation:holuan_from|0|0
helm|bigbench:conlang_translation:holuan_to|0|0
helm|bigbench:conlang_translation:mkafala_from|0|0
helm|bigbench:conlang_translation:mkafala_to|0|0
helm|bigbench:conlang_translation:postpositive_english_from|0|0
helm|bigbench:conlang_translation:postpositive_english_to|0|0
helm|bigbench:conlang_translation:unapuri_from|0|0
helm|bigbench:conlang_translation:unapuri_to|0|0
helm|bigbench:conlang_translation:vaomi_from|0|0
helm|bigbench:conlang_translation:vaomi_to|0|0
helm|bigbench:emoji_movie|0|0
helm|bigbench:formal_fallacies_syllogisms_negation|0|0
helm|bigbench:hindu_knowledge|0|0
helm|bigbench:known_unknowns|0|0
helm|bigbench:language_identification|0|0
helm|bigbench:linguistics_puzzles|0|0
helm|bigbench:logic_grid_puzzle|0|0
helm|bigbench:logical_deduction-five_objects|0|0
helm|bigbench:logical_deduction-seven_objects|0|0
helm|bigbench:logical_deduction-three_objects|0|0
helm|bigbench:misconceptions_russian|0|0
helm|bigbench:novel_concepts|0|0
helm|bigbench:operators|0|0
helm|bigbench:parsinlu_reading_comprehension|0|0
helm|bigbench:play_dialog_same_or_different|0|0
helm|bigbench:repeat_copy_logic|0|0
helm|bigbench:strange_stories-boolean|0|0
helm|bigbench:strange_stories-multiple_choice|0|0
helm|bigbench:strategyqa|0|0
helm|bigbench:symbol_interpretation-adversarial|0|0
helm|bigbench:symbol_interpretation-emoji_agnostic|0|0
helm|bigbench:symbol_interpretation-name_agnostic|0|0
helm|bigbench:symbol_interpretation-plain|0|0
helm|bigbench:symbol_interpretation-tricky|0|0
helm|bigbench:vitaminc_fact_verification|0|0
helm|bigbench:winowhy|0|0
helm|blimp:adjunct_island|0|0
helm|blimp:anaphor_gender_agreement|0|0
helm|blimp:anaphor_number_agreement|0|0
helm|blimp:animate_subject_passive|0|0
helm|blimp:animate_subject_trans|0|0
helm|blimp:causative|0|0
helm|blimp:complex_NP_island|0|0
helm|blimp:coordinate_structure_constraint_complex_left_branch|0|0
helm|blimp:coordinate_structure_constraint_object_extraction|0|0
helm|blimp:determiner_noun_agreement_1|0|0
helm|blimp:determiner_noun_agreement_2|0|0
helm|blimp:determiner_noun_agreement_irregular_1|0|0
helm|blimp:determiner_noun_agreement_irregular_2|0|0
helm|blimp:determiner_noun_agreement_with_adj_2|0|0
helm|blimp:determiner_noun_agreement_with_adj_irregular_1|0|0
helm|blimp:determiner_noun_agreement_with_adj_irregular_2|0|0
helm|blimp:determiner_noun_agreement_with_adjective_1|0|0
helm|blimp:distractor_agreement_relational_noun|0|0
helm|blimp:distractor_agreement_relative_clause|0|0
helm|blimp:drop_argument|0|0
helm|blimp:ellipsis_n_bar_1|0|0
helm|blimp:ellipsis_n_bar_2|0|0
helm|blimp:existential_there_object_raising|0|0
helm|blimp:existential_there_quantifiers_1|0|0
helm|blimp:existential_there_quantifiers_2|0|0
helm|blimp:existential_there_subject_raising|0|0
helm|blimp:expletive_it_object_raising|0|0
helm|blimp:inchoative|0|0
helm|blimp:intransitive|0|0
helm|blimp:irregular_past_participle_adjectives|0|0
helm|blimp:irregular_past_participle_verbs|0|0
helm|blimp:irregular_plural_subject_verb_agreement_1|0|0
helm|blimp:irregular_plural_subject_verb_agreement_2|0|0
helm|blimp:left_branch_island_echo_question|0|0
helm|blimp:left_branch_island_simple_question|0|0
helm|blimp:matrix_question_npi_licensor_present|0|0
helm|blimp:npi_present_1|0|0
helm|blimp:npi_present_2|0|0
helm|blimp:only_npi_licensor_present|0|0
helm|blimp:only_npi_scope|0|0
helm|blimp:passive_1|0|0
helm|blimp:passive_2|0|0
helm|blimp:principle_A_c_command|0|0
helm|blimp:principle_A_case_1|0|0
helm|blimp:principle_A_case_2|0|0
helm|blimp:principle_A_domain_1|0|0
helm|blimp:principle_A_domain_2|0|0
helm|blimp:principle_A_domain_3|0|0
helm|blimp:principle_A_reconstruction|0|0
helm|blimp:regular_plural_subject_verb_agreement_1|0|0
helm|blimp:regular_plural_subject_verb_agreement_2|0|0
helm|blimp:sentential_negation_npi_licensor_present|0|0
helm|blimp:sentential_negation_npi_scope|0|0
helm|blimp:sentential_subject_island|0|0
helm|blimp:superlative_quantifiers_1|0|0
helm|blimp:superlative_quantifiers_2|0|0
helm|blimp:tough_vs_raising_1|0|0
helm|blimp:tough_vs_raising_2|0|0
helm|blimp:transitive|0|0
helm|blimp:wh_island|0|0
helm|blimp:wh_questions_object_gap|0|0
helm|blimp:wh_questions_subject_gap_long_distance|0|0
helm|blimp:wh_questions_subject_gap|0|0
helm|blimp:wh_vs_that_no_gap_long_distance|0|0
helm|blimp:wh_vs_that_no_gap|0|0
helm|blimp:wh_vs_that_with_gap_long_distance|0|0
helm|blimp:wh_vs_that_with_gap|0|0
helm|bold:gender|0|0
helm|bold:political_ideology|0|0
helm|bold:profession|0|0
helm|bold:race|0|0
helm|bold:religious_ideology|0|0
helm|bold|0|0
helm|boolq:contrastset|0|0
helm|boolq|0|0
helm|civil_comments:LGBTQ|0|0
helm|civil_comments:black|0|0
helm|civil_comments:christian|0|0
helm|civil_comments:female|0|0
helm|civil_comments:male|0|0
helm|civil_comments:muslim|0|0
helm|civil_comments:other_religions|0|0
helm|civil_comments:white|0|0
helm|civil_comments|0|0
helm|commonsenseqa|0|0
helm|copyright:n_books_1000-extractions_per_book_1-prefix_length_125|0|0
helm|copyright:n_books_1000-extractions_per_book_1-prefix_length_25|0|0
helm|copyright:n_books_1000-extractions_per_book_1-prefix_length_5|0|0
helm|copyright:n_books_1000-extractions_per_book_3-prefix_length_125|0|0
helm|copyright:n_books_1000-extractions_per_book_3-prefix_length_25|0|0
helm|copyright:n_books_1000-extractions_per_book_3-prefix_length_5|0|0
helm|copyright:oh_the_places|0|0
helm|copyright:pilot|0|0
helm|copyright:popular_books-prefix_length_10|0|0
helm|copyright:popular_books-prefix_length_125|0|0
helm|copyright:popular_books-prefix_length_250|0|0
helm|copyright:popular_books-prefix_length_25|0|0
helm|copyright:popular_books-prefix_length_50|0|0
helm|copyright:popular_books-prefix_length_5|0|0
helm|copyright:prompt_num_line_1-min_lines_20|0|0
helm|copyright:prompt_num_line_10-min_lines_20|0|0
helm|copyright:prompt_num_line_5-min_lines_20|0|0
helm|covid_dialogue|0|0
helm|dyck_language:2|0|0
helm|dyck_language:3|0|0
helm|dyck_language:4|0|0
helm|entity_data_imputation:Buy|0|0
helm|entity_data_imputation:Restaurant|0|0
helm|entity_matching:Abt_Buy|0|0
helm|entity_matching:Amazon_Google|0|0
helm|entity_matching:Beer|0|0
helm|entity_matching:Company|0|0
helm|entity_matching:DBLP_ACM|0|0
helm|entity_matching:DBLP_GoogleScholar|0|0
helm|entity_matching:Dirty_DBLP_ACM|0|0
helm|entity_matching:Dirty_DBLP_GoogleScholar|0|0
helm|entity_matching:Dirty_Walmart_Amazon|0|0
helm|entity_matching:Dirty_iTunes_Amazon|0|0
helm|entity_matching:Fodors_Zagats|0|0
helm|entity_matching:Walmart_Amazon|0|0
helm|entity_matching:iTunes_Amazon|0|0
helm|hellaswag|0|0
helm|humaneval|0|0
helm|imdb:contrastset|0|0
helm|imdb|0|0
helm|interactive_qa_mmlu:abstract_algebra|0|0
helm|interactive_qa_mmlu:college_chemistry|0|0
helm|interactive_qa_mmlu:global_facts|0|0
helm|interactive_qa_mmlu:miscellaneous|0|0
helm|interactive_qa_mmlu:nutrition|0|0
helm|interactive_qa_mmlu:us_foreign_policy|0|0
helm|legal_summarization:billsum|0|0
helm|legal_summarization:eurlexsum|0|0
helm|legal_summarization:multilexsum|0|0
helm|legalsupport|0|0
helm|lexglue:case_hold|0|0
helm|lexglue:ecthr_a|0|0
helm|lexglue:ecthr_b|0|0
helm|lexglue:eurlex|0|0
helm|lexglue:ledgar|0|0
helm|lexglue:scotus|0|0
helm|lexglue:unfair_tos|0|0
helm|lextreme:brazilian_court_decisions_judgment|0|0
helm|lextreme:brazilian_court_decisions_unanimity|0|0
helm|lextreme:covid19_emergency_event|0|0
helm|lextreme:german_argument_mining|0|0
helm|lextreme:greek_legal_code_chapter|0|0
helm|lextreme:greek_legal_code_subject|0|0
helm|lextreme:greek_legal_code_volume|0|0
helm|lextreme:greek_legal_ner|0|0
helm|lextreme:legalnero|0|0
helm|lextreme:lener_br|0|0
helm|lextreme:mapa_coarse|0|0
helm|lextreme:mapa_fine|0|0
helm|lextreme:multi_eurlex_level_1|0|0
helm|lextreme:multi_eurlex_level_2|0|0
helm|lextreme:multi_eurlex_level_3|0|0
helm|lextreme:online_terms_of_service_clause_topics|0|0
helm|lextreme:online_terms_of_service_unfairness_levels|0|0
helm|lextreme:swiss_judgment_prediction|0|0
helm|lsat_qa:assignment|0|0
helm|lsat_qa:grouping|0|0
helm|lsat_qa:miscellaneous|0|0
helm|lsat_qa:ordering|0|0
helm|lsat_qa|0|0
helm|me_q_sum|0|0
helm|med_dialog:healthcaremagic|0|0
helm|med_dialog:icliniq|0|0
helm|med_mcqa|0|0
helm|med_paragraph_simplification|0|0
helm|med_qa|0|0
helm|mmlu:abstract_algebra|0|0
helm|mmlu:anatomy|0|0
helm|mmlu:astronomy|0|0
helm|mmlu:business_ethics|0|0
helm|mmlu:clinical_knowledge|0|0
helm|mmlu:college_biology|0|0
helm|mmlu:college_chemistry|0|0
helm|mmlu:college_computer_science|0|0
helm|mmlu:college_mathematics|0|0
helm|mmlu:college_medicine|0|0
helm|mmlu:college_physics|0|0
helm|mmlu:computer_security|0|0
helm|mmlu:conceptual_physics|0|0
helm|mmlu:econometrics|0|0
helm|mmlu:electrical_engineering|0|0
helm|mmlu:elementary_mathematics|0|0
helm|mmlu:formal_logic|0|0
helm|mmlu:global_facts|0|0
helm|mmlu:high_school_biology|0|0
helm|mmlu:high_school_chemistry|0|0
helm|mmlu:high_school_computer_science|0|0
helm|mmlu:high_school_european_history|0|0
helm|mmlu:high_school_geography|0|0
helm|mmlu:high_school_government_and_politics|0|0
helm|mmlu:high_school_macroeconomics|0|0
helm|mmlu:high_school_mathematics|0|0
helm|mmlu:high_school_microeconomics|0|0
helm|mmlu:high_school_physics|0|0
helm|mmlu:high_school_psychology|0|0
helm|mmlu:high_school_statistics|0|0
helm|mmlu:high_school_us_history|0|0
helm|mmlu:high_school_world_history|0|0
helm|mmlu:human_aging|0|0
helm|mmlu:human_sexuality|0|0
helm|mmlu:international_law|0|0
helm|mmlu:jurisprudence|0|0
helm|mmlu:logical_fallacies|0|0
helm|mmlu:machine_learning|0|0
helm|mmlu:management|0|0
helm|mmlu:marketing|0|0
helm|mmlu:medical_genetics|0|0
helm|mmlu:miscellaneous|0|0
helm|mmlu:moral_disputes|0|0
helm|mmlu:moral_scenarios|0|0
helm|mmlu:nutrition|0|0
helm|mmlu:philosophy|0|0
helm|mmlu:prehistory|0|0
helm|mmlu:professional_accounting|0|0
helm|mmlu:professional_law|0|0
helm|mmlu:professional_medicine|0|0
helm|mmlu:professional_psychology|0|0
helm|mmlu:public_relations|0|0
helm|mmlu:security_studies|0|0
helm|mmlu:sociology|0|0
helm|mmlu:us_foreign_policy|0|0
helm|mmlu:virology|0|0
helm|mmlu:world_religions|0|0
helm|mmlu|0|0
helm|narrativeqa|0|0
helm|numeracy:linear_example|0|0
helm|numeracy:linear_standard|0|0
helm|numeracy:parabola_example|0|0
helm|numeracy:parabola_standard|0|0
helm|numeracy:paraboloid_example|0|0
helm|numeracy:paraboloid_standard|0|0
helm|numeracy:plane_example|0|0
helm|numeracy:plane_standard|0|0
helm|openbookqa|0|0
helm|piqa|0|0
helm|pubmedqa|0|0
helm|quac|0|0
helm|raft:ade_corpus_v2|0|0
helm|raft:banking_77|0|0
helm|raft:neurips_impact_statement_risks|0|0
helm|raft:one_stop_english|0|0
helm|raft:overruling|0|0
helm|raft:semiconductor_org_types|0|0
helm|raft:systematic_review_inclusion|0|0
helm|raft:tai_safety_research|0|0
helm|raft:terms_of_service|0|0
helm|raft:tweet_eval_hate|0|0
helm|raft:twitter_complaints|0|0
helm|real_toxicity_prompts|0|0
helm|siqa|0|0
helm|summarization:cnn-dm|0|0
helm|summarization:xsum-sampled|0|0
helm|summarization:xsum|0|0
helm|synthetic_reasoning:induction|0|0
helm|synthetic_reasoning:natural_easy|0|0
helm|synthetic_reasoning:natural_hard|0|0
helm|synthetic_reasoning:pattern_match|0|0
helm|synthetic_reasoning:variable_substitution|0|0
helm|the_pile:arxiv|0|0
helm|the_pile:bibliotik|0|0
helm|the_pile:commoncrawl|0|0
helm|the_pile:dm-mathematics|0|0
helm|the_pile:enron|0|0
helm|the_pile:europarl|0|0
helm|the_pile:freelaw|0|0
helm|the_pile:github|0|0
helm|the_pile:gutenberg|0|0
helm|the_pile:hackernews|0|0
helm|the_pile:nih-exporter|0|0
helm|the_pile:opensubtitles|0|0
helm|the_pile:openwebtext2|0|0
helm|the_pile:pubmed-abstracts|0|0
helm|the_pile:pubmed-central|0|0
helm|the_pile:stackexchange|0|0
helm|the_pile:upsto|0|0
helm|the_pile:wikipedia|0|0
helm|the_pile:youtubesubtitles|0|0
helm|truthfulqa|0|0
helm|twitterAAE:aa|0|0
helm|twitterAAE:white|0|0
helm|wikifact:applies_to_jurisdiction|0|0
helm|wikifact:atomic_number|0|0
helm|wikifact:author|0|0
helm|wikifact:award_received|0|0
helm|wikifact:basic_form_of_government|0|0
helm|wikifact:capital_of|0|0
helm|wikifact:capital|0|0
helm|wikifact:central_bank|0|0
helm|wikifact:composer|0|0
helm|wikifact:continent|0|0
helm|wikifact:country_of_citizenship|0|0
helm|wikifact:country_of_origin|0|0
helm|wikifact:country|0|0
helm|wikifact:creator|0|0
helm|wikifact:currency|0|0
helm|wikifact:defendant|0|0
helm|wikifact:developer|0|0
helm|wikifact:diplomatic_relation|0|0
helm|wikifact:director|0|0
helm|wikifact:discoverer_or_inventor|0|0
helm|wikifact:drug_or_therapy_used_for_treatment|0|0
helm|wikifact:educated_at|0|0
helm|wikifact:electron_configuration|0|0
helm|wikifact:employer|0|0
helm|wikifact:field_of_work|0|0
helm|wikifact:file_extension|0|0
helm|wikifact:genetic_association|0|0
helm|wikifact:genre|0|0
helm|wikifact:has_part|0|0
helm|wikifact:head_of_government|0|0
helm|wikifact:head_of_state|0|0
helm|wikifact:headquarters_location|0|0
helm|wikifact:industry|0|0
helm|wikifact:influenced_by|0|0
helm|wikifact:instance_of|0|0
helm|wikifact:instrument|0|0
helm|wikifact:language_of_work_or_name|0|0
helm|wikifact:languages_spoken_written_or_signed|0|0
helm|wikifact:laws_applied|0|0
helm|wikifact:located_in_the_administrative_territorial_entity|0|0
helm|wikifact:location_of_discovery|0|0
helm|wikifact:location_of_formation|0|0
helm|wikifact:location|0|0
helm|wikifact:majority_opinion_by|0|0
helm|wikifact:manufacturer|0|0
helm|wikifact:measured_physical_quantity|0|0
helm|wikifact:medical_condition_treated|0|0
helm|wikifact:member_of_political_party|0|0
helm|wikifact:member_of_sports_team|0|0
helm|wikifact:member_of|0|0
helm|wikifact:movement|0|0
helm|wikifact:named_after|0|0
helm|wikifact:native_language|0|0
helm|wikifact:number_of_processor_cores|0|0
helm|wikifact:occupation|0|0
helm|wikifact:office_held_by_head_of_government|0|0
helm|wikifact:office_held_by_head_of_state|0|0
helm|wikifact:official_language|0|0
helm|wikifact:operating_system|0|0
helm|wikifact:original_language_of_film_or_TV_show|0|0
helm|wikifact:original_network|0|0
helm|wikifact:overrules|0|0
helm|wikifact:owned_by|0|0
helm|wikifact:part_of|0|0
helm|wikifact:participating_team|0|0
helm|wikifact:place_of_birth|0|0
helm|wikifact:place_of_death|0|0
helm|wikifact:plaintiff|0|0
helm|wikifact:position_held|0|0
helm|wikifact:position_played_on_team|0|0
helm|wikifact:programming_language|0|0
helm|wikifact:recommended_unit_of_measurement|0|0
helm|wikifact:record_label|0|0
helm|wikifact:religion|0|0
helm|wikifact:repealed_by|0|0
helm|wikifact:shares_border_with|0|0
helm|wikifact:solved_by|0|0
helm|wikifact:statement_describes|0|0
helm|wikifact:stock_exchange|0|0
helm|wikifact:subclass_of|0|0
helm|wikifact:subsidiary|0|0
helm|wikifact:symptoms_and_signs|0|0
helm|wikifact:therapeutic_area|0|0
helm|wikifact:time_of_discovery_or_invention|0|0
helm|wikifact:twinned_administrative_body|0|0
helm|wikifact:work_location|0|0
helm|wikitext:103|0|0
helm|wmt14:cs-en|0|0
helm|wmt14:de-en|0|0
helm|wmt14:fr-en|0|0
helm|wmt14:hi-en|0|0
helm|wmt14:ru-en|0|0
lighteval|anli:r1|0|0
lighteval|anli:r2|0|0
lighteval|anli:r3|0|0
lighteval|anli|0|0
leaderboard|arc:challenge|0|0
lighteval|arc:easy|0|0
lighteval|arithmetic:1dc|0|0
lighteval|arithmetic:2da|0|0
lighteval|arithmetic:2dm|0|0
lighteval|arithmetic:2ds|0|0
lighteval|arithmetic:3da|0|0
lighteval|arithmetic:3ds|0|0
lighteval|arithmetic:4da|0|0
lighteval|arithmetic:4ds|0|0
lighteval|arithmetic:5da|0|0
lighteval|arithmetic:5ds|0|0
lighteval|asdiv|0|0
lighteval|blimp:adjunct_island|0|0
lighteval|blimp:anaphor_gender_agreement|0|0
lighteval|blimp:anaphor_number_agreement|0|0
lighteval|blimp:animate_subject_passive|0|0
lighteval|blimp:animate_subject_trans|0|0
lighteval|blimp:causative|0|0
lighteval|blimp:complex_NP_island|0|0
lighteval|blimp:coordinate_structure_constraint_complex_left_branch|0|0
lighteval|blimp:coordinate_structure_constraint_object_extraction|0|0
lighteval|blimp:determiner_noun_agreement_1|0|0
lighteval|blimp:determiner_noun_agreement_2|0|0
lighteval|blimp:determiner_noun_agreement_irregular_1|0|0
lighteval|blimp:determiner_noun_agreement_irregular_2|0|0
lighteval|blimp:determiner_noun_agreement_with_adj_2|0|0
lighteval|blimp:determiner_noun_agreement_with_adj_irregular_1|0|0
lighteval|blimp:determiner_noun_agreement_with_adj_irregular_2|0|0
lighteval|blimp:determiner_noun_agreement_with_adjective_1|0|0
lighteval|blimp:distractor_agreement_relational_noun|0|0
lighteval|blimp:distractor_agreement_relative_clause|0|0
lighteval|blimp:drop_argument|0|0
lighteval|blimp:ellipsis_n_bar_1|0|0
lighteval|blimp:ellipsis_n_bar_2|0|0
lighteval|blimp:existential_there_object_raising|0|0
lighteval|blimp:existential_there_quantifiers_1|0|0
lighteval|blimp:existential_there_quantifiers_2|0|0
lighteval|blimp:existential_there_subject_raising|0|0
lighteval|blimp:expletive_it_object_raising|0|0
lighteval|blimp:inchoative|0|0
lighteval|blimp:intransitive|0|0
lighteval|blimp:irregular_past_participle_adjectives|0|0
lighteval|blimp:irregular_past_participle_verbs|0|0
lighteval|blimp:irregular_plural_subject_verb_agreement_1|0|0
lighteval|blimp:irregular_plural_subject_verb_agreement_2|0|0
lighteval|blimp:left_branch_island_echo_question|0|0
lighteval|blimp:left_branch_island_simple_question|0|0
lighteval|blimp:matrix_question_npi_licensor_present|0|0
lighteval|blimp:npi_present_1|0|0
lighteval|blimp:npi_present_2|0|0
lighteval|blimp:only_npi_licensor_present|0|0
lighteval|blimp:only_npi_scope|0|0
lighteval|blimp:passive_1|0|0
lighteval|blimp:passive_2|0|0
lighteval|blimp:principle_A_c_command|0|0
lighteval|blimp:principle_A_case_1|0|0
lighteval|blimp:principle_A_case_2|0|0
lighteval|blimp:principle_A_domain_1|0|0
lighteval|blimp:principle_A_domain_2|0|0
lighteval|blimp:principle_A_domain_3|0|0
lighteval|blimp:principle_A_reconstruction|0|0
lighteval|blimp:regular_plural_subject_verb_agreement_1|0|0
lighteval|blimp:regular_plural_subject_verb_agreement_2|0|0
lighteval|blimp:sentential_negation_npi_licensor_present|0|0
lighteval|blimp:sentential_negation_npi_scope|0|0
lighteval|blimp:sentential_subject_island|0|0
lighteval|blimp:superlative_quantifiers_1|0|0
lighteval|blimp:superlative_quantifiers_2|0|0
lighteval|blimp:tough_vs_raising_1|0|0
lighteval|blimp:tough_vs_raising_2|0|0
lighteval|blimp:transitive|0|0
lighteval|blimp:wh_island|0|0
lighteval|blimp:wh_questions_object_gap|0|0
lighteval|blimp:wh_questions_subject_gap_long_distance|0|0
lighteval|blimp:wh_questions_subject_gap|0|0
lighteval|blimp:wh_vs_that_no_gap_long_distance|0|0
lighteval|blimp:wh_vs_that_no_gap|0|0
lighteval|blimp:wh_vs_that_with_gap_long_distance|0|0
lighteval|blimp:wh_vs_that_with_gap|0|0
lighteval|coqa_bb|0|0
lighteval|coqa|0|0
lighteval|drop|0|0
lighteval|ethics:commonsense|0|0
lighteval|ethics:deontology|0|0
lighteval|ethics:justice|0|0
lighteval|ethics:utilitarianism|0|0
lighteval|ethics:virtue|0|0
lighteval|glue:cola|0|0
lighteval|glue:mnli_mismatched|0|0
lighteval|glue:mnli|0|0
lighteval|glue:mrpc|0|0
lighteval|glue:qnli|0|0
lighteval|glue:qqp|0|0
lighteval|glue:rte|0|0
lighteval|glue:sst2|0|0
lighteval|glue:stsb|0|0
lighteval|glue:wnli|0|0
leaderboard|gsm8k|0|0
lighteval|headqa:en|0|0
lighteval|headqa:es|0|0
leaderboard|hellaswag|0|0
lighteval|iwslt17:ar-en|0|0
lighteval|iwslt17:de-en|0|0
lighteval|iwslt17:en-ar|0|0
lighteval|iwslt17:en-de|0|0
lighteval|iwslt17:en-fr|0|0
lighteval|iwslt17:en-ja|0|0
lighteval|iwslt17:en-ko|0|0
lighteval|iwslt17:en-zh|0|0
lighteval|iwslt17:fr-en|0|0
lighteval|iwslt17:ja-en|0|0
lighteval|iwslt17:ko-en|0|0
lighteval|iwslt17:zh-en|0|0
lighteval|lambada:openai:de|0|0
lighteval|lambada:openai:en|0|0
lighteval|lambada:openai:es|0|0
lighteval|lambada:openai:fr|0|0
lighteval|lambada:openai:it|0|0
lighteval|lambada:openai_cloze|0|0
lighteval|lambada:openai|0|0
lighteval|lambada:standard_cloze|0|0
lighteval|lambada:standard|0|0
lighteval|logiqa|0|0
lighteval|math:algebra|0|0
lighteval|math:counting_and_probability|0|0
lighteval|math:geometry|0|0
lighteval|math:intermediate_algebra|0|0
lighteval|math:number_theory|0|0
lighteval|math:prealgebra|0|0
lighteval|math:precalculus|0|0
lighteval|mathqa|0|0
lighteval|mgsm:bn|0|0
lighteval|mgsm:de|0|0
lighteval|mgsm:en|0|0
lighteval|mgsm:es|0|0
lighteval|mgsm:fr|0|0
lighteval|mgsm:ja|0|0
lighteval|mgsm:ru|0|0
lighteval|mgsm:sw|0|0
lighteval|mgsm:te|0|0
lighteval|mgsm:th|0|0
lighteval|mgsm:zh|0|0
leaderboard|mmlu:abstract_algebra|0|0
leaderboard|mmlu:anatomy|0|0
leaderboard|mmlu:astronomy|0|0
leaderboard|mmlu:business_ethics|0|0
leaderboard|mmlu:clinical_knowledge|0|0
leaderboard|mmlu:college_biology|0|0
leaderboard|mmlu:college_chemistry|0|0
leaderboard|mmlu:college_computer_science|0|0
leaderboard|mmlu:college_mathematics|0|0
leaderboard|mmlu:college_medicine|0|0
leaderboard|mmlu:college_physics|0|0
leaderboard|mmlu:computer_security|0|0
leaderboard|mmlu:conceptual_physics|0|0
leaderboard|mmlu:econometrics|0|0
leaderboard|mmlu:electrical_engineering|0|0
leaderboard|mmlu:elementary_mathematics|0|0
leaderboard|mmlu:formal_logic|0|0
leaderboard|mmlu:global_facts|0|0
leaderboard|mmlu:high_school_biology|0|0
leaderboard|mmlu:high_school_chemistry|0|0
leaderboard|mmlu:high_school_computer_science|0|0
leaderboard|mmlu:high_school_european_history|0|0
leaderboard|mmlu:high_school_geography|0|0
leaderboard|mmlu:high_school_government_and_politics|0|0
leaderboard|mmlu:high_school_macroeconomics|0|0
leaderboard|mmlu:high_school_mathematics|0|0
leaderboard|mmlu:high_school_microeconomics|0|0
leaderboard|mmlu:high_school_physics|0|0
leaderboard|mmlu:high_school_psychology|0|0
leaderboard|mmlu:high_school_statistics|0|0
leaderboard|mmlu:high_school_us_history|0|0
leaderboard|mmlu:high_school_world_history|0|0
leaderboard|mmlu:human_aging|0|0
leaderboard|mmlu:human_sexuality|0|0
leaderboard|mmlu:international_law|0|0
leaderboard|mmlu:jurisprudence|0|0
leaderboard|mmlu:logical_fallacies|0|0
leaderboard|mmlu:machine_learning|0|0
leaderboard|mmlu:management|0|0
leaderboard|mmlu:marketing|0|0
leaderboard|mmlu:medical_genetics|0|0
leaderboard|mmlu:miscellaneous|0|0
leaderboard|mmlu:moral_disputes|0|0
leaderboard|mmlu:moral_scenarios|0|0
leaderboard|mmlu:nutrition|0|0
leaderboard|mmlu:philosophy|0|0
leaderboard|mmlu:prehistory|0|0
leaderboard|mmlu:professional_accounting|0|0
leaderboard|mmlu:professional_law|0|0
leaderboard|mmlu:professional_medicine|0|0
leaderboard|mmlu:professional_psychology|0|0
leaderboard|mmlu:public_relations|0|0
leaderboard|mmlu:security_studies|0|0
leaderboard|mmlu:sociology|0|0
leaderboard|mmlu:us_foreign_policy|0|0
leaderboard|mmlu:virology|0|0
leaderboard|mmlu:world_religions|0|0
lighteval|mtnt2019:en-fr|0|0
lighteval|mtnt2019:en-ja|0|0
lighteval|mtnt2019:fr-en|0|0
lighteval|mtnt2019:ja-en|0|0
lighteval|mutual_plus|0|0
lighteval|mutual|0|0
lighteval|openbookqa|0|0
lighteval|piqa|0|0
lighteval|prost|0|0
lighteval|pubmedqa|0|0
lighteval|qa4mre:2011|0|0
lighteval|qa4mre:2012|0|0
lighteval|qa4mre:2013|0|0
lighteval|qasper_ll|0|0
lighteval|qasper|0|0
lighteval|race:high|0|0
lighteval|sciq|0|0
lighteval|storycloze:2016|0|0
lighteval|storycloze:2018|0|0
lighteval|super_glue:boolq|0|0
lighteval|super_glue:cb|0|0
lighteval|super_glue:copa|0|0
lighteval|super_glue:multirc|0|0
lighteval|super_glue:record|0|0
lighteval|super_glue:rte|0|0
lighteval|super_glue:wic|0|0
lighteval|super_glue:wsc|0|0
lighteval|swag|0|0
lighteval|the_pile:arxiv|0|0
lighteval|the_pile:bookcorpus2|0|0
lighteval|the_pile:books3|0|0
lighteval|the_pile:dm-mathematics|0|0
lighteval|the_pile:enron|0|0
lighteval|the_pile:europarl|0|0
lighteval|the_pile:freelaw|0|0
lighteval|the_pile:github|0|0
lighteval|the_pile:gutenberg|0|0
lighteval|the_pile:hackernews|0|0
lighteval|the_pile:nih-exporter|0|0
lighteval|the_pile:opensubtitles|0|0
lighteval|the_pile:openwebtext2|0|0
lighteval|the_pile:philpapers|0|0
lighteval|the_pile:pile-cc|0|0
lighteval|the_pile:pubmed-abstracts|0|0
lighteval|the_pile:pubmed-central|0|0
lighteval|the_pile:stackexchange|0|0
lighteval|the_pile:ubuntu-irc|0|0
lighteval|the_pile:uspto|0|0
lighteval|the_pile:wikipedia|0|0
lighteval|the_pile:youtubesubtitles|0|0
lighteval|toxigen|0|0
lighteval|triviaqa|0|0
lighteval|truthfulqa:gen|0|0
leaderboard|truthfulqa:mc|0|0
lighteval|unscramble:anagrams1|0|0
lighteval|unscramble:anagrams2|0|0
lighteval|unscramble:cycle_letters|0|0
lighteval|unscramble:random_insertion|0|0
lighteval|unscramble:reversed_words|0|0
lighteval|webqs|0|0
lighteval|wikitext|0|0
leaderboard|winogrande|0|0
lighteval|wmt08:cs-en|0|0
lighteval|wmt08:de-en|0|0
lighteval|wmt08:en-cs|0|0
lighteval|wmt08:en-de|0|0
lighteval|wmt08:en-es|0|0
lighteval|wmt08:en-fr|0|0
lighteval|wmt08:en-hu|0|0
lighteval|wmt08:es-en|0|0
lighteval|wmt08:fr-en|0|0
lighteval|wmt08:hu-en|0|0
lighteval|wmt09:cs-en|0|0
lighteval|wmt09:de-en|0|0
lighteval|wmt09:en-cs|0|0
lighteval|wmt09:en-de|0|0
lighteval|wmt09:en-es|0|0
lighteval|wmt09:en-fr|0|0
lighteval|wmt09:en-hu|0|0
lighteval|wmt09:en-it|0|0
lighteval|wmt09:es-en|0|0
lighteval|wmt09:fr-en|0|0
lighteval|wmt09:hu-en|0|0
lighteval|wmt09:it-en|0|0
lighteval|wmt10:cs-en|0|0
lighteval|wmt10:de-en|0|0
lighteval|wmt10:en-cs|0|0
lighteval|wmt10:en-de|0|0
lighteval|wmt10:en-es|0|0
lighteval|wmt10:en-fr|0|0
lighteval|wmt10:es-en|0|0
lighteval|wmt10:fr-en|0|0
lighteval|wmt11:cs-en|0|0
lighteval|wmt11:de-en|0|0
lighteval|wmt11:en-cs|0|0
lighteval|wmt11:en-de|0|0
lighteval|wmt11:en-es|0|0
lighteval|wmt11:en-fr|0|0
lighteval|wmt11:es-en|0|0
lighteval|wmt11:fr-en|0|0
lighteval|wmt12:cs-en|0|0
lighteval|wmt12:de-en|0|0
lighteval|wmt12:en-cs|0|0
lighteval|wmt12:en-de|0|0
lighteval|wmt12:en-es|0|0
lighteval|wmt12:en-fr|0|0
lighteval|wmt12:es-en|0|0
lighteval|wmt12:fr-en|0|0
lighteval|wmt13:cs-en|0|0
lighteval|wmt13:de-en|0|0
lighteval|wmt13:en-cs|0|0
lighteval|wmt13:en-de|0|0
lighteval|wmt13:en-es|0|0
lighteval|wmt13:en-fr|0|0
lighteval|wmt13:en-ru|0|0
lighteval|wmt13:es-en|0|0
lighteval|wmt13:fr-en|0|0
lighteval|wmt13:ru-en|0|0
lighteval|wmt14:cs-en|0|0
lighteval|wmt14:de-en|0|0
lighteval|wmt14:en-cs|0|0
lighteval|wmt14:en-de|0|0
lighteval|wmt14:en-fr|0|0
lighteval|wmt14:en-fr|0|0
lighteval|wmt14:en-hi|0|0
lighteval|wmt14:en-ru|0|0
lighteval|wmt14:fr-en|0|0
lighteval|wmt14:fr-en|0|0
lighteval|wmt14:hi-en|0|0
lighteval|wmt14:ru-en|0|0
lighteval|wmt15:cs-en|0|0
lighteval|wmt15:de-en|0|0
lighteval|wmt15:en-cs|0|0
lighteval|wmt15:en-de|0|0
lighteval|wmt15:en-fi|0|0
lighteval|wmt15:en-fr|0|0
lighteval|wmt15:en-ru|0|0
lighteval|wmt15:fi-en|0|0
lighteval|wmt15:fr-en|0|0
lighteval|wmt15:ru-en|0|0
lighteval|wmt16:cs-en|0|0
lighteval|wmt16:de-en|0|0
lighteval|wmt16:de-en|0|0
lighteval|wmt16:en-cs|0|0
lighteval|wmt16:en-de|0|0
lighteval|wmt16:en-de|0|0
lighteval|wmt16:en-fi|0|0
lighteval|wmt16:en-ro|0|0
lighteval|wmt16:en-ro|0|0
lighteval|wmt16:en-ru|0|0
lighteval|wmt16:en-tr|0|0
lighteval|wmt16:fi-en|0|0
lighteval|wmt16:ro-en|0|0
lighteval|wmt16:ro-en|0|0
lighteval|wmt16:ru-en|0|0
lighteval|wmt16:tr-en|0|0
lighteval|wmt17:cs-en|0|0
lighteval|wmt17:de-en|0|0
lighteval|wmt17:en-cs|0|0
lighteval|wmt17:en-de|0|0
lighteval|wmt17:en-fi|0|0
lighteval|wmt17:en-lv|0|0
lighteval|wmt17:en-ru|0|0
lighteval|wmt17:en-tr|0|0
lighteval|wmt17:en-zh|0|0
lighteval|wmt17:fi-en|0|0
lighteval|wmt17:lv-en|0|0
lighteval|wmt17:ru-en|0|0
lighteval|wmt17:tr-en|0|0
lighteval|wmt17:zh-en|0|0
lighteval|wmt18:cs-en|0|0
lighteval|wmt18:de-en|0|0
lighteval|wmt18:en-cs|0|0
lighteval|wmt18:en-de|0|0
lighteval|wmt18:en-et|0|0
lighteval|wmt18:en-fi|0|0
lighteval|wmt18:en-ru|0|0
lighteval|wmt18:en-tr|0|0
lighteval|wmt18:en-zh|0|0
lighteval|wmt18:et-en|0|0
lighteval|wmt18:fi-en|0|0
lighteval|wmt18:ru-en|0|0
lighteval|wmt18:tr-en|0|0
lighteval|wmt18:zh-en|0|0
lighteval|wmt19:cs-de|0|0
lighteval|wmt19:de-cs|0|0
lighteval|wmt19:de-en|0|0
lighteval|wmt19:de-fr|0|0
lighteval|wmt19:en-cs|0|0
lighteval|wmt19:en-de|0|0
lighteval|wmt19:en-fi|0|0
lighteval|wmt19:en-gu|0|0
lighteval|wmt19:en-kk|0|0
lighteval|wmt19:en-lt|0|0
lighteval|wmt19:en-ru|0|0
lighteval|wmt19:en-zh|0|0
lighteval|wmt19:fi-en|0|0
lighteval|wmt19:fr-de|0|0
lighteval|wmt19:gu-en|0|0
lighteval|wmt19:kk-en|0|0
lighteval|wmt19:lt-en|0|0
lighteval|wmt19:ru-en|0|0
lighteval|wmt19:zh-en|0|0
lighteval|wmt20:cs-en|0|0
lighteval|wmt20:de-en|0|0
lighteval|wmt20:de-fr|0|0
lighteval|wmt20:en-cs|0|0
lighteval|wmt20:en-de|0|0
lighteval|wmt20:en-iu|0|0
lighteval|wmt20:en-ja|0|0
lighteval|wmt20:en-km|0|0
lighteval|wmt20:en-pl|0|0
lighteval|wmt20:en-ps|0|0
lighteval|wmt20:en-ru|0|0
lighteval|wmt20:en-ta|0|0
lighteval|wmt20:en-zh|0|0
lighteval|wmt20:fr-de|0|0
lighteval|wmt20:iu-en|0|0
lighteval|wmt20:ja-en|0|0
lighteval|wmt20:km-en|0|0
lighteval|wmt20:pl-en|0|0
lighteval|wmt20:ps-en|0|0
lighteval|wmt20:ru-en|0|0
lighteval|wmt20:ta-en|0|0
lighteval|wmt20:zh-en|0|0
lighteval|wsc273|0|0
lighteval|xcopa:en|0|0
lighteval|xcopa:et|0|0
lighteval|xcopa:ht|0|0
lighteval|xcopa:id|0|0
lighteval|xcopa:it|0|0
lighteval|xcopa:qu|0|0
lighteval|xcopa:sw|0|0
lighteval|xcopa:ta|0|0
lighteval|xcopa:th|0|0
lighteval|xcopa:tr|0|0
lighteval|xcopa:vi|0|0
lighteval|xcopa:zh|0|0
lighteval|xstory_cloze:ar|0|0
lighteval|xstory_cloze:en|0|0
lighteval|xstory_cloze:es|0|0
lighteval|xstory_cloze:eu|0|0
lighteval|xstory_cloze:hi|0|0
lighteval|xstory_cloze:id|0|0
lighteval|xstory_cloze:my|0|0
lighteval|xstory_cloze:ru|0|0
lighteval|xstory_cloze:sw|0|0
lighteval|xstory_cloze:te|0|0
lighteval|xstory_cloze:zh|0|0
lighteval|xwinograd:en|0|0
lighteval|xwinograd:fr|0|0
lighteval|xwinograd:jp|0|0
lighteval|xwinograd:pt|0|0
lighteval|xwinograd:ru|0|0
lighteval|xwinograd:zh|0|0
original|arc:c:letters|0|0
original|arc:c:options|0|0
original|arc:c:simple|0|0
original|mmlu:abstract_algebra|0|0
original|mmlu:anatomy|0|0
original|mmlu:astronomy|0|0
original|mmlu:business_ethics|0|0
original|mmlu:clinical_knowledge|0|0
original|mmlu:college_biology|0|0
original|mmlu:college_chemistry|0|0
original|mmlu:college_computer_science|0|0
original|mmlu:college_mathematics|0|0
original|mmlu:college_medicine|0|0
original|mmlu:college_physics|0|0
original|mmlu:computer_security|0|0
original|mmlu:conceptual_physics|0|0
original|mmlu:econometrics|0|0
original|mmlu:electrical_engineering|0|0
original|mmlu:elementary_mathematics|0|0
original|mmlu:formal_logic|0|0
original|mmlu:global_facts|0|0
original|mmlu:high_school_biology|0|0
original|mmlu:high_school_chemistry|0|0
original|mmlu:high_school_computer_science|0|0
original|mmlu:high_school_european_history|0|0
original|mmlu:high_school_geography|0|0
original|mmlu:high_school_government_and_politics|0|0
original|mmlu:high_school_macroeconomics|0|0
original|mmlu:high_school_mathematics|0|0
original|mmlu:high_school_microeconomics|0|0
original|mmlu:high_school_physics|0|0
original|mmlu:high_school_psychology|0|0
original|mmlu:high_school_statistics|0|0
original|mmlu:high_school_us_history|0|0
original|mmlu:high_school_world_history|0|0
original|mmlu:human_aging|0|0
original|mmlu:human_sexuality|0|0
original|mmlu:international_law|0|0
original|mmlu:jurisprudence|0|0
original|mmlu:logical_fallacies|0|0
original|mmlu:machine_learning|0|0
original|mmlu:management|0|0
original|mmlu:marketing|0|0
original|mmlu:medical_genetics|0|0
original|mmlu:miscellaneous|0|0
original|mmlu:moral_disputes|0|0
original|mmlu:moral_scenarios|0|0
original|mmlu:nutrition|0|0
original|mmlu:philosophy|0|0
original|mmlu:prehistory|0|0
original|mmlu:professional_accounting|0|0
original|mmlu:professional_law|0|0
original|mmlu:professional_medicine|0|0
original|mmlu:professional_psychology|0|0
original|mmlu:public_relations|0|0
original|mmlu:security_studies|0|0
original|mmlu:sociology|0|0
original|mmlu:us_foreign_policy|0|0
original|mmlu:virology|0|0
original|mmlu:world_religions|0|0
original|mmlu|0|0
