PRIMARY_METRICS_OLMES = {
    "autobencher": "acc_per_char",
    "autobencher:mc": "acc_per_char",
    "aime": "exact_match_flex",
    "alpaca_eval": "win_rate",
    "arc_challenge": "acc_uncond",
    "arc_challenge:mc": "acc_raw",
    "arc_easy": "acc_per_char",
    "arc_easy:mc": "acc_raw",
    "autobencher": "logits_per_byte",
    "autobencher:mc": "acc_raw",
    "autobencher_math": "exact_match",
    "bigcodebench": "pass_at_1",
    "bigcodebench_hard": "pass_at_1",
    "boolq": "acc_raw",
    "boolq:mc": None,
    "custom_loss_sky_t1": "bits_per_byte",
    "custom_loss_numia_math": "bits_per_byte",
    "custom_loss_tulu_if": "bits_per_byte",
    "codex_humaneval": "pass_at_1",
    "codex_humanevalplus": "pass_at_1",
    "copa": "acc_raw",
    "copycolors": "acc_uncond",
    "copycolors:mc": "acc_raw",
    "coqa": "f1",
    "cosmosqa": "acc_per_char",
    "cosmosqa:mc": "acc_raw",
    "csqa": "acc_uncond",
    "csqa:mc": "acc_raw",
    "drop": "f1",
    "gsm8k": "exact_match",
    "gsm8k_selfc": "maj_at_10",
    "gsm_plus": "exact_match",
    "gsm_plus_selfc": None,
    "gsm_symbolic_main": "exact_match",
    "gsm_symbolic_p1": "exact_match",
    "gsm_symbolic_p2": "exact_match",
    "gpqa": "exact_match",
    "hellaswag": "acc_per_char",
    "hellaswag:mc": "acc_raw",
    "ifeval": "inst_level_loose_acc",
    "jeopardy": "f1",
    "logiqa": "acc_per_char",
    "logiqa:mc": "acc_raw",
    "minerva_math_algebra": None,
    "minerva_math_counting_and_probability": None,
    "minerva_math_geometry": None,
    "minerva_math_intermediate_algebra": None,
    "minerva_math_number_theory": None,
    "minerva_math_prealgebra": None,
    "minerva_math_precalculus": None,
    "minerva_math_500": "exact_match",
    "mbpp": "pass_at_1",
    "mbppplus": "pass_at_1",
    "medmcqa": "acc_per_char",
    "medmcqa:mc": "acc_per_char",
    "mmlu_abstract_algebra:mc": None,
    "mmlu_abstract_algebra": None,
    "mmlu_abstract_algebra:cot": "exact_match",
    "mmlu_anatomy:mc": None,
    "mmlu_anatomy": None,
    "mmlu_anatomy:cot": "exact_match",
    "mmlu_astronomy:mc": None,
    "mmlu_astronomy": None,
    "mmlu_astronomy:cot": "exact_match",
    "mmlu_business_ethics:mc": None,
    "mmlu_business_ethics": None,
    "mmlu_business_ethics:cot": "exact_match",
    "mmlu_clinical_knowledge:mc": None,
    "mmlu_clinical_knowledge": None,
    "mmlu_clinical_knowledge:cot": "exact_match",
    "mmlu_college_biology:mc": None,
    "mmlu_college_biology": None,
    "mmlu_college_biology:cot": "exact_match",
    "mmlu_college_chemistry:mc": None,
    "mmlu_college_chemistry": None,
    "mmlu_college_chemistry:cot": "exact_match",
    "mmlu_college_computer_science:mc": None,
    "mmlu_college_computer_science": None,
    "mmlu_college_computer_science:cot": "exact_match",
    "mmlu_college_mathematics:mc": None,
    "mmlu_college_mathematics": None,
    "mmlu_college_mathematics:cot": "exact_match",
    "mmlu_college_medicine:mc": None,
    "mmlu_college_medicine": None,
    "mmlu_college_medicine:cot": "exact_match",
    "mmlu_college_physics:mc": None,
    "mmlu_college_physics": None,
    "mmlu_college_physics:cot": "exact_match",
    "mmlu_computer_security:mc": None,
    "mmlu_computer_security": None,
    "mmlu_computer_security:cot": "exact_match",
    "mmlu_conceptual_physics:mc": None,
    "mmlu_conceptual_physics": None,
    "mmlu_conceptual_physics:cot": "exact_match",
    "mmlu_econometrics:mc": None,
    "mmlu_econometrics": None,
    "mmlu_econometrics:cot": "exact_match",
    "mmlu_electrical_engineering:mc": None,
    "mmlu_electrical_engineering": None,
    "mmlu_electrical_engineering:cot": "exact_match",
    "mmlu_elementary_mathematics:mc": None,
    "mmlu_elementary_mathematics": None,
    "mmlu_elementary_mathematics:cot": "exact_match",
    "mmlu_formal_logic:mc": None,
    "mmlu_formal_logic": None,
    "mmlu_formal_logic:cot": "exact_match",
    "mmlu_global_facts:mc": None,
    "mmlu_global_facts": None,
    "mmlu_global_facts:cot": "exact_match",
    "mmlu_high_school_biology:mc": None,
    "mmlu_high_school_biology": None,
    "mmlu_high_school_biology:cot": "exact_match",
    "mmlu_high_school_chemistry:mc": None,
    "mmlu_high_school_chemistry": None,
    "mmlu_high_school_chemistry:cot": "exact_match",
    "mmlu_high_school_computer_science:mc": None,
    "mmlu_high_school_computer_science": None,
    "mmlu_high_school_computer_science:cot": "exact_match",
    "mmlu_high_school_european_history:mc": None,
    "mmlu_high_school_european_history": None,
    "mmlu_high_school_european_history:cot": "exact_match",
    "mmlu_high_school_geography:mc": None,
    "mmlu_high_school_geography": None,
    "mmlu_high_school_geography:cot": "exact_match",
    "mmlu_high_school_government_and_politics:mc": None,
    "mmlu_high_school_government_and_politics": None,
    "mmlu_high_school_government_and_politics:cot": "exact_match",
    "mmlu_high_school_macroeconomics:mc": None,
    "mmlu_high_school_macroeconomics": None,
    "mmlu_high_school_macroeconomics:cot": "exact_match",
    "mmlu_high_school_mathematics:mc": None,
    "mmlu_high_school_mathematics": None,
    "mmlu_high_school_mathematics:cot": "exact_match",
    "mmlu_high_school_microeconomics:mc": None,
    "mmlu_high_school_microeconomics": None,
    "mmlu_high_school_microeconomics:cot": "exact_match",
    "mmlu_high_school_physics:mc": None,
    "mmlu_high_school_physics": None,
    "mmlu_high_school_physics:cot": "exact_match",
    "mmlu_high_school_psychology:mc": None,
    "mmlu_high_school_psychology": None,
    "mmlu_high_school_psychology:cot": "exact_match",
    "mmlu_high_school_statistics:mc": None,
    "mmlu_high_school_statistics": None,
    "mmlu_high_school_statistics:cot": "exact_match",
    "mmlu_high_school_us_history:mc": None,
    "mmlu_high_school_us_history": None,
    "mmlu_high_school_us_history:cot": "exact_match",
    "mmlu_high_school_world_history:mc": None,
    "mmlu_high_school_world_history": None,
    "mmlu_high_school_world_history:cot": "exact_match",
    "mmlu_human_aging:mc": None,
    "mmlu_human_aging": None,
    "mmlu_human_aging:cot": "exact_match",
    "mmlu_human_sexuality:mc": None,
    "mmlu_human_sexuality": None,
    "mmlu_human_sexuality:cot": "exact_match",
    "mmlu_international_law:mc": None,
    "mmlu_international_law": None,
    "mmlu_international_law:cot": "exact_match",
    "mmlu_jurisprudence:mc": None,
    "mmlu_jurisprudence": None,
    "mmlu_jurisprudence:cot": "exact_match",
    "mmlu_logical_fallacies:mc": None,
    "mmlu_logical_fallacies": None,
    "mmlu_logical_fallacies:cot": "exact_match",
    "mmlu_machine_learning:mc": None,
    "mmlu_machine_learning": None,
    "mmlu_machine_learning:cot": "exact_match",
    "mmlu_management:mc": None,
    "mmlu_management": None,
    "mmlu_management:cot": "exact_match",
    "mmlu_marketing:mc": None,
    "mmlu_marketing": None,
    "mmlu_marketing:cot": "exact_match",
    "mmlu_medical_genetics:mc": None,
    "mmlu_medical_genetics": None,
    "mmlu_medical_genetics:cot": "exact_match",
    "mmlu_miscellaneous:mc": None,
    "mmlu_miscellaneous": None,
    "mmlu_miscellaneous:cot": "exact_match",
    "mmlu_moral_disputes:mc": None,
    "mmlu_moral_disputes": None,
    "mmlu_moral_disputes:cot": "exact_match",
    "mmlu_moral_scenarios:mc": None,
    "mmlu_moral_scenarios": None,
    "mmlu_moral_scenarios:cot": "exact_match",
    "mmlu_nutrition:mc": None,
    "mmlu_nutrition": None,
    "mmlu_nutrition:cot": "exact_match",
    "mmlu_philosophy:mc": None,
    "mmlu_philosophy": None,
    "mmlu_philosophy:cot": "exact_match",
    "mmlu_prehistory:mc": None,
    "mmlu_prehistory": None,
    "mmlu_prehistory:cot": "exact_match",
    "mmlu_professional_accounting:mc": None,
    "mmlu_professional_accounting": None,
    "mmlu_professional_accounting:cot": "exact_match",
    "mmlu_professional_law:mc": None,
    "mmlu_professional_law": None,
    "mmlu_professional_law:cot": "exact_match",
    "mmlu_professional_medicine:mc": None,
    "mmlu_professional_medicine": None,
    "mmlu_professional_medicine:cot": "exact_match",
    "mmlu_professional_psychology:mc": None,
    "mmlu_professional_psychology": None,
    "mmlu_professional_psychology:cot": "exact_match",
    "mmlu_public_relations:mc": None,
    "mmlu_public_relations": None,
    "mmlu_public_relations:cot": "exact_match",
    "mmlu_security_studies:mc": None,
    "mmlu_security_studies": None,
    "mmlu_security_studies:cot": "exact_match",
    "mmlu_sociology:mc": None,
    "mmlu_sociology": None,
    "mmlu_sociology:cot": "exact_match",
    "mmlu_us_foreign_policy:mc": None,
    "mmlu_us_foreign_policy": None,
    "mmlu_us_foreign_policy:cot": "exact_match",
    "mmlu_virology:mc": None,
    "mmlu_virology": None,
    "mmlu_virology:cot": "exact_match",
    "mmlu_world_religions:mc": None,
    "mmlu_world_religions": None,
    "mmlu_world_religions:cot": "exact_match",
    "mmlu_pro_math:cot": "exact_match",
    "mmlu_pro_health:cot": "exact_match",
    "mmlu_pro_physics:cot": "exact_match",
    "mmlu_pro_business:cot": "exact_match",
    "mmlu_pro_biology:cot": "exact_match",
    "mmlu_pro_chemistry:cot": "exact_match",
    "mmlu_pro_computer science:cot": "exact_match",
    "mmlu_pro_economics:cot": "exact_match",
    "mmlu_pro_engineering:cot": "exact_match",
    "mmlu_pro_philosophy:cot": "exact_match",
    "mmlu_pro_other:cot": "exact_match",
    "mmlu_pro_history:cot": "exact_match",
    "mmlu_pro_psychology:cot": "exact_match",
    "mmlu_pro_law:cot": "exact_match",
    "mmlu_pro_math": None,
    "mmlu_pro_health": None,
    "mmlu_pro_physics": None,
    "mmlu_pro_business": None,
    "mmlu_pro_biology": None,
    "mmlu_pro_chemistry": None,
    "mmlu_pro_computer science": None,
    "mmlu_pro_economics": None,
    "mmlu_pro_engineering": None,
    "mmlu_pro_philosophy": None,
    "mmlu_pro_other": None,
    "mmlu_pro_history": None,
    "mmlu_pro_psychology": None,
    "mmlu_pro_law": None,
    "mmlu_pro_math:rc": None,
    "mmlu_pro_health:rc": None,
    "mmlu_pro_physics:rc": None,
    "mmlu_pro_business:rc": None,
    "mmlu_pro_biology:rc": None,
    "mmlu_pro_chemistry:rc": None,
    "mmlu_pro_computer science:rc": None,
    "mmlu_pro_economics:rc": None,
    "mmlu_pro_engineering:rc": None,
    "mmlu_pro_philosophy:rc": None,
    "mmlu_pro_other:rc": None,
    "mmlu_pro_history:rc": None,
    "mmlu_pro_psychology:rc": None,
    "mmlu_pro_law:rc": None,
    "mt_eval_refinement_single": "llm_score",
    "mt_eval_refinement_multi": "llm_score",
    "mt_eval_expansion_single": "llm_score",
    "mt_eval_expansion_multi": "llm_score",
    "mt_eval_follow-up_single": "llm_score",
    "mt_eval_follow-up_multi": "llm_score",
    "mt_eval_recollection_single_cls": "llm_score",
    "mt_eval_recollection_multi_cls": "llm_score",
    "mt_eval_recollection_single_global-inst": "llm_score",
    "mt_eval_recollection_multi_global-inst": "llm_score",
    "naturalqs_open": "f1",
    "openbookqa": "acc_uncond",
    "openbookqa:mc": "acc_raw",
    "paloma_4chan_meta_sep": None,
    "paloma_c4_100_domains": None,
    "paloma_c4_en": None,
    "paloma_dolma_100_programing_languages": None,
    "paloma_dolma_100_subreddits": None,
    "paloma_dolma-v1_5": None,
    "paloma_falcon-refinedweb": None,
    "paloma_gab": None,
    "paloma_m2d2_s2orc_unsplit": None,
    "paloma_m2d2_wikipedia_unsplit": None,
    "paloma_manosphere_meta_sep": None,
    "paloma_mc4": None,
    "paloma_ptb": None,
    "paloma_redpajama": None,
    "paloma_twitterAAE_HELM_fixed": None,
    "paloma_wikitext_103": None,
    "llm_compression_arxiv_math": None,
    "llm_compression_cc": None,
    "llm_compression_python": None,
    "piqa": "acc_per_char",
    "piqa:mc": "acc_raw",
    "popqa": "exact_match",
    "sciq": "acc_raw",
    "socialiqa": "acc_per_char",
    "socialiqa:mc": "acc_raw",
    "squad": "f1",
    "squad2": "f1",
    "triviaqa": "f1",
    "truthfulqa": "mc1",
    "tydiqa_english": None,
    "tydiqa_arabic": None,
    "tydiqa_bengali": None,
    "tydiqa_finnish": None,
    "tydiqa_indonesian": None,
    "tydiqa_korean": None,
    "tydiqa_russian": None,
    "tydiqa_swahili": None,
    "tydiqa_telugu": None,
    "winogrande": "acc_raw",
    "winogrande:mc": "acc_raw",
    "zero_scrolls_gov_report": "rougeL_f1",
    "zero_scrolls_summ_screen_fd": "rougeL_f1",
    "zero_scrolls_qmsum": "rougeL_f1",
    "zero_scrolls_qasper": "f1",
    "zero_scrolls_narrative_qa": "f1",
    "zero_scrolls_quality": "exact_match",
    "arc_challenge:para": "acc_per_char",
    "arc_easy:para": "acc_per_char",
    "boolq:para": "acc_raw",
    "csqa:para": "acc_uncond",
    "hellaswag:para": "acc_per_char",
    "openbookqa:para": "acc_uncond",
    "piqa:para": "acc_per_char",
    "socialiqa:para": "acc_per_char",
    "winogrande:para": "acc_raw",
    "mmlu_abstract_algebra:para": "acc_per_char",
    "mmlu_anatomy:para": "acc_per_char",
    "mmlu_astronomy:para": "acc_per_char",
    "mmlu_business_ethics:para": "acc_per_char",
    "mmlu_clinical_knowledge:para": "acc_per_char",
    "mmlu_college_biology:para": "acc_per_char",
    "mmlu_college_chemistry:para": "acc_per_char",
    "mmlu_college_computer_science:para": "acc_per_char",
    "mmlu_college_mathematics:para": "acc_per_char",
    "mmlu_college_medicine:para": "acc_per_char",
    "mmlu_college_physics:para": "acc_per_char",
    "mmlu_computer_security:para": "acc_per_char",
    "mmlu_conceptual_physics:para": "acc_per_char",
    "mmlu_econometrics:para": "acc_per_char",
    "mmlu_electrical_engineering:para": "acc_per_char",
    "mmlu_elementary_mathematics:para": "acc_per_char",
    "mmlu_formal_logic:para": "acc_per_char",
    "mmlu_global_facts:para": "acc_per_char",
    "mmlu_high_school_biology:para": "acc_per_char",
    "mmlu_high_school_chemistry:para": "acc_per_char",
    "mmlu_high_school_computer_science:para": "acc_per_char",
    "mmlu_high_school_european_history:para": "acc_per_char",
    "mmlu_high_school_geography:para": "acc_per_char",
    "mmlu_high_school_government_and_politics:para": "acc_per_char",
    "mmlu_high_school_macroeconomics:para": "acc_per_char",
    "mmlu_high_school_mathematics:para": "acc_per_char",
    "mmlu_high_school_microeconomics:para": "acc_per_char",
    "mmlu_high_school_physics:para": "acc_per_char",
    "mmlu_high_school_psychology:para": "acc_per_char",
    "mmlu_high_school_statistics:para": "acc_per_char",
    "mmlu_high_school_us_history:para": "acc_per_char",
    "mmlu_high_school_world_history:para": "acc_per_char",
    "mmlu_human_aging:para": "acc_per_char",
    "mmlu_human_sexuality:para": "acc_per_char",
    "mmlu_international_law:para": "acc_per_char",
    "mmlu_jurisprudence:para": "acc_per_char",
    "mmlu_logical_fallacies:para": "acc_per_char",
    "mmlu_machine_learning:para": "acc_per_char",
    "mmlu_management:para": "acc_per_char",
    "mmlu_marketing:para": "acc_per_char",
    "mmlu_medical_genetics:para": "acc_per_char",
    "mmlu_miscellaneous:para": "acc_per_char",
    "mmlu_moral_disputes:para": "acc_per_char",
    "mmlu_moral_scenarios:para": "acc_per_char",
    "mmlu_nutrition:para": "acc_per_char",
    "mmlu_philosophy:para": "acc_per_char",
    "mmlu_prehistory:para": "acc_per_char",
    "mmlu_professional_accounting:para": "acc_per_char",
    "mmlu_professional_law:para": "acc_per_char",
    "mmlu_professional_medicine:para": "acc_per_char",
    "mmlu_professional_psychology:para": "acc_per_char",
    "mmlu_public_relations:para": "acc_per_char",
    "mmlu_security_studies:para": "acc_per_char",
    "mmlu_sociology:para": "acc_per_char",
    "mmlu_us_foreign_policy:para": "acc_per_char",
    "mmlu_virology:para": "acc_per_char",
    "mmlu_world_religions:para": "acc_per_char",
    "minerva_math_geometry:perturb_cot": "acc_per_char",
    "gsm8k:perturb_cot": "acc_per_char",
    "minerva_math_intermediate_algebra:perturb_cot": "acc_per_char",
    "minerva_math_number_theory:perturb_cot": "acc_per_char",
    "minerva_math_algebra:perturb_cot": "acc_per_char",
    "minerva_math_prealgebra:perturb_cot": "acc_per_char",
    "minerva_math_counting_and_probability:perturb_cot": "acc_per_char",
    "arc_challenge:enlarge": "acc_uncond",
    "arc_easy:enlarge": "acc_per_char",
    "boolq:enlarge": "acc_raw",
    "csqa:enlarge": "acc_uncond",
    "hellaswag:enlarge": "acc_per_char",
    "openbookqa:enlarge": "acc_uncond",
    "piqa:enlarge": "acc_per_char",
    "socialiqa:enlarge": "acc_per_char",
    "arc_challenge:distractors": "acc_uncond",
    "arc_easy:distractors": "acc_per_char",
    "boolq:distractors": "acc_raw",
    "csqa:distractors": "acc_uncond",
    "hellaswag:distractors": "acc_per_char",
    "openbookqa:distractors": "acc_uncond",
    "piqa:distractors": "acc_per_char",
    "socialiqa:distractors": "acc_per_char",
    "drop:perturb_rc": "acc_per_char",
    "gsm8k:perturb_rc": "acc_per_char",
    "jeopardy:perturb_rc": "acc_per_char",
    "naturalqs:perturb_rc": "acc_per_char",
    "squad:perturb_rc": "acc_per_char",
    "triviaqa:perturb_rc": "acc_per_char",
}
