{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.5,
        "acc_stderr,none": 0.014611390804670088,
        "acc_norm,none": 0.5110921501706485,
        "acc_norm_stderr,none": 0.01460779491401306
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.7920875420875421,
        "acc_stderr,none": 0.008327124170469845,
        "acc_norm,none": 0.7377946127946128,
        "acc_norm_stderr,none": 0.00902519799172483
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.8299694189602447,
        "acc_stderr,none": 0.006570328814093819
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.47308567096285065,
        "exact_match_stderr,flexible-extract": 0.013752517189717454
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.5817566221868153,
        "acc_stderr,none": 0.004922624636945249,
        "acc_norm,none": 0.7422824138617805,
        "acc_norm_stderr,none": 0.00436483800033562
    },
    "mmlu": {
        "acc,none": 0.5198689645349666,
        "acc_stderr,none": 0.0039479515985297135,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.4828905419766206,
        "acc_stderr,none": 0.006808945318454314,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.3412698412698413,
        "acc_stderr,none": 0.04240799327574925
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.6848484848484848,
        "acc_stderr,none": 0.0362773057502241
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.7696078431372549,
        "acc_stderr,none": 0.029554292605695046
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.729957805907173,
        "acc_stderr,none": 0.028900721906293426
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.6942148760330579,
        "acc_stderr,none": 0.04205953933884123
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.6666666666666666,
        "acc_stderr,none": 0.04557239513497751
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.6319018404907976,
        "acc_stderr,none": 0.03789213935838396
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.5606936416184971,
        "acc_stderr,none": 0.026720034380514995
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.2424581005586592,
        "acc_stderr,none": 0.014333522059217892
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.6045016077170418,
        "acc_stderr,none": 0.027770918531427834
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.6450617283950617,
        "acc_stderr,none": 0.02662415247884585
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.3813559322033898,
        "acc_stderr,none": 0.012405509401888122
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.783625730994152,
        "acc_stderr,none": 0.03158149539338733
    },
    "mmlu_other": {
        "acc,none": 0.6366269713550048,
        "acc_stderr,none": 0.0083120388392631,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.57,
        "acc_stderr,none": 0.049756985195624284
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.6566037735849056,
        "acc_stderr,none": 0.02922452646912479
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.4797687861271676,
        "acc_stderr,none": 0.03809342081273957
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.35,
        "acc_stderr,none": 0.047937248544110196
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.6098654708520179,
        "acc_stderr,none": 0.03273766725459157
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.7669902912621359,
        "acc_stderr,none": 0.04185832598928315
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.7948717948717948,
        "acc_stderr,none": 0.0264535080540403
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.66,
        "acc_stderr,none": 0.04760952285695237
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.7611749680715197,
        "acc_stderr,none": 0.015246803197398682
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.5980392156862745,
        "acc_stderr,none": 0.028074158947600663
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.38652482269503546,
        "acc_stderr,none": 0.02904919034254345
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.6948529411764706,
        "acc_stderr,none": 0.027971541370170598
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.5120481927710844,
        "acc_stderr,none": 0.03891364495835817
    },
    "mmlu_social_sciences": {
        "acc,none": 0.6061098472538187,
        "acc_stderr,none": 0.00855566819324139,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.3684210526315789,
        "acc_stderr,none": 0.04537815354939391
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.6313131313131313,
        "acc_stderr,none": 0.03437305501980619
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.7409326424870466,
        "acc_stderr,none": 0.031618779179354115
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.4461538461538462,
        "acc_stderr,none": 0.02520357177302833
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.4327731092436975,
        "acc_stderr,none": 0.03218358107742613
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.7357798165137615,
        "acc_stderr,none": 0.0189041641715102
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.6870229007633588,
        "acc_stderr,none": 0.04066962905677697
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.5669934640522876,
        "acc_stderr,none": 0.020045442473324227
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.6,
        "acc_stderr,none": 0.0469237132203465
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.6081632653061224,
        "acc_stderr,none": 0.031251275910891656
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.736318407960199,
        "acc_stderr,none": 0.031157150869355554
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.77,
        "acc_stderr,none": 0.04229525846816506
    },
    "mmlu_stem": {
        "acc,none": 0.3758325404376784,
        "acc_stderr,none": 0.008312661454080994,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.24,
        "acc_stderr,none": 0.042923469599092816
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.4962962962962963,
        "acc_stderr,none": 0.04319223625811331
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.4605263157894737,
        "acc_stderr,none": 0.04056242252249035
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.5486111111111112,
        "acc_stderr,none": 0.041614023984032786
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.32,
        "acc_stderr,none": 0.046882617226215034
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.28,
        "acc_stderr,none": 0.04512608598542128
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.23,
        "acc_stderr,none": 0.04229525846816507
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.2647058823529412,
        "acc_stderr,none": 0.0438986995680878
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.7,
        "acc_stderr,none": 0.046056618647183814
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.41702127659574467,
        "acc_stderr,none": 0.03223276266711712
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.38620689655172413,
        "acc_stderr,none": 0.04057324734419035
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.2751322751322751,
        "acc_stderr,none": 0.023000086859068652
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.5870967741935483,
        "acc_stderr,none": 0.028009138125400387
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.3497536945812808,
        "acc_stderr,none": 0.03355400904969565
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.53,
        "acc_stderr,none": 0.050161355804659205
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.23703703703703705,
        "acc_stderr,none": 0.025928876132766097
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.19205298013245034,
        "acc_stderr,none": 0.032162984205936156
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.2916666666666667,
        "acc_stderr,none": 0.03099866630456052
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.4017857142857143,
        "acc_stderr,none": 0.04653333146973646
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.7970620239390642,
        "acc_stderr,none": 0.009383679003767346,
        "acc_norm,none": 0.8079434167573449,
        "acc_norm_stderr,none": 0.00919074029512648
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.4703172978505629,
        "acc_stderr,none": 0.011294116144908547
    }
}