{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.36945392491467577,
        "acc_stderr,none": 0.014104578366491899,
        "acc_norm,none": 0.4069965870307167,
        "acc_norm_stderr,none": 0.01435639941800913
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.5122053872053872,
        "acc_stderr,none": 0.01025672623512901,
        "acc_norm,none": 0.4612794612794613,
        "acc_norm_stderr,none": 0.010228972678389627
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.8122324159021407,
        "acc_stderr,none": 0.006830350780854062
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.2441243366186505,
        "exact_match_stderr,flexible-extract": 0.011832404674077592
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.39912368054172476,
        "acc_stderr,none": 0.004887174080003028,
        "acc_norm,none": 0.46883091017725553,
        "acc_norm_stderr,none": 0.004980076707392432
    },
    "mmlu": {
        "acc,none": 0.508545791197835,
        "acc_stderr,none": 0.0039971160532239,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.4501594048884166,
        "acc_stderr,none": 0.006870542444369273,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.30158730158730157,
        "acc_stderr,none": 0.041049472699033945
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.6181818181818182,
        "acc_stderr,none": 0.03793713171165634
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.6715686274509803,
        "acc_stderr,none": 0.03296245110172228
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.6540084388185654,
        "acc_stderr,none": 0.03096481058878671
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.6776859504132231,
        "acc_stderr,none": 0.042664163633521685
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.6388888888888888,
        "acc_stderr,none": 0.04643454608906275
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.656441717791411,
        "acc_stderr,none": 0.03731133519673893
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.5404624277456648,
        "acc_stderr,none": 0.026830805998952233
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.2435754189944134,
        "acc_stderr,none": 0.014355911964767867
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.5627009646302251,
        "acc_stderr,none": 0.028173917761762906
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.5524691358024691,
        "acc_stderr,none": 0.0276671385694227
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.35267275097783574,
        "acc_stderr,none": 0.012203286846053889
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.7485380116959064,
        "acc_stderr,none": 0.033275044238468436
    },
    "mmlu_other": {
        "acc,none": 0.5980045059542968,
        "acc_stderr,none": 0.008455866508522238,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.6,
        "acc_stderr,none": 0.049236596391733084
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.6,
        "acc_stderr,none": 0.03015113445777629
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.5260115606936416,
        "acc_stderr,none": 0.038073017265045125
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.25,
        "acc_stderr,none": 0.04351941398892446
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.5695067264573991,
        "acc_stderr,none": 0.033231973029429394
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.7184466019417476,
        "acc_stderr,none": 0.04453254836326466
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.7692307692307693,
        "acc_stderr,none": 0.027601921381417618
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.6,
        "acc_stderr,none": 0.04923659639173309
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.7420178799489144,
        "acc_stderr,none": 0.01564583018834895
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.5980392156862745,
        "acc_stderr,none": 0.028074158947600666
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.3475177304964539,
        "acc_stderr,none": 0.02840662780959095
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.49264705882352944,
        "acc_stderr,none": 0.030369552523902173
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.5180722891566265,
        "acc_stderr,none": 0.03889951252827216
    },
    "mmlu_social_sciences": {
        "acc,none": 0.6087097822554436,
        "acc_stderr,none": 0.00858201178676087,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.3508771929824561,
        "acc_stderr,none": 0.044895393502706986
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.6212121212121212,
        "acc_stderr,none": 0.03456088731993747
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.7616580310880829,
        "acc_stderr,none": 0.03074890536390988
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.5076923076923077,
        "acc_stderr,none": 0.025348006031534778
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.48739495798319327,
        "acc_stderr,none": 0.032468167657521745
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.7394495412844037,
        "acc_stderr,none": 0.018819182034850068
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.6106870229007634,
        "acc_stderr,none": 0.04276486542814592
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.5326797385620915,
        "acc_stderr,none": 0.020184583359102202
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.6363636363636364,
        "acc_stderr,none": 0.04607582090719976
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.5959183673469388,
        "acc_stderr,none": 0.03141470802586589
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.7611940298507462,
        "acc_stderr,none": 0.030147775935409217
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.71,
        "acc_stderr,none": 0.045604802157206845
    },
    "mmlu_stem": {
        "acc,none": 0.4097684744687599,
        "acc_stderr,none": 0.00849727054108984,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.25,
        "acc_stderr,none": 0.04351941398892446
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.4888888888888889,
        "acc_stderr,none": 0.04318275491977978
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.5328947368421053,
        "acc_stderr,none": 0.04060127035236397
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.6111111111111112,
        "acc_stderr,none": 0.04076663253918567
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.37,
        "acc_stderr,none": 0.04852365870939099
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.39,
        "acc_stderr,none": 0.04902071300001975
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.31,
        "acc_stderr,none": 0.04648231987117316
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.3333333333333333,
        "acc_stderr,none": 0.04690650298201942
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.63,
        "acc_stderr,none": 0.048523658709391
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.4595744680851064,
        "acc_stderr,none": 0.03257901482099835
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.46206896551724136,
        "acc_stderr,none": 0.04154659671707548
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.291005291005291,
        "acc_stderr,none": 0.023393826500484875
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.6258064516129033,
        "acc_stderr,none": 0.0275289042998457
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.3399014778325123,
        "acc_stderr,none": 0.0333276906841079
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.45,
        "acc_stderr,none": 0.04999999999999999
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.2518518518518518,
        "acc_stderr,none": 0.026466117538959912
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.304635761589404,
        "acc_stderr,none": 0.03757949922943343
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.3611111111111111,
        "acc_stderr,none": 0.03275773486100999
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.38392857142857145,
        "acc_stderr,none": 0.04616143075028547
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.6730141458106638,
        "acc_stderr,none": 0.010945157126978218,
        "acc_norm,none": 0.6664853101196954,
        "acc_norm_stderr,none": 0.011000139592184557
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.38024564994882293,
        "acc_stderr,none": 0.010984765684099734
    }
}