{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.39590443686006827,
        "acc_stderr,none": 0.014291228393536585,
        "acc_norm,none": 0.4206484641638225,
        "acc_norm_stderr,none": 0.014426211252508406
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.5681818181818182,
        "acc_stderr,none": 0.010163945352271726,
        "acc_norm,none": 0.4797979797979798,
        "acc_norm_stderr,none": 0.010251405621305368
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.8308868501529052,
        "acc_stderr,none": 0.006556199674684506
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.21910538286580744,
        "exact_match_stderr,flexible-extract": 0.01139370663497807
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.5407289384584744,
        "acc_stderr,none": 0.004973199296339956,
        "acc_norm,none": 0.6251742680740888,
        "acc_norm_stderr,none": 0.0048308857043800795
    },
    "mmlu": {
        "acc,none": 0.25395242842899873,
        "acc_stderr,none": 0.0036548601685296223,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.26546227417640805,
        "acc_stderr,none": 0.006423003417084352,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.2857142857142857,
        "acc_stderr,none": 0.040406101782088394
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.2909090909090909,
        "acc_stderr,none": 0.035465630196243374
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.31862745098039214,
        "acc_stderr,none": 0.0327028718148208
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.3037974683544304,
        "acc_stderr,none": 0.029936696387138598
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.36363636363636365,
        "acc_stderr,none": 0.043913262867240704
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.3055555555555556,
        "acc_stderr,none": 0.044531975073749834
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.2331288343558282,
        "acc_stderr,none": 0.033220157957767414
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.26878612716763006,
        "acc_stderr,none": 0.023868003262500118
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.23798882681564246,
        "acc_stderr,none": 0.014242630070574885
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.18971061093247588,
        "acc_stderr,none": 0.02226819625878321
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.2623456790123457,
        "acc_stderr,none": 0.02447722285613511
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.26140808344198174,
        "acc_stderr,none": 0.01122252816977131
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.36257309941520466,
        "acc_stderr,none": 0.0368713061556206
    },
    "mmlu_other": {
        "acc,none": 0.2787254586417766,
        "acc_stderr,none": 0.0079773624121133,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.37,
        "acc_stderr,none": 0.04852365870939099
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.24528301886792453,
        "acc_stderr,none": 0.02648035717989568
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.2254335260115607,
        "acc_stderr,none": 0.031862098516411454
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.18,
        "acc_stderr,none": 0.03861229196653694
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.3452914798206278,
        "acc_stderr,none": 0.031911001928357954
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.32038834951456313,
        "acc_stderr,none": 0.0462028408228004
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.4358974358974359,
        "acc_stderr,none": 0.03248577511578401
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.33,
        "acc_stderr,none": 0.04725815626252605
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.25925925925925924,
        "acc_stderr,none": 0.015671006009339582
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.2875816993464052,
        "acc_stderr,none": 0.025917806117147158
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.25177304964539005,
        "acc_stderr,none": 0.0258921511567094
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.18382352941176472,
        "acc_stderr,none": 0.023529242185193106
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.30120481927710846,
        "acc_stderr,none": 0.03571609230053481
    },
    "mmlu_social_sciences": {
        "acc,none": 0.24991875203119923,
        "acc_stderr,none": 0.007782709768764748,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.23684210526315788,
        "acc_stderr,none": 0.03999423879281335
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.1919191919191919,
        "acc_stderr,none": 0.028057791672989017
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.26424870466321243,
        "acc_stderr,none": 0.0318215505091665
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.20512820512820512,
        "acc_stderr,none": 0.02047323317355198
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.21428571428571427,
        "acc_stderr,none": 0.026653531596715484
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.22201834862385322,
        "acc_stderr,none": 0.01781884956479663
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.29770992366412213,
        "acc_stderr,none": 0.04010358942462203
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.27450980392156865,
        "acc_stderr,none": 0.018054027458815194
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.2727272727272727,
        "acc_stderr,none": 0.04265792110940589
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.24897959183673468,
        "acc_stderr,none": 0.02768297952296023
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.3333333333333333,
        "acc_stderr,none": 0.03333333333333334
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.36,
        "acc_stderr,none": 0.048241815132442176
    },
    "mmlu_stem": {
        "acc,none": 0.21630193466539804,
        "acc_stderr,none": 0.007319970506466498,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.22,
        "acc_stderr,none": 0.04163331998932268
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.2,
        "acc_stderr,none": 0.03455473702325435
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.20394736842105263,
        "acc_stderr,none": 0.0327900040631005
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.2569444444444444,
        "acc_stderr,none": 0.03653946969442099
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.2,
        "acc_stderr,none": 0.04020151261036846
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.26,
        "acc_stderr,none": 0.0440844002276808
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.21,
        "acc_stderr,none": 0.040936018074033256
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.21568627450980393,
        "acc_stderr,none": 0.04092563958237655
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.28,
        "acc_stderr,none": 0.04512608598542128
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.2680851063829787,
        "acc_stderr,none": 0.028957342788342347
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.2482758620689655,
        "acc_stderr,none": 0.03600105692727771
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.20899470899470898,
        "acc_stderr,none": 0.02094048156533485
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.1967741935483871,
        "acc_stderr,none": 0.022616409420742025
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.15270935960591134,
        "acc_stderr,none": 0.025308904539380627
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.25,
        "acc_stderr,none": 0.04351941398892446
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.2111111111111111,
        "acc_stderr,none": 0.024882116857655113
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.1986754966887417,
        "acc_stderr,none": 0.032578473844367774
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.14351851851851852,
        "acc_stderr,none": 0.02391077925264438
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.3125,
        "acc_stderr,none": 0.043994650575715215
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.7323177366702938,
        "acc_stderr,none": 0.010330111189370444,
        "acc_norm,none": 0.735582154515778,
        "acc_norm_stderr,none": 0.010289787244767173
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.4068577277379734,
        "acc_stderr,none": 0.011116027212644342
    }
}