{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.42662116040955633,
        "acc_stderr,none": 0.014453185592920293,
        "acc_norm,none": 0.44795221843003413,
        "acc_norm_stderr,none": 0.014532011498211666
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.6414141414141414,
        "acc_stderr,none": 0.009840882301225299,
        "acc_norm,none": 0.5307239057239057,
        "acc_norm_stderr,none": 0.010240395584815239
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.8201834862385321,
        "acc_stderr,none": 0.006716806494844566
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.20318423047763456,
        "exact_match_stderr,flexible-extract": 0.011083227665267797
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.5500896235809599,
        "acc_stderr,none": 0.004964679845918434,
        "acc_norm,none": 0.6586337382991436,
        "acc_norm_stderr,none": 0.004731989816563666
    },
    "mmlu": {
        "acc,none": 0.25267055974932345,
        "acc_stderr,none": 0.0036479405398565598,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.27035069075451645,
        "acc_stderr,none": 0.006449531881714995,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.2857142857142857,
        "acc_stderr,none": 0.040406101782088394
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.3090909090909091,
        "acc_stderr,none": 0.036085410115739666
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.39215686274509803,
        "acc_stderr,none": 0.034267123492472726
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.35864978902953587,
        "acc_stderr,none": 0.03121956944530184
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.33884297520661155,
        "acc_stderr,none": 0.043207678075366684
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.26851851851851855,
        "acc_stderr,none": 0.04284467968052191
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.22085889570552147,
        "acc_stderr,none": 0.032591773927421776
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.2774566473988439,
        "acc_stderr,none": 0.024105712607754307
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.23798882681564246,
        "acc_stderr,none": 0.014242630070574885
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.19614147909967847,
        "acc_stderr,none": 0.022552447780478036
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.23148148148148148,
        "acc_stderr,none": 0.023468429832451163
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.2692307692307692,
        "acc_stderr,none": 0.011328734403140316
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.32748538011695905,
        "acc_stderr,none": 0.035993357714560276
    },
    "mmlu_other": {
        "acc,none": 0.26778242677824265,
        "acc_stderr,none": 0.007897216709559994,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.34,
        "acc_stderr,none": 0.047609522856952365
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.2339622641509434,
        "acc_stderr,none": 0.026055296901152915
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.21965317919075145,
        "acc_stderr,none": 0.031568093627031744
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.19,
        "acc_stderr,none": 0.03942772444036624
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.33183856502242154,
        "acc_stderr,none": 0.0316029514377668
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.27184466019417475,
        "acc_stderr,none": 0.044052680241409216
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.39316239316239315,
        "acc_stderr,none": 0.03199957924651047
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.31,
        "acc_stderr,none": 0.04648231987117316
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.24265644955300128,
        "acc_stderr,none": 0.015329888940899873
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.3006535947712418,
        "acc_stderr,none": 0.02625605383571896
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.25177304964539005,
        "acc_stderr,none": 0.0258921511567094
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.18382352941176472,
        "acc_stderr,none": 0.023529242185193106
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.3072289156626506,
        "acc_stderr,none": 0.03591566797824662
    },
    "mmlu_social_sciences": {
        "acc,none": 0.2495937601559961,
        "acc_stderr,none": 0.007772528354547808,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.23684210526315788,
        "acc_stderr,none": 0.03999423879281335
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.17676767676767677,
        "acc_stderr,none": 0.027178752639044915
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.2694300518134715,
        "acc_stderr,none": 0.03201867122877794
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.20256410256410257,
        "acc_stderr,none": 0.020377660970371386
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.21428571428571427,
        "acc_stderr,none": 0.026653531596715484
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.21284403669724772,
        "acc_stderr,none": 0.017549376389313694
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.29770992366412213,
        "acc_stderr,none": 0.04010358942462203
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.2826797385620915,
        "acc_stderr,none": 0.018217269552053442
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.2545454545454545,
        "acc_stderr,none": 0.04172343038705383
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.2693877551020408,
        "acc_stderr,none": 0.02840125202902294
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.3482587064676617,
        "acc_stderr,none": 0.033687874661154596
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.32,
        "acc_stderr,none": 0.04688261722621504
    },
    "mmlu_stem": {
        "acc,none": 0.2143989850935617,
        "acc_stderr,none": 0.007294085637992533,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.22,
        "acc_stderr,none": 0.04163331998932268
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.1925925925925926,
        "acc_stderr,none": 0.03406542058502653
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.18421052631578946,
        "acc_stderr,none": 0.0315469804508223
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.2638888888888889,
        "acc_stderr,none": 0.03685651095897532
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.2,
        "acc_stderr,none": 0.04020151261036846
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.26,
        "acc_stderr,none": 0.0440844002276808
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.21,
        "acc_stderr,none": 0.040936018074033256
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.21568627450980393,
        "acc_stderr,none": 0.04092563958237655
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.29,
        "acc_stderr,none": 0.045604802157206845
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.26382978723404255,
        "acc_stderr,none": 0.02880998985410297
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.2413793103448276,
        "acc_stderr,none": 0.03565998174135302
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.20899470899470898,
        "acc_stderr,none": 0.02094048156533485
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.19032258064516128,
        "acc_stderr,none": 0.022331707611823078
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.15270935960591134,
        "acc_stderr,none": 0.025308904539380627
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.25,
        "acc_stderr,none": 0.04351941398892446
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.2111111111111111,
        "acc_stderr,none": 0.024882116857655113
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.1986754966887417,
        "acc_stderr,none": 0.032578473844367774
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.14351851851851852,
        "acc_stderr,none": 0.02391077925264438
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.3125,
        "acc_stderr,none": 0.043994650575715215
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.764417845484222,
        "acc_stderr,none": 0.009901067586473909,
        "acc_norm,none": 0.7606093579978237,
        "acc_norm_stderr,none": 0.009955884250291694
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.44114636642784033,
        "acc_stderr,none": 0.0112354189473446
    }
}