{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.4462457337883959,
        "acc_stderr,none": 0.014526705548539976,
        "acc_norm,none": 0.46075085324232085,
        "acc_norm_stderr,none": 0.014566303676636586
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.6658249158249159,
        "acc_stderr,none": 0.009679106032919065,
        "acc_norm,none": 0.5959595959595959,
        "acc_norm_stderr,none": 0.010069061649549542
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.8305810397553517,
        "acc_stderr,none": 0.006560917129050386
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.2001516300227445,
        "exact_match_stderr,flexible-extract": 0.011021119022510194
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.4502091216889066,
        "acc_stderr,none": 0.004964979120927572,
        "acc_norm,none": 0.5631348336984664,
        "acc_norm_stderr,none": 0.0049498429673314375
    },
    "mmlu": {
        "acc,none": 0.5719270759151118,
        "acc_stderr,none": 0.003959525651515846,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.5251859723698193,
        "acc_stderr,none": 0.006867617566687876,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.42857142857142855,
        "acc_stderr,none": 0.04426266681379909
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.696969696969697,
        "acc_stderr,none": 0.035886248000917075
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.7843137254901961,
        "acc_stderr,none": 0.028867431449849313
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.7552742616033755,
        "acc_stderr,none": 0.027985699387036413
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.7355371900826446,
        "acc_stderr,none": 0.04026187527591207
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.7314814814814815,
        "acc_stderr,none": 0.042844679680521934
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.6809815950920245,
        "acc_stderr,none": 0.03661997551073836
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.6329479768786127,
        "acc_stderr,none": 0.025950054337654085
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.329608938547486,
        "acc_stderr,none": 0.015721531075183866
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.639871382636656,
        "acc_stderr,none": 0.027264297599804012
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.6481481481481481,
        "acc_stderr,none": 0.02657148348071997
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.40352020860495436,
        "acc_stderr,none": 0.012530241301193176
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.8304093567251462,
        "acc_stderr,none": 0.02878210810540171
    },
    "mmlu_other": {
        "acc,none": 0.6475700032185387,
        "acc_stderr,none": 0.008283243821881678,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.58,
        "acc_stderr,none": 0.049604496374885836
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.660377358490566,
        "acc_stderr,none": 0.02914690474779833
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.5433526011560693,
        "acc_stderr,none": 0.03798106566014498
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.34,
        "acc_stderr,none": 0.04760952285695236
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.6053811659192825,
        "acc_stderr,none": 0.03280400504755291
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.8058252427184466,
        "acc_stderr,none": 0.03916667762822584
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.8034188034188035,
        "acc_stderr,none": 0.02603538609895129
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.69,
        "acc_stderr,none": 0.04648231987117316
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.7662835249042146,
        "acc_stderr,none": 0.015133383278988825
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.6339869281045751,
        "acc_stderr,none": 0.02758281141515961
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.42907801418439717,
        "acc_stderr,none": 0.029525914302558562
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.6617647058823529,
        "acc_stderr,none": 0.028739328513983572
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.4879518072289157,
        "acc_stderr,none": 0.03891364495835821
    },
    "mmlu_social_sciences": {
        "acc,none": 0.6620084497887553,
        "acc_stderr,none": 0.00830992086329635,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.37719298245614036,
        "acc_stderr,none": 0.04559522141958216
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.702020202020202,
        "acc_stderr,none": 0.03258630383836556
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.8238341968911918,
        "acc_stderr,none": 0.027493504244548047
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.5974358974358974,
        "acc_stderr,none": 0.024864995159767752
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.5630252100840336,
        "acc_stderr,none": 0.03221943636566196
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.7651376146788991,
        "acc_stderr,none": 0.018175110510343585
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.6946564885496184,
        "acc_stderr,none": 0.04039314978724561
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.565359477124183,
        "acc_stderr,none": 0.02005426920072646
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.6181818181818182,
        "acc_stderr,none": 0.046534298079135075
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.6653061224489796,
        "acc_stderr,none": 0.030209235226242307
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.8258706467661692,
        "acc_stderr,none": 0.026814951200421603
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.78,
        "acc_stderr,none": 0.04163331998932263
    },
    "mmlu_stem": {
        "acc,none": 0.47922613384078655,
        "acc_stderr,none": 0.008575959467904662,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.28,
        "acc_stderr,none": 0.04512608598542128
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.5333333333333333,
        "acc_stderr,none": 0.043097329010363554
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.631578947368421,
        "acc_stderr,none": 0.039255233810529325
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.6458333333333334,
        "acc_stderr,none": 0.039994111357535424
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.46,
        "acc_stderr,none": 0.05009082659620333
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.55,
        "acc_stderr,none": 0.049999999999999996
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.36,
        "acc_stderr,none": 0.04824181513244218
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.35294117647058826,
        "acc_stderr,none": 0.04755129616062946
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.67,
        "acc_stderr,none": 0.04725815626252605
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.5106382978723404,
        "acc_stderr,none": 0.03267862331014063
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.5172413793103449,
        "acc_stderr,none": 0.04164188720169375
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.37566137566137564,
        "acc_stderr,none": 0.024942368931159788
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.7161290322580646,
        "acc_stderr,none": 0.025649381063029254
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.4187192118226601,
        "acc_stderr,none": 0.03471192860518468
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.58,
        "acc_stderr,none": 0.049604496374885836
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.26666666666666666,
        "acc_stderr,none": 0.026962424325073817
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.31125827814569534,
        "acc_stderr,none": 0.03780445850526733
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.5277777777777778,
        "acc_stderr,none": 0.03404705328653881
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.41964285714285715,
        "acc_stderr,none": 0.046840993210771065
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.7132752992383025,
        "acc_stderr,none": 0.01055131450310805,
        "acc_norm,none": 0.7165397170837867,
        "acc_norm_stderr,none": 0.010515057791152053
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.42681678607983625,
        "acc_stderr,none": 0.011192223024107382
    }
}