{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.378839590443686,
        "acc_stderr,none": 0.014175915490000326,
        "acc_norm,none": 0.3873720136518771,
        "acc_norm_stderr,none": 0.01423587248790987
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.5446127946127947,
        "acc_stderr,none": 0.010218861787618718,
        "acc_norm,none": 0.47053872053872053,
        "acc_norm_stderr,none": 0.010241957728409684
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.82782874617737,
        "acc_stderr,none": 0.006603027596591711
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.22820318423047764,
        "exact_match_stderr,flexible-extract": 0.011559914877317385
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.441346345349532,
        "acc_stderr,none": 0.0049553302773042655,
        "acc_norm,none": 0.5497908783110934,
        "acc_norm_stderr,none": 0.004964979120927566
    },
    "mmlu": {
        "acc,none": 0.5452214784218772,
        "acc_stderr,none": 0.003962683762880744,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.48841657810839534,
        "acc_stderr,none": 0.0067944626282623885,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.373015873015873,
        "acc_stderr,none": 0.04325506042017086
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.696969696969697,
        "acc_stderr,none": 0.035886248000917075
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.75,
        "acc_stderr,none": 0.03039153369274154
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.7257383966244726,
        "acc_stderr,none": 0.029041333510598052
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.6859504132231405,
        "acc_stderr,none": 0.042369647530410184
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.6666666666666666,
        "acc_stderr,none": 0.04557239513497751
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.6809815950920245,
        "acc_stderr,none": 0.03661997551073836
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.6213872832369942,
        "acc_stderr,none": 0.02611374936131034
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.23798882681564246,
        "acc_stderr,none": 0.014242630070574885
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.6302250803858521,
        "acc_stderr,none": 0.027417996705630995
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.5895061728395061,
        "acc_stderr,none": 0.027371350925124768
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.3852672750977836,
        "acc_stderr,none": 0.01242948543495522
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.8128654970760234,
        "acc_stderr,none": 0.029913127232368025
    },
    "mmlu_other": {
        "acc,none": 0.6305117476665594,
        "acc_stderr,none": 0.008325714878420168,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.59,
        "acc_stderr,none": 0.04943110704237102
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.6037735849056604,
        "acc_stderr,none": 0.030102793781791194
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.5317919075144508,
        "acc_stderr,none": 0.038047497443647646
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.32,
        "acc_stderr,none": 0.046882617226215034
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.5919282511210763,
        "acc_stderr,none": 0.03298574607842821
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.7864077669902912,
        "acc_stderr,none": 0.040580420156460364
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.8076923076923077,
        "acc_stderr,none": 0.025819233256483737
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.64,
        "acc_stderr,none": 0.04824181513244218
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.7713920817369093,
        "acc_stderr,none": 0.015016884698539873
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.6078431372549019,
        "acc_stderr,none": 0.027956046165424516
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.38652482269503546,
        "acc_stderr,none": 0.029049190342543454
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.6213235294117647,
        "acc_stderr,none": 0.02946513363977613
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.4939759036144578,
        "acc_stderr,none": 0.03892212195333047
    },
    "mmlu_social_sciences": {
        "acc,none": 0.6389340266493337,
        "acc_stderr,none": 0.008418678723407671,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.3508771929824561,
        "acc_stderr,none": 0.04489539350270698
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.6818181818181818,
        "acc_stderr,none": 0.03318477333845331
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.8341968911917098,
        "acc_stderr,none": 0.026839845022314415
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.5461538461538461,
        "acc_stderr,none": 0.025242770987126188
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.5378151260504201,
        "acc_stderr,none": 0.032385469487589795
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.7688073394495413,
        "acc_stderr,none": 0.018075750241633156
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.6259541984732825,
        "acc_stderr,none": 0.04243869242230523
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.5522875816993464,
        "acc_stderr,none": 0.02011692534742242
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.6090909090909091,
        "acc_stderr,none": 0.04673752333670237
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.6285714285714286,
        "acc_stderr,none": 0.03093285879278986
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.7711442786069652,
        "acc_stderr,none": 0.029705284056772436
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.74,
        "acc_stderr,none": 0.044084400227680794
    },
    "mmlu_stem": {
        "acc,none": 0.45448778940691403,
        "acc_stderr,none": 0.008593861885012502,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.23,
        "acc_stderr,none": 0.04229525846816506
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.5333333333333333,
        "acc_stderr,none": 0.043097329010363554
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.5657894736842105,
        "acc_stderr,none": 0.040335656678483184
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.6319444444444444,
        "acc_stderr,none": 0.04032999053960719
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.41,
        "acc_stderr,none": 0.04943110704237101
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.5,
        "acc_stderr,none": 0.050251890762960605
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.31,
        "acc_stderr,none": 0.04648231987117316
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.4019607843137255,
        "acc_stderr,none": 0.04878608714466996
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.68,
        "acc_stderr,none": 0.046882617226215034
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.4808510638297872,
        "acc_stderr,none": 0.03266204299064678
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.5310344827586206,
        "acc_stderr,none": 0.04158632762097828
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.36507936507936506,
        "acc_stderr,none": 0.02479606060269994
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.6645161290322581,
        "acc_stderr,none": 0.02686020644472436
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.3842364532019704,
        "acc_stderr,none": 0.034223985656575515
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.52,
        "acc_stderr,none": 0.050211673156867795
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.3111111111111111,
        "acc_stderr,none": 0.02822644674968352
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.23841059602649006,
        "acc_stderr,none": 0.03479185572599661
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.4444444444444444,
        "acc_stderr,none": 0.03388857118502325
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.44642857142857145,
        "acc_stderr,none": 0.04718471485219588
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.690424374319913,
        "acc_stderr,none": 0.010786656752183345,
        "acc_norm,none": 0.691512513601741,
        "acc_norm_stderr,none": 0.010776164678037155
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.40890481064483114,
        "acc_stderr,none": 0.011124710055682836
    }
}