{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.4709897610921502,
        "acc_stderr,none": 0.014586776355294326,
        "acc_norm,none": 0.46245733788395904,
        "acc_norm_stderr,none": 0.014570144495075578
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.702020202020202,
        "acc_stderr,none": 0.009385046066694868,
        "acc_norm,none": 0.601010101010101,
        "acc_norm_stderr,none": 0.010048240683798764
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.8354740061162079,
        "acc_stderr,none": 0.00648449668227979
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.33510235026535257,
        "exact_match_stderr,flexible-extract": 0.013001948176422954
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.5321649073889664,
        "acc_stderr,none": 0.0049794460388247605,
        "acc_norm,none": 0.6968731328420633,
        "acc_norm_stderr,none": 0.004586702716014093
    },
    "mmlu": {
        "acc,none": 0.5684375445093292,
        "acc_stderr,none": 0.003924009102708228,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.5185972369819342,
        "acc_stderr,none": 0.006755079642434817,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.42857142857142855,
        "acc_stderr,none": 0.04426266681379909
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.7454545454545455,
        "acc_stderr,none": 0.0340150671524904
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.7941176470588235,
        "acc_stderr,none": 0.028379449451588667
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.7805907172995781,
        "acc_stderr,none": 0.026939106581553945
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.71900826446281,
        "acc_stderr,none": 0.04103203830514512
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.7037037037037037,
        "acc_stderr,none": 0.044143436668549335
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.7239263803680982,
        "acc_stderr,none": 0.035123852837050475
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.6069364161849711,
        "acc_stderr,none": 0.026296227915613667
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.23798882681564246,
        "acc_stderr,none": 0.014242630070574885
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.6559485530546624,
        "acc_stderr,none": 0.026981478043648043
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.6419753086419753,
        "acc_stderr,none": 0.02667561192603709
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.43089960886571055,
        "acc_stderr,none": 0.01264769588954724
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.8128654970760234,
        "acc_stderr,none": 0.029913127232368032
    },
    "mmlu_other": {
        "acc,none": 0.6536852269069843,
        "acc_stderr,none": 0.00823277606212246,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.67,
        "acc_stderr,none": 0.04725815626252607
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.6150943396226415,
        "acc_stderr,none": 0.02994649856769995
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.5549132947976878,
        "acc_stderr,none": 0.03789401760283647
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.32,
        "acc_stderr,none": 0.046882617226215034
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.6188340807174888,
        "acc_stderr,none": 0.032596251184168284
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.7864077669902912,
        "acc_stderr,none": 0.04058042015646035
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.811965811965812,
        "acc_stderr,none": 0.025598193686652247
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.67,
        "acc_stderr,none": 0.04725815626252607
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.7803320561941252,
        "acc_stderr,none": 0.014805384478371162
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.630718954248366,
        "acc_stderr,none": 0.02763417668960266
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.42907801418439717,
        "acc_stderr,none": 0.029525914302558562
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.6875,
        "acc_stderr,none": 0.02815637344037142
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.5120481927710844,
        "acc_stderr,none": 0.03891364495835817
    },
    "mmlu_social_sciences": {
        "acc,none": 0.6616834579135522,
        "acc_stderr,none": 0.00826183808250275,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.35964912280701755,
        "acc_stderr,none": 0.04514496132873633
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.7171717171717171,
        "acc_stderr,none": 0.03208779558786752
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.844559585492228,
        "acc_stderr,none": 0.026148483469153324
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.5538461538461539,
        "acc_stderr,none": 0.025203571773028323
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.5630252100840336,
        "acc_stderr,none": 0.032219436365661956
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.7834862385321101,
        "acc_stderr,none": 0.017658710594443138
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.6946564885496184,
        "acc_stderr,none": 0.04039314978724561
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.5735294117647058,
        "acc_stderr,none": 0.02000791273935936
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.6090909090909091,
        "acc_stderr,none": 0.04673752333670237
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.6448979591836734,
        "acc_stderr,none": 0.030635655150387638
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.8308457711442786,
        "acc_stderr,none": 0.026508590656233268
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.79,
        "acc_stderr,none": 0.04093601807403326
    },
    "mmlu_stem": {
        "acc,none": 0.4678084364097685,
        "acc_stderr,none": 0.008542774611730431,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.23,
        "acc_stderr,none": 0.04229525846816506
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.5407407407407407,
        "acc_stderr,none": 0.04304979692464242
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.631578947368421,
        "acc_stderr,none": 0.039255233810529325
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.6388888888888888,
        "acc_stderr,none": 0.040166600304512336
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.36,
        "acc_stderr,none": 0.048241815132442176
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.51,
        "acc_stderr,none": 0.05024183937956912
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.3,
        "acc_stderr,none": 0.046056618647183814
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.35294117647058826,
        "acc_stderr,none": 0.047551296160629475
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.72,
        "acc_stderr,none": 0.04512608598542127
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.4808510638297872,
        "acc_stderr,none": 0.032662042990646796
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.5724137931034483,
        "acc_stderr,none": 0.04122737111370331
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.37037037037037035,
        "acc_stderr,none": 0.024870815251057093
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.7225806451612903,
        "acc_stderr,none": 0.025470196835900055
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.41379310344827586,
        "acc_stderr,none": 0.03465304488406796
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.56,
        "acc_stderr,none": 0.04988876515698589
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.32222222222222224,
        "acc_stderr,none": 0.028493465091028597
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.32450331125827814,
        "acc_stderr,none": 0.03822746937658754
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.3888888888888889,
        "acc_stderr,none": 0.03324708911809117
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.4107142857142857,
        "acc_stderr,none": 0.04669510663875191
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.7600652883569097,
        "acc_stderr,none": 0.009963625892809545,
        "acc_norm,none": 0.7589771490750816,
        "acc_norm_stderr,none": 0.009979042717267314
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.45445240532241554,
        "acc_stderr,none": 0.011267028794184966
    }
}