{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.371160409556314,
        "acc_stderr,none": 0.014117971901142813,
        "acc_norm,none": 0.4087030716723549,
        "acc_norm_stderr,none": 0.014365750345427001
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.5117845117845118,
        "acc_stderr,none": 0.010256933475911013,
        "acc_norm,none": 0.47685185185185186,
        "acc_norm_stderr,none": 0.010248782484554473
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.8296636085626912,
        "acc_stderr,none": 0.0065750230780145125
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.15921152388172857,
        "exact_match_stderr,flexible-extract": 0.01007796671755188
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.4420434176458873,
        "acc_stderr,none": 0.00495614704610896,
        "acc_norm,none": 0.5091615216092412,
        "acc_norm_stderr,none": 0.004988943721711223
    },
    "mmlu": {
        "acc,none": 0.45534824099131177,
        "acc_stderr,none": 0.004018641153916311,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.41764080765143463,
        "acc_stderr,none": 0.006882101782469806,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.3253968253968254,
        "acc_stderr,none": 0.04190596438871136
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.5272727272727272,
        "acc_stderr,none": 0.03898531605579418
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.6225490196078431,
        "acc_stderr,none": 0.03402272044340704
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.6329113924050633,
        "acc_stderr,none": 0.03137624072561619
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.5950413223140496,
        "acc_stderr,none": 0.04481137755942469
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.5555555555555556,
        "acc_stderr,none": 0.04803752235190193
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.588957055214724,
        "acc_stderr,none": 0.038656978537853624
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.47398843930635837,
        "acc_stderr,none": 0.026882643434022902
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.24692737430167597,
        "acc_stderr,none": 0.01442229220480886
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.4887459807073955,
        "acc_stderr,none": 0.028390897396863533
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.5246913580246914,
        "acc_stderr,none": 0.027786800931427443
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.32333767926988266,
        "acc_stderr,none": 0.011946565758447216
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.7543859649122807,
        "acc_stderr,none": 0.0330140594698725
    },
    "mmlu_other": {
        "acc,none": 0.5416800772449308,
        "acc_stderr,none": 0.008601148949270152,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.6,
        "acc_stderr,none": 0.049236596391733084
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.5056603773584906,
        "acc_stderr,none": 0.03077090076385131
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.44508670520231214,
        "acc_stderr,none": 0.03789401760283647
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.2,
        "acc_stderr,none": 0.04020151261036846
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.5381165919282511,
        "acc_stderr,none": 0.033460150119732274
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.6504854368932039,
        "acc_stderr,none": 0.047211885060971716
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.7521367521367521,
        "acc_stderr,none": 0.028286324075564424
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.54,
        "acc_stderr,none": 0.05009082659620332
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.6781609195402298,
        "acc_stderr,none": 0.0167063814150579
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.5261437908496732,
        "acc_stderr,none": 0.028590752958852394
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.31560283687943264,
        "acc_stderr,none": 0.027724989449509317
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.4007352941176471,
        "acc_stderr,none": 0.02976826352893311
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.5120481927710844,
        "acc_stderr,none": 0.03891364495835817
    },
    "mmlu_social_sciences": {
        "acc,none": 0.5206369840753982,
        "acc_stderr,none": 0.008838682570330216,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.3684210526315789,
        "acc_stderr,none": 0.04537815354939391
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.5505050505050505,
        "acc_stderr,none": 0.035441324919479704
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.6476683937823834,
        "acc_stderr,none": 0.03447478286414357
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.4128205128205128,
        "acc_stderr,none": 0.024962683564331806
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.3739495798319328,
        "acc_stderr,none": 0.03142946637883708
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.6201834862385321,
        "acc_stderr,none": 0.020808825617866244
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.5190839694656488,
        "acc_stderr,none": 0.043820947055509867
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.45588235294117646,
        "acc_stderr,none": 0.020148939420415738
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.5454545454545454,
        "acc_stderr,none": 0.04769300568972746
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.5183673469387755,
        "acc_stderr,none": 0.031987615467631264
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.6915422885572139,
        "acc_stderr,none": 0.03265819588512697
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.65,
        "acc_stderr,none": 0.0479372485441102
    },
    "mmlu_stem": {
        "acc,none": 0.3628290516967967,
        "acc_stderr,none": 0.008282456413585045,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.25,
        "acc_stderr,none": 0.04351941398892446
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.4148148148148148,
        "acc_stderr,none": 0.042561937679014075
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.506578947368421,
        "acc_stderr,none": 0.040685900502249704
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.5416666666666666,
        "acc_stderr,none": 0.04166666666666665
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.34,
        "acc_stderr,none": 0.04760952285695235
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.35,
        "acc_stderr,none": 0.0479372485441102
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.2,
        "acc_stderr,none": 0.04020151261036846
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.29411764705882354,
        "acc_stderr,none": 0.04533838195929774
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.66,
        "acc_stderr,none": 0.04760952285695238
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.37872340425531914,
        "acc_stderr,none": 0.03170995606040655
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.41379310344827586,
        "acc_stderr,none": 0.04104269211806232
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.25132275132275134,
        "acc_stderr,none": 0.022340482339643898
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.5741935483870968,
        "acc_stderr,none": 0.028129112709165904
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.28078817733990147,
        "acc_stderr,none": 0.03161856335358609
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.42,
        "acc_stderr,none": 0.049604496374885836
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.21851851851851853,
        "acc_stderr,none": 0.025195752251823793
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.23841059602649006,
        "acc_stderr,none": 0.0347918557259966
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.2962962962962963,
        "acc_stderr,none": 0.03114144782353604
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.38392857142857145,
        "acc_stderr,none": 0.04616143075028547
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.7127312295973884,
        "acc_stderr,none": 0.010557291761528635,
        "acc_norm,none": 0.719804134929271,
        "acc_norm_stderr,none": 0.010478122015577076
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.3654042988741044,
        "acc_stderr,none": 0.010896430858185684
    }
}