{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.3984641638225256,
        "acc_stderr,none": 0.014306946052735562,
        "acc_norm,none": 0.4206484641638225,
        "acc_norm_stderr,none": 0.014426211252508406
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.5816498316498316,
        "acc_stderr,none": 0.010122061470742858,
        "acc_norm,none": 0.5176767676767676,
        "acc_norm_stderr,none": 0.010253369805698968
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.8226299694189603,
        "acc_stderr,none": 0.006680899270344288
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.18423047763457165,
        "exact_match_stderr,flexible-extract": 0.010678414428555006
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.45947022505477,
        "acc_stderr,none": 0.0049733613391696515,
        "acc_norm,none": 0.572495518820952,
        "acc_norm_stderr,none": 0.004937054233711571
    },
    "mmlu": {
        "acc,none": 0.5223614869676684,
        "acc_stderr,none": 0.004004020751591052,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.47927736450584485,
        "acc_stderr,none": 0.0068769329376966535,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.3888888888888889,
        "acc_stderr,none": 0.04360314860077459
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.6424242424242425,
        "acc_stderr,none": 0.03742597043806587
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.7009803921568627,
        "acc_stderr,none": 0.03213325717373616
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.6962025316455697,
        "acc_stderr,none": 0.029936696387138625
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.6446280991735537,
        "acc_stderr,none": 0.04369236326573981
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.6296296296296297,
        "acc_stderr,none": 0.04668408033024931
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.6319018404907976,
        "acc_stderr,none": 0.03789213935838396
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.5982658959537572,
        "acc_stderr,none": 0.026394104177643634
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.2446927374301676,
        "acc_stderr,none": 0.014378169884098409
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.6045016077170418,
        "acc_stderr,none": 0.027770918531427838
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.6018518518518519,
        "acc_stderr,none": 0.027237415094592474
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.3891786179921773,
        "acc_stderr,none": 0.012452613934286993
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.8011695906432749,
        "acc_stderr,none": 0.030611116557432528
    },
    "mmlu_other": {
        "acc,none": 0.5980045059542968,
        "acc_stderr,none": 0.008481597628846253,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.56,
        "acc_stderr,none": 0.04988876515698589
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.569811320754717,
        "acc_stderr,none": 0.030471445867183238
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.5317919075144508,
        "acc_stderr,none": 0.03804749744364764
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.33,
        "acc_stderr,none": 0.04725815626252604
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.5919282511210763,
        "acc_stderr,none": 0.03298574607842821
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.6796116504854369,
        "acc_stderr,none": 0.04620284082280041
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.7948717948717948,
        "acc_stderr,none": 0.026453508054040356
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.61,
        "acc_stderr,none": 0.04902071300001975
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.7432950191570882,
        "acc_stderr,none": 0.015620480263064515
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.5882352941176471,
        "acc_stderr,none": 0.028180596328259283
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.36879432624113473,
        "acc_stderr,none": 0.02878222756134724
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.48161764705882354,
        "acc_stderr,none": 0.03035230339535196
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.4819277108433735,
        "acc_stderr,none": 0.03889951252827216
    },
    "mmlu_social_sciences": {
        "acc,none": 0.6061098472538187,
        "acc_stderr,none": 0.008582645988896444,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.30701754385964913,
        "acc_stderr,none": 0.043391383225798594
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.6767676767676768,
        "acc_stderr,none": 0.03332299921070645
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.7616580310880829,
        "acc_stderr,none": 0.030748905363909874
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.5230769230769231,
        "acc_stderr,none": 0.025323990861736242
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.5126050420168067,
        "acc_stderr,none": 0.03246816765752174
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.7302752293577982,
        "acc_stderr,none": 0.01902848671111544
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.6412213740458015,
        "acc_stderr,none": 0.04206739313864909
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.5212418300653595,
        "acc_stderr,none": 0.020209572388600248
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.5363636363636364,
        "acc_stderr,none": 0.047764491623961985
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.5755102040816327,
        "acc_stderr,none": 0.031642094879429414
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.7611940298507462,
        "acc_stderr,none": 0.030147775935409217
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.69,
        "acc_stderr,none": 0.04648231987117316
    },
    "mmlu_stem": {
        "acc,none": 0.430383761496987,
        "acc_stderr,none": 0.008524721282109524,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.25,
        "acc_stderr,none": 0.04351941398892446
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.5111111111111111,
        "acc_stderr,none": 0.043182754919779756
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.5592105263157895,
        "acc_stderr,none": 0.04040311062490438
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.6111111111111112,
        "acc_stderr,none": 0.04076663253918567
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.36,
        "acc_stderr,none": 0.048241815132442176
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.45,
        "acc_stderr,none": 0.05
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.3,
        "acc_stderr,none": 0.046056618647183814
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.39215686274509803,
        "acc_stderr,none": 0.04858083574266344
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.58,
        "acc_stderr,none": 0.049604496374885836
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.4765957446808511,
        "acc_stderr,none": 0.032650194750335815
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.5103448275862069,
        "acc_stderr,none": 0.04165774775728763
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.3333333333333333,
        "acc_stderr,none": 0.024278568024307706
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.6709677419354839,
        "acc_stderr,none": 0.026729499068349958
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.35467980295566504,
        "acc_stderr,none": 0.033661244890514495
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.55,
        "acc_stderr,none": 0.05
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.23703703703703705,
        "acc_stderr,none": 0.025928876132766097
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.31125827814569534,
        "acc_stderr,none": 0.03780445850526733
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.3611111111111111,
        "acc_stderr,none": 0.032757734861009996
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.4017857142857143,
        "acc_stderr,none": 0.04653333146973646
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.7094668117519043,
        "acc_stderr,none": 0.010592765034696536,
        "acc_norm,none": 0.7149075081610446,
        "acc_norm_stderr,none": 0.01053327058873893
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.4104401228249744,
        "acc_stderr,none": 0.011131091241082379
    }
}