{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.4138225255972696,
        "acc_stderr,none": 0.014392730009221007,
        "acc_norm,none": 0.4274744027303754,
        "acc_norm_stderr,none": 0.01445686294465065
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.6203703703703703,
        "acc_stderr,none": 0.009958037725468567,
        "acc_norm,none": 0.5349326599326599,
        "acc_norm_stderr,none": 0.010234713052723667
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.8021406727828746,
        "acc_stderr,none": 0.006967806357528037
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.2759666413949962,
        "exact_match_stderr,flexible-extract": 0.012312603010427348
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.460565624377614,
        "acc_stderr,none": 0.004974238284524827,
        "acc_norm,none": 0.574586735710018,
        "acc_norm_stderr,none": 0.0049339509533808884
    },
    "mmlu": {
        "acc,none": 0.5256373735935052,
        "acc_stderr,none": 0.0039859802173403,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.48331562167906483,
        "acc_stderr,none": 0.006816853724945194,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.3968253968253968,
        "acc_stderr,none": 0.04375888492727061
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.7333333333333333,
        "acc_stderr,none": 0.03453131801885415
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.7549019607843137,
        "acc_stderr,none": 0.03019028245350194
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.7426160337552743,
        "acc_stderr,none": 0.028458820991460278
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.6859504132231405,
        "acc_stderr,none": 0.04236964753041019
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.6296296296296297,
        "acc_stderr,none": 0.04668408033024931
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.6687116564417178,
        "acc_stderr,none": 0.03697983910025588
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.5606936416184971,
        "acc_stderr,none": 0.026720034380514998
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.23798882681564246,
        "acc_stderr,none": 0.014242630070574885
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.5755627009646302,
        "acc_stderr,none": 0.028071928247946205
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.5925925925925926,
        "acc_stderr,none": 0.027339546640662727
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.3891786179921773,
        "acc_stderr,none": 0.012452613934286988
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.8070175438596491,
        "acc_stderr,none": 0.030267457554898458
    },
    "mmlu_other": {
        "acc,none": 0.6031541680077245,
        "acc_stderr,none": 0.008433663108034384,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.56,
        "acc_stderr,none": 0.04988876515698589
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.5584905660377358,
        "acc_stderr,none": 0.030561590426731837
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.4913294797687861,
        "acc_stderr,none": 0.038118909889404126
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.27,
        "acc_stderr,none": 0.0446196043338474
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.5515695067264574,
        "acc_stderr,none": 0.033378837362550984
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.7669902912621359,
        "acc_stderr,none": 0.04185832598928315
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.7564102564102564,
        "acc_stderr,none": 0.028120966503914387
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.63,
        "acc_stderr,none": 0.048523658709391
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.7535121328224776,
        "acc_stderr,none": 0.015411308769686938
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.5620915032679739,
        "acc_stderr,none": 0.02840830202033269
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.35815602836879434,
        "acc_stderr,none": 0.028602085862759415
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.625,
        "acc_stderr,none": 0.029408372932278746
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.5,
        "acc_stderr,none": 0.03892494720807614
    },
    "mmlu_social_sciences": {
        "acc,none": 0.6093597660058498,
        "acc_stderr,none": 0.008556635003233346,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.3157894736842105,
        "acc_stderr,none": 0.04372748290278008
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.6818181818181818,
        "acc_stderr,none": 0.033184773338453315
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.7668393782383419,
        "acc_stderr,none": 0.03051611137147601
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.517948717948718,
        "acc_stderr,none": 0.025334667080954915
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.49159663865546216,
        "acc_stderr,none": 0.03247390276569669
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.7467889908256881,
        "acc_stderr,none": 0.01864407304137505
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.6106870229007634,
        "acc_stderr,none": 0.042764865428145914
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.5228758169934641,
        "acc_stderr,none": 0.020206653187884786
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.5363636363636364,
        "acc_stderr,none": 0.04776449162396197
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.6204081632653061,
        "acc_stderr,none": 0.03106721126287247
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.736318407960199,
        "acc_stderr,none": 0.031157150869355575
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.71,
        "acc_stderr,none": 0.045604802157206845
    },
    "mmlu_stem": {
        "acc,none": 0.43070091975895974,
        "acc_stderr,none": 0.008535721309610944,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.24,
        "acc_stderr,none": 0.042923469599092816
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.4962962962962963,
        "acc_stderr,none": 0.04319223625811331
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.5460526315789473,
        "acc_stderr,none": 0.04051646342874142
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.625,
        "acc_stderr,none": 0.04048439222695598
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.38,
        "acc_stderr,none": 0.04878317312145632
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.47,
        "acc_stderr,none": 0.05016135580465919
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.3,
        "acc_stderr,none": 0.046056618647183814
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.3627450980392157,
        "acc_stderr,none": 0.04784060704105654
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.65,
        "acc_stderr,none": 0.047937248544110196
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.46808510638297873,
        "acc_stderr,none": 0.03261936918467382
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.45517241379310347,
        "acc_stderr,none": 0.04149886942192118
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.30423280423280424,
        "acc_stderr,none": 0.02369541500946309
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.6645161290322581,
        "acc_stderr,none": 0.026860206444724352
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.3694581280788177,
        "acc_stderr,none": 0.03395970381998574
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.56,
        "acc_stderr,none": 0.049888765156985884
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.2962962962962963,
        "acc_stderr,none": 0.027840811495871937
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.31125827814569534,
        "acc_stderr,none": 0.03780445850526733
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.3611111111111111,
        "acc_stderr,none": 0.032757734861009996
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.39285714285714285,
        "acc_stderr,none": 0.04635550135609976
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.7121871599564744,
        "acc_stderr,none": 0.01056325038305919,
        "acc_norm,none": 0.7094668117519043,
        "acc_norm_stderr,none": 0.010592765034696536
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.4247697031729785,
        "acc_stderr,none": 0.01118527125767134
    }
}