{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.49146757679180886,
        "acc_stderr,none": 0.014609263165632182,
        "acc_norm,none": 0.4991467576791809,
        "acc_norm_stderr,none": 0.014611369529813283
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.7887205387205387,
        "acc_stderr,none": 0.008376419295817054,
        "acc_norm,none": 0.7276936026936027,
        "acc_norm_stderr,none": 0.009134218447652668
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.8308868501529052,
        "acc_stderr,none": 0.006556199674684506
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.4579226686884003,
        "exact_match_stderr,flexible-extract": 0.013723629649844084
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.5817566221868153,
        "acc_stderr,none": 0.004922624636945249,
        "acc_norm,none": 0.7486556462856004,
        "acc_norm_stderr,none": 0.0043289955103126086
    },
    "mmlu": {
        "acc,none": 0.5118216778236718,
        "acc_stderr,none": 0.0039645346008681855,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.4964930924548353,
        "acc_stderr,none": 0.0068366070073220915,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.35714285714285715,
        "acc_stderr,none": 0.04285714285714281
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.7272727272727273,
        "acc_stderr,none": 0.03477691162163659
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.7549019607843137,
        "acc_stderr,none": 0.030190282453501943
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.7341772151898734,
        "acc_stderr,none": 0.028756799629658332
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.7272727272727273,
        "acc_stderr,none": 0.04065578140908705
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.6296296296296297,
        "acc_stderr,none": 0.0466840803302493
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.6503067484662577,
        "acc_stderr,none": 0.03746668325470021
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.5664739884393064,
        "acc_stderr,none": 0.026680134761679217
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.2435754189944134,
        "acc_stderr,none": 0.014355911964767864
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.6141479099678456,
        "acc_stderr,none": 0.027648149599751464
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.6358024691358025,
        "acc_stderr,none": 0.026774929899722324
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.41460234680573665,
        "acc_stderr,none": 0.012582597058908284
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.783625730994152,
        "acc_stderr,none": 0.03158149539338733
    },
    "mmlu_other": {
        "acc,none": 0.6089475378178307,
        "acc_stderr,none": 0.00843646628086344,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.58,
        "acc_stderr,none": 0.049604496374885836
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.5886792452830188,
        "acc_stderr,none": 0.030285009259009798
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.4624277456647399,
        "acc_stderr,none": 0.0380168510452446
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.25,
        "acc_stderr,none": 0.04351941398892446
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.5650224215246636,
        "acc_stderr,none": 0.03327283370271344
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.7281553398058253,
        "acc_stderr,none": 0.044052680241409216
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.7777777777777778,
        "acc_stderr,none": 0.02723601394619671
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.63,
        "acc_stderr,none": 0.04852365870939099
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.7394636015325671,
        "acc_stderr,none": 0.015696008563807075
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.565359477124183,
        "acc_stderr,none": 0.028384256704883037
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.3829787234042553,
        "acc_stderr,none": 0.028999080904806167
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.6617647058823529,
        "acc_stderr,none": 0.028739328513983576
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.5240963855421686,
        "acc_stderr,none": 0.03887971849597264
    },
    "mmlu_social_sciences": {
        "acc,none": 0.5846603834904127,
        "acc_stderr,none": 0.008584988103844145,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.32456140350877194,
        "acc_stderr,none": 0.04404556157374767
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.5606060606060606,
        "acc_stderr,none": 0.035360859475294805
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.7357512953367875,
        "acc_stderr,none": 0.03182155050916646
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.4256410256410256,
        "acc_stderr,none": 0.025069094387296535
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.3697478991596639,
        "acc_stderr,none": 0.031357095996135904
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.726605504587156,
        "acc_stderr,none": 0.01910929984609829
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.6641221374045801,
        "acc_stderr,none": 0.041423137719966634
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.5571895424836601,
        "acc_stderr,none": 0.020095083154577347
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.5909090909090909,
        "acc_stderr,none": 0.047093069786618966
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.5918367346938775,
        "acc_stderr,none": 0.03146465712827424
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.7313432835820896,
        "acc_stderr,none": 0.03134328358208954
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.74,
        "acc_stderr,none": 0.0440844002276808
    },
    "mmlu_stem": {
        "acc,none": 0.3679035838883603,
        "acc_stderr,none": 0.008267870704906205,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.22,
        "acc_stderr,none": 0.04163331998932268
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.4666666666666667,
        "acc_stderr,none": 0.043097329010363554
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.5197368421052632,
        "acc_stderr,none": 0.04065771002562605
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.5763888888888888,
        "acc_stderr,none": 0.04132125019723369
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.26,
        "acc_stderr,none": 0.04408440022768078
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.34,
        "acc_stderr,none": 0.04760952285695236
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.22,
        "acc_stderr,none": 0.04163331998932269
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.2549019607843137,
        "acc_stderr,none": 0.04336432707993176
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.64,
        "acc_stderr,none": 0.04824181513244218
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.40425531914893614,
        "acc_stderr,none": 0.03208115750788684
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.4,
        "acc_stderr,none": 0.04082482904638628
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.2671957671957672,
        "acc_stderr,none": 0.022789673145776578
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.5967741935483871,
        "acc_stderr,none": 0.027906150826041143
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.29064039408866993,
        "acc_stderr,none": 0.03194740072265541
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.44,
        "acc_stderr,none": 0.04988876515698589
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.21851851851851853,
        "acc_stderr,none": 0.025195752251823793
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.24503311258278146,
        "acc_stderr,none": 0.035118075718047245
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.2777777777777778,
        "acc_stderr,none": 0.030546745264953195
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.38392857142857145,
        "acc_stderr,none": 0.04616143075028547
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.8035908596300326,
        "acc_stderr,none": 0.00926923223767993,
        "acc_norm,none": 0.8122959738846572,
        "acc_norm_stderr,none": 0.009110440292132567
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.4600818833162743,
        "acc_stderr,none": 0.011277955967920392
    }
}