{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.4539249146757679,
        "acc_stderr,none": 0.014549221105171858,
        "acc_norm,none": 0.4616040955631399,
        "acc_norm_stderr,none": 0.014568245550296354
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.7121212121212122,
        "acc_stderr,none": 0.009290733161670152,
        "acc_norm,none": 0.5816498316498316,
        "acc_norm_stderr,none": 0.010122061470742861
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.82782874617737,
        "acc_stderr,none": 0.006603027596591704
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.2137983320697498,
        "exact_match_stderr,flexible-extract": 0.011293054698635051
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.551682931686915,
        "acc_stderr,none": 0.004963053161193596,
        "acc_norm,none": 0.6687910774746066,
        "acc_norm_stderr,none": 0.004696861625496953
    },
    "mmlu": {
        "acc,none": 0.3102834354080615,
        "acc_stderr,none": 0.0038206484292908067,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.35005313496280555,
        "acc_stderr,none": 0.006755637801149142,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.2857142857142857,
        "acc_stderr,none": 0.040406101782088394
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.5515151515151515,
        "acc_stderr,none": 0.038835659779569286
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.6176470588235294,
        "acc_stderr,none": 0.03410785338904719
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.6160337552742616,
        "acc_stderr,none": 0.031658678064106674
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.5867768595041323,
        "acc_stderr,none": 0.04495087843548408
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.37962962962962965,
        "acc_stderr,none": 0.04691521224077742
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.3006134969325153,
        "acc_stderr,none": 0.03602511318806771
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.33236994219653176,
        "acc_stderr,none": 0.025361168749688218
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.23798882681564246,
        "acc_stderr,none": 0.014242630070574885
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.2347266881028939,
        "acc_stderr,none": 0.024071805887677045
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.2808641975308642,
        "acc_stderr,none": 0.025006469755799208
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.35071707953063885,
        "acc_stderr,none": 0.01218777337074152
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.3333333333333333,
        "acc_stderr,none": 0.03615507630310935
    },
    "mmlu_other": {
        "acc,none": 0.32249758609591245,
        "acc_stderr,none": 0.008313481665106093,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.4,
        "acc_stderr,none": 0.049236596391733084
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.3132075471698113,
        "acc_stderr,none": 0.028544793319055326
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.24277456647398843,
        "acc_stderr,none": 0.0326926380614177
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.19,
        "acc_stderr,none": 0.03942772444036624
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.4484304932735426,
        "acc_stderr,none": 0.03337883736255098
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.3106796116504854,
        "acc_stderr,none": 0.04582124160161551
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.44017094017094016,
        "acc_stderr,none": 0.032520741720630506
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.4,
        "acc_stderr,none": 0.049236596391733084
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.2796934865900383,
        "acc_stderr,none": 0.016050792148036532
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.369281045751634,
        "acc_stderr,none": 0.02763417668960267
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.30141843971631205,
        "acc_stderr,none": 0.02737412888263115
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.2536764705882353,
        "acc_stderr,none": 0.02643132987078953
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.3433734939759036,
        "acc_stderr,none": 0.03696584317010601
    },
    "mmlu_social_sciences": {
        "acc,none": 0.325316867078323,
        "acc_stderr,none": 0.008314965468054249,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.2543859649122807,
        "acc_stderr,none": 0.040969851398436716
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.2474747474747475,
        "acc_stderr,none": 0.03074630074212451
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.39896373056994816,
        "acc_stderr,none": 0.03533999094065695
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.22564102564102564,
        "acc_stderr,none": 0.021193632525148522
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.226890756302521,
        "acc_stderr,none": 0.027205371538279483
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.26055045871559634,
        "acc_stderr,none": 0.018819182034850068
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.37404580152671757,
        "acc_stderr,none": 0.042438692422305246
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.369281045751634,
        "acc_stderr,none": 0.019524316744866346
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.32727272727272727,
        "acc_stderr,none": 0.04494290866252089
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.4448979591836735,
        "acc_stderr,none": 0.031814251181977865
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.5024875621890548,
        "acc_stderr,none": 0.03535490150137289
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.41,
        "acc_stderr,none": 0.04943110704237101
    },
    "mmlu_stem": {
        "acc,none": 0.22423089121471615,
        "acc_stderr,none": 0.0074104073964514384,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.22,
        "acc_stderr,none": 0.04163331998932268
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.2,
        "acc_stderr,none": 0.03455473702325435
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.23026315789473684,
        "acc_stderr,none": 0.034260594244031654
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.2708333333333333,
        "acc_stderr,none": 0.03716177437566017
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.2,
        "acc_stderr,none": 0.04020151261036846
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.26,
        "acc_stderr,none": 0.0440844002276808
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.21,
        "acc_stderr,none": 0.040936018074033256
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.21568627450980393,
        "acc_stderr,none": 0.04092563958237655
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.31,
        "acc_stderr,none": 0.04648231987117316
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.26382978723404255,
        "acc_stderr,none": 0.02880998985410297
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.2482758620689655,
        "acc_stderr,none": 0.03600105692727771
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.20899470899470898,
        "acc_stderr,none": 0.02094048156533485
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.21935483870967742,
        "acc_stderr,none": 0.023540799358723295
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.15270935960591134,
        "acc_stderr,none": 0.025308904539380627
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.32,
        "acc_stderr,none": 0.04688261722621504
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.2111111111111111,
        "acc_stderr,none": 0.024882116857655113
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.1986754966887417,
        "acc_stderr,none": 0.032578473844367774
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.1574074074074074,
        "acc_stderr,none": 0.02483717351824239
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.3125,
        "acc_stderr,none": 0.043994650575715215
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.7823721436343852,
        "acc_stderr,none": 0.009627407474840876,
        "acc_norm,none": 0.7769314472252449,
        "acc_norm_stderr,none": 0.009713057213018518
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.4595701125895599,
        "acc_stderr,none": 0.011277022486079952
    }
}