{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.4069965870307167,
        "acc_stderr,none": 0.014356399418009135,
        "acc_norm,none": 0.4232081911262799,
        "acc_norm_stderr,none": 0.014438036220848023
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.563973063973064,
        "acc_stderr,none": 0.010175459582759736,
        "acc_norm,none": 0.47853535353535354,
        "acc_norm_stderr,none": 0.010250325159456649
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.8379204892966361,
        "acc_stderr,none": 0.006445520637182678
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.25246398786959817,
        "exact_match_stderr,flexible-extract": 0.011966250044833981
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.5722963553077076,
        "acc_stderr,none": 0.004937345081868102,
        "acc_norm,none": 0.6312487552280422,
        "acc_norm_stderr,none": 0.0048148030984368215
    },
    "mmlu": {
        "acc,none": 0.23280159521435692,
        "acc_stderr,none": 0.0035590419410117596,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.24739638682252924,
        "acc_stderr,none": 0.006286095083246151,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.2857142857142857,
        "acc_stderr,none": 0.040406101782088394
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.23030303030303031,
        "acc_stderr,none": 0.032876667586034886
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.29411764705882354,
        "acc_stderr,none": 0.0319800166011507
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.29535864978902954,
        "acc_stderr,none": 0.029696338713422882
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.2396694214876033,
        "acc_stderr,none": 0.03896878985070417
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.25925925925925924,
        "acc_stderr,none": 0.04236511258094632
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.22085889570552147,
        "acc_stderr,none": 0.032591773927421776
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.24566473988439305,
        "acc_stderr,none": 0.023176298203992012
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.23798882681564246,
        "acc_stderr,none": 0.014242630070574885
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.1864951768488746,
        "acc_stderr,none": 0.022122439772480764
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.22530864197530864,
        "acc_stderr,none": 0.02324620264781975
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.24967405475880053,
        "acc_stderr,none": 0.011054538377832327
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.3216374269005848,
        "acc_stderr,none": 0.03582529442573122
    },
    "mmlu_other": {
        "acc,none": 0.24364338590280013,
        "acc_stderr,none": 0.007678654143566438,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.32,
        "acc_stderr,none": 0.046882617226215034
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.21509433962264152,
        "acc_stderr,none": 0.025288394502891366
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.20809248554913296,
        "acc_stderr,none": 0.03095289021774988
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.18,
        "acc_stderr,none": 0.03861229196653694
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.31390134529147984,
        "acc_stderr,none": 0.031146796482972465
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.1941747572815534,
        "acc_stderr,none": 0.039166677628225836
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.3162393162393162,
        "acc_stderr,none": 0.03046365674734025
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.3,
        "acc_stderr,none": 0.046056618647183814
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.2413793103448276,
        "acc_stderr,none": 0.015302380123542103
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.21895424836601307,
        "acc_stderr,none": 0.02367908986180772
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.2375886524822695,
        "acc_stderr,none": 0.025389512552729903
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.18382352941176472,
        "acc_stderr,none": 0.023529242185193106
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.28313253012048195,
        "acc_stderr,none": 0.03507295431370519
    },
    "mmlu_social_sciences": {
        "acc,none": 0.2203444913877153,
        "acc_stderr,none": 0.007467844458382516,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.23684210526315788,
        "acc_stderr,none": 0.03999423879281335
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.17676767676767677,
        "acc_stderr,none": 0.027178752639044915
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.19689119170984457,
        "acc_stderr,none": 0.028697873971860677
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.20512820512820512,
        "acc_stderr,none": 0.02047323317355198
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.21008403361344538,
        "acc_stderr,none": 0.026461398717471874
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.1944954128440367,
        "acc_stderr,none": 0.01697028909045805
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.26717557251908397,
        "acc_stderr,none": 0.038808483010823944
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.2549019607843137,
        "acc_stderr,none": 0.017630827375148383
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.23636363636363636,
        "acc_stderr,none": 0.040693063197213754
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.19591836734693877,
        "acc_stderr,none": 0.025409301953225678
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.24378109452736318,
        "acc_stderr,none": 0.03036049015401466
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.28,
        "acc_stderr,none": 0.045126085985421276
    },
    "mmlu_stem": {
        "acc,none": 0.21249603552172533,
        "acc_stderr,none": 0.007270821068575025,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.22,
        "acc_stderr,none": 0.04163331998932268
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.18518518518518517,
        "acc_stderr,none": 0.0335567721631314
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.17763157894736842,
        "acc_stderr,none": 0.031103182383123398
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.2569444444444444,
        "acc_stderr,none": 0.03653946969442099
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.2,
        "acc_stderr,none": 0.04020151261036846
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.26,
        "acc_stderr,none": 0.0440844002276808
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.21,
        "acc_stderr,none": 0.040936018074033256
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.21568627450980393,
        "acc_stderr,none": 0.04092563958237655
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.28,
        "acc_stderr,none": 0.04512608598542128
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.26382978723404255,
        "acc_stderr,none": 0.02880998985410297
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.2413793103448276,
        "acc_stderr,none": 0.03565998174135302
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.20899470899470898,
        "acc_stderr,none": 0.02094048156533485
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.18064516129032257,
        "acc_stderr,none": 0.021886178567172548
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.15270935960591134,
        "acc_stderr,none": 0.025308904539380627
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.25,
        "acc_stderr,none": 0.04351941398892446
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.2111111111111111,
        "acc_stderr,none": 0.024882116857655113
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.1986754966887417,
        "acc_stderr,none": 0.032578473844367774
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.14814814814814814,
        "acc_stderr,none": 0.024227629273728363
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.3125,
        "acc_stderr,none": 0.043994650575715215
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.7263329706202394,
        "acc_stderr,none": 0.010402184206229216,
        "acc_norm,none": 0.7241566920565833,
        "acc_norm_stderr,none": 0.010427805502729115
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.3955987717502559,
        "acc_stderr,none": 0.011064683986236569
    }
}