{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.5034129692832765,
        "acc_stderr,none": 0.014611050403244084,
        "acc_norm,none": 0.5162116040955631,
        "acc_norm_stderr,none": 0.014603708567414952
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.7941919191919192,
        "acc_stderr,none": 0.008295873791918654,
        "acc_norm,none": 0.7352693602693603,
        "acc_norm_stderr,none": 0.009053021086173963
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.8333333333333334,
        "acc_stderr,none": 0.006518179131399636
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.45943896891584535,
        "exact_match_stderr,flexible-extract": 0.013727093010429781
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.5828520215096594,
        "acc_stderr,none": 0.004920800313232736,
        "acc_norm,none": 0.7479585739892451,
        "acc_norm_stderr,none": 0.004332975695428876
    },
    "mmlu": {
        "acc,none": 0.49821962683378435,
        "acc_stderr,none": 0.0039754845896016735,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.4773645058448459,
        "acc_stderr,none": 0.006896134877055831,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.3253968253968254,
        "acc_stderr,none": 0.041905964388711366
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.703030303030303,
        "acc_stderr,none": 0.03567969772268049
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.75,
        "acc_stderr,none": 0.03039153369274154
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.7130801687763713,
        "acc_stderr,none": 0.029443773022594693
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.7024793388429752,
        "acc_stderr,none": 0.04173349148083499
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.6388888888888888,
        "acc_stderr,none": 0.04643454608906276
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.5705521472392638,
        "acc_stderr,none": 0.03889066619112722
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.5289017341040463,
        "acc_stderr,none": 0.02687408588351835
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.2659217877094972,
        "acc_stderr,none": 0.01477676506643889
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.5562700964630225,
        "acc_stderr,none": 0.02821768355665231
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.6049382716049383,
        "acc_stderr,none": 0.027201117666925654
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.39048239895697523,
        "acc_stderr,none": 0.012460135913945075
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.7660818713450293,
        "acc_stderr,none": 0.032467217651178264
    },
    "mmlu_other": {
        "acc,none": 0.6070164145477953,
        "acc_stderr,none": 0.008425302405961932,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.59,
        "acc_stderr,none": 0.04943110704237102
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.5735849056603773,
        "acc_stderr,none": 0.03043779434298305
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.42196531791907516,
        "acc_stderr,none": 0.0376574669386515
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.24,
        "acc_stderr,none": 0.042923469599092816
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.5739910313901345,
        "acc_stderr,none": 0.0331883328621728
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.7669902912621359,
        "acc_stderr,none": 0.04185832598928315
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.7863247863247863,
        "acc_stderr,none": 0.026853450377009185
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.63,
        "acc_stderr,none": 0.04852365870939099
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.7266922094508301,
        "acc_stderr,none": 0.015936681062628556
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.5882352941176471,
        "acc_stderr,none": 0.02818059632825928
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.37943262411347517,
        "acc_stderr,none": 0.028947338851614105
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.6727941176470589,
        "acc_stderr,none": 0.028501452860396573
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.5120481927710844,
        "acc_stderr,none": 0.03891364495835817
    },
    "mmlu_social_sciences": {
        "acc,none": 0.5710107247318817,
        "acc_stderr,none": 0.00860317009698212,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.2982456140350877,
        "acc_stderr,none": 0.04303684033537315
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.5606060606060606,
        "acc_stderr,none": 0.0353608594752948
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.7357512953367875,
        "acc_stderr,none": 0.03182155050916646
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.382051282051282,
        "acc_stderr,none": 0.024635549163908234
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.3697478991596639,
        "acc_stderr,none": 0.031357095996135904
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.7009174311926606,
        "acc_stderr,none": 0.019630417285415175
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.6717557251908397,
        "acc_stderr,none": 0.04118438565806298
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.545751633986928,
        "acc_stderr,none": 0.0201429745537952
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.5818181818181818,
        "acc_stderr,none": 0.04724577405731572
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.6,
        "acc_stderr,none": 0.031362502409358936
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.7064676616915423,
        "acc_stderr,none": 0.03220024104534205
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.76,
        "acc_stderr,none": 0.042923469599092816
    },
    "mmlu_stem": {
        "acc,none": 0.3510941960038059,
        "acc_stderr,none": 0.008254943213306509,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.22,
        "acc_stderr,none": 0.04163331998932268
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.45185185185185184,
        "acc_stderr,none": 0.04299268905480864
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.42105263157894735,
        "acc_stderr,none": 0.040179012759817494
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.5347222222222222,
        "acc_stderr,none": 0.04171115858181618
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.29,
        "acc_stderr,none": 0.045604802157206845
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.34,
        "acc_stderr,none": 0.04760952285695236
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.22,
        "acc_stderr,none": 0.04163331998932269
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.24509803921568626,
        "acc_stderr,none": 0.04280105837364396
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.61,
        "acc_stderr,none": 0.04902071300001975
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.3829787234042553,
        "acc_stderr,none": 0.03177821250236922
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.4068965517241379,
        "acc_stderr,none": 0.04093793981266237
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.2671957671957672,
        "acc_stderr,none": 0.022789673145776578
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.532258064516129,
        "acc_stderr,none": 0.028384747788813332
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.2857142857142857,
        "acc_stderr,none": 0.0317852971064275
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.48,
        "acc_stderr,none": 0.05021167315686779
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.21851851851851853,
        "acc_stderr,none": 0.025195752251823793
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.2119205298013245,
        "acc_stderr,none": 0.03336767086567977
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.25,
        "acc_stderr,none": 0.029531221160930918
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.4107142857142857,
        "acc_stderr,none": 0.04669510663875191
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.8101196953210011,
        "acc_stderr,none": 0.009150819250948718,
        "acc_norm,none": 0.8166485310119695,
        "acc_norm_stderr,none": 0.009028283984689401
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.477482088024565,
        "acc_stderr,none": 0.01130259098843505
    }
}