{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.45051194539249145,
        "acc_stderr,none": 0.014539646098471625,
        "acc_norm,none": 0.45051194539249145,
        "acc_norm_stderr,none": 0.014539646098471627
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.6607744107744108,
        "acc_stderr,none": 0.009714917207765846,
        "acc_norm,none": 0.5715488215488216,
        "acc_norm_stderr,none": 0.010154195733990972
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.825382262996942,
        "acc_stderr,none": 0.006639941963847397
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.3570887035633055,
        "exact_match_stderr,flexible-extract": 0.013197931775445206
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.5126468830910177,
        "acc_stderr,none": 0.004988184988345287,
        "acc_norm,none": 0.6613224457279426,
        "acc_norm_stderr,none": 0.00472292833283406
    },
    "mmlu": {
        "acc,none": 0.5142429853297251,
        "acc_stderr,none": 0.003988538504161728,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.48012752391073327,
        "acc_stderr,none": 0.00683677882539784,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.3253968253968254,
        "acc_stderr,none": 0.041905964388711366
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.7151515151515152,
        "acc_stderr,none": 0.035243908445117815
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.7303921568627451,
        "acc_stderr,none": 0.031145570659486782
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.729957805907173,
        "acc_stderr,none": 0.028900721906293426
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.6363636363636364,
        "acc_stderr,none": 0.043913262867240704
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.6851851851851852,
        "acc_stderr,none": 0.04489931073591312
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.6196319018404908,
        "acc_stderr,none": 0.038142698932618374
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.5867052023121387,
        "acc_stderr,none": 0.026511261369409244
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.23798882681564246,
        "acc_stderr,none": 0.014242630070574885
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.5819935691318328,
        "acc_stderr,none": 0.028013651891995072
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.5679012345679012,
        "acc_stderr,none": 0.027563010971606672
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.39504563233376794,
        "acc_stderr,none": 0.012485727813251538
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.8128654970760234,
        "acc_stderr,none": 0.029913127232368032
    },
    "mmlu_other": {
        "acc,none": 0.5973607981976183,
        "acc_stderr,none": 0.008381121663913587,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.56,
        "acc_stderr,none": 0.049888765156985884
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.539622641509434,
        "acc_stderr,none": 0.03067609659938918
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.5028901734104047,
        "acc_stderr,none": 0.038124005659748335
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.3,
        "acc_stderr,none": 0.046056618647183814
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.5739910313901345,
        "acc_stderr,none": 0.0331883328621728
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.7378640776699029,
        "acc_stderr,none": 0.04354631077260595
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.7948717948717948,
        "acc_stderr,none": 0.026453508054040332
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.64,
        "acc_stderr,none": 0.04824181513244218
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.7650063856960408,
        "acc_stderr,none": 0.015162024152278452
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.5588235294117647,
        "acc_stderr,none": 0.028431095444176643
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.30851063829787234,
        "acc_stderr,none": 0.027553366165101366
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.5551470588235294,
        "acc_stderr,none": 0.030187532060329376
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.46987951807228917,
        "acc_stderr,none": 0.03885425420866767
    },
    "mmlu_social_sciences": {
        "acc,none": 0.5895352616184596,
        "acc_stderr,none": 0.008617286105034339,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.3157894736842105,
        "acc_stderr,none": 0.04372748290278008
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.6818181818181818,
        "acc_stderr,none": 0.03318477333845331
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.7202072538860104,
        "acc_stderr,none": 0.03239637046735703
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.4512820512820513,
        "acc_stderr,none": 0.025230381238934833
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.47058823529411764,
        "acc_stderr,none": 0.03242225027115006
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.7302752293577982,
        "acc_stderr,none": 0.01902848671111544
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.6335877862595419,
        "acc_stderr,none": 0.04225875451969638
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.5065359477124183,
        "acc_stderr,none": 0.020226106567657814
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.5727272727272728,
        "acc_stderr,none": 0.047381987035454834
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.6040816326530613,
        "acc_stderr,none": 0.03130802899065686
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.6965174129353234,
        "acc_stderr,none": 0.03251006816458618
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.74,
        "acc_stderr,none": 0.044084400227680794
    },
    "mmlu_stem": {
        "acc,none": 0.4097684744687599,
        "acc_stderr,none": 0.008515958006060913,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.23,
        "acc_stderr,none": 0.04229525846816506
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.4666666666666667,
        "acc_stderr,none": 0.04309732901036354
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.5131578947368421,
        "acc_stderr,none": 0.04067533136309174
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.5902777777777778,
        "acc_stderr,none": 0.04112490974670787
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.36,
        "acc_stderr,none": 0.04824181513244218
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.43,
        "acc_stderr,none": 0.04975698519562428
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.28,
        "acc_stderr,none": 0.04512608598542127
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.29411764705882354,
        "acc_stderr,none": 0.04533838195929774
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.59,
        "acc_stderr,none": 0.04943110704237102
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.451063829787234,
        "acc_stderr,none": 0.03252909619613197
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.503448275862069,
        "acc_stderr,none": 0.041665675771015785
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.35714285714285715,
        "acc_stderr,none": 0.024677862841332786
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.6193548387096774,
        "acc_stderr,none": 0.027621717832907036
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.33497536945812806,
        "acc_stderr,none": 0.033208527423483104
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.52,
        "acc_stderr,none": 0.050211673156867795
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.2814814814814815,
        "acc_stderr,none": 0.027420019350945277
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.2781456953642384,
        "acc_stderr,none": 0.03658603262763743
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.2824074074074074,
        "acc_stderr,none": 0.030701372111510927
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.375,
        "acc_stderr,none": 0.04595091388086298
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.7399347116430903,
        "acc_stderr,none": 0.010234893249061317,
        "acc_norm,none": 0.7393906420021763,
        "acc_norm_stderr,none": 0.010241826155811616
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.4329580348004094,
        "acc_stderr,none": 0.011211904262208602
    }
}