{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.4069965870307167,
        "acc_stderr,none": 0.014356399418009131,
        "acc_norm,none": 0.4206484641638225,
        "acc_norm_stderr,none": 0.014426211252508401
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.6056397306397306,
        "acc_stderr,none": 0.010028176038393006,
        "acc_norm,none": 0.5202020202020202,
        "acc_norm_stderr,none": 0.010251405621305368
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.8125382262996942,
        "acc_stderr,none": 0.006826071005051068
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.26459438968915844,
        "exact_match_stderr,flexible-extract": 0.012150554001563238
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.45867357100179246,
        "acc_stderr,none": 0.004972708369656543,
        "acc_norm,none": 0.5879306910973909,
        "acc_norm_stderr,none": 0.004912015369160069
    },
    "mmlu": {
        "acc,none": 0.5188007406352372,
        "acc_stderr,none": 0.003992106931218878,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.4858660998937301,
        "acc_stderr,none": 0.0068281877450081995,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.3492063492063492,
        "acc_stderr,none": 0.04263906892795132
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.6666666666666666,
        "acc_stderr,none": 0.036810508691615486
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.75,
        "acc_stderr,none": 0.03039153369274154
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.7510548523206751,
        "acc_stderr,none": 0.028146970599422647
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.6611570247933884,
        "acc_stderr,none": 0.04320767807536672
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.6203703703703703,
        "acc_stderr,none": 0.04691521224077742
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.6441717791411042,
        "acc_stderr,none": 0.03761521380046734
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.5953757225433526,
        "acc_stderr,none": 0.02642481659400985
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.23798882681564246,
        "acc_stderr,none": 0.014242630070574885
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.6077170418006431,
        "acc_stderr,none": 0.027731258647011994
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.5987654320987654,
        "acc_stderr,none": 0.027272582849839796
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.3970013037809648,
        "acc_stderr,none": 0.012496346982909556
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.8070175438596491,
        "acc_stderr,none": 0.030267457554898458
    },
    "mmlu_other": {
        "acc,none": 0.6031541680077245,
        "acc_stderr,none": 0.008394342715740468,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.53,
        "acc_stderr,none": 0.050161355804659205
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.5509433962264151,
        "acc_stderr,none": 0.030612730713641095
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.5028901734104047,
        "acc_stderr,none": 0.038124005659748335
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.29,
        "acc_stderr,none": 0.04560480215720684
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.5919282511210763,
        "acc_stderr,none": 0.03298574607842821
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.7378640776699029,
        "acc_stderr,none": 0.04354631077260595
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.782051282051282,
        "acc_stderr,none": 0.02704685763071666
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.64,
        "acc_stderr,none": 0.04824181513244218
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.7637292464878672,
        "acc_stderr,none": 0.015190473717037498
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.5718954248366013,
        "acc_stderr,none": 0.028332397483664274
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.3262411347517731,
        "acc_stderr,none": 0.02796845304356317
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.5698529411764706,
        "acc_stderr,none": 0.030074971917302875
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.5060240963855421,
        "acc_stderr,none": 0.03892212195333045
    },
    "mmlu_social_sciences": {
        "acc,none": 0.5911602209944752,
        "acc_stderr,none": 0.008665439671728307,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.37719298245614036,
        "acc_stderr,none": 0.04559522141958216
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.6060606060606061,
        "acc_stderr,none": 0.034812853382329645
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.7253886010362695,
        "acc_stderr,none": 0.03221024508041154
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.4794871794871795,
        "acc_stderr,none": 0.02532966316348994
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.44537815126050423,
        "acc_stderr,none": 0.0322841062671639
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.7247706422018348,
        "acc_stderr,none": 0.019149093743155203
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.6259541984732825,
        "acc_stderr,none": 0.04243869242230523
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.5294117647058824,
        "acc_stderr,none": 0.02019280827143379
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.5818181818181818,
        "acc_stderr,none": 0.04724577405731571
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.5836734693877551,
        "acc_stderr,none": 0.031557828165561644
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.7064676616915423,
        "acc_stderr,none": 0.032200241045342054
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.73,
        "acc_stderr,none": 0.0446196043338474
    },
    "mmlu_stem": {
        "acc,none": 0.41420869013637807,
        "acc_stderr,none": 0.008505276077891888,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.24,
        "acc_stderr,none": 0.042923469599092816
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.45185185185185184,
        "acc_stderr,none": 0.04299268905480864
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.5592105263157895,
        "acc_stderr,none": 0.04040311062490437
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.6041666666666666,
        "acc_stderr,none": 0.04089465449325582
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.34,
        "acc_stderr,none": 0.04760952285695236
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.42,
        "acc_stderr,none": 0.049604496374885836
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.27,
        "acc_stderr,none": 0.044619604333847394
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.35294117647058826,
        "acc_stderr,none": 0.047551296160629475
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.65,
        "acc_stderr,none": 0.047937248544110196
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.451063829787234,
        "acc_stderr,none": 0.032529096196131965
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.4896551724137931,
        "acc_stderr,none": 0.04165774775728762
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.3306878306878307,
        "acc_stderr,none": 0.024229965298425086
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.6258064516129033,
        "acc_stderr,none": 0.027528904299845693
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.3497536945812808,
        "acc_stderr,none": 0.03355400904969565
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.51,
        "acc_stderr,none": 0.05024183937956911
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.2740740740740741,
        "acc_stderr,none": 0.027195934804085622
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.2980132450331126,
        "acc_stderr,none": 0.037345356767871984
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.2916666666666667,
        "acc_stderr,none": 0.03099866630456053
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.4017857142857143,
        "acc_stderr,none": 0.04653333146973646
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.7159956474428727,
        "acc_stderr,none": 0.010521147542454227,
        "acc_norm,none": 0.7138193688792165,
        "acc_norm_stderr,none": 0.010545318576106646
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.4273285568065507,
        "acc_stderr,none": 0.011193930340551272
    }
}