{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.4667235494880546,
        "acc_stderr,none": 0.01457899585960582,
        "acc_norm,none": 0.4812286689419795,
        "acc_norm_stderr,none": 0.014601090150633966
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.7449494949494949,
        "acc_stderr,none": 0.008944265906130707,
        "acc_norm,none": 0.6523569023569024,
        "acc_norm_stderr,none": 0.009771868846830916
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.8339449541284404,
        "acc_stderr,none": 0.006508595338469725
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.45261561789234267,
        "exact_match_stderr,flexible-extract": 0.01371049907093497
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.545309699263095,
        "acc_stderr,none": 0.004969251445596341,
        "acc_norm,none": 0.7117108145787692,
        "acc_norm_stderr,none": 0.004520406331084047
    },
    "mmlu": {
        "acc,none": 0.568651189289275,
        "acc_stderr,none": 0.0039416097457302965,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.520297555791711,
        "acc_stderr,none": 0.006793061464357365,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.40476190476190477,
        "acc_stderr,none": 0.04390259265377562
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.7393939393939394,
        "acc_stderr,none": 0.03427743175816524
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.7843137254901961,
        "acc_stderr,none": 0.028867431449849313
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.7763713080168776,
        "acc_stderr,none": 0.027123298205229966
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.6942148760330579,
        "acc_stderr,none": 0.04205953933884122
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.6759259259259259,
        "acc_stderr,none": 0.045245960070300496
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.6932515337423313,
        "acc_stderr,none": 0.03623089915724146
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.6445086705202312,
        "acc_stderr,none": 0.025770292082977247
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.25251396648044694,
        "acc_stderr,none": 0.014530330201468654
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.6366559485530546,
        "acc_stderr,none": 0.027316847674192714
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.6358024691358025,
        "acc_stderr,none": 0.02677492989972234
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.4348109517601043,
        "acc_stderr,none": 0.01266123380561631
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.8245614035087719,
        "acc_stderr,none": 0.029170885500727654
    },
    "mmlu_other": {
        "acc,none": 0.6511103958802703,
        "acc_stderr,none": 0.008250588115301776,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.59,
        "acc_stderr,none": 0.04943110704237102
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.6264150943396226,
        "acc_stderr,none": 0.02977308271331987
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.5375722543352601,
        "acc_stderr,none": 0.0380168510452446
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.34,
        "acc_stderr,none": 0.04760952285695235
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.6367713004484304,
        "acc_stderr,none": 0.03227790442850499
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.7864077669902912,
        "acc_stderr,none": 0.04058042015646034
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.8162393162393162,
        "acc_stderr,none": 0.02537213967172293
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.66,
        "acc_stderr,none": 0.04760952285695238
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.7867177522349936,
        "acc_stderr,none": 0.014648172749593501
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.630718954248366,
        "acc_stderr,none": 0.02763417668960266
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.4397163120567376,
        "acc_stderr,none": 0.029609912075594106
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.6397058823529411,
        "acc_stderr,none": 0.029163128570670736
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.5060240963855421,
        "acc_stderr,none": 0.03892212195333045
    },
    "mmlu_social_sciences": {
        "acc,none": 0.6584335391615209,
        "acc_stderr,none": 0.008286844435557487,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.35964912280701755,
        "acc_stderr,none": 0.04514496132873633
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.702020202020202,
        "acc_stderr,none": 0.03258630383836556
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.8082901554404145,
        "acc_stderr,none": 0.02840895362624528
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.5384615384615384,
        "acc_stderr,none": 0.025275892070240644
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.5336134453781513,
        "acc_stderr,none": 0.03240501447690071
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.7926605504587156,
        "acc_stderr,none": 0.01738141556360867
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.6870229007633588,
        "acc_stderr,none": 0.04066962905677697
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.5931372549019608,
        "acc_stderr,none": 0.019873802005061177
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.5818181818181818,
        "acc_stderr,none": 0.047245774057315705
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.6530612244897959,
        "acc_stderr,none": 0.030472526026726496
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.8109452736318408,
        "acc_stderr,none": 0.02768691358801301
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.81,
        "acc_stderr,none": 0.039427724440366234
    },
    "mmlu_stem": {
        "acc,none": 0.4719314938154139,
        "acc_stderr,none": 0.008596514151569817,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.23,
        "acc_stderr,none": 0.04229525846816506
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.5555555555555556,
        "acc_stderr,none": 0.04292596718256981
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.5723684210526315,
        "acc_stderr,none": 0.040260970832965634
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.6527777777777778,
        "acc_stderr,none": 0.03981240543717861
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.41,
        "acc_stderr,none": 0.04943110704237102
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.49,
        "acc_stderr,none": 0.05024183937956911
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.3,
        "acc_stderr,none": 0.046056618647183814
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.37254901960784315,
        "acc_stderr,none": 0.04810840148082636
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.7,
        "acc_stderr,none": 0.046056618647183814
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.49361702127659574,
        "acc_stderr,none": 0.032683358999363366
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.5862068965517241,
        "acc_stderr,none": 0.04104269211806232
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.38095238095238093,
        "acc_stderr,none": 0.025010749116137595
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.6967741935483871,
        "acc_stderr,none": 0.02614868593067175
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.43349753694581283,
        "acc_stderr,none": 0.034867317274198714
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.6,
        "acc_stderr,none": 0.04923659639173309
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.3333333333333333,
        "acc_stderr,none": 0.028742040903948492
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.31788079470198677,
        "acc_stderr,none": 0.03802039760107903
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.39814814814814814,
        "acc_stderr,none": 0.033384734032074016
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.42857142857142855,
        "acc_stderr,none": 0.04697113923010213
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.780195865070729,
        "acc_stderr,none": 0.009661958616651764,
        "acc_norm,none": 0.7872687704026116,
        "acc_norm_stderr,none": 0.009548223123047336
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.4570112589559877,
        "acc_stderr,none": 0.01127217546233142
    }
}