{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.35580204778157,
        "acc_stderr,none": 0.013990571137918763,
        "acc_norm,none": 0.3643344709897611,
        "acc_norm_stderr,none": 0.014063260279882413
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.4852693602693603,
        "acc_stderr,none": 0.010255329977562098,
        "acc_norm,none": 0.45286195286195285,
        "acc_norm_stderr,none": 0.010214087372211392
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.8211009174311926,
        "acc_stderr,none": 0.006703395833491561
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.19332827899924185,
        "exact_match_stderr,flexible-extract": 0.010877733223180565
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.3968333001394145,
        "acc_stderr,none": 0.004882410029935438,
        "acc_norm,none": 0.4538936466839275,
        "acc_norm_stderr,none": 0.004968521608065473
    },
    "mmlu": {
        "acc,none": 0.5206523287281014,
        "acc_stderr,none": 0.003979103788409097,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.4718384697130712,
        "acc_stderr,none": 0.006861959994398498,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.3412698412698413,
        "acc_stderr,none": 0.04240799327574924
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.6363636363636364,
        "acc_stderr,none": 0.03756335775187896
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.696078431372549,
        "acc_stderr,none": 0.032282103870378935
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.6708860759493671,
        "acc_stderr,none": 0.03058732629470235
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.6859504132231405,
        "acc_stderr,none": 0.042369647530410184
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.6296296296296297,
        "acc_stderr,none": 0.04668408033024931
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.6441717791411042,
        "acc_stderr,none": 0.03761521380046734
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.5606936416184971,
        "acc_stderr,none": 0.026720034380514995
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.23798882681564246,
        "acc_stderr,none": 0.014242630070574885
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.6109324758842444,
        "acc_stderr,none": 0.027690337536485376
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.6049382716049383,
        "acc_stderr,none": 0.027201117666925657
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.3820078226857888,
        "acc_stderr,none": 0.012409564470235545
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.7953216374269005,
        "acc_stderr,none": 0.030944459778533193
    },
    "mmlu_other": {
        "acc,none": 0.6092693916961699,
        "acc_stderr,none": 0.008382226508317612,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.59,
        "acc_stderr,none": 0.04943110704237102
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.5849056603773585,
        "acc_stderr,none": 0.03032594578928611
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.5144508670520231,
        "acc_stderr,none": 0.03810871630454764
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.26,
        "acc_stderr,none": 0.04408440022768078
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.5919282511210763,
        "acc_stderr,none": 0.03298574607842821
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.7475728155339806,
        "acc_stderr,none": 0.04301250399690878
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.8034188034188035,
        "acc_stderr,none": 0.026035386098951282
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.65,
        "acc_stderr,none": 0.0479372485441102
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.7535121328224776,
        "acc_stderr,none": 0.015411308769686936
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.6111111111111112,
        "acc_stderr,none": 0.027914055510468015
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.3475177304964539,
        "acc_stderr,none": 0.02840662780959095
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.5220588235294118,
        "acc_stderr,none": 0.03034326422421352
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.5120481927710844,
        "acc_stderr,none": 0.03891364495835817
    },
    "mmlu_social_sciences": {
        "acc,none": 0.6074098147546312,
        "acc_stderr,none": 0.008563294215709238,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.3684210526315789,
        "acc_stderr,none": 0.04537815354939391
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.6565656565656566,
        "acc_stderr,none": 0.033832012232444426
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.7875647668393783,
        "acc_stderr,none": 0.02951928261681723
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.5076923076923077,
        "acc_stderr,none": 0.025348006031534778
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.47058823529411764,
        "acc_stderr,none": 0.03242225027115006
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.7321100917431193,
        "acc_stderr,none": 0.018987462257978652
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.648854961832061,
        "acc_stderr,none": 0.04186445163013751
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.5130718954248366,
        "acc_stderr,none": 0.020220920829626923
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.6272727272727273,
        "acc_stderr,none": 0.04631381319425463
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.5795918367346938,
        "acc_stderr,none": 0.031601069934496004
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.7562189054726368,
        "acc_stderr,none": 0.03036049015401465
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.74,
        "acc_stderr,none": 0.044084400227680794
    },
    "mmlu_stem": {
        "acc,none": 0.42150333016175073,
        "acc_stderr,none": 0.008433521607384357,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.3,
        "acc_stderr,none": 0.046056618647183814
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.5111111111111111,
        "acc_stderr,none": 0.043182754919779756
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.5723684210526315,
        "acc_stderr,none": 0.04026097083296564
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.6319444444444444,
        "acc_stderr,none": 0.04032999053960718
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.37,
        "acc_stderr,none": 0.048523658709391
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.39,
        "acc_stderr,none": 0.04902071300001974
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.31,
        "acc_stderr,none": 0.04648231987117316
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.35294117647058826,
        "acc_stderr,none": 0.047551296160629475
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.72,
        "acc_stderr,none": 0.04512608598542127
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.4595744680851064,
        "acc_stderr,none": 0.03257901482099835
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.5172413793103449,
        "acc_stderr,none": 0.04164188720169375
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.2804232804232804,
        "acc_stderr,none": 0.02313528797432563
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.6645161290322581,
        "acc_stderr,none": 0.026860206444724356
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.33497536945812806,
        "acc_stderr,none": 0.033208527423483104
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.41,
        "acc_stderr,none": 0.04943110704237102
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.22962962962962963,
        "acc_stderr,none": 0.025644108639267606
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.271523178807947,
        "acc_stderr,none": 0.036313298039696525
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.3888888888888889,
        "acc_stderr,none": 0.033247089118091176
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.4107142857142857,
        "acc_stderr,none": 0.04669510663875191
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.6969532100108814,
        "acc_stderr,none": 0.010722648689531515,
        "acc_norm,none": 0.6866158868335147,
        "acc_norm_stderr,none": 0.010822829929195494
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.35516888433981575,
        "acc_stderr,none": 0.010829024262629137
    }
}