{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.40955631399317405,
        "acc_stderr,none": 0.014370358632472447,
        "acc_norm,none": 0.42918088737201365,
        "acc_norm_stderr,none": 0.014464085894870655
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.5997474747474747,
        "acc_stderr,none": 0.01005355011989612,
        "acc_norm,none": 0.5303030303030303,
        "acc_norm_stderr,none": 0.010240923608726537
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.809480122324159,
        "acc_stderr,none": 0.006868561339089451
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.23426838514025777,
        "exact_match_stderr,flexible-extract": 0.011666415127631032
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.4519020115514838,
        "acc_stderr,none": 0.004966640868083856,
        "acc_norm,none": 0.5572595100577574,
        "acc_norm_stderr,none": 0.004956953917781311
    },
    "mmlu": {
        "acc,none": 0.544509329155391,
        "acc_stderr,none": 0.0039642265160579875,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.4975557917109458,
        "acc_stderr,none": 0.006813158546382347,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.4126984126984127,
        "acc_stderr,none": 0.04403438954768176
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.703030303030303,
        "acc_stderr,none": 0.03567969772268048
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.7696078431372549,
        "acc_stderr,none": 0.02955429260569506
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.7552742616033755,
        "acc_stderr,none": 0.027985699387036413
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.7107438016528925,
        "acc_stderr,none": 0.041391127276354626
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.6481481481481481,
        "acc_stderr,none": 0.04616631111801714
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.6871165644171779,
        "acc_stderr,none": 0.036429145782924055
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.5809248554913294,
        "acc_stderr,none": 0.026564178111422622
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.24804469273743016,
        "acc_stderr,none": 0.014444157808261462
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.617363344051447,
        "acc_stderr,none": 0.02760468902858198
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.6141975308641975,
        "acc_stderr,none": 0.027085401226132143
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.40091264667535853,
        "acc_stderr,none": 0.012516960350640828
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.8187134502923976,
        "acc_stderr,none": 0.029547741687640038
    },
    "mmlu_other": {
        "acc,none": 0.6269713550048278,
        "acc_stderr,none": 0.00835663482334075,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.58,
        "acc_stderr,none": 0.049604496374885836
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.5962264150943396,
        "acc_stderr,none": 0.03019761160019795
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.5086705202312138,
        "acc_stderr,none": 0.03811890988940412
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.29,
        "acc_stderr,none": 0.04560480215720684
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.57847533632287,
        "acc_stderr,none": 0.03314190222110656
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.7572815533980582,
        "acc_stderr,none": 0.04245022486384495
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.7649572649572649,
        "acc_stderr,none": 0.027778835904935444
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.63,
        "acc_stderr,none": 0.048523658709391
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.7726692209450831,
        "acc_stderr,none": 0.01498727064094601
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.6176470588235294,
        "acc_stderr,none": 0.027826109307283693
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.3971631205673759,
        "acc_stderr,none": 0.029189805673587102
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.6507352941176471,
        "acc_stderr,none": 0.028959755196824855
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.5,
        "acc_stderr,none": 0.03892494720807614
    },
    "mmlu_social_sciences": {
        "acc,none": 0.6353591160220995,
        "acc_stderr,none": 0.008454353938442686,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.3333333333333333,
        "acc_stderr,none": 0.04434600701584925
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.7171717171717171,
        "acc_stderr,none": 0.03208779558786752
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.7875647668393783,
        "acc_stderr,none": 0.02951928261681723
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.5487179487179488,
        "acc_stderr,none": 0.02523038123893484
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.5378151260504201,
        "acc_stderr,none": 0.032385469487589795
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.7559633027522936,
        "acc_stderr,none": 0.018415286351416413
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.6412213740458015,
        "acc_stderr,none": 0.04206739313864908
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.545751633986928,
        "acc_stderr,none": 0.0201429745537952
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.5909090909090909,
        "acc_stderr,none": 0.04709306978661896
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.6489795918367347,
        "acc_stderr,none": 0.03055531675557364
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.746268656716418,
        "acc_stderr,none": 0.030769444967296014
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.77,
        "acc_stderr,none": 0.04229525846816506
    },
    "mmlu_stem": {
        "acc,none": 0.4446558832857596,
        "acc_stderr,none": 0.008511922589674285,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.21,
        "acc_stderr,none": 0.040936018074033256
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.5111111111111111,
        "acc_stderr,none": 0.04318275491977976
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.5789473684210527,
        "acc_stderr,none": 0.040179012759817494
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.6666666666666666,
        "acc_stderr,none": 0.03942082639927213
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.44,
        "acc_stderr,none": 0.04988876515698589
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.47,
        "acc_stderr,none": 0.05016135580465919
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.27,
        "acc_stderr,none": 0.044619604333847394
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.39215686274509803,
        "acc_stderr,none": 0.04858083574266345
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.64,
        "acc_stderr,none": 0.048241815132442176
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.49361702127659574,
        "acc_stderr,none": 0.032683358999363366
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.4689655172413793,
        "acc_stderr,none": 0.04158632762097828
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.30158730158730157,
        "acc_stderr,none": 0.023636975996101806
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.6903225806451613,
        "acc_stderr,none": 0.026302774983517418
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.39408866995073893,
        "acc_stderr,none": 0.03438157967036544
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.58,
        "acc_stderr,none": 0.049604496374885836
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.2814814814814815,
        "acc_stderr,none": 0.02742001935094527
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.32450331125827814,
        "acc_stderr,none": 0.038227469376587525
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.4027777777777778,
        "acc_stderr,none": 0.033448873829978666
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.39285714285714285,
        "acc_stderr,none": 0.04635550135609976
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.6996735582154516,
        "acc_stderr,none": 0.010695225308183127,
        "acc_norm,none": 0.6974972796517954,
        "acc_norm_stderr,none": 0.01071719969808388
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.4048106448311157,
        "acc_stderr,none": 0.011107144401926745
    }
}