{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.4300341296928328,
        "acc_stderr,none": 0.014467631559137998,
        "acc_norm,none": 0.42918088737201365,
        "acc_norm_stderr,none": 0.014464085894870653
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.6157407407407407,
        "acc_stderr,none": 0.009981120724601434,
        "acc_norm,none": 0.5378787878787878,
        "acc_norm_stderr,none": 0.010230299628864794
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.8180428134556575,
        "acc_stderr,none": 0.006747846008907705
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.3032600454890068,
        "exact_match_stderr,flexible-extract": 0.012661502663418698
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.4671380203146783,
        "acc_stderr,none": 0.004978992721242828,
        "acc_norm,none": 0.5818562039434375,
        "acc_norm_stderr,none": 0.0049224598204347676
    },
    "mmlu": {
        "acc,none": 0.4863267340834639,
        "acc_stderr,none": 0.004013547986330838,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.45462274176408074,
        "acc_stderr,none": 0.00688096509689843,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.3333333333333333,
        "acc_stderr,none": 0.04216370213557835
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.6242424242424243,
        "acc_stderr,none": 0.03781887353205982
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.6862745098039216,
        "acc_stderr,none": 0.032566854844603886
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.6582278481012658,
        "acc_stderr,none": 0.030874537537553617
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.6446280991735537,
        "acc_stderr,none": 0.04369236326573981
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.6851851851851852,
        "acc_stderr,none": 0.04489931073591312
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.6196319018404908,
        "acc_stderr,none": 0.038142698932618374
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.5549132947976878,
        "acc_stderr,none": 0.02675625512966377
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.23910614525139665,
        "acc_stderr,none": 0.01426555419233115
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.5144694533762058,
        "acc_stderr,none": 0.028386198084177687
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.5555555555555556,
        "acc_stderr,none": 0.027648477877413324
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.3683181225554107,
        "acc_stderr,none": 0.012319403369564634
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.783625730994152,
        "acc_stderr,none": 0.031581495393387324
    },
    "mmlu_other": {
        "acc,none": 0.5597038944319279,
        "acc_stderr,none": 0.00854331931229016,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.54,
        "acc_stderr,none": 0.05009082659620333
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.5320754716981132,
        "acc_stderr,none": 0.03070948699255655
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.49710982658959535,
        "acc_stderr,none": 0.038124005659748335
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.24,
        "acc_stderr,none": 0.04292346959909282
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.48878923766816146,
        "acc_stderr,none": 0.033549366530984746
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.7281553398058253,
        "acc_stderr,none": 0.044052680241409216
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.7435897435897436,
        "acc_stderr,none": 0.02860595370200421
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.6,
        "acc_stderr,none": 0.04923659639173309
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.7203065134099617,
        "acc_stderr,none": 0.016050792148036546
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.5326797385620915,
        "acc_stderr,none": 0.02856869975222587
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.3333333333333333,
        "acc_stderr,none": 0.02812163604063989
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.43014705882352944,
        "acc_stderr,none": 0.030074971917302875
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.46987951807228917,
        "acc_stderr,none": 0.03885425420866766
    },
    "mmlu_social_sciences": {
        "acc,none": 0.563860903477413,
        "acc_stderr,none": 0.00872638789376448,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.3508771929824561,
        "acc_stderr,none": 0.044895393502706986
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.6414141414141414,
        "acc_stderr,none": 0.034169036403915214
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.7098445595854922,
        "acc_stderr,none": 0.03275264467791515
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.4256410256410256,
        "acc_stderr,none": 0.025069094387296535
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.42857142857142855,
        "acc_stderr,none": 0.032145368597886394
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.6935779816513762,
        "acc_stderr,none": 0.01976551722045852
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.5877862595419847,
        "acc_stderr,none": 0.043171711948702556
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.48856209150326796,
        "acc_stderr,none": 0.02022254151561087
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.5727272727272728,
        "acc_stderr,none": 0.047381987035454834
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.6,
        "acc_stderr,none": 0.031362502409358936
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.6368159203980099,
        "acc_stderr,none": 0.034005985055990146
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.71,
        "acc_stderr,none": 0.045604802157206845
    },
    "mmlu_stem": {
        "acc,none": 0.38566444655883286,
        "acc_stderr,none": 0.008406431962214278,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.23,
        "acc_stderr,none": 0.04229525846816506
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.48148148148148145,
        "acc_stderr,none": 0.04316378599511326
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.5197368421052632,
        "acc_stderr,none": 0.040657710025626057
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.5833333333333334,
        "acc_stderr,none": 0.04122728707651282
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.33,
        "acc_stderr,none": 0.047258156262526045
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.34,
        "acc_stderr,none": 0.04760952285695236
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.25,
        "acc_stderr,none": 0.04351941398892446
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.3333333333333333,
        "acc_stderr,none": 0.04690650298201942
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.58,
        "acc_stderr,none": 0.049604496374885836
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.4127659574468085,
        "acc_stderr,none": 0.03218471141400351
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.4896551724137931,
        "acc_stderr,none": 0.04165774775728763
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.29894179894179895,
        "acc_stderr,none": 0.02357760479165581
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.5967741935483871,
        "acc_stderr,none": 0.027906150826041146
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.3103448275862069,
        "acc_stderr,none": 0.032550867699701024
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.43,
        "acc_stderr,none": 0.049756985195624284
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.23333333333333334,
        "acc_stderr,none": 0.025787874220959336
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.271523178807947,
        "acc_stderr,none": 0.036313298039696545
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.2824074074074074,
        "acc_stderr,none": 0.030701372111510927
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.39285714285714285,
        "acc_stderr,none": 0.04635550135609976
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.7138193688792165,
        "acc_stderr,none": 0.010545318576106638,
        "acc_norm,none": 0.7116430903155604,
        "acc_norm_stderr,none": 0.010569190399220647
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.40890481064483114,
        "acc_stderr,none": 0.011124710055682835
    }
}