{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.5059726962457338,
        "acc_stderr,none": 0.014610348300255798,
        "acc_norm,none": 0.5119453924914675,
        "acc_norm_stderr,none": 0.014607220340597171
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.7904040404040404,
        "acc_stderr,none": 0.008351879544327126,
        "acc_norm,none": 0.73989898989899,
        "acc_norm_stderr,none": 0.009001718541079949
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.82782874617737,
        "acc_stderr,none": 0.006603027596591708
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.4783927217589083,
        "exact_match_stderr,flexible-extract": 0.01375961866705177
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.5787691694881498,
        "acc_stderr,none": 0.004927473370720143,
        "acc_norm,none": 0.7411870145389364,
        "acc_norm_stderr,none": 0.004370875625259
    },
    "mmlu": {
        "acc,none": 0.5076199971514029,
        "acc_stderr,none": 0.003972141497394558,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.4669500531349628,
        "acc_stderr,none": 0.006841505123205101,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.3333333333333333,
        "acc_stderr,none": 0.04216370213557836
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.6727272727272727,
        "acc_stderr,none": 0.036639749943912434
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.7549019607843137,
        "acc_stderr,none": 0.03019028245350194
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.70042194092827,
        "acc_stderr,none": 0.029818024749753102
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.6859504132231405,
        "acc_stderr,none": 0.04236964753041018
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.6666666666666666,
        "acc_stderr,none": 0.04557239513497751
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.5276073619631901,
        "acc_stderr,none": 0.039223782906109894
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.5260115606936416,
        "acc_stderr,none": 0.026882643434022885
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.2424581005586592,
        "acc_stderr,none": 0.014333522059217892
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.5852090032154341,
        "acc_stderr,none": 0.02798268045975956
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.6234567901234568,
        "acc_stderr,none": 0.026959344518747784
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.3709256844850065,
        "acc_stderr,none": 0.012337391684530312
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.7660818713450293,
        "acc_stderr,none": 0.032467217651178264
    },
    "mmlu_other": {
        "acc,none": 0.6324428709365948,
        "acc_stderr,none": 0.00839369019691107,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.58,
        "acc_stderr,none": 0.049604496374885836
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.6528301886792452,
        "acc_stderr,none": 0.02930010170554965
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.49710982658959535,
        "acc_stderr,none": 0.038124005659748335
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.34,
        "acc_stderr,none": 0.04760952285695235
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.6143497757847534,
        "acc_stderr,none": 0.03266842214289201
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.7766990291262136,
        "acc_stderr,none": 0.04123553189891431
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.7905982905982906,
        "acc_stderr,none": 0.026655699653922723
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.65,
        "acc_stderr,none": 0.0479372485441102
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.7343550446998723,
        "acc_stderr,none": 0.015794302487888722
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.5915032679738562,
        "acc_stderr,none": 0.02814640599309636
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.4148936170212766,
        "acc_stderr,none": 0.0293922365846125
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.6875,
        "acc_stderr,none": 0.02815637344037142
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.5240963855421686,
        "acc_stderr,none": 0.03887971849597264
    },
    "mmlu_social_sciences": {
        "acc,none": 0.5862853428664283,
        "acc_stderr,none": 0.0086202734281084,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.3333333333333333,
        "acc_stderr,none": 0.04434600701584925
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.5909090909090909,
        "acc_stderr,none": 0.03502975799413008
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.7305699481865285,
        "acc_stderr,none": 0.032018671228777947
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.4230769230769231,
        "acc_stderr,none": 0.02504919787604234
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.4117647058823529,
        "acc_stderr,none": 0.03196876989195778
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.7119266055045872,
        "acc_stderr,none": 0.019416445892636025
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.6412213740458015,
        "acc_stderr,none": 0.04206739313864909
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.553921568627451,
        "acc_stderr,none": 0.020109864547181354
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.6,
        "acc_stderr,none": 0.0469237132203465
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.5959183673469388,
        "acc_stderr,none": 0.03141470802586589
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.7412935323383084,
        "acc_stderr,none": 0.03096590312357302
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.73,
        "acc_stderr,none": 0.044619604333847394
    },
    "mmlu_stem": {
        "acc,none": 0.3685379004123057,
        "acc_stderr,none": 0.008338392006729228,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.24,
        "acc_stderr,none": 0.042923469599092816
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.5037037037037037,
        "acc_stderr,none": 0.04319223625811331
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.4276315789473684,
        "acc_stderr,none": 0.04026097083296558
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.5555555555555556,
        "acc_stderr,none": 0.041553199555931467
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.33,
        "acc_stderr,none": 0.04725815626252605
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.3,
        "acc_stderr,none": 0.046056618647183814
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.24,
        "acc_stderr,none": 0.042923469599092816
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.2549019607843137,
        "acc_stderr,none": 0.04336432707993176
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.65,
        "acc_stderr,none": 0.04793724854411019
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.39148936170212767,
        "acc_stderr,none": 0.03190701242326812
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.3793103448275862,
        "acc_stderr,none": 0.04043461861916747
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.2804232804232804,
        "acc_stderr,none": 0.02313528797432563
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.5516129032258065,
        "acc_stderr,none": 0.028292056830112735
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.33497536945812806,
        "acc_stderr,none": 0.033208527423483104
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.5,
        "acc_stderr,none": 0.050251890762960605
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.24814814814814815,
        "acc_stderr,none": 0.026335739404055803
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.19205298013245034,
        "acc_stderr,none": 0.032162984205936156
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.2916666666666667,
        "acc_stderr,none": 0.030998666304560524
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.4107142857142857,
        "acc_stderr,none": 0.04669510663875191
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.7986942328618063,
        "acc_stderr,none": 0.009355431098990435,
        "acc_norm,none": 0.8041349292709467,
        "acc_norm_stderr,none": 0.009259518041395787
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.47389969293756395,
        "acc_stderr,none": 0.011298645160980824
    }
}