{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.4445392491467577,
        "acc_stderr,none": 0.01452122640562707,
        "acc_norm,none": 0.4462457337883959,
        "acc_norm_stderr,none": 0.014526705548539976
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.7226430976430976,
        "acc_stderr,none": 0.0091864901051119,
        "acc_norm,none": 0.6393097643097643,
        "acc_norm_stderr,none": 0.009853512108416741
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.7978593272171254,
        "acc_stderr,none": 0.007023968517730727
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.43745261561789234,
        "exact_match_stderr,flexible-extract": 0.013664299060751919
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.5276837283409679,
        "acc_stderr,none": 0.004982127315605207,
        "acc_norm,none": 0.6827325234017128,
        "acc_norm_stderr,none": 0.004644613601104146
    },
    "mmlu": {
        "acc,none": 0.5286284005127475,
        "acc_stderr,none": 0.004012530834697541,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.4858660998937301,
        "acc_stderr,none": 0.006896090254578555,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.35714285714285715,
        "acc_stderr,none": 0.04285714285714281
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.6484848484848484,
        "acc_stderr,none": 0.0372820699868265
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.7058823529411765,
        "acc_stderr,none": 0.03198001660115069
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.6962025316455697,
        "acc_stderr,none": 0.029936696387138632
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.6776859504132231,
        "acc_stderr,none": 0.04266416363352168
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.6574074074074074,
        "acc_stderr,none": 0.04587904741301812
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.6073619631901841,
        "acc_stderr,none": 0.03836740907831029
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.5635838150289018,
        "acc_stderr,none": 0.026700545424943684
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.23798882681564246,
        "acc_stderr,none": 0.014242630070574885
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.5627009646302251,
        "acc_stderr,none": 0.028173917761762896
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.6111111111111112,
        "acc_stderr,none": 0.027125115513166858
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.42503259452411996,
        "acc_stderr,none": 0.012625879884892003
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.8187134502923976,
        "acc_stderr,none": 0.029547741687640038
    },
    "mmlu_other": {
        "acc,none": 0.6137753459929192,
        "acc_stderr,none": 0.008424452519358264,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.51,
        "acc_stderr,none": 0.05024183937956911
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.5773584905660377,
        "acc_stderr,none": 0.030402331445769537
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.4913294797687861,
        "acc_stderr,none": 0.03811890988940412
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.36,
        "acc_stderr,none": 0.048241815132442176
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.5919282511210763,
        "acc_stderr,none": 0.03298574607842821
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.7572815533980582,
        "acc_stderr,none": 0.04245022486384495
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.7692307692307693,
        "acc_stderr,none": 0.027601921381417593
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.65,
        "acc_stderr,none": 0.0479372485441102
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.7611749680715197,
        "acc_stderr,none": 0.015246803197398691
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.545751633986928,
        "acc_stderr,none": 0.02850980780262659
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.38652482269503546,
        "acc_stderr,none": 0.02904919034254345
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.6397058823529411,
        "acc_stderr,none": 0.029163128570670733
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.4879518072289157,
        "acc_stderr,none": 0.038913644958358196
    },
    "mmlu_social_sciences": {
        "acc,none": 0.6022099447513812,
        "acc_stderr,none": 0.008583867792808866,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.38596491228070173,
        "acc_stderr,none": 0.04579639422070435
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.6717171717171717,
        "acc_stderr,none": 0.03345678422756777
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.7772020725388601,
        "acc_stderr,none": 0.030031147977641545
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.5076923076923077,
        "acc_stderr,none": 0.025348006031534767
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.46218487394957986,
        "acc_stderr,none": 0.0323854694875898
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.7321100917431193,
        "acc_stderr,none": 0.018987462257978652
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.6641221374045801,
        "acc_stderr,none": 0.04142313771996665
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.5343137254901961,
        "acc_stderr,none": 0.020180144843307293
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.5727272727272728,
        "acc_stderr,none": 0.047381987035454834
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.4857142857142857,
        "acc_stderr,none": 0.03199615232806286
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.7263681592039801,
        "acc_stderr,none": 0.03152439186555401
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.77,
        "acc_stderr,none": 0.04229525846816506
    },
    "mmlu_stem": {
        "acc,none": 0.4367269267364415,
        "acc_stderr,none": 0.008623051895253753,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.28,
        "acc_stderr,none": 0.04512608598542129
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.4888888888888889,
        "acc_stderr,none": 0.04318275491977978
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.4934210526315789,
        "acc_stderr,none": 0.040685900502249704
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.6111111111111112,
        "acc_stderr,none": 0.04076663253918567
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.39,
        "acc_stderr,none": 0.04902071300001974
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.43,
        "acc_stderr,none": 0.04975698519562429
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.34,
        "acc_stderr,none": 0.04760952285695235
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.37254901960784315,
        "acc_stderr,none": 0.04810840148082635
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.66,
        "acc_stderr,none": 0.04760952285695238
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.46382978723404256,
        "acc_stderr,none": 0.032600385118357715
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.496551724137931,
        "acc_stderr,none": 0.041665675771015785
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.38095238095238093,
        "acc_stderr,none": 0.025010749116137595
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.6483870967741936,
        "acc_stderr,none": 0.027162537826948458
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.4039408866995074,
        "acc_stderr,none": 0.03452453903822039
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.49,
        "acc_stderr,none": 0.05024183937956913
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.2962962962962963,
        "acc_stderr,none": 0.027840811495871927
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.33112582781456956,
        "acc_stderr,none": 0.038425817186598696
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.3148148148148148,
        "acc_stderr,none": 0.031674687068289784
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.4017857142857143,
        "acc_stderr,none": 0.04653333146973646
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.750272034820457,
        "acc_stderr,none": 0.010099232969867507,
        "acc_norm,none": 0.7464635473340587,
        "acc_norm_stderr,none": 0.010150090834551786
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.44882292732855683,
        "acc_stderr,none": 0.011254649314820134
    }
}