{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.3771331058020478,
        "acc_stderr,none": 0.0141633668961926,
        "acc_norm,none": 0.39334470989761094,
        "acc_norm_stderr,none": 0.014275101465693024
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.5273569023569024,
        "acc_stderr,none": 0.01024441516439053,
        "acc_norm,none": 0.4684343434343434,
        "acc_norm_stderr,none": 0.010239317603199512
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.8177370030581039,
        "acc_stderr,none": 0.006752251630113674
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.18423047763457165,
        "exact_match_stderr,flexible-extract": 0.010678414428555008
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.4064927305317666,
        "acc_stderr,none": 0.00490174742633173,
        "acc_norm,none": 0.4718183628759211,
        "acc_norm_stderr,none": 0.004981849291299654
    },
    "mmlu": {
        "acc,none": 0.506266913545079,
        "acc_stderr,none": 0.0039897970088237505,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.45462274176408074,
        "acc_stderr,none": 0.00684861029496676,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.3253968253968254,
        "acc_stderr,none": 0.04190596438871136
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.5696969696969697,
        "acc_stderr,none": 0.03866225962879077
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.6568627450980392,
        "acc_stderr,none": 0.033321399446680854
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.6329113924050633,
        "acc_stderr,none": 0.03137624072561619
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.6859504132231405,
        "acc_stderr,none": 0.04236964753041019
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.6666666666666666,
        "acc_stderr,none": 0.04557239513497751
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.6687116564417178,
        "acc_stderr,none": 0.03697983910025588
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.5635838150289018,
        "acc_stderr,none": 0.02670054542494368
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.23798882681564246,
        "acc_stderr,none": 0.014242630070574885
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.5852090032154341,
        "acc_stderr,none": 0.027982680459759567
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.5864197530864198,
        "acc_stderr,none": 0.02740204204026996
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.35267275097783574,
        "acc_stderr,none": 0.012203286846053889
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.7894736842105263,
        "acc_stderr,none": 0.031267817146631786
    },
    "mmlu_other": {
        "acc,none": 0.5925329900225298,
        "acc_stderr,none": 0.008461759414668845,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.56,
        "acc_stderr,none": 0.049888765156985884
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.5924528301886792,
        "acc_stderr,none": 0.030242233800854494
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.5086705202312138,
        "acc_stderr,none": 0.03811890988940412
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.25,
        "acc_stderr,none": 0.04351941398892446
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.5695067264573991,
        "acc_stderr,none": 0.033231973029429394
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.7281553398058253,
        "acc_stderr,none": 0.044052680241409216
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.7863247863247863,
        "acc_stderr,none": 0.02685345037700914
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.62,
        "acc_stderr,none": 0.048783173121456316
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.7381864623243933,
        "acc_stderr,none": 0.01572083867844527
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.5915032679738562,
        "acc_stderr,none": 0.028146405993096358
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.3829787234042553,
        "acc_stderr,none": 0.028999080904806167
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.4227941176470588,
        "acc_stderr,none": 0.030008562845003483
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.5120481927710844,
        "acc_stderr,none": 0.03891364495835816
    },
    "mmlu_social_sciences": {
        "acc,none": 0.6035099122521937,
        "acc_stderr,none": 0.008603815380333984,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.3684210526315789,
        "acc_stderr,none": 0.04537815354939391
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.6414141414141414,
        "acc_stderr,none": 0.034169036403915214
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.7772020725388601,
        "acc_stderr,none": 0.03003114797764154
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.4948717948717949,
        "acc_stderr,none": 0.025349672906838653
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.5126050420168067,
        "acc_stderr,none": 0.03246816765752173
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.728440366972477,
        "acc_stderr,none": 0.01906909836319145
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.5725190839694656,
        "acc_stderr,none": 0.04338920305792401
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.5179738562091504,
        "acc_stderr,none": 0.020214761037872404
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.6090909090909091,
        "acc_stderr,none": 0.04673752333670237
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.5836734693877551,
        "acc_stderr,none": 0.03155782816556165
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.746268656716418,
        "acc_stderr,none": 0.030769444967296018
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.74,
        "acc_stderr,none": 0.044084400227680794
    },
    "mmlu_stem": {
        "acc,none": 0.4034253092293054,
        "acc_stderr,none": 0.008441532892970793,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.24,
        "acc_stderr,none": 0.042923469599092816
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.5111111111111111,
        "acc_stderr,none": 0.043182754919779756
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.5657894736842105,
        "acc_stderr,none": 0.040335656678483184
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.625,
        "acc_stderr,none": 0.04048439222695598
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.36,
        "acc_stderr,none": 0.04824181513244218
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.39,
        "acc_stderr,none": 0.04902071300001975
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.25,
        "acc_stderr,none": 0.04351941398892446
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.35294117647058826,
        "acc_stderr,none": 0.047551296160629475
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.65,
        "acc_stderr,none": 0.04793724854411019
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.4340425531914894,
        "acc_stderr,none": 0.03240038086792747
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.496551724137931,
        "acc_stderr,none": 0.04166567577101579
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.31216931216931215,
        "acc_stderr,none": 0.023865206836972595
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.6064516129032258,
        "acc_stderr,none": 0.02779187875313227
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.31527093596059114,
        "acc_stderr,none": 0.032690808719701876
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.41,
        "acc_stderr,none": 0.049431107042371025
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.23333333333333334,
        "acc_stderr,none": 0.025787874220959333
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.2913907284768212,
        "acc_stderr,none": 0.03710185726119995
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.3148148148148148,
        "acc_stderr,none": 0.03167468706828979
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.375,
        "acc_stderr,none": 0.04595091388086298
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.6795429815016322,
        "acc_stderr,none": 0.010887766073814868,
        "acc_norm,none": 0.661588683351469,
        "acc_norm_stderr,none": 0.011039817512986811
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.38843398157625386,
        "acc_stderr,none": 0.011028822814998108
    }
}