{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4513651877133106,
    "acc_stderr,none": 0.014542104569955264,
    "acc_norm,none": 0.4598976109215017,
    "acc_norm_stderr,none": 0.01456431885692485
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6973905723905723,
    "acc_stderr,none": 0.009426434542371223,
    "acc_norm,none": 0.57996632996633,
    "acc_norm_stderr,none": 0.010127718838529321
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.8584097859327217,
    "acc_stderr,none": 0.006097565706068083
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.17361637604245642,
    "exact_match_stderr,flexible-extract": 0.010433463221257641
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5738896634136627,
    "acc_stderr,none": 0.004934995402995954,
    "acc_norm,none": 0.6733718382792272,
    "acc_norm_stderr,none": 0.00468021500339592
  },
  "mmlu": {
    "acc,none": 0.4044295684375445,
    "acc_stderr,none": 0.0039744144797355,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.4055260361317747,
    "acc_stderr,none": 0.006795790883388519,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.2777777777777778,
    "acc_stderr,none": 0.04006168083848878
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.6424242424242425,
    "acc_stderr,none": 0.03742597043806587
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.6617647058823529,
    "acc_stderr,none": 0.0332057461294543
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.6666666666666666,
    "acc_stderr,none": 0.030685820596610815
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.6446280991735537,
    "acc_stderr,none": 0.04369236326573981
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.5277777777777778,
    "acc_stderr,none": 0.04826217294139894
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.5276073619631901,
    "acc_stderr,none": 0.0392237829061099
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.42485549132947975,
    "acc_stderr,none": 0.026613350840261736
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.23798882681564246,
    "acc_stderr,none": 0.014242630070574885
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.3633440514469453,
    "acc_stderr,none": 0.027316847674192707
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.5216049382716049,
    "acc_stderr,none": 0.027794760105008722
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.31486310299869624,
    "acc_stderr,none": 0.011862561755715926
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7485380116959064,
    "acc_stderr,none": 0.033275044238468436
  },
  "mmlu_other": {
    "acc,none": 0.4850337946572256,
    "acc_stderr,none": 0.00874771791794648,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.5,
    "acc_stderr,none": 0.050251890762960605
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.45660377358490567,
    "acc_stderr,none": 0.03065674869673943
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.34104046242774566,
    "acc_stderr,none": 0.036146654241808254
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.22,
    "acc_stderr,none": 0.04163331998932269
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.5426008968609866,
    "acc_stderr,none": 0.033435777055830646
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.5533980582524272,
    "acc_stderr,none": 0.04922424153458933
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.6752136752136753,
    "acc_stderr,none": 0.03067902276549883
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.53,
    "acc_stderr,none": 0.05016135580465919
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.5836526181353767,
    "acc_stderr,none": 0.0176279480304303
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.49019607843137253,
    "acc_stderr,none": 0.02862441255016795
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.32978723404255317,
    "acc_stderr,none": 0.028045946942042405
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.35661764705882354,
    "acc_stderr,none": 0.02909720956841195
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.41566265060240964,
    "acc_stderr,none": 0.038367221765980515
  },
  "mmlu_social_sciences": {
    "acc,none": 0.4494637634059149,
    "acc_stderr,none": 0.00882236633216108,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.30701754385964913,
    "acc_stderr,none": 0.0433913832257986
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.45454545454545453,
    "acc_stderr,none": 0.03547601494006936
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.5595854922279793,
    "acc_stderr,none": 0.03582724530036096
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.3128205128205128,
    "acc_stderr,none": 0.023507579020645347
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.3067226890756303,
    "acc_stderr,none": 0.029953823891887048
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.5211009174311927,
    "acc_stderr,none": 0.021418224754264643
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.45038167938931295,
    "acc_stderr,none": 0.04363643698524779
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.46078431372549017,
    "acc_stderr,none": 0.02016552331390791
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.4636363636363636,
    "acc_stderr,none": 0.04776449162396197
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.40408163265306124,
    "acc_stderr,none": 0.03141470802586589
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.6268656716417911,
    "acc_stderr,none": 0.03419832608176007
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.54,
    "acc_stderr,none": 0.05009082659620332
  },
  "mmlu_stem": {
    "acc,none": 0.2794164287979702,
    "acc_stderr,none": 0.007874801050145094,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.22,
    "acc_stderr,none": 0.04163331998932268
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.34814814814814815,
    "acc_stderr,none": 0.041153246103369526
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.3618421052631579,
    "acc_stderr,none": 0.03910525752849724
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.4166666666666667,
    "acc_stderr,none": 0.041227287076512825
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.24,
    "acc_stderr,none": 0.04292346959909282
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.29,
    "acc_stderr,none": 0.04560480215720684
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.21,
    "acc_stderr,none": 0.040936018074033256
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.22549019607843138,
    "acc_stderr,none": 0.041583075330832865
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.43,
    "acc_stderr,none": 0.049756985195624284
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.28936170212765955,
    "acc_stderr,none": 0.02964400657700962
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.30344827586206896,
    "acc_stderr,none": 0.038312260488503336
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.20899470899470898,
    "acc_stderr,none": 0.02094048156533485
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.4064516129032258,
    "acc_stderr,none": 0.027941727346256315
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.1921182266009852,
    "acc_stderr,none": 0.027719315709614778
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.35,
    "acc_stderr,none": 0.047937248544110196
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.2111111111111111,
    "acc_stderr,none": 0.024882116857655113
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.2052980132450331,
    "acc_stderr,none": 0.03297986648473836
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.17592592592592593,
    "acc_stderr,none": 0.02596742095825853
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.35714285714285715,
    "acc_stderr,none": 0.04547960999764376
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7589771490750816,
    "acc_stderr,none": 0.009979042717267312,
    "acc_norm,none": 0.764417845484222,
    "acc_norm_stderr,none": 0.009901067586473909
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.43654042988741043,
    "acc_stderr,none": 0.011222574420844785
  }
}