{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.41467576791808874,
    "acc_stderr,none": 0.014397070564409174,
    "acc_norm,none": 0.4283276450511945,
    "acc_norm_stderr,none": 0.014460496367599027
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6275252525252525,
    "acc_stderr,none": 0.009920469215736014,
    "acc_norm,none": 0.5370370370370371,
    "acc_norm_stderr,none": 0.010231597249131063
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.8018348623853211,
    "acc_stderr,none": 0.006971859616072257
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.8908263836239575,
    "exact_match_stderr,flexible-extract": 0.00859008930051116
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5258912567217686,
    "acc_stderr,none": 0.004983087049281749,
    "acc_norm,none": 0.6590320653256323,
    "acc_norm_stderr,none": 0.004730658073041547
  },
  "mmlu": {
    "acc,none": 0.6535393818544367,
    "acc_stderr,none": 0.003816346513083389,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.571519659936238,
    "acc_stderr,none": 0.0068394887645455505,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5555555555555556,
    "acc_stderr,none": 0.044444444444444495
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7333333333333333,
    "acc_stderr,none": 0.03453131801885417
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7696078431372549,
    "acc_stderr,none": 0.029554292605695066
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7890295358649789,
    "acc_stderr,none": 0.02655837250266192
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7603305785123967,
    "acc_stderr,none": 0.038968789850704164
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7314814814814815,
    "acc_stderr,none": 0.042844679680521934
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.7791411042944786,
    "acc_stderr,none": 0.03259177392742178
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.7052023121387283,
    "acc_stderr,none": 0.024547617794803835
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.3664804469273743,
    "acc_stderr,none": 0.01611523550486547
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.6591639871382636,
    "acc_stderr,none": 0.026920841260776155
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7376543209876543,
    "acc_stderr,none": 0.024477222856135114
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4602346805736636,
    "acc_stderr,none": 0.012729785386598564
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.783625730994152,
    "acc_stderr,none": 0.03158149539338733
  },
  "mmlu_other": {
    "acc,none": 0.6971355004827808,
    "acc_stderr,none": 0.007977286533466934,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.71,
    "acc_stderr,none": 0.045604802157206845
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.6867924528301886,
    "acc_stderr,none": 0.028544793319055326
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.6416184971098265,
    "acc_stderr,none": 0.0365634365335316
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.35,
    "acc_stderr,none": 0.04793724854411018
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6681614349775785,
    "acc_stderr,none": 0.031602951437766785
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7184466019417476,
    "acc_stderr,none": 0.044532548363264673
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8974358974358975,
    "acc_stderr,none": 0.019875655027867454
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.75,
    "acc_stderr,none": 0.04351941398892446
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7905491698595147,
    "acc_stderr,none": 0.014551310568143695
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7189542483660131,
    "acc_stderr,none": 0.025738854797818723
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5390070921985816,
    "acc_stderr,none": 0.02973659252642444
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.6911764705882353,
    "acc_stderr,none": 0.028064998167040094
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4819277108433735,
    "acc_stderr,none": 0.03889951252827216
  },
  "mmlu_social_sciences": {
    "acc,none": 0.758856028599285,
    "acc_stderr,none": 0.007615272522266677,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.5701754385964912,
    "acc_stderr,none": 0.04657047260594963
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.803030303030303,
    "acc_stderr,none": 0.028335609732463362
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8238341968911918,
    "acc_stderr,none": 0.027493504244548057
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7256410256410256,
    "acc_stderr,none": 0.022622765767493218
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.819327731092437,
    "acc_stderr,none": 0.024991964966600746
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8385321100917431,
    "acc_stderr,none": 0.015776239256163248
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7175572519083969,
    "acc_stderr,none": 0.03948406125768361
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.6928104575163399,
    "acc_stderr,none": 0.018663359671463677
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6454545454545455,
    "acc_stderr,none": 0.04582004841505415
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.726530612244898,
    "acc_stderr,none": 0.028535560337128448
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8258706467661692,
    "acc_stderr,none": 0.026814951200421603
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.84,
    "acc_stderr,none": 0.0368452949177471
  },
  "mmlu_stem": {
    "acc,none": 0.6301934665398033,
    "acc_stderr,none": 0.00822682733218626,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.41,
    "acc_stderr,none": 0.04943110704237102
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6518518518518519,
    "acc_stderr,none": 0.041153246103369526
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.7894736842105263,
    "acc_stderr,none": 0.03317672787533157
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.7986111111111112,
    "acc_stderr,none": 0.0335364746971384
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.44,
    "acc_stderr,none": 0.04988876515698589
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.59,
    "acc_stderr,none": 0.049431107042371025
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.44,
    "acc_stderr,none": 0.04988876515698589
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.45098039215686275,
    "acc_stderr,none": 0.049512182523962625
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.75,
    "acc_stderr,none": 0.04351941398892446
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.7404255319148936,
    "acc_stderr,none": 0.028659179374292316
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.6137931034482759,
    "acc_stderr,none": 0.04057324734419035
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.6349206349206349,
    "acc_stderr,none": 0.02479606060269994
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8387096774193549,
    "acc_stderr,none": 0.0209233270064233
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6699507389162561,
    "acc_stderr,none": 0.03308530426228258
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.82,
    "acc_stderr,none": 0.03861229196653694
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.37777777777777777,
    "acc_stderr,none": 0.029560707392465704
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.48344370860927155,
    "acc_stderr,none": 0.04080244185628972
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.6203703703703703,
    "acc_stderr,none": 0.03309682581119035
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5803571428571429,
    "acc_stderr,none": 0.04684099321077106
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.76550598476605,
    "acc_stderr,none": 0.009885203143240547,
    "acc_norm,none": 0.7557127312295974,
    "acc_norm_stderr,none": 0.01002476517228424
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.42988741044012285,
    "acc_stderr,none": 0.011202283451328796
  }
}