{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.42150170648464164,
    "acc_stderr,none": 0.014430197069326025,
    "acc_norm,none": 0.44795221843003413,
    "acc_norm_stderr,none": 0.01453201149821167
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6662457912457912,
    "acc_stderr,none": 0.009676065683575473,
    "acc_norm,none": 0.5664983164983165,
    "acc_norm_stderr,none": 0.01016864062545411
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.8269113149847095,
    "acc_stderr,none": 0.0066169270438866555
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.7475360121304018,
    "exact_match_stderr,flexible-extract": 0.011966250044833997
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5138418641704839,
    "acc_stderr,none": 0.004987868988630004,
    "acc_norm,none": 0.658334993029277,
    "acc_norm_stderr,none": 0.004732986187325897
  },
  "mmlu": {
    "acc,none": 0.6554621848739496,
    "acc_stderr,none": 0.003811736421678586,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.567268862911796,
    "acc_stderr,none": 0.006822679879400017,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5079365079365079,
    "acc_stderr,none": 0.044715725362943486
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7272727272727273,
    "acc_stderr,none": 0.0347769116216366
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7401960784313726,
    "acc_stderr,none": 0.03077855467869326
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7679324894514767,
    "acc_stderr,none": 0.027479744550808514
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7603305785123967,
    "acc_stderr,none": 0.03896878985070416
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7777777777777778,
    "acc_stderr,none": 0.040191074725573483
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.803680981595092,
    "acc_stderr,none": 0.031207970394709215
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6734104046242775,
    "acc_stderr,none": 0.025248264774242832
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.3664804469273743,
    "acc_stderr,none": 0.016115235504865474
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.707395498392283,
    "acc_stderr,none": 0.025839898334877976
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7592592592592593,
    "acc_stderr,none": 0.023788583551658537
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.44589308996088656,
    "acc_stderr,none": 0.012695244711379778
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.783625730994152,
    "acc_stderr,none": 0.03158149539338733
  },
  "mmlu_other": {
    "acc,none": 0.6997103315094947,
    "acc_stderr,none": 0.007974981422946179,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.7,
    "acc_stderr,none": 0.04605661864718381
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.7169811320754716,
    "acc_stderr,none": 0.027724236492700918
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.6358381502890174,
    "acc_stderr,none": 0.03669072477416905
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.38,
    "acc_stderr,none": 0.04878317312145633
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6636771300448431,
    "acc_stderr,none": 0.031708824268455
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7378640776699029,
    "acc_stderr,none": 0.04354631077260595
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8931623931623932,
    "acc_stderr,none": 0.020237149008990957
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.74,
    "acc_stderr,none": 0.0440844002276808
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7777777777777778,
    "acc_stderr,none": 0.01486682166470959
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7058823529411765,
    "acc_stderr,none": 0.02609016250427904
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5212765957446809,
    "acc_stderr,none": 0.029800481645628693
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.75,
    "acc_stderr,none": 0.026303648393696036
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.5,
    "acc_stderr,none": 0.03892494720807614
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7666558336041599,
    "acc_stderr,none": 0.0075396139768817315,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.5877192982456141,
    "acc_stderr,none": 0.04630653203366595
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8131313131313131,
    "acc_stderr,none": 0.027772533334218967
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8393782383419689,
    "acc_stderr,none": 0.026499057701397453
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7384615384615385,
    "acc_stderr,none": 0.022282141204204405
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8235294117647058,
    "acc_stderr,none": 0.024762902678057922
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8458715596330275,
    "acc_stderr,none": 0.015480826865374282
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.6946564885496184,
    "acc_stderr,none": 0.040393149787245605
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.7026143790849673,
    "acc_stderr,none": 0.01849259653639695
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6727272727272727,
    "acc_stderr,none": 0.04494290866252088
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7591836734693878,
    "acc_stderr,none": 0.027372942201788163
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.7960199004975125,
    "acc_stderr,none": 0.02849317624532607
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.83,
    "acc_stderr,none": 0.03775251680686371
  },
  "mmlu_stem": {
    "acc,none": 0.6349508404693942,
    "acc_stderr,none": 0.00828401842673178,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.49,
    "acc_stderr,none": 0.05024183937956911
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6888888888888889,
    "acc_stderr,none": 0.03999262876617722
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.756578947368421,
    "acc_stderr,none": 0.034923496688842384
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8055555555555556,
    "acc_stderr,none": 0.03309615177059006
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.5,
    "acc_stderr,none": 0.050251890762960605
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.57,
    "acc_stderr,none": 0.049756985195624284
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.46,
    "acc_stderr,none": 0.05009082659620333
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.5294117647058824,
    "acc_stderr,none": 0.049665709039785295
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.7,
    "acc_stderr,none": 0.046056618647183814
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.7361702127659574,
    "acc_stderr,none": 0.028809989854102956
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.6896551724137931,
    "acc_stderr,none": 0.0385528961637895
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.5873015873015873,
    "acc_stderr,none": 0.02535574126305526
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8258064516129032,
    "acc_stderr,none": 0.02157624818451457
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6699507389162561,
    "acc_stderr,none": 0.033085304262282574
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.82,
    "acc_stderr,none": 0.03861229196653694
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.4,
    "acc_stderr,none": 0.0298696050953169
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.4966887417218543,
    "acc_stderr,none": 0.04082393379449654
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.6296296296296297,
    "acc_stderr,none": 0.03293377139415191
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5714285714285714,
    "acc_stderr,none": 0.04697113923010213
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7693144722524483,
    "acc_stderr,none": 0.009828959550983089,
    "acc_norm,none": 0.7595212187159956,
    "acc_norm_stderr,none": 0.009971345364651076
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4421699078812692,
    "acc_stderr,none": 0.01123814002932692
  }
}