{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.41638225255972694,
    "acc_stderr,none": 0.014405618279436169,
    "acc_norm,none": 0.44112627986348124,
    "acc_norm_stderr,none": 0.014509747749064664
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6590909090909091,
    "acc_stderr,none": 0.00972657959342402,
    "acc_norm,none": 0.5589225589225589,
    "acc_norm_stderr,none": 0.010188293221040551
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.825382262996942,
    "acc_stderr,none": 0.0066399419638474115
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.7702805155420773,
    "exact_match_stderr,flexible-extract": 0.011586857544997503
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.513343955387373,
    "acc_stderr,none": 0.004988004122536529,
    "acc_norm,none": 0.6555467038438558,
    "acc_norm_stderr,none": 0.004742185169264752
  },
  "mmlu": {
    "acc,none": 0.6571713431135165,
    "acc_stderr,none": 0.0038005064333683333,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5670563230605739,
    "acc_stderr,none": 0.0068129533828951835,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5079365079365079,
    "acc_stderr,none": 0.044715725362943486
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7272727272727273,
    "acc_stderr,none": 0.0347769116216366
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7352941176470589,
    "acc_stderr,none": 0.030964517926923393
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7637130801687764,
    "acc_stderr,none": 0.02765215314415928
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.768595041322314,
    "acc_stderr,none": 0.038498560987940904
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7870370370370371,
    "acc_stderr,none": 0.03957835471980981
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.803680981595092,
    "acc_stderr,none": 0.031207970394709215
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6763005780346821,
    "acc_stderr,none": 0.025190181327608405
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.3642458100558659,
    "acc_stderr,none": 0.016094338768474593
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.707395498392283,
    "acc_stderr,none": 0.025839898334877976
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7654320987654321,
    "acc_stderr,none": 0.02357688174400572
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4439374185136897,
    "acc_stderr,none": 0.012689708167787682
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7894736842105263,
    "acc_stderr,none": 0.031267817146631786
  },
  "mmlu_other": {
    "acc,none": 0.700032185387834,
    "acc_stderr,none": 0.007956986767191256,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.7,
    "acc_stderr,none": 0.04605661864718381
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.7169811320754716,
    "acc_stderr,none": 0.027724236492700918
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.6647398843930635,
    "acc_stderr,none": 0.03599586301247078
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.35,
    "acc_stderr,none": 0.047937248544110196
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6636771300448431,
    "acc_stderr,none": 0.031708824268455
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7572815533980582,
    "acc_stderr,none": 0.04245022486384495
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8888888888888888,
    "acc_stderr,none": 0.020588491316092358
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.74,
    "acc_stderr,none": 0.0440844002276808
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7803320561941252,
    "acc_stderr,none": 0.014805384478371146
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.6993464052287581,
    "acc_stderr,none": 0.026256053835718964
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5141843971631206,
    "acc_stderr,none": 0.02981549448368206
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.75,
    "acc_stderr,none": 0.026303648393696036
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.5,
    "acc_stderr,none": 0.03892494720807614
  },
  "mmlu_social_sciences": {
    "acc,none": 0.769580760480988,
    "acc_stderr,none": 0.0075020978078128,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.5877192982456141,
    "acc_stderr,none": 0.04630653203366595
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8181818181818182,
    "acc_stderr,none": 0.0274796030105388
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.844559585492228,
    "acc_stderr,none": 0.02614848346915332
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7410256410256411,
    "acc_stderr,none": 0.022211106810061672
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8235294117647058,
    "acc_stderr,none": 0.024762902678057922
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8495412844036697,
    "acc_stderr,none": 0.015328563932669235
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7022900763358778,
    "acc_stderr,none": 0.04010358942462203
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.6993464052287581,
    "acc_stderr,none": 0.018550634502952964
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6818181818181818,
    "acc_stderr,none": 0.04461272175910507
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7714285714285715,
    "acc_stderr,none": 0.026882144922307748
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8009950248756219,
    "acc_stderr,none": 0.028231365092758406
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.83,
    "acc_stderr,none": 0.03775251680686371
  },
  "mmlu_stem": {
    "acc,none": 0.6397082143989851,
    "acc_stderr,none": 0.008248628328532811,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.49,
    "acc_stderr,none": 0.05024183937956911
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6962962962962963,
    "acc_stderr,none": 0.039725528847851375
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.7631578947368421,
    "acc_stderr,none": 0.034597776068105365
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8055555555555556,
    "acc_stderr,none": 0.03309615177059006
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.5,
    "acc_stderr,none": 0.050251890762960605
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.58,
    "acc_stderr,none": 0.04960449637488583
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.45,
    "acc_stderr,none": 0.05
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.5294117647058824,
    "acc_stderr,none": 0.049665709039785295
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.72,
    "acc_stderr,none": 0.04512608598542127
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.7446808510638298,
    "acc_stderr,none": 0.028504856470514255
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.696551724137931,
    "acc_stderr,none": 0.038312260488503336
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.5846560846560847,
    "acc_stderr,none": 0.025379524910778408
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8387096774193549,
    "acc_stderr,none": 0.020923327006423305
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6699507389162561,
    "acc_stderr,none": 0.033085304262282574
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.83,
    "acc_stderr,none": 0.0377525168068637
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.4111111111111111,
    "acc_stderr,none": 0.029999923508706686
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.5099337748344371,
    "acc_stderr,none": 0.04081677107248437
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.6296296296296297,
    "acc_stderr,none": 0.03293377139415191
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5625,
    "acc_stderr,none": 0.04708567521880525
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7671381936887922,
    "acc_stderr,none": 0.009861236071080751,
    "acc_norm,none": 0.7595212187159956,
    "acc_norm_stderr,none": 0.009971345364651073
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.44421699078812693,
    "acc_stderr,none": 0.01124343708855982
  }
}