{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4112627986348123,
    "acc_stderr,none": 0.014379441068522082,
    "acc_norm,none": 0.42150170648464164,
    "acc_norm_stderr,none": 0.014430197069326025
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6494107744107744,
    "acc_stderr,none": 0.009791003829831555,
    "acc_norm,none": 0.5408249158249159,
    "acc_norm_stderr,none": 0.010225526906982618
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.7207951070336391,
    "acc_stderr,none": 0.007846210712706131
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.7376800606520091,
    "exact_match_stderr,flexible-extract": 0.012116912419925706
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.49283011352320255,
    "acc_stderr,none": 0.004989268362968715,
    "acc_norm,none": 0.6125273849830711,
    "acc_norm_stderr,none": 0.004861774129612516
  },
  "mmlu": {
    "acc,none": 0.6615154536390827,
    "acc_stderr,none": 0.0037879948898885954,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5725823591923486,
    "acc_stderr,none": 0.006803956810463235,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5476190476190477,
    "acc_stderr,none": 0.044518079590553275
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7454545454545455,
    "acc_stderr,none": 0.03401506715249039
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.75,
    "acc_stderr,none": 0.03039153369274154
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7890295358649789,
    "acc_stderr,none": 0.02655837250266192
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7603305785123967,
    "acc_stderr,none": 0.03896878985070416
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7777777777777778,
    "acc_stderr,none": 0.040191074725573483
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.803680981595092,
    "acc_stderr,none": 0.031207970394709218
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6820809248554913,
    "acc_stderr,none": 0.025070713719153183
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.36983240223463687,
    "acc_stderr,none": 0.016145881256056215
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.7041800643086816,
    "acc_stderr,none": 0.025922371788818784
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7530864197530864,
    "acc_stderr,none": 0.02399350170904211
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4485006518904824,
    "acc_stderr,none": 0.012702317490559813
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.8011695906432749,
    "acc_stderr,none": 0.030611116557432528
  },
  "mmlu_other": {
    "acc,none": 0.7009977470228517,
    "acc_stderr,none": 0.007964391366157383,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.72,
    "acc_stderr,none": 0.04512608598542129
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.7056603773584905,
    "acc_stderr,none": 0.028049186315695245
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.653179190751445,
    "acc_stderr,none": 0.036291466701596636
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.37,
    "acc_stderr,none": 0.04852365870939099
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6681614349775785,
    "acc_stderr,none": 0.03160295143776679
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7475728155339806,
    "acc_stderr,none": 0.04301250399690878
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8846153846153846,
    "acc_stderr,none": 0.02093019318517934
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.73,
    "acc_stderr,none": 0.044619604333847415
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7816091954022989,
    "acc_stderr,none": 0.01477435831993448
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7124183006535948,
    "acc_stderr,none": 0.02591780611714716
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5283687943262412,
    "acc_stderr,none": 0.029779450957303062
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.7463235294117647,
    "acc_stderr,none": 0.02643132987078955
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4879518072289157,
    "acc_stderr,none": 0.038913644958358196
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7757556061098473,
    "acc_stderr,none": 0.007432881162185533,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.6140350877192983,
    "acc_stderr,none": 0.04579639422070435
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8131313131313131,
    "acc_stderr,none": 0.027772533334218967
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8549222797927462,
    "acc_stderr,none": 0.025416343096306422
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7307692307692307,
    "acc_stderr,none": 0.02248938979365483
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8361344537815126,
    "acc_stderr,none": 0.02404405494044048
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8568807339449541,
    "acc_stderr,none": 0.015014462497168554
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7022900763358778,
    "acc_stderr,none": 0.04010358942462203
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.7140522875816994,
    "acc_stderr,none": 0.01828048507295467
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6818181818181818,
    "acc_stderr,none": 0.04461272175910507
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7836734693877551,
    "acc_stderr,none": 0.026358916334904035
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.7960199004975125,
    "acc_stderr,none": 0.02849317624532607
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.84,
    "acc_stderr,none": 0.0368452949177471
  },
  "mmlu_stem": {
    "acc,none": 0.6438312718046305,
    "acc_stderr,none": 0.008203575098649144,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.46,
    "acc_stderr,none": 0.05009082659620333
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6888888888888889,
    "acc_stderr,none": 0.03999262876617722
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.7631578947368421,
    "acc_stderr,none": 0.034597776068105365
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8194444444444444,
    "acc_stderr,none": 0.03216600808802268
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.5,
    "acc_stderr,none": 0.050251890762960605
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.56,
    "acc_stderr,none": 0.04988876515698589
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.48,
    "acc_stderr,none": 0.050211673156867795
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.5294117647058824,
    "acc_stderr,none": 0.049665709039785295
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.73,
    "acc_stderr,none": 0.04461960433384739
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.7446808510638298,
    "acc_stderr,none": 0.028504856470514258
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.696551724137931,
    "acc_stderr,none": 0.038312260488503336
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.6031746031746031,
    "acc_stderr,none": 0.025197101074246473
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8548387096774194,
    "acc_stderr,none": 0.020039563628053304
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6650246305418719,
    "acc_stderr,none": 0.033208527423483104
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.83,
    "acc_stderr,none": 0.0377525168068637
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.3962962962962963,
    "acc_stderr,none": 0.029822619458533997
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.5165562913907285,
    "acc_stderr,none": 0.0408024418562897
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.6435185185185185,
    "acc_stderr,none": 0.032664783315272714
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5803571428571429,
    "acc_stderr,none": 0.04684099321077106
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7437431991294886,
    "acc_stderr,none": 0.010185787831565063,
    "acc_norm,none": 0.7415669205658324,
    "acc_norm_stderr,none": 0.010213971636773317
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4421699078812692,
    "acc_stderr,none": 0.011238140029326923
  }
}