{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4112627986348123,
    "acc_stderr,none": 0.014379441068522084,
    "acc_norm,none": 0.4308873720136519,
    "acc_norm_stderr,none": 0.014471133392642476
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6300505050505051,
    "acc_stderr,none": 0.009906656266021151,
    "acc_norm,none": 0.5387205387205387,
    "acc_norm_stderr,none": 0.010228972678389592
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.7510703363914373,
    "acc_stderr,none": 0.007562600702970115
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.6914329037149356,
    "exact_match_stderr,flexible-extract": 0.01272307604981589
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5230033857797252,
    "acc_stderr,none": 0.004984497871025247,
    "acc_norm,none": 0.6605257916749652,
    "acc_norm_stderr,none": 0.004725630911520339
  },
  "mmlu": {
    "acc,none": 0.6505483549351944,
    "acc_stderr,none": 0.0038267674460107886,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5685441020191286,
    "acc_stderr,none": 0.006838565630785959,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5476190476190477,
    "acc_stderr,none": 0.044518079590553275
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7333333333333333,
    "acc_stderr,none": 0.03453131801885417
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7647058823529411,
    "acc_stderr,none": 0.029771775228145628
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7848101265822784,
    "acc_stderr,none": 0.026750826994676177
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.768595041322314,
    "acc_stderr,none": 0.03849856098794088
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7407407407407407,
    "acc_stderr,none": 0.04236511258094631
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.7975460122699386,
    "acc_stderr,none": 0.031570650789119005
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6994219653179191,
    "acc_stderr,none": 0.02468531686725781
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.3675977653631285,
    "acc_stderr,none": 0.016125543823552958
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.6655948553054662,
    "acc_stderr,none": 0.026795422327893947
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7283950617283951,
    "acc_stderr,none": 0.02474862449053737
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4517601043024772,
    "acc_stderr,none": 0.012710662233660247
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7777777777777778,
    "acc_stderr,none": 0.03188578017686397
  },
  "mmlu_other": {
    "acc,none": 0.6926295461860316,
    "acc_stderr,none": 0.008009470618196882,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.7,
    "acc_stderr,none": 0.046056618647183814
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.6792452830188679,
    "acc_stderr,none": 0.028727502957880263
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.630057803468208,
    "acc_stderr,none": 0.03681229633394319
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.36,
    "acc_stderr,none": 0.048241815132442176
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6591928251121076,
    "acc_stderr,none": 0.03181149747055359
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7184466019417476,
    "acc_stderr,none": 0.044532548363264673
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8974358974358975,
    "acc_stderr,none": 0.019875655027867457
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.74,
    "acc_stderr,none": 0.0440844002276808
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7854406130268199,
    "acc_stderr,none": 0.014680033956893346
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7189542483660131,
    "acc_stderr,none": 0.025738854797818723
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5212765957446809,
    "acc_stderr,none": 0.029800481645628693
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.6985294117647058,
    "acc_stderr,none": 0.027875982114273168
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4819277108433735,
    "acc_stderr,none": 0.03889951252827216
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7543061423464413,
    "acc_stderr,none": 0.007669107228634795,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.5789473684210527,
    "acc_stderr,none": 0.046446020912223177
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.797979797979798,
    "acc_stderr,none": 0.02860620428922987
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8082901554404145,
    "acc_stderr,none": 0.02840895362624528
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7205128205128205,
    "acc_stderr,none": 0.022752388839776823
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8025210084033614,
    "acc_stderr,none": 0.02585916412205147
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8366972477064221,
    "acc_stderr,none": 0.015848255806501548
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7099236641221374,
    "acc_stderr,none": 0.03980066246467766
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.6911764705882353,
    "acc_stderr,none": 0.01869085027359528
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6272727272727273,
    "acc_stderr,none": 0.04631381319425463
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.726530612244898,
    "acc_stderr,none": 0.028535560337128445
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8258706467661692,
    "acc_stderr,none": 0.026814951200421603
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.84,
    "acc_stderr,none": 0.0368452949177471
  },
  "mmlu_stem": {
    "acc,none": 0.6301934665398033,
    "acc_stderr,none": 0.008246569492975055,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.4,
    "acc_stderr,none": 0.04923659639173309
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6592592592592592,
    "acc_stderr,none": 0.04094376269996794
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.7763157894736842,
    "acc_stderr,none": 0.033911609343436025
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.7916666666666666,
    "acc_stderr,none": 0.033961162058453336
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.42,
    "acc_stderr,none": 0.049604496374885836
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.59,
    "acc_stderr,none": 0.049431107042371025
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.43,
    "acc_stderr,none": 0.04975698519562428
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.4411764705882353,
    "acc_stderr,none": 0.049406356306056595
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.75,
    "acc_stderr,none": 0.04351941398892446
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.7404255319148936,
    "acc_stderr,none": 0.028659179374292312
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.6137931034482759,
    "acc_stderr,none": 0.04057324734419035
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.6296296296296297,
    "acc_stderr,none": 0.024870815251057107
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8290322580645161,
    "acc_stderr,none": 0.021417242936321565
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6798029556650246,
    "acc_stderr,none": 0.032826493853041504
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.82,
    "acc_stderr,none": 0.03861229196653694
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.3962962962962963,
    "acc_stderr,none": 0.029822619458533997
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.5099337748344371,
    "acc_stderr,none": 0.04081677107248437
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.6342592592592593,
    "acc_stderr,none": 0.03284738857647207
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5625,
    "acc_stderr,none": 0.04708567521880525
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7665941240478781,
    "acc_stderr,none": 0.009869247889521008,
    "acc_norm,none": 0.7578890097932536,
    "acc_norm_stderr,none": 0.00999437126910438
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4278403275332651,
    "acc_stderr,none": 0.011195625418198208
  }
}