{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.39419795221843,
    "acc_stderr,none": 0.01428052266746732,
    "acc_norm,none": 0.41552901023890787,
    "acc_norm_stderr,none": 0.014401366641216391
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6308922558922558,
    "acc_stderr,none": 0.009901987410242731,
    "acc_norm,none": 0.5134680134680135,
    "acc_norm_stderr,none": 0.010256060854840751
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.7189602446483181,
    "acc_stderr,none": 0.007861924290695653
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.7422289613343442,
    "exact_match_stderr,flexible-extract": 0.012048370213576604
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.49044015136427005,
    "acc_stderr,none": 0.004988869288786874,
    "acc_norm,none": 0.6194981079466242,
    "acc_norm_stderr,none": 0.004845180034271624
  },
  "mmlu": {
    "acc,none": 0.6672126477709728,
    "acc_stderr,none": 0.0037688482058625228,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5738575982996812,
    "acc_stderr,none": 0.006775983662908779,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5476190476190477,
    "acc_stderr,none": 0.044518079590553275
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7393939393939394,
    "acc_stderr,none": 0.03427743175816524
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7696078431372549,
    "acc_stderr,none": 0.029554292605695053
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.8016877637130801,
    "acc_stderr,none": 0.025955020841621105
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7603305785123967,
    "acc_stderr,none": 0.03896878985070416
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7592592592592593,
    "acc_stderr,none": 0.041331194402438376
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.803680981595092,
    "acc_stderr,none": 0.031207970394709218
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6820809248554913,
    "acc_stderr,none": 0.025070713719153183
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.3642458100558659,
    "acc_stderr,none": 0.016094338768474593
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.7138263665594855,
    "acc_stderr,none": 0.025670259242188943
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7654320987654321,
    "acc_stderr,none": 0.02357688174400572
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4471968709256845,
    "acc_stderr,none": 0.012698825252435113
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.8128654970760234,
    "acc_stderr,none": 0.029913127232368036
  },
  "mmlu_other": {
    "acc,none": 0.7074348245896364,
    "acc_stderr,none": 0.007926503245812549,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.72,
    "acc_stderr,none": 0.04512608598542129
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.7169811320754716,
    "acc_stderr,none": 0.027724236492700918
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.653179190751445,
    "acc_stderr,none": 0.036291466701596636
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.4,
    "acc_stderr,none": 0.049236596391733084
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6771300448430493,
    "acc_stderr,none": 0.031381476375754995
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7475728155339806,
    "acc_stderr,none": 0.04301250399690878
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8846153846153846,
    "acc_stderr,none": 0.02093019318517934
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.74,
    "acc_stderr,none": 0.0440844002276808
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7918263090676884,
    "acc_stderr,none": 0.014518592248904033
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7091503267973857,
    "acc_stderr,none": 0.02600480036395213
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5460992907801419,
    "acc_stderr,none": 0.02970045324729148
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.7426470588235294,
    "acc_stderr,none": 0.02655651947004152
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4879518072289157,
    "acc_stderr,none": 0.0389136449583582
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7829054273643159,
    "acc_stderr,none": 0.00733903725018595,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.6052631578947368,
    "acc_stderr,none": 0.045981880578165414
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8181818181818182,
    "acc_stderr,none": 0.027479603010538804
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8704663212435233,
    "acc_stderr,none": 0.024233532297758733
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7461538461538462,
    "acc_stderr,none": 0.022066054378726257
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8571428571428571,
    "acc_stderr,none": 0.022730208119306552
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8587155963302753,
    "acc_stderr,none": 0.014933868987028085
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7251908396946565,
    "acc_stderr,none": 0.03915345408847836
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.7205882352941176,
    "acc_stderr,none": 0.018152871051538823
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6727272727272727,
    "acc_stderr,none": 0.04494290866252088
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7755102040816326,
    "acc_stderr,none": 0.02671143055553841
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8059701492537313,
    "acc_stderr,none": 0.027962677604768914
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.85,
    "acc_stderr,none": 0.03588702812826372
  },
  "mmlu_stem": {
    "acc,none": 0.6539803361877576,
    "acc_stderr,none": 0.008196380436935,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.47,
    "acc_stderr,none": 0.050161355804659205
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.674074074074074,
    "acc_stderr,none": 0.040491220417025055
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.756578947368421,
    "acc_stderr,none": 0.034923496688842384
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8194444444444444,
    "acc_stderr,none": 0.032166008088022675
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.5,
    "acc_stderr,none": 0.050251890762960605
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.58,
    "acc_stderr,none": 0.04960449637488583
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.49,
    "acc_stderr,none": 0.05024183937956912
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.5294117647058824,
    "acc_stderr,none": 0.049665709039785295
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.77,
    "acc_stderr,none": 0.04229525846816507
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.7574468085106383,
    "acc_stderr,none": 0.028020226271200217
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.7034482758620689,
    "acc_stderr,none": 0.03806142687309992
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.6216931216931217,
    "acc_stderr,none": 0.024976954053155257
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8483870967741935,
    "acc_stderr,none": 0.02040261665441674
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6551724137931034,
    "acc_stderr,none": 0.03344283744280458
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.82,
    "acc_stderr,none": 0.03861229196653694
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.44074074074074077,
    "acc_stderr,none": 0.03027067115728408
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.5496688741721855,
    "acc_stderr,none": 0.040622900186837764
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.6574074074074074,
    "acc_stderr,none": 0.03236585252602158
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5892857142857143,
    "acc_stderr,none": 0.04669510663875192
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.735038084874864,
    "acc_stderr,none": 0.010296557993316052,
    "acc_norm,none": 0.7334058759521219,
    "acc_norm_stderr,none": 0.010316749863541369
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4452405322415558,
    "acc_stderr,none": 0.011246013134156768
  }
}