{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.48464163822525597,
    "acc_stderr,none": 0.014604496129394908,
    "acc_norm,none": 0.48378839590443684,
    "acc_norm_stderr,none": 0.014603708567414943
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.7403198653198653,
    "acc_stderr,none": 0.008996990428562217,
    "acc_norm,none": 0.6153198653198653,
    "acc_norm_stderr,none": 0.009983171707009011
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.8458715596330275,
    "acc_stderr,none": 0.006315185954696949
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.7589082638362395,
    "exact_match_stderr,flexible-extract": 0.011782246325099723
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5173272256522605,
    "acc_stderr,none": 0.0049867843197717865,
    "acc_norm,none": 0.6440948018323043,
    "acc_norm_stderr,none": 0.004778081784542396
  },
  "mmlu": {
    "acc,none": 0.6971229169633956,
    "acc_stderr,none": 0.0036536590192328884,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5985122210414453,
    "acc_stderr,none": 0.006718098826450922,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5793650793650794,
    "acc_stderr,none": 0.04415438226743745
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7818181818181819,
    "acc_stderr,none": 0.03225078108306289
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.8137254901960784,
    "acc_stderr,none": 0.027325470966716323
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.8059071729957806,
    "acc_stderr,none": 0.02574490253229092
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7603305785123967,
    "acc_stderr,none": 0.038968789850704164
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7870370370370371,
    "acc_stderr,none": 0.03957835471980981
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.8404907975460123,
    "acc_stderr,none": 0.02876748172598387
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.7225433526011561,
    "acc_stderr,none": 0.024105712607754307
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.3687150837988827,
    "acc_stderr,none": 0.016135759015030122
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.7395498392282959,
    "acc_stderr,none": 0.024926723224845543
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7654320987654321,
    "acc_stderr,none": 0.023576881744005723
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.48891786179921776,
    "acc_stderr,none": 0.012767098998525843
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7894736842105263,
    "acc_stderr,none": 0.031267817146631786
  },
  "mmlu_other": {
    "acc,none": 0.7434824589636305,
    "acc_stderr,none": 0.007579926443322494,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.7,
    "acc_stderr,none": 0.04605661864718381
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.7660377358490567,
    "acc_stderr,none": 0.02605529690115292
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.7456647398843931,
    "acc_stderr,none": 0.0332055644308557
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.38,
    "acc_stderr,none": 0.04878317312145633
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6860986547085202,
    "acc_stderr,none": 0.03114679648297246
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.8737864077669902,
    "acc_stderr,none": 0.03288180278808629
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8803418803418803,
    "acc_stderr,none": 0.021262719400407006
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.78,
    "acc_stderr,none": 0.04163331998932262
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.8212005108556832,
    "acc_stderr,none": 0.013702643715368988
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.761437908496732,
    "acc_stderr,none": 0.02440439492808787
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5921985815602837,
    "acc_stderr,none": 0.029316011776343555
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.7830882352941176,
    "acc_stderr,none": 0.02503584522771125
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.5240963855421686,
    "acc_stderr,none": 0.03887971849597264
  },
  "mmlu_social_sciences": {
    "acc,none": 0.8082547936301593,
    "acc_stderr,none": 0.006960411190275669,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.6052631578947368,
    "acc_stderr,none": 0.045981880578165414
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8535353535353535,
    "acc_stderr,none": 0.025190921114603915
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8963730569948186,
    "acc_stderr,none": 0.02199531196364424
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.8025641025641026,
    "acc_stderr,none": 0.02018264696867484
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.9033613445378151,
    "acc_stderr,none": 0.019192520709708727
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8972477064220183,
    "acc_stderr,none": 0.013018246509173758
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7633587786259542,
    "acc_stderr,none": 0.03727673575596913
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.7238562091503268,
    "acc_stderr,none": 0.018087276935663137
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6727272727272727,
    "acc_stderr,none": 0.04494290866252089
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7877551020408163,
    "acc_stderr,none": 0.026176967197866767
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8208955223880597,
    "acc_stderr,none": 0.027113286753111837
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.84,
    "acc_stderr,none": 0.0368452949177471
  },
  "mmlu_stem": {
    "acc,none": 0.6901363780526483,
    "acc_stderr,none": 0.00789491628701122,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.51,
    "acc_stderr,none": 0.05024183937956914
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.7037037037037037,
    "acc_stderr,none": 0.03944624162501116
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.8289473684210527,
    "acc_stderr,none": 0.03064360707167709
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8472222222222222,
    "acc_stderr,none": 0.030085743248565684
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.56,
    "acc_stderr,none": 0.04988876515698589
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.62,
    "acc_stderr,none": 0.048783173121456316
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.45,
    "acc_stderr,none": 0.05
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.5294117647058824,
    "acc_stderr,none": 0.049665709039785295
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.75,
    "acc_stderr,none": 0.04351941398892446
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.8,
    "acc_stderr,none": 0.0261488180184245
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.7448275862068966,
    "acc_stderr,none": 0.03632984052707842
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.6481481481481481,
    "acc_stderr,none": 0.024594975128920945
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.9064516129032258,
    "acc_stderr,none": 0.016565754668270993
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6896551724137931,
    "acc_stderr,none": 0.03255086769970103
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.86,
    "acc_stderr,none": 0.03487350880197772
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.4703703703703704,
    "acc_stderr,none": 0.03043196354793657
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.6026490066225165,
    "acc_stderr,none": 0.03995524007681689
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.7407407407407407,
    "acc_stderr,none": 0.02988691054762697
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5714285714285714,
    "acc_stderr,none": 0.04697113923010213
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7486398258977149,
    "acc_stderr,none": 0.01012115601681926,
    "acc_norm,none": 0.735582154515778,
    "acc_norm_stderr,none": 0.010289787244767163
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4554759467758444,
    "acc_stderr,none": 0.011269123444510764
  }
}