{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.39761092150170646,
    "acc_stderr,none": 0.01430175222327953,
    "acc_norm,none": 0.4189419795221843,
    "acc_norm_stderr,none": 0.01441810695363901
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.627104377104377,
    "acc_stderr,none": 0.009922743197129252,
    "acc_norm,none": 0.5138888888888888,
    "acc_norm_stderr,none": 0.010255824507190344
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.7568807339449541,
    "acc_stderr,none": 0.007502671870926004
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.759666413949962,
    "exact_match_stderr,flexible-extract": 0.011769580703836945
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.4903405696076479,
    "acc_stderr,none": 0.004988850185477503,
    "acc_norm,none": 0.630053774148576,
    "acc_norm_stderr,none": 0.004818031396138927
  },
  "mmlu": {
    "acc,none": 0.6659307790912975,
    "acc_stderr,none": 0.003771136625605338,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5717321997874601,
    "acc_stderr,none": 0.006787172760810059,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5317460317460317,
    "acc_stderr,none": 0.04463112720677171
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7333333333333333,
    "acc_stderr,none": 0.03453131801885417
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7745098039215687,
    "acc_stderr,none": 0.02933116229425172
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7932489451476793,
    "acc_stderr,none": 0.026361651668389094
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7520661157024794,
    "acc_stderr,none": 0.03941897526516302
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.75,
    "acc_stderr,none": 0.04186091791394607
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.803680981595092,
    "acc_stderr,none": 0.031207970394709218
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6763005780346821,
    "acc_stderr,none": 0.025190181327608405
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.3575418994413408,
    "acc_stderr,none": 0.016029394474894883
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.7170418006430869,
    "acc_stderr,none": 0.02558306248998483
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7654320987654321,
    "acc_stderr,none": 0.02357688174400572
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4511082138200782,
    "acc_stderr,none": 0.012709037347346233
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7953216374269005,
    "acc_stderr,none": 0.030944459778533204
  },
  "mmlu_other": {
    "acc,none": 0.7080785323463148,
    "acc_stderr,none": 0.007903812528175798,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.73,
    "acc_stderr,none": 0.0446196043338474
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.720754716981132,
    "acc_stderr,none": 0.027611163402399715
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.6589595375722543,
    "acc_stderr,none": 0.03614665424180826
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.37,
    "acc_stderr,none": 0.04852365870939099
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6860986547085202,
    "acc_stderr,none": 0.031146796482972465
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7475728155339806,
    "acc_stderr,none": 0.04301250399690878
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8846153846153846,
    "acc_stderr,none": 0.02093019318517934
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.74,
    "acc_stderr,none": 0.0440844002276808
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7956577266922095,
    "acc_stderr,none": 0.014419123980931894
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7091503267973857,
    "acc_stderr,none": 0.02600480036395213
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5460992907801419,
    "acc_stderr,none": 0.029700453247291477
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.7352941176470589,
    "acc_stderr,none": 0.026799562024887678
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4819277108433735,
    "acc_stderr,none": 0.038899512528272166
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7809554761130971,
    "acc_stderr,none": 0.007368079099396877,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.6052631578947368,
    "acc_stderr,none": 0.04598188057816542
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8181818181818182,
    "acc_stderr,none": 0.027479603010538797
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8652849740932642,
    "acc_stderr,none": 0.024639789097709443
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7538461538461538,
    "acc_stderr,none": 0.021840866990423084
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8361344537815126,
    "acc_stderr,none": 0.02404405494044048
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8587155963302753,
    "acc_stderr,none": 0.014933868987028085
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7175572519083969,
    "acc_stderr,none": 0.03948406125768361
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.7205882352941176,
    "acc_stderr,none": 0.018152871051538823
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6636363636363637,
    "acc_stderr,none": 0.04525393596302506
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7714285714285715,
    "acc_stderr,none": 0.026882144922307748
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8059701492537313,
    "acc_stderr,none": 0.027962677604768914
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.85,
    "acc_stderr,none": 0.03588702812826372
  },
  "mmlu_stem": {
    "acc,none": 0.6527117031398668,
    "acc_stderr,none": 0.0081930648659352,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.51,
    "acc_stderr,none": 0.05024183937956912
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6814814814814815,
    "acc_stderr,none": 0.04024778401977111
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.7631578947368421,
    "acc_stderr,none": 0.034597776068105365
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8333333333333334,
    "acc_stderr,none": 0.031164899666948614
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.49,
    "acc_stderr,none": 0.05024183937956912
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.58,
    "acc_stderr,none": 0.04960449637488583
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.49,
    "acc_stderr,none": 0.05024183937956912
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.5196078431372549,
    "acc_stderr,none": 0.04971358884367406
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.77,
    "acc_stderr,none": 0.04229525846816507
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.7446808510638298,
    "acc_stderr,none": 0.028504856470514255
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.7103448275862069,
    "acc_stderr,none": 0.03780019230438015
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.6111111111111112,
    "acc_stderr,none": 0.025107425481137275
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8548387096774194,
    "acc_stderr,none": 0.020039563628053307
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6600985221674877,
    "acc_stderr,none": 0.033327690684107895
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.81,
    "acc_stderr,none": 0.039427724440366234
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.43703703703703706,
    "acc_stderr,none": 0.030242862397654
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.5231788079470199,
    "acc_stderr,none": 0.04078093859163084
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.6481481481481481,
    "acc_stderr,none": 0.03256850570293648
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5982142857142857,
    "acc_stderr,none": 0.04653333146973647
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7383025027203483,
    "acc_stderr,none": 0.010255630772708229,
    "acc_norm,none": 0.7366702937976061,
    "acc_norm_stderr,none": 0.010276185322196768
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.43705220061412486,
    "acc_stderr,none": 0.011224050108856496
  }
}