{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4104095563139932,
    "acc_stderr,none": 0.014374922192642664,
    "acc_norm,none": 0.4087030716723549,
    "acc_norm_stderr,none": 0.014365750345427
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.5757575757575758,
    "acc_stderr,none": 0.010141333654958578,
    "acc_norm,none": 0.45664983164983164,
    "acc_norm_stderr,none": 0.010221149650118186
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.6510703363914373,
    "acc_stderr,none": 0.008336340399970122
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.6580742987111448,
    "exact_match_stderr,flexible-extract": 0.013066089625182803
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.518621788488349,
    "acc_stderr,none": 0.004986319587524958,
    "acc_norm,none": 0.6070503883688508,
    "acc_norm_stderr,none": 0.004874076250521581
  },
  "mmlu": {
    "acc,none": 0.556687081612306,
    "acc_stderr,none": 0.003938775690245181,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.51817215727949,
    "acc_stderr,none": 0.006812529528234707,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.42063492063492064,
    "acc_stderr,none": 0.04415438226743744
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7151515151515152,
    "acc_stderr,none": 0.03524390844511781
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7647058823529411,
    "acc_stderr,none": 0.02977177522814565
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7805907172995781,
    "acc_stderr,none": 0.026939106581553945
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.6859504132231405,
    "acc_stderr,none": 0.042369647530410184
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.6296296296296297,
    "acc_stderr,none": 0.04668408033024931
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.6380368098159509,
    "acc_stderr,none": 0.037757007291414416
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6242774566473989,
    "acc_stderr,none": 0.026074314851657083
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.23798882681564246,
    "acc_stderr,none": 0.014242630070574885
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.5627009646302251,
    "acc_stderr,none": 0.028173917761762906
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7283950617283951,
    "acc_stderr,none": 0.024748624490537375
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.455019556714472,
    "acc_stderr,none": 0.012718456618701775
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7777777777777778,
    "acc_stderr,none": 0.03188578017686397
  },
  "mmlu_other": {
    "acc,none": 0.6311554554232378,
    "acc_stderr,none": 0.008339710356724352,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.72,
    "acc_stderr,none": 0.04512608598542128
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.5358490566037736,
    "acc_stderr,none": 0.030693675018458003
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.5664739884393064,
    "acc_stderr,none": 0.03778621079092055
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.32,
    "acc_stderr,none": 0.04688261722621505
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.672645739910314,
    "acc_stderr,none": 0.031493846709941306
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7572815533980582,
    "acc_stderr,none": 0.04245022486384495
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8632478632478633,
    "acc_stderr,none": 0.022509033937077785
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.65,
    "acc_stderr,none": 0.0479372485441102
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7522349936143039,
    "acc_stderr,none": 0.01543808308056897
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.5718954248366013,
    "acc_stderr,none": 0.028332397483664278
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.4645390070921986,
    "acc_stderr,none": 0.029752389657427054
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.5441176470588235,
    "acc_stderr,none": 0.030254372573976722
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4759036144578313,
    "acc_stderr,none": 0.03887971849597264
  },
  "mmlu_social_sciences": {
    "acc,none": 0.6467338316542086,
    "acc_stderr,none": 0.00848144208518685,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.38596491228070173,
    "acc_stderr,none": 0.045796394220704355
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.6919191919191919,
    "acc_stderr,none": 0.032894773300986155
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.7564766839378239,
    "acc_stderr,none": 0.030975436386845436
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.5230769230769231,
    "acc_stderr,none": 0.025323990861736232
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.5672268907563025,
    "acc_stderr,none": 0.03218358107742613
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.7394495412844037,
    "acc_stderr,none": 0.018819182034850068
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.6335877862595419,
    "acc_stderr,none": 0.04225875451969637
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.6274509803921569,
    "acc_stderr,none": 0.01955964680921593
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6636363636363637,
    "acc_stderr,none": 0.04525393596302506
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.6816326530612244,
    "acc_stderr,none": 0.029822533793982052
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.6965174129353234,
    "acc_stderr,none": 0.03251006816458618
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.74,
    "acc_stderr,none": 0.044084400227680794
  },
  "mmlu_stem": {
    "acc,none": 0.45290199809705045,
    "acc_stderr,none": 0.008265093402141996,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.23,
    "acc_stderr,none": 0.04229525846816507
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.5777777777777777,
    "acc_stderr,none": 0.04266763404099582
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.6710526315789473,
    "acc_stderr,none": 0.03823428969926605
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.6875,
    "acc_stderr,none": 0.038760854559127644
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.36,
    "acc_stderr,none": 0.04824181513244218
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.55,
    "acc_stderr,none": 0.05
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.21,
    "acc_stderr,none": 0.040936018074033256
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.3235294117647059,
    "acc_stderr,none": 0.04655010411319619
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.72,
    "acc_stderr,none": 0.04512608598542127
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.5617021276595745,
    "acc_stderr,none": 0.03243618636108102
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.5724137931034483,
    "acc_stderr,none": 0.041227371113703316
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.3333333333333333,
    "acc_stderr,none": 0.024278568024307695
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.7032258064516129,
    "acc_stderr,none": 0.025988500792411887
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.3842364532019704,
    "acc_stderr,none": 0.034223985656575494
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.72,
    "acc_stderr,none": 0.045126085985421276
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.2111111111111111,
    "acc_stderr,none": 0.02488211685765511
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.26490066225165565,
    "acc_stderr,none": 0.036030385453603826
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.25462962962962965,
    "acc_stderr,none": 0.02971127586000534
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.42857142857142855,
    "acc_stderr,none": 0.04697113923010212
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7480957562568009,
    "acc_stderr,none": 0.010128421335088683,
    "acc_norm,none": 0.736126224156692,
    "acc_norm_stderr,none": 0.010282996367695559
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.436028659160696,
    "acc_stderr,none": 0.011221086587510792
  }
}