{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4274744027303754,
    "acc_stderr,none": 0.014456862944650647,
    "acc_norm,none": 0.4453924914675768,
    "acc_norm_stderr,none": 0.014523987638344067
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6738215488215489,
    "acc_stderr,none": 0.009619849417035168,
    "acc_norm,none": 0.5505050505050505,
    "acc_norm_stderr,none": 0.010207308833916053
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.808868501529052,
    "acc_stderr,none": 0.006876977982669623
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.7149355572403336,
    "exact_match_stderr,flexible-extract": 0.012435042334904006
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5096594303923522,
    "acc_stderr,none": 0.004988850185477477,
    "acc_norm,none": 0.6461860187213703,
    "acc_norm_stderr,none": 0.004771751187407036
  },
  "mmlu": {
    "acc,none": 0.6685657313772967,
    "acc_stderr,none": 0.0037601472604539814,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5738575982996812,
    "acc_stderr,none": 0.006773391317033884,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5158730158730159,
    "acc_stderr,none": 0.044698818540726076
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7333333333333333,
    "acc_stderr,none": 0.03453131801885417
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7696078431372549,
    "acc_stderr,none": 0.029554292605695053
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7932489451476793,
    "acc_stderr,none": 0.02636165166838909
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7603305785123967,
    "acc_stderr,none": 0.03896878985070416
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7592592592592593,
    "acc_stderr,none": 0.041331194402438376
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.8098159509202454,
    "acc_stderr,none": 0.030833491146281245
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6820809248554913,
    "acc_stderr,none": 0.025070713719153183
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.36312849162011174,
    "acc_stderr,none": 0.0160837499868537
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.729903536977492,
    "acc_stderr,none": 0.02521804037341062
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7746913580246914,
    "acc_stderr,none": 0.02324620264781975
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4485006518904824,
    "acc_stderr,none": 0.012702317490559814
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7953216374269005,
    "acc_stderr,none": 0.03094445977853321
  },
  "mmlu_other": {
    "acc,none": 0.7096878017380109,
    "acc_stderr,none": 0.007876276505793102,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.73,
    "acc_stderr,none": 0.044619604333847394
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.7396226415094339,
    "acc_stderr,none": 0.027008766090708056
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.653179190751445,
    "acc_stderr,none": 0.036291466701596636
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.36,
    "acc_stderr,none": 0.048241815132442176
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.672645739910314,
    "acc_stderr,none": 0.03149384670994131
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7475728155339806,
    "acc_stderr,none": 0.04301250399690878
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8888888888888888,
    "acc_stderr,none": 0.020588491316092358
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.74,
    "acc_stderr,none": 0.0440844002276808
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7943805874840357,
    "acc_stderr,none": 0.01445250045678583
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7222222222222222,
    "acc_stderr,none": 0.025646863097137915
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5319148936170213,
    "acc_stderr,none": 0.029766675075873866
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.7463235294117647,
    "acc_stderr,none": 0.026431329870789548
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4939759036144578,
    "acc_stderr,none": 0.03892212195333045
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7816054598635034,
    "acc_stderr,none": 0.007345998108376459,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.6140350877192983,
    "acc_stderr,none": 0.04579639422070435
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8181818181818182,
    "acc_stderr,none": 0.0274796030105388
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8601036269430051,
    "acc_stderr,none": 0.02503387058301518
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7564102564102564,
    "acc_stderr,none": 0.021763733684173926
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8529411764705882,
    "acc_stderr,none": 0.02300545944667395
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8715596330275229,
    "acc_stderr,none": 0.014344977542914313
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7175572519083969,
    "acc_stderr,none": 0.03948406125768361
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.7009803921568627,
    "acc_stderr,none": 0.018521756215423017
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6909090909090909,
    "acc_stderr,none": 0.044262946482000985
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7755102040816326,
    "acc_stderr,none": 0.02671143055553841
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8059701492537313,
    "acc_stderr,none": 0.027962677604768914
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.83,
    "acc_stderr,none": 0.03775251680686371
  },
  "mmlu_stem": {
    "acc,none": 0.6590548683793213,
    "acc_stderr,none": 0.008162832794384293,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.5,
    "acc_stderr,none": 0.050251890762960605
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6814814814814815,
    "acc_stderr,none": 0.0402477840197711
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.7697368421052632,
    "acc_stderr,none": 0.034260594244031654
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8055555555555556,
    "acc_stderr,none": 0.03309615177059006
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.52,
    "acc_stderr,none": 0.050211673156867795
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.57,
    "acc_stderr,none": 0.04975698519562429
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.47,
    "acc_stderr,none": 0.05016135580465919
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.5196078431372549,
    "acc_stderr,none": 0.04971358884367406
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.78,
    "acc_stderr,none": 0.04163331998932263
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.7659574468085106,
    "acc_stderr,none": 0.027678452578212387
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.7172413793103448,
    "acc_stderr,none": 0.037528339580033376
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.6216931216931217,
    "acc_stderr,none": 0.024976954053155257
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.864516129032258,
    "acc_stderr,none": 0.019469334586486906
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6699507389162561,
    "acc_stderr,none": 0.033085304262282574
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.81,
    "acc_stderr,none": 0.039427724440366234
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.45925925925925926,
    "acc_stderr,none": 0.03038416923235082
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.543046357615894,
    "acc_stderr,none": 0.04067325174247443
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.6574074074074074,
    "acc_stderr,none": 0.03236585252602159
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5714285714285714,
    "acc_stderr,none": 0.04697113923010213
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7557127312295974,
    "acc_stderr,none": 0.010024765172284246,
    "acc_norm,none": 0.7535364526659413,
    "acc_norm_stderr,none": 0.01005481078967183
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4503582395087001,
    "acc_stderr,none": 0.011258169830122285
  }
}