{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.5093856655290102,
    "acc_stderr,none": 0.014608816322065003,
    "acc_norm,none": 0.5162116040955631,
    "acc_norm_stderr,none": 0.014603708567414952
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.7840909090909091,
    "acc_stderr,none": 0.008442807100457746,
    "acc_norm,none": 0.7032828282828283,
    "acc_norm_stderr,none": 0.009373559492986846
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.8223241590214068,
    "acc_stderr,none": 0.006685413197479381
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.7005307050796058,
    "exact_match_stderr,flexible-extract": 0.012616300735519647
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5552678749253137,
    "acc_stderr,none": 0.004959204773046215,
    "acc_norm,none": 0.6879107747460665,
    "acc_norm_stderr,none": 0.004623990785158486
  },
  "mmlu": {
    "acc,none": 0.6870816123059393,
    "acc_stderr,none": 0.0037045069207892636,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5936238044633368,
    "acc_stderr,none": 0.006767071200344547,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.6111111111111112,
    "acc_stderr,none": 0.04360314860077459
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7757575757575758,
    "acc_stderr,none": 0.03256866661681102
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7843137254901961,
    "acc_stderr,none": 0.028867431449849313
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.8059071729957806,
    "acc_stderr,none": 0.02574490253229092
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7520661157024794,
    "acc_stderr,none": 0.039418975265163025
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.75,
    "acc_stderr,none": 0.04186091791394607
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.8159509202453987,
    "acc_stderr,none": 0.030446777687971716
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6994219653179191,
    "acc_stderr,none": 0.0246853168672578
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.38100558659217876,
    "acc_stderr,none": 0.01624202883405362
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.7491961414790996,
    "acc_stderr,none": 0.024619771956697168
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7654320987654321,
    "acc_stderr,none": 0.023576881744005723
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.47979139504563234,
    "acc_stderr,none": 0.012759801427767559
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7719298245614035,
    "acc_stderr,none": 0.03218093795602357
  },
  "mmlu_other": {
    "acc,none": 0.7328612809784358,
    "acc_stderr,none": 0.007671837968244023,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.77,
    "acc_stderr,none": 0.04229525846816506
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.7811320754716982,
    "acc_stderr,none": 0.02544786382510862
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.7052023121387283,
    "acc_stderr,none": 0.034765996075164785
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.34,
    "acc_stderr,none": 0.04760952285695235
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6905829596412556,
    "acc_stderr,none": 0.031024411740572223
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.8446601941747572,
    "acc_stderr,none": 0.03586594738573975
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8675213675213675,
    "acc_stderr,none": 0.02220930907316561
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.75,
    "acc_stderr,none": 0.04351941398892446
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.8084291187739464,
    "acc_stderr,none": 0.014072859310451949
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7450980392156863,
    "acc_stderr,none": 0.024954184324879912
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5886524822695035,
    "acc_stderr,none": 0.02935491115994098
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.7683823529411765,
    "acc_stderr,none": 0.025626533803777565
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4939759036144578,
    "acc_stderr,none": 0.03892212195333045
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7981800454988626,
    "acc_stderr,none": 0.007096589682897244,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.6140350877192983,
    "acc_stderr,none": 0.04579639422070434
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8383838383838383,
    "acc_stderr,none": 0.026225919863629283
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8860103626943006,
    "acc_stderr,none": 0.022935144053919443
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.782051282051282,
    "acc_stderr,none": 0.020932445774463185
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.9033613445378151,
    "acc_stderr,none": 0.019192520709708727
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8954128440366973,
    "acc_stderr,none": 0.013120530245265568
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7480916030534351,
    "acc_stderr,none": 0.03807387116306086
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.7189542483660131,
    "acc_stderr,none": 0.018185218954318075
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6454545454545455,
    "acc_stderr,none": 0.04582004841505415
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.763265306122449,
    "acc_stderr,none": 0.02721283588407316
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8059701492537313,
    "acc_stderr,none": 0.027962677604768914
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.83,
    "acc_stderr,none": 0.03775251680686371
  },
  "mmlu_stem": {
    "acc,none": 0.6730098319061212,
    "acc_stderr,none": 0.008068402776473768,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.53,
    "acc_stderr,none": 0.05016135580465919
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6814814814814815,
    "acc_stderr,none": 0.04024778401977111
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.8421052631578947,
    "acc_stderr,none": 0.029674167520101456
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8333333333333334,
    "acc_stderr,none": 0.031164899666948607
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.54,
    "acc_stderr,none": 0.05009082659620332
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.59,
    "acc_stderr,none": 0.049431107042371025
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.49,
    "acc_stderr,none": 0.05024183937956911
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.5196078431372549,
    "acc_stderr,none": 0.04971358884367405
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.77,
    "acc_stderr,none": 0.04229525846816508
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.7617021276595745,
    "acc_stderr,none": 0.02785125297388977
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.7034482758620689,
    "acc_stderr,none": 0.03806142687309992
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.6481481481481481,
    "acc_stderr,none": 0.02459497512892094
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8806451612903226,
    "acc_stderr,none": 0.0184434113253154
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6650246305418719,
    "acc_stderr,none": 0.033208527423483104
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.81,
    "acc_stderr,none": 0.03942772444036623
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.4740740740740741,
    "acc_stderr,none": 0.030444528528810747
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.5364238410596026,
    "acc_stderr,none": 0.04071636065944214
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.6666666666666666,
    "acc_stderr,none": 0.03214952147802749
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.6160714285714286,
    "acc_stderr,none": 0.04616143075028546
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7704026115342764,
    "acc_stderr,none": 0.009812682950815197,
    "acc_norm,none": 0.766050054406964,
    "acc_norm_stderr,none": 0.009877236895137453
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4795291709314227,
    "acc_stderr,none": 0.011304584145927804
  }
}