{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4035836177474403,
    "acc_stderr,none": 0.014337158914268447,
    "acc_norm,none": 0.41638225255972694,
    "acc_norm_stderr,none": 0.014405618279436169
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.5989057239057239,
    "acc_stderr,none": 0.010057051106534374,
    "acc_norm,none": 0.5016835016835017,
    "acc_norm_stderr,none": 0.010259725364582795
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.8048929663608563,
    "acc_stderr,none": 0.006931034632130355
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.6777862016679302,
    "exact_match_stderr,flexible-extract": 0.012872435481188778
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5205138418641705,
    "acc_stderr,none": 0.004985580065946455,
    "acc_norm,none": 0.6554471220872337,
    "acc_norm_stderr,none": 0.0047425103547779025
  },
  "mmlu": {
    "acc,none": 0.6476285429426007,
    "acc_stderr,none": 0.0038359586299725637,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5708820403825717,
    "acc_stderr,none": 0.006850043050156336,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5317460317460317,
    "acc_stderr,none": 0.04463112720677171
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7272727272727273,
    "acc_stderr,none": 0.03477691162163659
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7598039215686274,
    "acc_stderr,none": 0.02998373305591362
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7932489451476793,
    "acc_stderr,none": 0.026361651668389094
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7520661157024794,
    "acc_stderr,none": 0.03941897526516303
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7222222222222222,
    "acc_stderr,none": 0.0433004374965074
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.7852760736196319,
    "acc_stderr,none": 0.03226219377286774
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6994219653179191,
    "acc_stderr,none": 0.02468531686725781
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.37206703910614525,
    "acc_stderr,none": 0.0161658475835633
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.6495176848874598,
    "acc_stderr,none": 0.027098652621301747
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7469135802469136,
    "acc_stderr,none": 0.024191808600713
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4602346805736636,
    "acc_stderr,none": 0.01272978538659856
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.783625730994152,
    "acc_stderr,none": 0.03158149539338733
  },
  "mmlu_other": {
    "acc,none": 0.6900547151593177,
    "acc_stderr,none": 0.008038627658969495,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.69,
    "acc_stderr,none": 0.04648231987117316
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.6716981132075471,
    "acc_stderr,none": 0.02890159361241178
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.6184971098265896,
    "acc_stderr,none": 0.03703851193099522
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.39,
    "acc_stderr,none": 0.04902071300001975
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.672645739910314,
    "acc_stderr,none": 0.03149384670994131
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7184466019417476,
    "acc_stderr,none": 0.044532548363264673
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.9017094017094017,
    "acc_stderr,none": 0.019503444900757567
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.76,
    "acc_stderr,none": 0.042923469599092816
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7739463601532567,
    "acc_stderr,none": 0.014957458504335844
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7254901960784313,
    "acc_stderr,none": 0.025553169991826514
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5141843971631206,
    "acc_stderr,none": 0.02981549448368206
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.6911764705882353,
    "acc_stderr,none": 0.028064998167040094
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4759036144578313,
    "acc_stderr,none": 0.03887971849597264
  },
  "mmlu_social_sciences": {
    "acc,none": 0.750731231719207,
    "acc_stderr,none": 0.007699976525933856,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.5614035087719298,
    "acc_stderr,none": 0.04668000738510455
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.803030303030303,
    "acc_stderr,none": 0.028335609732463362
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8082901554404145,
    "acc_stderr,none": 0.02840895362624528
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7102564102564103,
    "acc_stderr,none": 0.023000628243687964
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8109243697478992,
    "acc_stderr,none": 0.02543511943810535
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8366972477064221,
    "acc_stderr,none": 0.01584825580650155
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7175572519083969,
    "acc_stderr,none": 0.03948406125768361
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.6830065359477124,
    "acc_stderr,none": 0.018824219512706214
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6545454545454545,
    "acc_stderr,none": 0.04554619617541054
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.710204081632653,
    "acc_stderr,none": 0.02904308868330433
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8159203980099502,
    "acc_stderr,none": 0.027403859410786848
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.83,
    "acc_stderr,none": 0.03775251680686371
  },
  "mmlu_stem": {
    "acc,none": 0.6197272438947035,
    "acc_stderr,none": 0.008255093628139924,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.39,
    "acc_stderr,none": 0.04902071300001975
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6592592592592592,
    "acc_stderr,none": 0.04094376269996794
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.7763157894736842,
    "acc_stderr,none": 0.033911609343436025
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.7916666666666666,
    "acc_stderr,none": 0.033961162058453336
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.45,
    "acc_stderr,none": 0.049999999999999996
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.57,
    "acc_stderr,none": 0.049756985195624284
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.42,
    "acc_stderr,none": 0.049604496374885836
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.43137254901960786,
    "acc_stderr,none": 0.04928099597287534
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.74,
    "acc_stderr,none": 0.04408440022768077
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.723404255319149,
    "acc_stderr,none": 0.02924188386962882
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.6551724137931034,
    "acc_stderr,none": 0.039609335494512087
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.6058201058201058,
    "acc_stderr,none": 0.025167982333894143
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.832258064516129,
    "acc_stderr,none": 0.021255464065371342
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6699507389162561,
    "acc_stderr,none": 0.033085304262282574
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.82,
    "acc_stderr,none": 0.038612291966536955
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.35185185185185186,
    "acc_stderr,none": 0.029116617606083018
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.4768211920529801,
    "acc_stderr,none": 0.04078093859163085
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.6018518518518519,
    "acc_stderr,none": 0.033384734032074016
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5803571428571429,
    "acc_stderr,none": 0.04684099321077106
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.766050054406964,
    "acc_stderr,none": 0.009877236895137463,
    "acc_norm,none": 0.7600652883569097,
    "acc_norm_stderr,none": 0.009963625892809545
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4273285568065507,
    "acc_stderr,none": 0.011193930340551272
  }
}