{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4283276450511945,
    "acc_stderr,none": 0.01446049636759903,
    "acc_norm,none": 0.43856655290102387,
    "acc_norm_stderr,none": 0.014500682618212862
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6822390572390572,
    "acc_stderr,none": 0.009554033064443064,
    "acc_norm,none": 0.5664983164983165,
    "acc_norm_stderr,none": 0.010168640625454115
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.8159021406727829,
    "acc_stderr,none": 0.006778536599685004
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.7103866565579985,
    "exact_match_stderr,flexible-extract": 0.012493927348659629
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5067715594503087,
    "acc_stderr,none": 0.004989323787413519,
    "acc_norm,none": 0.6374228241386178,
    "acc_norm_stderr,none": 0.004797616754372308
  },
  "mmlu": {
    "acc,none": 0.6630109670987039,
    "acc_stderr,none": 0.003776874434087005,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5702444208289055,
    "acc_stderr,none": 0.006791548609087373,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5396825396825397,
    "acc_stderr,none": 0.04458029125470973
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7333333333333333,
    "acc_stderr,none": 0.03453131801885417
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7549019607843137,
    "acc_stderr,none": 0.030190282453501954
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7848101265822784,
    "acc_stderr,none": 0.02675082699467618
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.768595041322314,
    "acc_stderr,none": 0.0384985609879409
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7592592592592593,
    "acc_stderr,none": 0.041331194402438376
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.8098159509202454,
    "acc_stderr,none": 0.030833491146281245
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6705202312138728,
    "acc_stderr,none": 0.02530525813187971
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.36312849162011174,
    "acc_stderr,none": 0.0160837499868537
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.7138263665594855,
    "acc_stderr,none": 0.025670259242188943
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7654320987654321,
    "acc_stderr,none": 0.02357688174400572
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4445893089960887,
    "acc_stderr,none": 0.012691575792657114
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.8070175438596491,
    "acc_stderr,none": 0.030267457554898458
  },
  "mmlu_other": {
    "acc,none": 0.7048599935629224,
    "acc_stderr,none": 0.007915499995445628,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.72,
    "acc_stderr,none": 0.04512608598542129
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.720754716981132,
    "acc_stderr,none": 0.027611163402399715
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.6589595375722543,
    "acc_stderr,none": 0.03614665424180826
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.35,
    "acc_stderr,none": 0.047937248544110196
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6636771300448431,
    "acc_stderr,none": 0.031708824268455
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7281553398058253,
    "acc_stderr,none": 0.044052680241409216
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8846153846153846,
    "acc_stderr,none": 0.02093019318517934
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.73,
    "acc_stderr,none": 0.044619604333847415
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.789272030651341,
    "acc_stderr,none": 0.014583812465862525
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7222222222222222,
    "acc_stderr,none": 0.025646863097137915
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.524822695035461,
    "acc_stderr,none": 0.029790719243829714
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.7573529411764706,
    "acc_stderr,none": 0.026040662474201285
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4939759036144578,
    "acc_stderr,none": 0.03892212195333045
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7799805004874878,
    "acc_stderr,none": 0.00737475226683132,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.6052631578947368,
    "acc_stderr,none": 0.045981880578165414
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8232323232323232,
    "acc_stderr,none": 0.027178752639044915
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8549222797927462,
    "acc_stderr,none": 0.025416343096306426
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7487179487179487,
    "acc_stderr,none": 0.021992016662370554
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8403361344537815,
    "acc_stderr,none": 0.023793353997528802
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8660550458715597,
    "acc_stderr,none": 0.014602811435592635
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7099236641221374,
    "acc_stderr,none": 0.03980066246467766
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.7091503267973857,
    "acc_stderr,none": 0.018373116915903973
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6909090909090909,
    "acc_stderr,none": 0.044262946482000985
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7877551020408163,
    "acc_stderr,none": 0.026176967197866767
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.7960199004975125,
    "acc_stderr,none": 0.02849317624532607
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.83,
    "acc_stderr,none": 0.03775251680686371
  },
  "mmlu_stem": {
    "acc,none": 0.6460513796384396,
    "acc_stderr,none": 0.008220735797122805,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.51,
    "acc_stderr,none": 0.05024183937956913
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6888888888888889,
    "acc_stderr,none": 0.03999262876617722
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.7631578947368421,
    "acc_stderr,none": 0.034597776068105365
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8125,
    "acc_stderr,none": 0.032639560491693344
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.5,
    "acc_stderr,none": 0.050251890762960605
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.57,
    "acc_stderr,none": 0.049756985195624284
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.44,
    "acc_stderr,none": 0.04988876515698589
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.5294117647058824,
    "acc_stderr,none": 0.049665709039785295
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.73,
    "acc_stderr,none": 0.044619604333847394
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.7404255319148936,
    "acc_stderr,none": 0.02865917937429232
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.7172413793103448,
    "acc_stderr,none": 0.037528339580033376
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.6005291005291006,
    "acc_stderr,none": 0.025225450284067932
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8483870967741935,
    "acc_stderr,none": 0.020402616654416734
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6798029556650246,
    "acc_stderr,none": 0.032826493853041504
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.81,
    "acc_stderr,none": 0.039427724440366234
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.4185185185185185,
    "acc_stderr,none": 0.030078013075022055
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.5231788079470199,
    "acc_stderr,none": 0.04078093859163083
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.6388888888888888,
    "acc_stderr,none": 0.03275773486100999
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5803571428571429,
    "acc_stderr,none": 0.04684099321077106
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7562568008705114,
    "acc_stderr,none": 0.010017199471500614,
    "acc_norm,none": 0.7589771490750816,
    "acc_norm_stderr,none": 0.009979042717267312
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.45138178096212894,
    "acc_stderr,none": 0.011260456681624443
  }
}