{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.39505119453924914,
    "acc_stderr,none": 0.014285898292938172,
    "acc_norm,none": 0.41552901023890787,
    "acc_norm_stderr,none": 0.014401366641216391
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6266835016835017,
    "acc_stderr,none": 0.009925009142802907,
    "acc_norm,none": 0.5113636363636364,
    "acc_norm_stderr,none": 0.010257133441117108
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.7412844036697248,
    "acc_stderr,none": 0.007659426910763691
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.7429871114480667,
    "exact_match_stderr,flexible-extract": 0.01203678175742868
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.4986058554072894,
    "acc_stderr,none": 0.004989762014739189,
    "acc_norm,none": 0.6355307707627963,
    "acc_norm_stderr,none": 0.004802974070507195
  },
  "mmlu": {
    "acc,none": 0.6662868537245407,
    "acc_stderr,none": 0.0037696119530606243,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5727948990435706,
    "acc_stderr,none": 0.006780078529952026,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5396825396825397,
    "acc_stderr,none": 0.04458029125470973
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7454545454545455,
    "acc_stderr,none": 0.03401506715249039
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7794117647058824,
    "acc_stderr,none": 0.02910225438967409
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.8059071729957806,
    "acc_stderr,none": 0.025744902532290937
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7603305785123967,
    "acc_stderr,none": 0.03896878985070416
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7407407407407407,
    "acc_stderr,none": 0.042365112580946315
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.7975460122699386,
    "acc_stderr,none": 0.03157065078911902
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6763005780346821,
    "acc_stderr,none": 0.025190181327608405
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.3675977653631285,
    "acc_stderr,none": 0.016125543823552954
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.7138263665594855,
    "acc_stderr,none": 0.025670259242188943
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7716049382716049,
    "acc_stderr,none": 0.023358211840626267
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4439374185136897,
    "acc_stderr,none": 0.01268970816778768
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7953216374269005,
    "acc_stderr,none": 0.030944459778533204
  },
  "mmlu_other": {
    "acc,none": 0.7071129707112971,
    "acc_stderr,none": 0.007916495898842097,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.73,
    "acc_stderr,none": 0.044619604333847394
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.7169811320754716,
    "acc_stderr,none": 0.027724236492700918
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.6473988439306358,
    "acc_stderr,none": 0.03643037168958548
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.37,
    "acc_stderr,none": 0.04852365870939099
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6905829596412556,
    "acc_stderr,none": 0.03102441174057221
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7475728155339806,
    "acc_stderr,none": 0.04301250399690878
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8846153846153846,
    "acc_stderr,none": 0.02093019318517934
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.74,
    "acc_stderr,none": 0.0440844002276808
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7931034482758621,
    "acc_stderr,none": 0.014485656041669195
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7058823529411765,
    "acc_stderr,none": 0.02609016250427904
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5425531914893617,
    "acc_stderr,none": 0.029719281272236837
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.7389705882352942,
    "acc_stderr,none": 0.02667925227010311
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4939759036144578,
    "acc_stderr,none": 0.03892212195333045
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7812804679883003,
    "acc_stderr,none": 0.007355878148702391,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.5964912280701754,
    "acc_stderr,none": 0.046151869625837054
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8232323232323232,
    "acc_stderr,none": 0.027178752639044915
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8704663212435233,
    "acc_stderr,none": 0.02423353229775873
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7487179487179487,
    "acc_stderr,none": 0.021992016662370547
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8487394957983193,
    "acc_stderr,none": 0.02327425589870794
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8587155963302753,
    "acc_stderr,none": 0.014933868987028085
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7099236641221374,
    "acc_stderr,none": 0.03980066246467766
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.7189542483660131,
    "acc_stderr,none": 0.01818521895431808
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6727272727272727,
    "acc_stderr,none": 0.04494290866252088
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7714285714285715,
    "acc_stderr,none": 0.026882144922307748
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8059701492537313,
    "acc_stderr,none": 0.027962677604768914
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.85,
    "acc_stderr,none": 0.03588702812826372
  },
  "mmlu_stem": {
    "acc,none": 0.6533460196638122,
    "acc_stderr,none": 0.008190793905641614,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.48,
    "acc_stderr,none": 0.050211673156867795
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6888888888888889,
    "acc_stderr,none": 0.03999262876617722
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.7631578947368421,
    "acc_stderr,none": 0.034597776068105365
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8263888888888888,
    "acc_stderr,none": 0.03167473383795717
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.52,
    "acc_stderr,none": 0.050211673156867795
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.57,
    "acc_stderr,none": 0.049756985195624284
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.49,
    "acc_stderr,none": 0.05024183937956912
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.5196078431372549,
    "acc_stderr,none": 0.04971358884367406
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.76,
    "acc_stderr,none": 0.042923469599092816
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.7489361702127659,
    "acc_stderr,none": 0.028346963777162452
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.7034482758620689,
    "acc_stderr,none": 0.03806142687309992
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.6137566137566137,
    "acc_stderr,none": 0.02507598176760169
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8516129032258064,
    "acc_stderr,none": 0.020222737554330378
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6600985221674877,
    "acc_stderr,none": 0.033327690684107895
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.81,
    "acc_stderr,none": 0.039427724440366234
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.42962962962962964,
    "acc_stderr,none": 0.030182099804387262
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.5298013245033113,
    "acc_stderr,none": 0.040752249922169775
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.6620370370370371,
    "acc_stderr,none": 0.03225941352631295
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.6160714285714286,
    "acc_stderr,none": 0.046161430750285455
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7388465723612623,
    "acc_stderr,none": 0.010248738649935571,
    "acc_norm,none": 0.736126224156692,
    "acc_norm_stderr,none": 0.010282996367695557
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.43654042988741043,
    "acc_stderr,none": 0.011222574420844783
  }
}