{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4249146757679181,
    "acc_stderr,none": 0.014445698968520769,
    "acc_norm,none": 0.43856655290102387,
    "acc_norm_stderr,none": 0.014500682618212862
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6721380471380471,
    "acc_stderr,none": 0.009632587076170014,
    "acc_norm,none": 0.5521885521885522,
    "acc_norm_stderr,none": 0.010203742451111504
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.8149847094801224,
    "acc_stderr,none": 0.006791584070894006
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.7399545109931767,
    "exact_match_stderr,flexible-extract": 0.012082852340334092
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5109539932284406,
    "acc_stderr,none": 0.0049885838203099185,
    "acc_norm,none": 0.651364270065724,
    "acc_norm_stderr,none": 0.004755645016263827
  },
  "mmlu": {
    "acc,none": 0.6663580686511893,
    "acc_stderr,none": 0.003764712164634201,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5717321997874601,
    "acc_stderr,none": 0.006781473552209103,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5317460317460317,
    "acc_stderr,none": 0.04463112720677171
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7333333333333333,
    "acc_stderr,none": 0.03453131801885417
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7696078431372549,
    "acc_stderr,none": 0.029554292605695053
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7763713080168776,
    "acc_stderr,none": 0.027123298205229966
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7603305785123967,
    "acc_stderr,none": 0.03896878985070416
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7592592592592593,
    "acc_stderr,none": 0.041331194402438376
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.8098159509202454,
    "acc_stderr,none": 0.030833491146281245
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6791907514450867,
    "acc_stderr,none": 0.025131000233647904
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.3642458100558659,
    "acc_stderr,none": 0.016094338768474593
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.7234726688102894,
    "acc_stderr,none": 0.025403832978179615
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7777777777777778,
    "acc_stderr,none": 0.023132376234543332
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4439374185136897,
    "acc_stderr,none": 0.01268970816778768
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7953216374269005,
    "acc_stderr,none": 0.030944459778533204
  },
  "mmlu_other": {
    "acc,none": 0.7074348245896364,
    "acc_stderr,none": 0.007897164009110159,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.73,
    "acc_stderr,none": 0.044619604333847394
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.7320754716981132,
    "acc_stderr,none": 0.027257260322494845
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.6589595375722543,
    "acc_stderr,none": 0.03614665424180826
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.36,
    "acc_stderr,none": 0.048241815132442176
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6636771300448431,
    "acc_stderr,none": 0.031708824268455
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7378640776699029,
    "acc_stderr,none": 0.04354631077260595
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8888888888888888,
    "acc_stderr,none": 0.020588491316092358
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.74,
    "acc_stderr,none": 0.0440844002276808
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7931034482758621,
    "acc_stderr,none": 0.014485656041669195
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7156862745098039,
    "acc_stderr,none": 0.025829163272757468
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5354609929078015,
    "acc_stderr,none": 0.02975238965742705
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.7463235294117647,
    "acc_stderr,none": 0.026431329870789548
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4879518072289157,
    "acc_stderr,none": 0.0389136449583582
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7819304517387066,
    "acc_stderr,none": 0.007344226102167747,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.6140350877192983,
    "acc_stderr,none": 0.04579639422070435
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8232323232323232,
    "acc_stderr,none": 0.027178752639044915
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8652849740932642,
    "acc_stderr,none": 0.024639789097709443
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7564102564102564,
    "acc_stderr,none": 0.021763733684173937
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8529411764705882,
    "acc_stderr,none": 0.02300545944667395
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8697247706422019,
    "acc_stderr,none": 0.014431862852473247
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7099236641221374,
    "acc_stderr,none": 0.03980066246467766
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.704248366013072,
    "acc_stderr,none": 0.018463154132632817
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.7,
    "acc_stderr,none": 0.04389311454644286
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7714285714285715,
    "acc_stderr,none": 0.026882144922307748
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8009950248756219,
    "acc_stderr,none": 0.028231365092758406
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.83,
    "acc_stderr,none": 0.03775251680686371
  },
  "mmlu_stem": {
    "acc,none": 0.6542974944497304,
    "acc_stderr,none": 0.00817154010745141,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.51,
    "acc_stderr,none": 0.05024183937956913
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6962962962962963,
    "acc_stderr,none": 0.039725528847851375
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.7631578947368421,
    "acc_stderr,none": 0.034597776068105365
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8055555555555556,
    "acc_stderr,none": 0.03309615177059006
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.51,
    "acc_stderr,none": 0.05024183937956911
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.55,
    "acc_stderr,none": 0.049999999999999996
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.48,
    "acc_stderr,none": 0.050211673156867795
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.5196078431372549,
    "acc_stderr,none": 0.04971358884367406
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.79,
    "acc_stderr,none": 0.040936018074033256
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.7617021276595745,
    "acc_stderr,none": 0.027851252973889778
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.7103448275862069,
    "acc_stderr,none": 0.03780019230438015
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.6164021164021164,
    "acc_stderr,none": 0.025043757318520193
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8580645161290322,
    "acc_stderr,none": 0.01985300367655978
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6748768472906403,
    "acc_stderr,none": 0.03295797566311271
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.81,
    "acc_stderr,none": 0.039427724440366234
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.42592592592592593,
    "acc_stderr,none": 0.03014913560136595
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.5364238410596026,
    "acc_stderr,none": 0.04071636065944215
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.6527777777777778,
    "acc_stderr,none": 0.032468872436376486
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5714285714285714,
    "acc_stderr,none": 0.04697113923010213
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7578890097932536,
    "acc_stderr,none": 0.009994371269104381,
    "acc_norm,none": 0.7562568008705114,
    "acc_norm_stderr,none": 0.010017199471500626
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4539406345957011,
    "acc_stderr,none": 0.011265963467510477
  }
}