{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.48293515358361777,
    "acc_stderr,none": 0.0146028783885366,
    "acc_norm,none": 0.48208191126279865,
    "acc_norm_stderr,none": 0.01460200558549098
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.7550505050505051,
    "acc_stderr,none": 0.00882458861121908,
    "acc_norm,none": 0.6224747474747475,
    "acc_norm_stderr,none": 0.00994722783346943
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.8336391437308869,
    "acc_stderr,none": 0.006513391193734422
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.7589082638362395,
    "exact_match_stderr,flexible-extract": 0.011782246325099725
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5103565026887075,
    "acc_stderr,none": 0.004988710917169332,
    "acc_norm,none": 0.6518621788488349,
    "acc_norm_stderr,none": 0.004754063867700175
  },
  "mmlu": {
    "acc,none": 0.6892892750320467,
    "acc_stderr,none": 0.003677798865943158,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.59192348565356,
    "acc_stderr,none": 0.006726596869390048,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5555555555555556,
    "acc_stderr,none": 0.044444444444444495
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7878787878787878,
    "acc_stderr,none": 0.031922715695482995
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.8284313725490197,
    "acc_stderr,none": 0.026460569561240647
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.8185654008438819,
    "acc_stderr,none": 0.025085961144579647
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7603305785123967,
    "acc_stderr,none": 0.038968789850704164
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7777777777777778,
    "acc_stderr,none": 0.0401910747255735
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.8220858895705522,
    "acc_stderr,none": 0.03004735765580664
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.7167630057803468,
    "acc_stderr,none": 0.024257901705323368
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.3653631284916201,
    "acc_stderr,none": 0.016104833880142295
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.7363344051446945,
    "acc_stderr,none": 0.02502553850053234
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7376543209876543,
    "acc_stderr,none": 0.024477222856135114
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.47783572359843546,
    "acc_stderr,none": 0.012757683047716175
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7953216374269005,
    "acc_stderr,none": 0.030944459778533207
  },
  "mmlu_other": {
    "acc,none": 0.7380109430318635,
    "acc_stderr,none": 0.007640909064846662,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.7,
    "acc_stderr,none": 0.04605661864718381
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.769811320754717,
    "acc_stderr,none": 0.025907897122408173
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.7109826589595376,
    "acc_stderr,none": 0.034564257450869995
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.38,
    "acc_stderr,none": 0.048783173121456344
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6905829596412556,
    "acc_stderr,none": 0.03102441174057221
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.8446601941747572,
    "acc_stderr,none": 0.03586594738573974
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8803418803418803,
    "acc_stderr,none": 0.021262719400407002
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.76,
    "acc_stderr,none": 0.04292346959909284
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.8148148148148148,
    "acc_stderr,none": 0.013890862162876166
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7712418300653595,
    "acc_stderr,none": 0.024051029739912258
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5921985815602837,
    "acc_stderr,none": 0.029316011776343555
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.7683823529411765,
    "acc_stderr,none": 0.025626533803777565
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.5120481927710844,
    "acc_stderr,none": 0.03891364495835816
  },
  "mmlu_social_sciences": {
    "acc,none": 0.8059798505037374,
    "acc_stderr,none": 0.007000121889404928,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.6052631578947368,
    "acc_stderr,none": 0.045981880578165414
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8434343434343434,
    "acc_stderr,none": 0.025890520358141454
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8911917098445595,
    "acc_stderr,none": 0.022473253332768766
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7974358974358975,
    "acc_stderr,none": 0.020377660970371383
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8991596638655462,
    "acc_stderr,none": 0.019559663430480777
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8935779816513761,
    "acc_stderr,none": 0.013221554674594372
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7480916030534351,
    "acc_stderr,none": 0.03807387116306085
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.7271241830065359,
    "acc_stderr,none": 0.018020474148393577
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6818181818181818,
    "acc_stderr,none": 0.04461272175910507
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7755102040816326,
    "acc_stderr,none": 0.026711430555538408
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8308457711442786,
    "acc_stderr,none": 0.026508590656233264
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.85,
    "acc_stderr,none": 0.03588702812826373
  },
  "mmlu_stem": {
    "acc,none": 0.6726926736441484,
    "acc_stderr,none": 0.008009964097968118,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.49,
    "acc_stderr,none": 0.05024183937956911
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6962962962962963,
    "acc_stderr,none": 0.039725528847851375
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.8092105263157895,
    "acc_stderr,none": 0.03197565821032499
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8472222222222222,
    "acc_stderr,none": 0.030085743248565684
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.58,
    "acc_stderr,none": 0.049604496374885836
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.62,
    "acc_stderr,none": 0.048783173121456316
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.47,
    "acc_stderr,none": 0.05016135580465919
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.5,
    "acc_stderr,none": 0.04975185951049946
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.79,
    "acc_stderr,none": 0.040936018074033256
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.7872340425531915,
    "acc_stderr,none": 0.02675439134803978
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.7103448275862069,
    "acc_stderr,none": 0.03780019230438015
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.6164021164021164,
    "acc_stderr,none": 0.0250437573185202
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8903225806451613,
    "acc_stderr,none": 0.017776778700485177
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6699507389162561,
    "acc_stderr,none": 0.033085304262282574
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.84,
    "acc_stderr,none": 0.036845294917747094
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.45185185185185184,
    "acc_stderr,none": 0.030343862998512636
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.5298013245033113,
    "acc_stderr,none": 0.040752249922169775
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.7129629629629629,
    "acc_stderr,none": 0.03085199299325701
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5625,
    "acc_stderr,none": 0.04708567521880525
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7616974972796517,
    "acc_stderr,none": 0.009940334245876209,
    "acc_norm,none": 0.7475516866158868,
    "acc_norm_stderr,none": 0.01013566554736237
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4564994882292733,
    "acc_stderr,none": 0.011271170113045128
  }
}