{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4880546075085324,
    "acc_stderr,none": 0.014607220340597171,
    "acc_norm,none": 0.49146757679180886,
    "acc_norm_stderr,none": 0.01460926316563219
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.7323232323232324,
    "acc_stderr,none": 0.009085000147099351,
    "acc_norm,none": 0.6275252525252525,
    "acc_norm_stderr,none": 0.009920469215736019
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.8296636085626912,
    "acc_stderr,none": 0.006575023078014515
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.759666413949962,
    "exact_match_stderr,flexible-extract": 0.011769580703836945
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5155347540330611,
    "acc_stderr,none": 0.004987372476207027,
    "acc_norm,none": 0.6429994025094603,
    "acc_norm_stderr,none": 0.004781358113341945
  },
  "mmlu": {
    "acc,none": 0.6969092721834497,
    "acc_stderr,none": 0.003654681947749904,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5970244420828905,
    "acc_stderr,none": 0.006716290195844046,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5714285714285714,
    "acc_stderr,none": 0.0442626668137991
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7818181818181819,
    "acc_stderr,none": 0.03225078108306289
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.8137254901960784,
    "acc_stderr,none": 0.027325470966716323
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.810126582278481,
    "acc_stderr,none": 0.025530100460233494
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7520661157024794,
    "acc_stderr,none": 0.039418975265163025
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7685185185185185,
    "acc_stderr,none": 0.04077494709252626
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.8343558282208589,
    "acc_stderr,none": 0.029208296231259104
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.7138728323699421,
    "acc_stderr,none": 0.024332146779134135
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.3653631284916201,
    "acc_stderr,none": 0.01610483388014229
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.7363344051446945,
    "acc_stderr,none": 0.02502553850053234
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7901234567901234,
    "acc_stderr,none": 0.022658344085981365
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4876140808344198,
    "acc_stderr,none": 0.01276631731547356
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7777777777777778,
    "acc_stderr,none": 0.03188578017686398
  },
  "mmlu_other": {
    "acc,none": 0.7425168973286128,
    "acc_stderr,none": 0.007594808758930677,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.69,
    "acc_stderr,none": 0.04648231987117316
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.7849056603773585,
    "acc_stderr,none": 0.02528839450289137
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.7398843930635838,
    "acc_stderr,none": 0.03345036916788991
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.39,
    "acc_stderr,none": 0.04902071300001975
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6860986547085202,
    "acc_stderr,none": 0.031146796482972465
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.8543689320388349,
    "acc_stderr,none": 0.03492606476623789
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8846153846153846,
    "acc_stderr,none": 0.02093019318517934
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.79,
    "acc_stderr,none": 0.040936018074033256
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.8199233716475096,
    "acc_stderr,none": 0.013740797258579825
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7516339869281046,
    "acc_stderr,none": 0.02473998135511359
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5815602836879432,
    "acc_stderr,none": 0.02942799403941999
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.7757352941176471,
    "acc_stderr,none": 0.02533684856333236
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.536144578313253,
    "acc_stderr,none": 0.03882310850890594
  },
  "mmlu_social_sciences": {
    "acc,none": 0.8085797855053624,
    "acc_stderr,none": 0.006961713450595309,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.631578947368421,
    "acc_stderr,none": 0.04537815354939392
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8535353535353535,
    "acc_stderr,none": 0.025190921114603915
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.9015544041450777,
    "acc_stderr,none": 0.021500249576033456
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.8128205128205128,
    "acc_stderr,none": 0.01977660108655003
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8991596638655462,
    "acc_stderr,none": 0.019559663430480788
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8935779816513761,
    "acc_stderr,none": 0.013221554674594372
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7709923664122137,
    "acc_stderr,none": 0.036853466317118506
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.7238562091503268,
    "acc_stderr,none": 0.018087276935663137
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6545454545454545,
    "acc_stderr,none": 0.04554619617541054
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7755102040816326,
    "acc_stderr,none": 0.02671143055553841
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8159203980099502,
    "acc_stderr,none": 0.027403859410786848
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.85,
    "acc_stderr,none": 0.035887028128263734
  },
  "mmlu_stem": {
    "acc,none": 0.6920393276244846,
    "acc_stderr,none": 0.007892753178493587,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.5,
    "acc_stderr,none": 0.050251890762960605
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.7037037037037037,
    "acc_stderr,none": 0.03944624162501116
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.8223684210526315,
    "acc_stderr,none": 0.031103182383123384
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8472222222222222,
    "acc_stderr,none": 0.030085743248565684
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.54,
    "acc_stderr,none": 0.05009082659620332
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.6,
    "acc_stderr,none": 0.04923659639173309
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.47,
    "acc_stderr,none": 0.050161355804659205
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.5196078431372549,
    "acc_stderr,none": 0.04971358884367405
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.77,
    "acc_stderr,none": 0.04229525846816507
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.8085106382978723,
    "acc_stderr,none": 0.025722149992637774
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.7517241379310344,
    "acc_stderr,none": 0.036001056927277696
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.6587301587301587,
    "acc_stderr,none": 0.02441923496681906
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.9,
    "acc_stderr,none": 0.01706640371965727
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6798029556650246,
    "acc_stderr,none": 0.032826493853041504
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.85,
    "acc_stderr,none": 0.0358870281282637
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.48518518518518516,
    "acc_stderr,none": 0.030472153249328584
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.5894039735099338,
    "acc_stderr,none": 0.04016689594849927
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.75,
    "acc_stderr,none": 0.029531221160930918
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5982142857142857,
    "acc_stderr,none": 0.04653333146973646
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7448313384113167,
    "acc_stderr,none": 0.01017157159252182,
    "acc_norm,none": 0.7393906420021763,
    "acc_norm_stderr,none": 0.010241826155811618
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4564994882292733,
    "acc_stderr,none": 0.011271170113045128
  }
}