{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4104095563139932,
    "acc_stderr,none": 0.014374922192642662,
    "acc_norm,none": 0.4129692832764505,
    "acc_norm_stderr,none": 0.014388344935398322
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6014309764309764,
    "acc_stderr,none": 0.010046455400477937,
    "acc_norm,none": 0.4797979797979798,
    "acc_norm_stderr,none": 0.01025140562130537
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.7116207951070337,
    "acc_stderr,none": 0.00792316727779508
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.003032600454890068,
    "exact_match_stderr,strict-match": 0.0015145735612245455,
    "exact_match,flexible-extract": 0.5678544351781653,
    "exact_match_stderr,flexible-extract": 0.013645072137842447
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5265883290181239,
    "acc_stderr,none": 0.0049827214724073405,
    "acc_norm,none": 0.6399123680541725,
    "acc_norm_stderr,none": 0.004790445139186365
  },
  "mmlu": {
    "acc,none": 0.6388691069648198,
    "acc_stderr,none": 0.0038432547228289034,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5776833156216791,
    "acc_stderr,none": 0.006855606335617063,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5079365079365079,
    "acc_stderr,none": 0.044715725362943486
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7212121212121212,
    "acc_stderr,none": 0.03501438706296781
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7696078431372549,
    "acc_stderr,none": 0.029554292605695066
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7932489451476793,
    "acc_stderr,none": 0.026361651668389094
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7768595041322314,
    "acc_stderr,none": 0.03800754475228733
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.6944444444444444,
    "acc_stderr,none": 0.04453197507374983
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.7975460122699386,
    "acc_stderr,none": 0.031570650789119005
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.7023121387283237,
    "acc_stderr,none": 0.024617055388677006
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.39664804469273746,
    "acc_stderr,none": 0.016361354769822475
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.6495176848874598,
    "acc_stderr,none": 0.027098652621301747
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7592592592592593,
    "acc_stderr,none": 0.023788583551658537
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4641460234680574,
    "acc_stderr,none": 0.012737361318730581
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7777777777777778,
    "acc_stderr,none": 0.03188578017686398
  },
  "mmlu_other": {
    "acc,none": 0.6916639845510139,
    "acc_stderr,none": 0.008041141094224408,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.7,
    "acc_stderr,none": 0.04605661864718381
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.6754716981132075,
    "acc_stderr,none": 0.028815615713432115
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.6589595375722543,
    "acc_stderr,none": 0.036146654241808254
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.42,
    "acc_stderr,none": 0.04960449637488584
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6860986547085202,
    "acc_stderr,none": 0.03114679648297246
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7281553398058253,
    "acc_stderr,none": 0.044052680241409216
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8974358974358975,
    "acc_stderr,none": 0.019875655027867447
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.72,
    "acc_stderr,none": 0.04512608598542128
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7828863346104725,
    "acc_stderr,none": 0.014743125394823298
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7058823529411765,
    "acc_stderr,none": 0.02609016250427904
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5070921985815603,
    "acc_stderr,none": 0.02982449855912901
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.6654411764705882,
    "acc_stderr,none": 0.02866199620233531
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4879518072289157,
    "acc_stderr,none": 0.038913644958358196
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7474813129671758,
    "acc_stderr,none": 0.007724792845276029,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.5263157894736842,
    "acc_stderr,none": 0.046970851366478626
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.7929292929292929,
    "acc_stderr,none": 0.028869778460267045
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8290155440414507,
    "acc_stderr,none": 0.02717121368316453
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.6948717948717948,
    "acc_stderr,none": 0.023346335293325884
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.7899159663865546,
    "acc_stderr,none": 0.026461398717471874
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8311926605504587,
    "acc_stderr,none": 0.016060056268530333
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7175572519083969,
    "acc_stderr,none": 0.03948406125768361
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.6928104575163399,
    "acc_stderr,none": 0.018663359671463677
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6454545454545455,
    "acc_stderr,none": 0.04582004841505415
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7061224489795919,
    "acc_stderr,none": 0.029162738410249765
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8258706467661692,
    "acc_stderr,none": 0.026814951200421603
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.83,
    "acc_stderr,none": 0.03775251680686371
  },
  "mmlu_stem": {
    "acc,none": 0.5721535045987948,
    "acc_stderr,none": 0.008287643655951955,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.35,
    "acc_stderr,none": 0.04793724854411019
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6444444444444445,
    "acc_stderr,none": 0.04135176749720386
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.7763157894736842,
    "acc_stderr,none": 0.03391160934343603
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.7916666666666666,
    "acc_stderr,none": 0.033961162058453336
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.42,
    "acc_stderr,none": 0.049604496374885836
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.55,
    "acc_stderr,none": 0.05
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.32,
    "acc_stderr,none": 0.04688261722621505
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.37254901960784315,
    "acc_stderr,none": 0.04810840148082633
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.75,
    "acc_stderr,none": 0.04351941398892446
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.6893617021276596,
    "acc_stderr,none": 0.03025123757921317
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.6620689655172414,
    "acc_stderr,none": 0.0394170763206489
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.5105820105820106,
    "acc_stderr,none": 0.02574554227604548
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8161290322580645,
    "acc_stderr,none": 0.022037217340267843
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6157635467980296,
    "acc_stderr,none": 0.03422398565657551
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.77,
    "acc_stderr,none": 0.04229525846816506
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.26666666666666666,
    "acc_stderr,none": 0.026962424325073828
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.4370860927152318,
    "acc_stderr,none": 0.04050035722230636
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.49537037037037035,
    "acc_stderr,none": 0.03409825519163572
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5089285714285714,
    "acc_stderr,none": 0.04745033255489123
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7600652883569097,
    "acc_stderr,none": 0.009963625892809545,
    "acc_norm,none": 0.7546245919477693,
    "acc_norm_stderr,none": 0.010039831320422405
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.43705220061412486,
    "acc_stderr,none": 0.011224050108856496
  }
}