{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4206484641638225,
    "acc_stderr,none": 0.0144262112525084,
    "acc_norm,none": 0.42150170648464164,
    "acc_norm_stderr,none": 0.01443019706932603
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6102693602693603,
    "acc_stderr,none": 0.01000716939179705,
    "acc_norm,none": 0.5037878787878788,
    "acc_norm_stderr,none": 0.010259489101351842
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.7651376146788991,
    "acc_stderr,none": 0.007414281137444414
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.6277482941622441,
    "exact_match_stderr,flexible-extract": 0.013315375362565038
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5248954391555467,
    "acc_stderr,none": 0.004983592410934178,
    "acc_norm,none": 0.6557458673571002,
    "acc_norm_stderr,none": 0.004741534106470316
  },
  "mmlu": {
    "acc,none": 0.6464891041162227,
    "acc_stderr,none": 0.003832018549148781,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5700318809776833,
    "acc_stderr,none": 0.006853379983890594,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5555555555555556,
    "acc_stderr,none": 0.04444444444444449
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7212121212121212,
    "acc_stderr,none": 0.03501438706296781
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7598039215686274,
    "acc_stderr,none": 0.02998373305591362
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7805907172995781,
    "acc_stderr,none": 0.026939106581553945
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.768595041322314,
    "acc_stderr,none": 0.03849856098794088
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7037037037037037,
    "acc_stderr,none": 0.04414343666854934
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.7914110429447853,
    "acc_stderr,none": 0.031921934489347235
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6878612716763006,
    "acc_stderr,none": 0.024946792225272314
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.37318435754189944,
    "acc_stderr,none": 0.016175692013381957
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.6655948553054662,
    "acc_stderr,none": 0.026795422327893947
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7561728395061729,
    "acc_stderr,none": 0.02389187954195961
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.45632333767926986,
    "acc_stderr,none": 0.012721420501462547
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7660818713450293,
    "acc_stderr,none": 0.03246721765117826
  },
  "mmlu_other": {
    "acc,none": 0.6935951078210493,
    "acc_stderr,none": 0.008026174246050103,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.7,
    "acc_stderr,none": 0.04605661864718381
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.690566037735849,
    "acc_stderr,none": 0.028450154794118634
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.653179190751445,
    "acc_stderr,none": 0.036291466701596636
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.43,
    "acc_stderr,none": 0.04975698519562428
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.672645739910314,
    "acc_stderr,none": 0.03149384670994131
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7281553398058253,
    "acc_stderr,none": 0.044052680241409216
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.9017094017094017,
    "acc_stderr,none": 0.019503444900757567
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.75,
    "acc_stderr,none": 0.04351941398892446
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.776500638569604,
    "acc_stderr,none": 0.01489723522945071
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7124183006535948,
    "acc_stderr,none": 0.02591780611714716
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5035460992907801,
    "acc_stderr,none": 0.02982674915328092
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.6875,
    "acc_stderr,none": 0.02815637344037142
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4819277108433735,
    "acc_stderr,none": 0.03889951252827216
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7546311342216444,
    "acc_stderr,none": 0.007664004288588055,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.5701754385964912,
    "acc_stderr,none": 0.04657047260594964
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.797979797979798,
    "acc_stderr,none": 0.028606204289229872
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8031088082901554,
    "acc_stderr,none": 0.02869787397186068
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7153846153846154,
    "acc_stderr,none": 0.022878322799706297
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8235294117647058,
    "acc_stderr,none": 0.024762902678057933
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8366972477064221,
    "acc_stderr,none": 0.01584825580650155
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7175572519083969,
    "acc_stderr,none": 0.03948406125768361
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.6928104575163399,
    "acc_stderr,none": 0.018663359671463677
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6545454545454545,
    "acc_stderr,none": 0.04554619617541054
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.710204081632653,
    "acc_stderr,none": 0.02904308868330433
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8258706467661692,
    "acc_stderr,none": 0.026814951200421603
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.83,
    "acc_stderr,none": 0.03775251680686371
  },
  "mmlu_stem": {
    "acc,none": 0.608626704725658,
    "acc_stderr,none": 0.008256276461847793,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.35,
    "acc_stderr,none": 0.04793724854411019
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.674074074074074,
    "acc_stderr,none": 0.040491220417025055
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.7763157894736842,
    "acc_stderr,none": 0.03391160934343603
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.7986111111111112,
    "acc_stderr,none": 0.0335364746971384
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.43,
    "acc_stderr,none": 0.049756985195624284
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.58,
    "acc_stderr,none": 0.04960449637488583
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.38,
    "acc_stderr,none": 0.04878317312145632
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.43137254901960786,
    "acc_stderr,none": 0.04928099597287534
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.74,
    "acc_stderr,none": 0.04408440022768078
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.723404255319149,
    "acc_stderr,none": 0.02924188386962882
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.6344827586206897,
    "acc_stderr,none": 0.04013124195424386
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.5714285714285714,
    "acc_stderr,none": 0.025487187147859375
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8290322580645161,
    "acc_stderr,none": 0.021417242936321565
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6502463054187192,
    "acc_stderr,none": 0.03355400904969565
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.82,
    "acc_stderr,none": 0.038612291966536955
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.32592592592592595,
    "acc_stderr,none": 0.028578348365473072
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.4966887417218543,
    "acc_stderr,none": 0.04082393379449654
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.5833333333333334,
    "acc_stderr,none": 0.03362277436608043
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5803571428571429,
    "acc_stderr,none": 0.04684099321077106
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7704026115342764,
    "acc_stderr,none": 0.009812682950815194,
    "acc_norm,none": 0.7611534276387377,
    "acc_norm_stderr,none": 0.0099481203853375
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.43091095189355166,
    "acc_stderr,none": 0.011205539177566719
  }
}