{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.42235494880546076,
    "acc_stderr,none": 0.014434138713379988,
    "acc_norm,none": 0.4129692832764505,
    "acc_norm_stderr,none": 0.014388344935398324
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.5871212121212122,
    "acc_stderr,none": 0.010102837421104667,
    "acc_norm,none": 0.48863636363636365,
    "acc_norm_stderr,none": 0.01025713344111711
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.6972477064220184,
    "acc_stderr,none": 0.008035812741801866
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.000758150113722517,
    "exact_match_stderr,strict-match": 0.0007581501137225225,
    "exact_match,flexible-extract": 0.6398786959818044,
    "exact_match_stderr,flexible-extract": 0.01322255942325049
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5363473411670981,
    "acc_stderr,none": 0.004976579655169274,
    "acc_norm,none": 0.6351324437363075,
    "acc_norm_stderr,none": 0.004804091708812554
  },
  "mmlu": {
    "acc,none": 0.6071072496795328,
    "acc_stderr,none": 0.0038471374271458313,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5394261424017003,
    "acc_stderr,none": 0.0067428621947799505,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5,
    "acc_stderr,none": 0.04472135954999579
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7272727272727273,
    "acc_stderr,none": 0.0347769116216366
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7647058823529411,
    "acc_stderr,none": 0.029771775228145628
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7848101265822784,
    "acc_stderr,none": 0.026750826994676166
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7024793388429752,
    "acc_stderr,none": 0.04173349148083499
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.6666666666666666,
    "acc_stderr,none": 0.04557239513497751
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.7300613496932515,
    "acc_stderr,none": 0.034878251684978906
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6791907514450867,
    "acc_stderr,none": 0.025131000233647904
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.24134078212290502,
    "acc_stderr,none": 0.014310999547961455
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.6302250803858521,
    "acc_stderr,none": 0.027417996705630998
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7592592592592593,
    "acc_stderr,none": 0.023788583551658537
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.46284224250325945,
    "acc_stderr,none": 0.01273492357953206
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.783625730994152,
    "acc_stderr,none": 0.03158149539338733
  },
  "mmlu_other": {
    "acc,none": 0.6826520759575153,
    "acc_stderr,none": 0.008076431618509322,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.73,
    "acc_stderr,none": 0.044619604333847394
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.6377358490566037,
    "acc_stderr,none": 0.0295822451283843
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.6473988439306358,
    "acc_stderr,none": 0.03643037168958548
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.41,
    "acc_stderr,none": 0.04943110704237101
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6995515695067265,
    "acc_stderr,none": 0.030769352008229146
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7572815533980582,
    "acc_stderr,none": 0.04245022486384495
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8931623931623932,
    "acc_stderr,none": 0.020237149008990957
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.73,
    "acc_stderr,none": 0.0446196043338474
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7867177522349936,
    "acc_stderr,none": 0.014648172749593505
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.6633986928104575,
    "acc_stderr,none": 0.027057974624494382
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.48936170212765956,
    "acc_stderr,none": 0.02982074719142248
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.6360294117647058,
    "acc_stderr,none": 0.029227192460032025
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4819277108433735,
    "acc_stderr,none": 0.03889951252827216
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7218069548261293,
    "acc_stderr,none": 0.007958197222170543,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.47368421052631576,
    "acc_stderr,none": 0.046970851366478626
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.7676767676767676,
    "acc_stderr,none": 0.03008862949021749
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8031088082901554,
    "acc_stderr,none": 0.02869787397186068
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.6282051282051282,
    "acc_stderr,none": 0.024503472557110943
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.7605042016806722,
    "acc_stderr,none": 0.027722065493361266
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8146788990825689,
    "acc_stderr,none": 0.016659279700295824
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7251908396946565,
    "acc_stderr,none": 0.03915345408847836
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.6764705882352942,
    "acc_stderr,none": 0.018926082916083383
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6545454545454545,
    "acc_stderr,none": 0.04554619617541054
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.6979591836734694,
    "acc_stderr,none": 0.0293936093198798
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8009950248756219,
    "acc_stderr,none": 0.028231365092758406
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.77,
    "acc_stderr,none": 0.04229525846816506
  },
  "mmlu_stem": {
    "acc,none": 0.5217253409451316,
    "acc_stderr,none": 0.008285930364554286,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.28,
    "acc_stderr,none": 0.04512608598542127
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6296296296296297,
    "acc_stderr,none": 0.04171654161354543
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.75,
    "acc_stderr,none": 0.03523807393012047
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.7916666666666666,
    "acc_stderr,none": 0.033961162058453336
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.4,
    "acc_stderr,none": 0.049236596391733084
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.56,
    "acc_stderr,none": 0.049888765156985884
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.24,
    "acc_stderr,none": 0.042923469599092816
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.3627450980392157,
    "acc_stderr,none": 0.04784060704105655
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.74,
    "acc_stderr,none": 0.04408440022768077
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.6085106382978723,
    "acc_stderr,none": 0.03190701242326812
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.6206896551724138,
    "acc_stderr,none": 0.040434618619167466
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.41798941798941797,
    "acc_stderr,none": 0.02540255550326091
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.7967741935483871,
    "acc_stderr,none": 0.02289168798455496
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.5467980295566502,
    "acc_stderr,none": 0.03502544650845872
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.72,
    "acc_stderr,none": 0.04512608598542128
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.23703703703703705,
    "acc_stderr,none": 0.02592887613276611
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.33112582781456956,
    "acc_stderr,none": 0.038425817186598696
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.4074074074074074,
    "acc_stderr,none": 0.03350991604696043
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.44642857142857145,
    "acc_stderr,none": 0.047184714852195886
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7584330794341676,
    "acc_stderr,none": 0.009986718001804474,
    "acc_norm,none": 0.7459194776931447,
    "acc_norm_stderr,none": 0.010157271999135034
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.43551688843398156,
    "acc_stderr,none": 0.011219586604022598
  }
}