{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.5042662116040956,
    "acc_stderr,none": 0.014610858923956948,
    "acc_norm,none": 0.5051194539249146,
    "acc_norm_stderr,none": 0.014610624890309157
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.7882996632996633,
    "acc_stderr,none": 0.008382520764977723,
    "acc_norm,none": 0.7015993265993266,
    "acc_norm_stderr,none": 0.00938885591404043
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.8366972477064221,
    "acc_stderr,none": 0.0064650734321900095
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.7149355572403336,
    "exact_match_stderr,flexible-extract": 0.01243504233490401
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.547998406691894,
    "acc_stderr,none": 0.004966736811010489,
    "acc_norm,none": 0.6850229038040231,
    "acc_norm_stderr,none": 0.004635574339176307
  },
  "mmlu": {
    "acc,none": 0.689787779518587,
    "acc_stderr,none": 0.0036876980054700794,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.6010626992561106,
    "acc_stderr,none": 0.006736474922323387,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.6111111111111112,
    "acc_stderr,none": 0.04360314860077459
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7757575757575758,
    "acc_stderr,none": 0.03256866661681102
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.803921568627451,
    "acc_stderr,none": 0.027865942286639325
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.8016877637130801,
    "acc_stderr,none": 0.025955020841621112
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7851239669421488,
    "acc_stderr,none": 0.03749492448709695
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7685185185185185,
    "acc_stderr,none": 0.04077494709252628
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.8220858895705522,
    "acc_stderr,none": 0.03004735765580662
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.7225433526011561,
    "acc_stderr,none": 0.024105712607754307
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.38212290502793295,
    "acc_stderr,none": 0.01625113971157077
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.7331189710610932,
    "acc_stderr,none": 0.02512263760881665
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7777777777777778,
    "acc_stderr,none": 0.02313237623454333
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4895697522816167,
    "acc_stderr,none": 0.012767457253930652
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.783625730994152,
    "acc_stderr,none": 0.03158149539338733
  },
  "mmlu_other": {
    "acc,none": 0.7347924042484711,
    "acc_stderr,none": 0.007669537644929993,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.75,
    "acc_stderr,none": 0.04351941398892446
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.7547169811320755,
    "acc_stderr,none": 0.02648035717989568
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.7225433526011561,
    "acc_stderr,none": 0.034140140070440354
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.37,
    "acc_stderr,none": 0.04852365870939099
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.695067264573991,
    "acc_stderr,none": 0.030898610882477515
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.8543689320388349,
    "acc_stderr,none": 0.0349260647662379
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8717948717948718,
    "acc_stderr,none": 0.021901905115073318
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.78,
    "acc_stderr,none": 0.04163331998932261
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.8084291187739464,
    "acc_stderr,none": 0.01407285931045195
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7450980392156863,
    "acc_stderr,none": 0.02495418432487991
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5815602836879432,
    "acc_stderr,none": 0.02942799403941999
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.7794117647058824,
    "acc_stderr,none": 0.025187786660227272
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.5060240963855421,
    "acc_stderr,none": 0.03892212195333045
  },
  "mmlu_social_sciences": {
    "acc,none": 0.805004874878128,
    "acc_stderr,none": 0.00700724138089306,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.6140350877192983,
    "acc_stderr,none": 0.04579639422070435
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8383838383838383,
    "acc_stderr,none": 0.026225919863629283
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8860103626943006,
    "acc_stderr,none": 0.022935144053919443
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.8051282051282052,
    "acc_stderr,none": 0.020083167595181393
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8907563025210085,
    "acc_stderr,none": 0.020262987400605344
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8972477064220183,
    "acc_stderr,none": 0.013018246509173761
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7709923664122137,
    "acc_stderr,none": 0.036853466317118506
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.7222222222222222,
    "acc_stderr,none": 0.018120224251484594
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6454545454545455,
    "acc_stderr,none": 0.04582004841505415
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7714285714285715,
    "acc_stderr,none": 0.026882144922307744
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8208955223880597,
    "acc_stderr,none": 0.027113286753111837
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.87,
    "acc_stderr,none": 0.03379976689896308
  },
  "mmlu_stem": {
    "acc,none": 0.6653980336187758,
    "acc_stderr,none": 0.00804921304083928,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.46,
    "acc_stderr,none": 0.05009082659620332
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6666666666666666,
    "acc_stderr,none": 0.04072314811876837
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.8486842105263158,
    "acc_stderr,none": 0.029162631596843982
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8055555555555556,
    "acc_stderr,none": 0.03309615177059006
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.49,
    "acc_stderr,none": 0.05024183937956912
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.59,
    "acc_stderr,none": 0.04943110704237102
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.47,
    "acc_stderr,none": 0.05016135580465919
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.5,
    "acc_stderr,none": 0.04975185951049946
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.8,
    "acc_stderr,none": 0.04020151261036846
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.774468085106383,
    "acc_stderr,none": 0.02732107841738753
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.7103448275862069,
    "acc_stderr,none": 0.037800192304380135
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.6296296296296297,
    "acc_stderr,none": 0.0248708152510571
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8838709677419355,
    "acc_stderr,none": 0.018225757949432306
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6798029556650246,
    "acc_stderr,none": 0.032826493853041504
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.81,
    "acc_stderr,none": 0.03942772444036623
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.44074074074074077,
    "acc_stderr,none": 0.030270671157284063
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.5165562913907285,
    "acc_stderr,none": 0.0408024418562897
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.6759259259259259,
    "acc_stderr,none": 0.03191923445686186
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.6428571428571429,
    "acc_stderr,none": 0.04547960999764376
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7829162132752993,
    "acc_stderr,none": 0.009618708415756783,
    "acc_norm,none": 0.7747551686615887,
    "acc_norm_stderr,none": 0.00974664347103215
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4866939611054248,
    "acc_stderr,none": 0.011310063517892637
  }
}