{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4667235494880546,
    "acc_stderr,none": 0.014578995859605814,
    "acc_norm,none": 0.4872013651877133,
    "acc_norm_stderr,none": 0.014606603181012534
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.7525252525252525,
    "acc_stderr,none": 0.00885511441483471,
    "acc_norm,none": 0.6262626262626263,
    "acc_norm_stderr,none": 0.009927267058259625
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.8403669724770643,
    "acc_stderr,none": 0.006406021659710515
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.2714177407126611,
    "exact_match_stderr,flexible-extract": 0.012249002026150594
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5684126667994424,
    "acc_stderr,none": 0.004942853459371541,
    "acc_norm,none": 0.6955785700059749,
    "acc_norm_stderr,none": 0.004592215118295276
  },
  "mmlu": {
    "acc,none": 0.46838057256801024,
    "acc_stderr,none": 0.004013632765772537,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.4722635494155154,
    "acc_stderr,none": 0.006953082441710287,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.29365079365079366,
    "acc_stderr,none": 0.04073524322147125
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.6727272727272727,
    "acc_stderr,none": 0.036639749943912434
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.696078431372549,
    "acc_stderr,none": 0.03228210387037892
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7172995780590717,
    "acc_stderr,none": 0.029312814153955917
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.6776859504132231,
    "acc_stderr,none": 0.042664163633521685
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.6018518518518519,
    "acc_stderr,none": 0.04732332615978813
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.5276073619631901,
    "acc_stderr,none": 0.0392237829061099
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.5,
    "acc_stderr,none": 0.026919095102908273
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.27150837988826815,
    "acc_stderr,none": 0.014874252168095268
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.4758842443729904,
    "acc_stderr,none": 0.028365041542564577
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.6141975308641975,
    "acc_stderr,none": 0.027085401226132143
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.41395045632333766,
    "acc_stderr,none": 0.012579699631289265
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7660818713450293,
    "acc_stderr,none": 0.03246721765117827
  },
  "mmlu_other": {
    "acc,none": 0.5339555841647892,
    "acc_stderr,none": 0.008741113861179189,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.5,
    "acc_stderr,none": 0.050251890762960605
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.5509433962264151,
    "acc_stderr,none": 0.030612730713641092
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.4046242774566474,
    "acc_stderr,none": 0.03742461193887248
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.23,
    "acc_stderr,none": 0.04229525846816506
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.5829596412556054,
    "acc_stderr,none": 0.03309266936071721
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.6990291262135923,
    "acc_stderr,none": 0.04541609446503949
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.6837606837606838,
    "acc_stderr,none": 0.030463656747340247
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.54,
    "acc_stderr,none": 0.05009082659620332
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.6232439335887612,
    "acc_stderr,none": 0.017328292907303037
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.5326797385620915,
    "acc_stderr,none": 0.02856869975222588
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.35106382978723405,
    "acc_stderr,none": 0.02847350127296376
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.45955882352941174,
    "acc_stderr,none": 0.030273325077345755
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4759036144578313,
    "acc_stderr,none": 0.03887971849597264
  },
  "mmlu_social_sciences": {
    "acc,none": 0.5459863503412414,
    "acc_stderr,none": 0.0087181379996854,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.3157894736842105,
    "acc_stderr,none": 0.04372748290278007
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.51010101010101,
    "acc_stderr,none": 0.035616254886737454
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.6839378238341969,
    "acc_stderr,none": 0.033553973696861736
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.37435897435897436,
    "acc_stderr,none": 0.024537591572830506
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.3739495798319328,
    "acc_stderr,none": 0.031429466378837076
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.6256880733944954,
    "acc_stderr,none": 0.020748959408988323
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.6106870229007634,
    "acc_stderr,none": 0.04276486542814591
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.5294117647058824,
    "acc_stderr,none": 0.020192808271433788
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.5363636363636364,
    "acc_stderr,none": 0.04776449162396197
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.5959183673469388,
    "acc_stderr,none": 0.03141470802586589
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.7512437810945274,
    "acc_stderr,none": 0.030567675938916718
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.75,
    "acc_stderr,none": 0.04351941398892446
  },
  "mmlu_stem": {
    "acc,none": 0.32223279416428796,
    "acc_stderr,none": 0.008078627446491408,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.22,
    "acc_stderr,none": 0.04163331998932268
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.45185185185185184,
    "acc_stderr,none": 0.04299268905480864
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.4473684210526316,
    "acc_stderr,none": 0.0404633688397825
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.4930555555555556,
    "acc_stderr,none": 0.04180806750294938
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.28,
    "acc_stderr,none": 0.04512608598542128
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.3,
    "acc_stderr,none": 0.046056618647183814
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.21,
    "acc_stderr,none": 0.040936018074033256
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.24509803921568626,
    "acc_stderr,none": 0.04280105837364396
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.48,
    "acc_stderr,none": 0.050211673156867795
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.3276595744680851,
    "acc_stderr,none": 0.030683020843231004
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.33793103448275863,
    "acc_stderr,none": 0.039417076320648906
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.2222222222222222,
    "acc_stderr,none": 0.021411684393694196
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.5451612903225806,
    "acc_stderr,none": 0.028327743091561053
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.23645320197044334,
    "acc_stderr,none": 0.02989611429173355
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.4,
    "acc_stderr,none": 0.049236596391733084
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.2111111111111111,
    "acc_stderr,none": 0.024882116857655113
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.2185430463576159,
    "acc_stderr,none": 0.03374235550425694
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.20833333333333334,
    "acc_stderr,none": 0.02769691071309394
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.35714285714285715,
    "acc_stderr,none": 0.04547960999764376
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7878128400435256,
    "acc_stderr,none": 0.009539299828174079,
    "acc_norm,none": 0.7872687704026116,
    "acc_norm_stderr,none": 0.00954822312304735
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4595701125895599,
    "acc_stderr,none": 0.01127702248607995
  }
}