{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.49146757679180886,
    "acc_stderr,none": 0.01460926316563219,
    "acc_norm,none": 0.4880546075085324,
    "acc_norm_stderr,none": 0.014607220340597171
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.7529461279461279,
    "acc_stderr,none": 0.008850055161459239,
    "acc_norm,none": 0.6317340067340067,
    "acc_norm_stderr,none": 0.00989728620901089
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.8360856269113149,
    "acc_stderr,none": 0.006474801177302569
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.7604245640636846,
    "exact_match_stderr,flexible-extract": 0.01175686434407741
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5131447918741286,
    "acc_stderr,none": 0.004988056789119677,
    "acc_norm,none": 0.6470822545309699,
    "acc_norm_stderr,none": 0.004769007545082278
  },
  "mmlu": {
    "acc,none": 0.689431704885344,
    "acc_stderr,none": 0.003678837607898647,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5934112646121148,
    "acc_stderr,none": 0.006713338770199462,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.6111111111111112,
    "acc_stderr,none": 0.04360314860077459
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7757575757575758,
    "acc_stderr,none": 0.03256866661681102
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.8333333333333334,
    "acc_stderr,none": 0.02615686752393104
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.8270042194092827,
    "acc_stderr,none": 0.024621562866768424
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.768595041322314,
    "acc_stderr,none": 0.03849856098794088
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7777777777777778,
    "acc_stderr,none": 0.040191074725573483
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.8159509202453987,
    "acc_stderr,none": 0.03044677768797174
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.7138728323699421,
    "acc_stderr,none": 0.02433214677913414
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.3687150837988827,
    "acc_stderr,none": 0.016135759015030122
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.7427652733118971,
    "acc_stderr,none": 0.024826171289250888
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7530864197530864,
    "acc_stderr,none": 0.02399350170904211
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.47196870925684486,
    "acc_stderr,none": 0.01275015180292244
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7894736842105263,
    "acc_stderr,none": 0.031267817146631786
  },
  "mmlu_other": {
    "acc,none": 0.7370453813968458,
    "acc_stderr,none": 0.007654121735886417,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.73,
    "acc_stderr,none": 0.04461960433384739
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.7547169811320755,
    "acc_stderr,none": 0.026480357179895688
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.7167630057803468,
    "acc_stderr,none": 0.034355680560478746
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.38,
    "acc_stderr,none": 0.04878317312145634
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6995515695067265,
    "acc_stderr,none": 0.030769352008229146
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.8543689320388349,
    "acc_stderr,none": 0.0349260647662379
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8803418803418803,
    "acc_stderr,none": 0.021262719400407006
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.77,
    "acc_stderr,none": 0.04229525846816506
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.8122605363984674,
    "acc_stderr,none": 0.01396439376989914
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7450980392156863,
    "acc_stderr,none": 0.02495418432487991
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5957446808510638,
    "acc_stderr,none": 0.029275532159704725
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.7794117647058824,
    "acc_stderr,none": 0.025187786660227276
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.5060240963855421,
    "acc_stderr,none": 0.03892212195333045
  },
  "mmlu_social_sciences": {
    "acc,none": 0.8027299317517063,
    "acc_stderr,none": 0.007039770192889797,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.5964912280701754,
    "acc_stderr,none": 0.046151869625837054
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8434343434343434,
    "acc_stderr,none": 0.025890520358141454
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.9015544041450777,
    "acc_stderr,none": 0.021500249576033456
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7974358974358975,
    "acc_stderr,none": 0.020377660970371383
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.9033613445378151,
    "acc_stderr,none": 0.019192520709708727
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8880733944954129,
    "acc_stderr,none": 0.013517352714958781
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7480916030534351,
    "acc_stderr,none": 0.03807387116306085
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.7205882352941176,
    "acc_stderr,none": 0.018152871051538816
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6818181818181818,
    "acc_stderr,none": 0.04461272175910507
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7755102040816326,
    "acc_stderr,none": 0.02671143055553841
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8159203980099502,
    "acc_stderr,none": 0.027403859410786848
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.83,
    "acc_stderr,none": 0.03775251680686371
  },
  "mmlu_stem": {
    "acc,none": 0.6752299397399302,
    "acc_stderr,none": 0.00799884950925438,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.5,
    "acc_stderr,none": 0.050251890762960605
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.674074074074074,
    "acc_stderr,none": 0.040491220417025055
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.8289473684210527,
    "acc_stderr,none": 0.03064360707167709
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8472222222222222,
    "acc_stderr,none": 0.030085743248565684
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.54,
    "acc_stderr,none": 0.05009082659620332
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.62,
    "acc_stderr,none": 0.048783173121456316
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.46,
    "acc_stderr,none": 0.05009082659620333
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.5196078431372549,
    "acc_stderr,none": 0.04971358884367405
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.77,
    "acc_stderr,none": 0.04229525846816507
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.7957446808510639,
    "acc_stderr,none": 0.026355158413349407
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.7103448275862069,
    "acc_stderr,none": 0.03780019230438015
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.6296296296296297,
    "acc_stderr,none": 0.024870815251057093
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.9064516129032258,
    "acc_stderr,none": 0.016565754668270993
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6403940886699507,
    "acc_stderr,none": 0.03376458246509567
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.82,
    "acc_stderr,none": 0.038612291966536955
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.4703703703703704,
    "acc_stderr,none": 0.030431963547936577
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.5695364238410596,
    "acc_stderr,none": 0.04042809961395634
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.7037037037037037,
    "acc_stderr,none": 0.03114144782353603
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5535714285714286,
    "acc_stderr,none": 0.047184714852195886
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7513601741022851,
    "acc_stderr,none": 0.010084511234296867,
    "acc_norm,none": 0.7426550598476604,
    "acc_norm_stderr,none": 0.01019992106479251
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4534288638689867,
    "acc_stderr,none": 0.011264886135301374
  }
}