{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.44880546075085326,
    "acc_stderr,none": 0.014534599585097674,
    "acc_norm,none": 0.46501706484641636,
    "acc_norm_stderr,none": 0.014575583922019675
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6712962962962963,
    "acc_stderr,none": 0.009638903167022178,
    "acc_norm,none": 0.5757575757575758,
    "acc_norm_stderr,none": 0.010141333654958572
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.8376146788990826,
    "acc_stderr,none": 0.006450421045061457
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.2259287338893101,
    "exact_match_stderr,flexible-extract": 0.011519098777279965
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.4945230033857797,
    "acc_stderr,none": 0.0049894820406101124,
    "acc_norm,none": 0.6406094403505278,
    "acc_norm_stderr,none": 0.0047884120623757
  },
  "mmlu": {
    "acc,none": 0.5749181028343541,
    "acc_stderr,none": 0.003912138774886954,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5207226354941552,
    "acc_stderr,none": 0.006771474241593176,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.3888888888888889,
    "acc_stderr,none": 0.04360314860077459
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7333333333333333,
    "acc_stderr,none": 0.03453131801885417
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7794117647058824,
    "acc_stderr,none": 0.029102254389674093
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.759493670886076,
    "acc_stderr,none": 0.027820781981149675
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7355371900826446,
    "acc_stderr,none": 0.04026187527591207
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7037037037037037,
    "acc_stderr,none": 0.044143436668549335
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.6871165644171779,
    "acc_stderr,none": 0.036429145782924055
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.630057803468208,
    "acc_stderr,none": 0.025992472029306383
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.23798882681564246,
    "acc_stderr,none": 0.014242630070574885
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.6366559485530546,
    "acc_stderr,none": 0.027316847674192714
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.6728395061728395,
    "acc_stderr,none": 0.026105673861409818
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.44198174706649285,
    "acc_stderr,none": 0.012683972513598804
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.8128654970760234,
    "acc_stderr,none": 0.029913127232368025
  },
  "mmlu_other": {
    "acc,none": 0.663018989378822,
    "acc_stderr,none": 0.008161795518308614,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.62,
    "acc_stderr,none": 0.04878317312145633
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.6339622641509434,
    "acc_stderr,none": 0.029647813539365245
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.5606936416184971,
    "acc_stderr,none": 0.037842719328874674
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.36,
    "acc_stderr,none": 0.04824181513244218
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6771300448430493,
    "acc_stderr,none": 0.031381476375755
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7864077669902912,
    "acc_stderr,none": 0.040580420156460364
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8290598290598291,
    "acc_stderr,none": 0.024662496845209835
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.7,
    "acc_stderr,none": 0.046056618647183814
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7969348659003831,
    "acc_stderr,none": 0.014385525076611555
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.6699346405228758,
    "acc_stderr,none": 0.02692565465361569
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.425531914893617,
    "acc_stderr,none": 0.029494827600144373
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.6102941176470589,
    "acc_stderr,none": 0.029624663581159696
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.5180722891566265,
    "acc_stderr,none": 0.03889951252827216
  },
  "mmlu_social_sciences": {
    "acc,none": 0.6714332141696457,
    "acc_stderr,none": 0.008184054969435669,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.38596491228070173,
    "acc_stderr,none": 0.045796394220704355
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.7222222222222222,
    "acc_stderr,none": 0.031911782267135466
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8497409326424871,
    "acc_stderr,none": 0.025787723180723886
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.5384615384615384,
    "acc_stderr,none": 0.02527589207024064
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.5336134453781513,
    "acc_stderr,none": 0.03240501447690071
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.7908256880733945,
    "acc_stderr,none": 0.017437937173343236
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.732824427480916,
    "acc_stderr,none": 0.03880848301082395
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.6013071895424836,
    "acc_stderr,none": 0.01980828131744984
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6,
    "acc_stderr,none": 0.0469237132203465
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.6775510204081633,
    "acc_stderr,none": 0.029923100563683906
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.835820895522388,
    "acc_stderr,none": 0.026193923544454125
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.83,
    "acc_stderr,none": 0.03775251680686371
  },
  "mmlu_stem": {
    "acc,none": 0.4747859181731684,
    "acc_stderr,none": 0.008543300439037194,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.29,
    "acc_stderr,none": 0.045604802157206845
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.5259259259259259,
    "acc_stderr,none": 0.04313531696750574
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.625,
    "acc_stderr,none": 0.039397364351956274
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.7083333333333334,
    "acc_stderr,none": 0.03800968060554859
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.39,
    "acc_stderr,none": 0.04902071300001975
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.5,
    "acc_stderr,none": 0.050251890762960605
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.34,
    "acc_stderr,none": 0.047609522856952344
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.3431372549019608,
    "acc_stderr,none": 0.04724007352383888
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.7,
    "acc_stderr,none": 0.046056618647183814
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.5361702127659574,
    "acc_stderr,none": 0.03260038511835771
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.5448275862068965,
    "acc_stderr,none": 0.04149886942192118
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.37037037037037035,
    "acc_stderr,none": 0.02487081525105708
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.7129032258064516,
    "acc_stderr,none": 0.025736542745594525
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.4433497536945813,
    "acc_stderr,none": 0.034953345821629324
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.58,
    "acc_stderr,none": 0.049604496374885836
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.2777777777777778,
    "acc_stderr,none": 0.027309140588230182
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.304635761589404,
    "acc_stderr,none": 0.03757949922943342
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.4166666666666667,
    "acc_stderr,none": 0.033622774366080424
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.41964285714285715,
    "acc_stderr,none": 0.04684099321077106
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.73449401523395,
    "acc_stderr,none": 0.010303308653024425,
    "acc_norm,none": 0.7219804134929271,
    "acc_norm_stderr,none": 0.01045311735833281
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.43705220061412486,
    "acc_stderr,none": 0.011224050108856496
  }
}