{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4180887372013652,
    "acc_stderr,none": 0.014413988396996084,
    "acc_norm,none": 0.4496587030716723,
    "acc_norm_stderr,none": 0.014537144444284748
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6696127946127947,
    "acc_stderr,none": 0.009651430216428178,
    "acc_norm,none": 0.5715488215488216,
    "acc_norm_stderr,none": 0.010154195733990972
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.82782874617737,
    "acc_stderr,none": 0.006603027596591702
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.7520849128127369,
    "exact_match_stderr,flexible-extract": 0.011893980214826173
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5116510655247959,
    "acc_stderr,none": 0.004988426528513013,
    "acc_norm,none": 0.6603266281617207,
    "acc_norm_stderr,none": 0.004726304225137333
  },
  "mmlu": {
    "acc,none": 0.653681811707734,
    "acc_stderr,none": 0.003811127248991467,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5642933049946866,
    "acc_stderr,none": 0.006817815738017944,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5,
    "acc_stderr,none": 0.04472135954999579
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7272727272727273,
    "acc_stderr,none": 0.0347769116216366
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7401960784313726,
    "acc_stderr,none": 0.03077855467869326
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7721518987341772,
    "acc_stderr,none": 0.027303484599069432
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7851239669421488,
    "acc_stderr,none": 0.037494924487096966
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.8055555555555556,
    "acc_stderr,none": 0.03826076324884863
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.7852760736196319,
    "acc_stderr,none": 0.03226219377286774
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6734104046242775,
    "acc_stderr,none": 0.025248264774242832
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.36201117318435755,
    "acc_stderr,none": 0.016073067350153087
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.6945337620578779,
    "acc_stderr,none": 0.026160584450140457
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.75,
    "acc_stderr,none": 0.02409347123262133
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.44132985658409385,
    "acc_stderr,none": 0.012682016335646676
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7894736842105263,
    "acc_stderr,none": 0.031267817146631786
  },
  "mmlu_other": {
    "acc,none": 0.6990666237528163,
    "acc_stderr,none": 0.007984051242060208,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.7,
    "acc_stderr,none": 0.04605661864718381
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.7169811320754716,
    "acc_stderr,none": 0.027724236492700918
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.6416184971098265,
    "acc_stderr,none": 0.0365634365335316
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.36,
    "acc_stderr,none": 0.048241815132442176
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6681614349775785,
    "acc_stderr,none": 0.03160295143776679
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7572815533980582,
    "acc_stderr,none": 0.04245022486384495
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8888888888888888,
    "acc_stderr,none": 0.020588491316092358
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.73,
    "acc_stderr,none": 0.04461960433384741
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7726692209450831,
    "acc_stderr,none": 0.014987270640946017
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7091503267973857,
    "acc_stderr,none": 0.02600480036395213
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5212765957446809,
    "acc_stderr,none": 0.029800481645628693
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.7463235294117647,
    "acc_stderr,none": 0.026431329870789548
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.5120481927710844,
    "acc_stderr,none": 0.03891364495835816
  },
  "mmlu_social_sciences": {
    "acc,none": 0.766980825479363,
    "acc_stderr,none": 0.0075329129077762735,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.5877192982456141,
    "acc_stderr,none": 0.046306532033665956
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8181818181818182,
    "acc_stderr,none": 0.0274796030105388
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.844559585492228,
    "acc_stderr,none": 0.02614848346915332
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7282051282051282,
    "acc_stderr,none": 0.022556551010132358
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8235294117647058,
    "acc_stderr,none": 0.024762902678057922
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8440366972477065,
    "acc_stderr,none": 0.015555802713590148
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7022900763358778,
    "acc_stderr,none": 0.04010358942462203
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.704248366013072,
    "acc_stderr,none": 0.018463154132632817
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6636363636363637,
    "acc_stderr,none": 0.04525393596302506
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.763265306122449,
    "acc_stderr,none": 0.027212835884073156
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8059701492537313,
    "acc_stderr,none": 0.027962677604768914
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.83,
    "acc_stderr,none": 0.03775251680686371
  },
  "mmlu_stem": {
    "acc,none": 0.6317792578496669,
    "acc_stderr,none": 0.0082847142044089,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.49,
    "acc_stderr,none": 0.05024183937956911
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6814814814814815,
    "acc_stderr,none": 0.0402477840197711
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.756578947368421,
    "acc_stderr,none": 0.034923496688842384
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8125,
    "acc_stderr,none": 0.032639560491693344
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.47,
    "acc_stderr,none": 0.05016135580465919
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.57,
    "acc_stderr,none": 0.049756985195624284
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.45,
    "acc_stderr,none": 0.05
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.5196078431372549,
    "acc_stderr,none": 0.04971358884367406
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.68,
    "acc_stderr,none": 0.04688261722621505
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.7361702127659574,
    "acc_stderr,none": 0.028809989854102963
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.6827586206896552,
    "acc_stderr,none": 0.038783523721386236
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.5873015873015873,
    "acc_stderr,none": 0.025355741263055263
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.832258064516129,
    "acc_stderr,none": 0.021255464065371342
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6699507389162561,
    "acc_stderr,none": 0.033085304262282574
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.81,
    "acc_stderr,none": 0.039427724440366234
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.3962962962962963,
    "acc_stderr,none": 0.029822619458533997
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.4966887417218543,
    "acc_stderr,none": 0.04082393379449654
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.6296296296296297,
    "acc_stderr,none": 0.03293377139415191
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5535714285714286,
    "acc_stderr,none": 0.047184714852195886
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7627856365614799,
    "acc_stderr,none": 0.009924694933586354,
    "acc_norm,none": 0.7568008705114254,
    "acc_norm_stderr,none": 0.01000961195385893
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4421699078812692,
    "acc_stderr,none": 0.011238140029326922
  }
}