{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4138225255972696,
    "acc_stderr,none": 0.01439273000922101,
    "acc_norm,none": 0.4249146757679181,
    "acc_norm_stderr,none": 0.014445698968520769
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6531986531986532,
    "acc_stderr,none": 0.009766326091716005,
    "acc_norm,none": 0.5441919191919192,
    "acc_norm_stderr,none": 0.010219631763437851
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.7740061162079511,
    "acc_stderr,none": 0.007314978918224591
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.7460197119029568,
    "exact_match_stderr,flexible-extract": 0.011989952209548078
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5056761601274646,
    "acc_stderr,none": 0.004989459871609183,
    "acc_norm,none": 0.6358295160326628,
    "acc_norm_stderr,none": 0.004802133511654253
  },
  "mmlu": {
    "acc,none": 0.6641504059250819,
    "acc_stderr,none": 0.003775276538709252,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5702444208289055,
    "acc_stderr,none": 0.006795161889647177,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5238095238095238,
    "acc_stderr,none": 0.04467062628403273
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7333333333333333,
    "acc_stderr,none": 0.03453131801885417
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7647058823529411,
    "acc_stderr,none": 0.029771775228145628
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.8016877637130801,
    "acc_stderr,none": 0.02595502084162111
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7603305785123967,
    "acc_stderr,none": 0.03896878985070416
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7685185185185185,
    "acc_stderr,none": 0.04077494709252626
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.7975460122699386,
    "acc_stderr,none": 0.03157065078911902
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6763005780346821,
    "acc_stderr,none": 0.025190181327608405
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.3642458100558659,
    "acc_stderr,none": 0.016094338768474593
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.7041800643086816,
    "acc_stderr,none": 0.025922371788818784
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7530864197530864,
    "acc_stderr,none": 0.023993501709042114
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.44589308996088656,
    "acc_stderr,none": 0.012695244711379778
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.8070175438596491,
    "acc_stderr,none": 0.030267457554898458
  },
  "mmlu_other": {
    "acc,none": 0.7061474090762794,
    "acc_stderr,none": 0.007921160552887442,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.73,
    "acc_stderr,none": 0.0446196043338474
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.7094339622641509,
    "acc_stderr,none": 0.027943219989337124
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.653179190751445,
    "acc_stderr,none": 0.036291466701596636
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.37,
    "acc_stderr,none": 0.04852365870939099
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6636771300448431,
    "acc_stderr,none": 0.031708824268455
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7572815533980582,
    "acc_stderr,none": 0.04245022486384495
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8888888888888888,
    "acc_stderr,none": 0.020588491316092358
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.74,
    "acc_stderr,none": 0.0440844002276808
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7867177522349936,
    "acc_stderr,none": 0.014648172749593503
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7189542483660131,
    "acc_stderr,none": 0.025738854797818716
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5390070921985816,
    "acc_stderr,none": 0.029736592526424438
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.7536764705882353,
    "acc_stderr,none": 0.02617343857052
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4939759036144578,
    "acc_stderr,none": 0.03892212195333045
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7812804679883003,
    "acc_stderr,none": 0.00735081507694977,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.5964912280701754,
    "acc_stderr,none": 0.046151869625837054
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8232323232323232,
    "acc_stderr,none": 0.027178752639044915
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8652849740932642,
    "acc_stderr,none": 0.024639789097709443
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7384615384615385,
    "acc_stderr,none": 0.022282141204204405
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8529411764705882,
    "acc_stderr,none": 0.023005459446673957
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8642201834862385,
    "acc_stderr,none": 0.014686907556340008
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7099236641221374,
    "acc_stderr,none": 0.03980066246467766
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.7107843137254902,
    "acc_stderr,none": 0.018342529845275908
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.7,
    "acc_stderr,none": 0.04389311454644286
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7877551020408163,
    "acc_stderr,none": 0.026176967197866767
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8059701492537313,
    "acc_stderr,none": 0.027962677604768914
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.84,
    "acc_stderr,none": 0.0368452949177471
  },
  "mmlu_stem": {
    "acc,none": 0.6485886457342214,
    "acc_stderr,none": 0.008214633885710336,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.48,
    "acc_stderr,none": 0.050211673156867795
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6888888888888889,
    "acc_stderr,none": 0.03999262876617722
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.756578947368421,
    "acc_stderr,none": 0.034923496688842384
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8194444444444444,
    "acc_stderr,none": 0.03216600808802268
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.49,
    "acc_stderr,none": 0.05024183937956912
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.57,
    "acc_stderr,none": 0.049756985195624284
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.49,
    "acc_stderr,none": 0.05024183937956912
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.5196078431372549,
    "acc_stderr,none": 0.04971358884367406
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.73,
    "acc_stderr,none": 0.04461960433384739
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.7446808510638298,
    "acc_stderr,none": 0.028504856470514258
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.7034482758620689,
    "acc_stderr,none": 0.03806142687309992
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.6084656084656085,
    "acc_stderr,none": 0.025138091388851112
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8548387096774194,
    "acc_stderr,none": 0.020039563628053304
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6699507389162561,
    "acc_stderr,none": 0.033085304262282574
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.81,
    "acc_stderr,none": 0.039427724440366234
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.42592592592592593,
    "acc_stderr,none": 0.030149135601365947
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.5364238410596026,
    "acc_stderr,none": 0.04071636065944215
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.6435185185185185,
    "acc_stderr,none": 0.032664783315272714
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5892857142857143,
    "acc_stderr,none": 0.04669510663875192
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7475516866158868,
    "acc_stderr,none": 0.010135665547362366,
    "acc_norm,none": 0.7513601741022851,
    "acc_norm_stderr,none": 0.010084511234296862
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4437052200614125,
    "acc_stderr,none": 0.011242130951086891
  }
}