{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4061433447098976,
    "acc_stderr,none": 0.014351656690097862,
    "acc_norm,none": 0.4061433447098976,
    "acc_norm_stderr,none": 0.014351656690097863
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.5395622895622896,
    "acc_stderr,none": 0.01022761638628901,
    "acc_norm,none": 0.43897306397306396,
    "acc_norm_stderr,none": 0.010183076012972074
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.6501529051987768,
    "acc_stderr,none": 0.008341409251946768
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.000758150113722517,
    "exact_match_stderr,strict-match": 0.0007581501137225345,
    "exact_match,flexible-extract": 0.5481425322213799,
    "exact_match_stderr,flexible-extract": 0.013708494995677632
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5228042222664808,
    "acc_stderr,none": 0.004984589012289359,
    "acc_norm,none": 0.5946026687910775,
    "acc_norm_stderr,none": 0.004899653704032844
  },
  "mmlu": {
    "acc,none": 0.5006409343398376,
    "acc_stderr,none": 0.003977920000631001,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.4820403825717322,
    "acc_stderr,none": 0.006843852106313869,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.35714285714285715,
    "acc_stderr,none": 0.04285714285714281
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.696969696969697,
    "acc_stderr,none": 0.03588624800091709
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7696078431372549,
    "acc_stderr,none": 0.029554292605695053
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.759493670886076,
    "acc_stderr,none": 0.027820781981149678
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.5950413223140496,
    "acc_stderr,none": 0.04481137755942469
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.5462962962962963,
    "acc_stderr,none": 0.04812917324536823
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.5521472392638037,
    "acc_stderr,none": 0.03906947479456607
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.5173410404624278,
    "acc_stderr,none": 0.026902900458666647
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.23798882681564246,
    "acc_stderr,none": 0.014242630070574885
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.5209003215434084,
    "acc_stderr,none": 0.028373270961069414
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7129629629629629,
    "acc_stderr,none": 0.02517104191530968
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.41264667535853977,
    "acc_stderr,none": 0.012573836633799016
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7719298245614035,
    "acc_stderr,none": 0.032180937956023566
  },
  "mmlu_other": {
    "acc,none": 0.5854522046990667,
    "acc_stderr,none": 0.008403581214966602,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.71,
    "acc_stderr,none": 0.045604802157206845
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.44528301886792454,
    "acc_stderr,none": 0.030588052974270655
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.44508670520231214,
    "acc_stderr,none": 0.03789401760283647
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.31,
    "acc_stderr,none": 0.04648231987117316
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6367713004484304,
    "acc_stderr,none": 0.03227790442850499
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7378640776699029,
    "acc_stderr,none": 0.04354631077260595
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8589743589743589,
    "acc_stderr,none": 0.022801382534597552
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.62,
    "acc_stderr,none": 0.048783173121456316
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7381864623243933,
    "acc_stderr,none": 0.015720838678445266
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.4477124183006536,
    "acc_stderr,none": 0.02847293847803353
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.4397163120567376,
    "acc_stderr,none": 0.029609912075594113
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.4632352941176471,
    "acc_stderr,none": 0.03029061918048569
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4578313253012048,
    "acc_stderr,none": 0.0387862677100236
  },
  "mmlu_social_sciences": {
    "acc,none": 0.5521611959701007,
    "acc_stderr,none": 0.008820336837702935,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.30701754385964913,
    "acc_stderr,none": 0.04339138322579861
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.5858585858585859,
    "acc_stderr,none": 0.03509438348879629
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.6373056994818653,
    "acc_stderr,none": 0.03469713791704372
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.40512820512820513,
    "acc_stderr,none": 0.024890471769938145
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.4369747899159664,
    "acc_stderr,none": 0.03221943636566196
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.6293577981651376,
    "acc_stderr,none": 0.020707458164352984
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.6335877862595419,
    "acc_stderr,none": 0.04225875451969638
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.5441176470588235,
    "acc_stderr,none": 0.02014893942041575
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6545454545454545,
    "acc_stderr,none": 0.04554619617541054
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.6163265306122448,
    "acc_stderr,none": 0.031130880396235943
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.5920398009950248,
    "acc_stderr,none": 0.03475116365194092
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.62,
    "acc_stderr,none": 0.048783173121456316
  },
  "mmlu_stem": {
    "acc,none": 0.39454487789406917,
    "acc_stderr,none": 0.00817790971543249,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.22,
    "acc_stderr,none": 0.04163331998932268
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.5259259259259259,
    "acc_stderr,none": 0.04313531696750574
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.5855263157894737,
    "acc_stderr,none": 0.04008973785779206
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.6111111111111112,
    "acc_stderr,none": 0.04076663253918567
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.28,
    "acc_stderr,none": 0.04512608598542127
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.49,
    "acc_stderr,none": 0.05024183937956912
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.21,
    "acc_stderr,none": 0.040936018074033256
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.2647058823529412,
    "acc_stderr,none": 0.0438986995680878
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.73,
    "acc_stderr,none": 0.04461960433384739
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.4553191489361702,
    "acc_stderr,none": 0.03255525359340356
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.5103448275862069,
    "acc_stderr,none": 0.041657747757287644
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.26455026455026454,
    "acc_stderr,none": 0.022717467897708607
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.6225806451612903,
    "acc_stderr,none": 0.027575960723278246
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.30049261083743845,
    "acc_stderr,none": 0.03225799476233483
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.65,
    "acc_stderr,none": 0.0479372485441102
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.2111111111111111,
    "acc_stderr,none": 0.024882116857655113
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.2185430463576159,
    "acc_stderr,none": 0.03374235550425694
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.19444444444444445,
    "acc_stderr,none": 0.02699145450203672
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.39285714285714285,
    "acc_stderr,none": 0.04635550135609976
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7415669205658324,
    "acc_stderr,none": 0.010213971636773322,
    "acc_norm,none": 0.7285092491838956,
    "acc_norm_stderr,none": 0.010376251176596137
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.43705220061412486,
    "acc_stderr,none": 0.011224050108856498
  }
}