{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4189419795221843,
    "acc_stderr,none": 0.014418106953639008,
    "acc_norm,none": 0.44368600682593856,
    "acc_norm_stderr,none": 0.014518421825670442
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6519360269360269,
    "acc_stderr,none": 0.009774627600259014,
    "acc_norm,none": 0.5349326599326599,
    "acc_norm_stderr,none": 0.010234713052723667
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.8232415902140673,
    "acc_stderr,none": 0.006671849390579372
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.07505686125852919,
    "exact_match_stderr,flexible-extract": 0.007257633145486642
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5816570404301932,
    "acc_stderr,none": 0.004922789247319875,
    "acc_norm,none": 0.6770563632742481,
    "acc_norm_stderr,none": 0.004666457279979415
  },
  "mmlu": {
    "acc,none": 0.2595784076342401,
    "acc_stderr,none": 0.003678638161764881,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.27013815090329435,
    "acc_stderr,none": 0.006438812601466085,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.2857142857142857,
    "acc_stderr,none": 0.040406101782088394
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.2606060606060606,
    "acc_stderr,none": 0.03427743175816524
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.4068627450980392,
    "acc_stderr,none": 0.03447891136353382
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.3333333333333333,
    "acc_stderr,none": 0.030685820596610798
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.371900826446281,
    "acc_stderr,none": 0.04412015806624503
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.3055555555555556,
    "acc_stderr,none": 0.04453197507374984
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.26993865030674846,
    "acc_stderr,none": 0.03487825168497892
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.25722543352601157,
    "acc_stderr,none": 0.02353292543104428
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.23798882681564246,
    "acc_stderr,none": 0.014242630070574885
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.19292604501607716,
    "acc_stderr,none": 0.022411516780911363
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.2962962962962963,
    "acc_stderr,none": 0.02540719779889016
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.24967405475880053,
    "acc_stderr,none": 0.011054538377832325
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.391812865497076,
    "acc_stderr,none": 0.037439798259264016
  },
  "mmlu_other": {
    "acc,none": 0.2880592211136144,
    "acc_stderr,none": 0.008078708428373299,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.34,
    "acc_stderr,none": 0.047609522856952365
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.28679245283018867,
    "acc_stderr,none": 0.027834912527544074
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.23699421965317918,
    "acc_stderr,none": 0.03242414757483098
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.18,
    "acc_stderr,none": 0.03861229196653694
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.3811659192825112,
    "acc_stderr,none": 0.032596251184168264
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.2815533980582524,
    "acc_stderr,none": 0.044532548363264673
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.3888888888888889,
    "acc_stderr,none": 0.03193705726200293
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.39,
    "acc_stderr,none": 0.04902071300001975
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.2656449553001277,
    "acc_stderr,none": 0.015794302487888733
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.29411764705882354,
    "acc_stderr,none": 0.026090162504279066
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.26595744680851063,
    "acc_stderr,none": 0.026358065698880592
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.20588235294117646,
    "acc_stderr,none": 0.024562204314142317
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.3192771084337349,
    "acc_stderr,none": 0.03629335329947861
  },
  "mmlu_social_sciences": {
    "acc,none": 0.2567435814104647,
    "acc_stderr,none": 0.007853806013289031,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.23684210526315788,
    "acc_stderr,none": 0.03999423879281335
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.21717171717171718,
    "acc_stderr,none": 0.02937661648494564
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.30569948186528495,
    "acc_stderr,none": 0.03324837939758159
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.21025641025641026,
    "acc_stderr,none": 0.020660597485026924
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.2184873949579832,
    "acc_stderr,none": 0.02684151432295894
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.23119266055045873,
    "acc_stderr,none": 0.018075750241633156
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.3053435114503817,
    "acc_stderr,none": 0.040393149787245605
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.29248366013071897,
    "acc_stderr,none": 0.01840341571010979
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.2636363636363636,
    "acc_stderr,none": 0.04220224692971987
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.22857142857142856,
    "acc_stderr,none": 0.02688214492230774
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.2935323383084577,
    "acc_stderr,none": 0.032200241045342054
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.38,
    "acc_stderr,none": 0.04878317312145632
  },
  "mmlu_stem": {
    "acc,none": 0.2185220424992071,
    "acc_stderr,none": 0.007345004395633717,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.22,
    "acc_stderr,none": 0.04163331998932268
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.2222222222222222,
    "acc_stderr,none": 0.0359144408419697
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.18421052631578946,
    "acc_stderr,none": 0.0315469804508223
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.2569444444444444,
    "acc_stderr,none": 0.03653946969442099
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.2,
    "acc_stderr,none": 0.04020151261036846
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.26,
    "acc_stderr,none": 0.0440844002276808
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.21,
    "acc_stderr,none": 0.040936018074033256
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.21568627450980393,
    "acc_stderr,none": 0.04092563958237655
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.32,
    "acc_stderr,none": 0.046882617226215034
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.26382978723404255,
    "acc_stderr,none": 0.02880998985410297
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.2482758620689655,
    "acc_stderr,none": 0.03600105692727771
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.20899470899470898,
    "acc_stderr,none": 0.02094048156533485
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.2032258064516129,
    "acc_stderr,none": 0.022891687984554963
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.15763546798029557,
    "acc_stderr,none": 0.025639014131172404
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.25,
    "acc_stderr,none": 0.04351941398892446
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.2111111111111111,
    "acc_stderr,none": 0.024882116857655113
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.1986754966887417,
    "acc_stderr,none": 0.032578473844367774
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.14814814814814814,
    "acc_stderr,none": 0.02422762927372836
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.3125,
    "acc_stderr,none": 0.043994650575715215
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7442872687704026,
    "acc_stderr,none": 0.01017869010945985,
    "acc_norm,none": 0.7513601741022851,
    "acc_norm_stderr,none": 0.010084511234296864
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4390992835209826,
    "acc_stderr,none": 0.011229831502847806
  }
}