{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.40955631399317405,
    "acc_stderr,none": 0.01437035863247245,
    "acc_norm,none": 0.41467576791808874,
    "acc_norm_stderr,none": 0.014397070564409177
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6026936026936027,
    "acc_stderr,none": 0.010041053078884279,
    "acc_norm,none": 0.4898989898989899,
    "acc_norm_stderr,none": 0.010257689687458356
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.7877675840978593,
    "acc_stderr,none": 0.007151505082440095
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.000758150113722517,
    "exact_match_stderr,strict-match": 0.0007581501137225315,
    "exact_match,flexible-extract": 0.5868081880212282,
    "exact_match_stderr,flexible-extract": 0.013563326951984365
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.519717187811193,
    "acc_stderr,none": 0.0049859001723176945,
    "acc_norm,none": 0.6396136227843059,
    "acc_norm_stderr,none": 0.004791313101877033
  },
  "mmlu": {
    "acc,none": 0.6457057399230879,
    "acc_stderr,none": 0.0038311872461730175,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5757704569606801,
    "acc_stderr,none": 0.0068476684593098075,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5317460317460317,
    "acc_stderr,none": 0.04463112720677171
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7272727272727273,
    "acc_stderr,none": 0.03477691162163659
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7647058823529411,
    "acc_stderr,none": 0.029771775228145628
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.8016877637130801,
    "acc_stderr,none": 0.025955020841621105
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7768595041322314,
    "acc_stderr,none": 0.03800754475228733
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7129629629629629,
    "acc_stderr,none": 0.043733130409147614
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.7975460122699386,
    "acc_stderr,none": 0.031570650789119005
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.684971098265896,
    "acc_stderr,none": 0.025009313790069713
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.39217877094972065,
    "acc_stderr,none": 0.01632906107320745
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.6655948553054662,
    "acc_stderr,none": 0.026795422327893947
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7561728395061729,
    "acc_stderr,none": 0.023891879541959603
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4576271186440678,
    "acc_stderr,none": 0.012724296550980188
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7777777777777778,
    "acc_stderr,none": 0.03188578017686398
  },
  "mmlu_other": {
    "acc,none": 0.6919858384293531,
    "acc_stderr,none": 0.008035016310878,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.69,
    "acc_stderr,none": 0.04648231987117316
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.6830188679245283,
    "acc_stderr,none": 0.02863723563980089
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.6242774566473989,
    "acc_stderr,none": 0.03692820767264867
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.43,
    "acc_stderr,none": 0.04975698519562428
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6860986547085202,
    "acc_stderr,none": 0.031146796482972465
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7281553398058253,
    "acc_stderr,none": 0.044052680241409216
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.9017094017094017,
    "acc_stderr,none": 0.019503444900757567
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.76,
    "acc_stderr,none": 0.042923469599092816
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7752234993614304,
    "acc_stderr,none": 0.01492744710193715
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7124183006535948,
    "acc_stderr,none": 0.02591780611714716
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5106382978723404,
    "acc_stderr,none": 0.02982074719142244
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.6875,
    "acc_stderr,none": 0.02815637344037142
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.46987951807228917,
    "acc_stderr,none": 0.03885425420866767
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7536561585960351,
    "acc_stderr,none": 0.007666246891437949,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.543859649122807,
    "acc_stderr,none": 0.046854730419077895
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8131313131313131,
    "acc_stderr,none": 0.027772533334218974
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8134715025906736,
    "acc_stderr,none": 0.028112091210117467
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7153846153846154,
    "acc_stderr,none": 0.022878322799706297
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8151260504201681,
    "acc_stderr,none": 0.025215992877954205
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8366972477064221,
    "acc_stderr,none": 0.01584825580650157
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7099236641221374,
    "acc_stderr,none": 0.03980066246467766
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.6977124183006536,
    "acc_stderr,none": 0.01857923271111387
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6363636363636364,
    "acc_stderr,none": 0.04607582090719976
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.710204081632653,
    "acc_stderr,none": 0.02904308868330433
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8159203980099502,
    "acc_stderr,none": 0.027403859410786848
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.82,
    "acc_stderr,none": 0.03861229196653695
  },
  "mmlu_stem": {
    "acc,none": 0.5991119568664763,
    "acc_stderr,none": 0.008248839619836025,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.35,
    "acc_stderr,none": 0.04793724854411019
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6592592592592592,
    "acc_stderr,none": 0.040943762699967946
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.7828947368421053,
    "acc_stderr,none": 0.03355045304882924
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.7847222222222222,
    "acc_stderr,none": 0.034370793441061344
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.44,
    "acc_stderr,none": 0.04988876515698589
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.54,
    "acc_stderr,none": 0.05009082659620333
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.34,
    "acc_stderr,none": 0.047609522856952344
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.4019607843137255,
    "acc_stderr,none": 0.04878608714466997
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.73,
    "acc_stderr,none": 0.04461960433384739
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.723404255319149,
    "acc_stderr,none": 0.02924188386962882
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.6758620689655173,
    "acc_stderr,none": 0.03900432069185555
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.5767195767195767,
    "acc_stderr,none": 0.02544636563440679
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.832258064516129,
    "acc_stderr,none": 0.021255464065371342
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.645320197044335,
    "acc_stderr,none": 0.03366124489051449
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.82,
    "acc_stderr,none": 0.038612291966536955
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.3148148148148148,
    "acc_stderr,none": 0.02831753349606647
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.46357615894039733,
    "acc_stderr,none": 0.04071636065944217
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.5324074074074074,
    "acc_stderr,none": 0.03402801581358966
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5357142857142857,
    "acc_stderr,none": 0.04733667890053756
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7633297062023939,
    "acc_stderr,none": 0.009916841655042809,
    "acc_norm,none": 0.7513601741022851,
    "acc_norm_stderr,none": 0.010084511234296857
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.43244626407369496,
    "acc_stderr,none": 0.011210331273967561
  }
}