{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4121160409556314,
    "acc_stderr,none": 0.014383915302225403,
    "acc_norm,none": 0.41467576791808874,
    "acc_norm_stderr,none": 0.014397070564409175
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.5900673400673401,
    "acc_stderr,none": 0.010091953527506239,
    "acc_norm,none": 0.4739057239057239,
    "acc_norm_stderr,none": 0.010245801990240045
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.7152905198776759,
    "acc_stderr,none": 0.007892866159005748
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.5625473843821076,
    "exact_match_stderr,flexible-extract": 0.013664299060751915
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5305715992830113,
    "acc_stderr,none": 0.004980445551991274,
    "acc_norm,none": 0.6311491734714201,
    "acc_norm_stderr,none": 0.004815073334000604
  },
  "mmlu": {
    "acc,none": 0.6255519156815268,
    "acc_stderr,none": 0.0038330284945234574,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5545164718384697,
    "acc_stderr,none": 0.006764100692788427,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5158730158730159,
    "acc_stderr,none": 0.044698818540726076
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7212121212121212,
    "acc_stderr,none": 0.03501438706296781
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7745098039215687,
    "acc_stderr,none": 0.029331162294251728
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7932489451476793,
    "acc_stderr,none": 0.026361651668389094
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7603305785123967,
    "acc_stderr,none": 0.03896878985070417
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7037037037037037,
    "acc_stderr,none": 0.04414343666854934
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.7668711656441718,
    "acc_stderr,none": 0.03322015795776741
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6936416184971098,
    "acc_stderr,none": 0.024818350129436586
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.28268156424581004,
    "acc_stderr,none": 0.015060381730018089
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.639871382636656,
    "acc_stderr,none": 0.027264297599804012
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7592592592592593,
    "acc_stderr,none": 0.023788583551658537
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4661016949152542,
    "acc_stderr,none": 0.012740853872949836
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7777777777777778,
    "acc_stderr,none": 0.03188578017686398
  },
  "mmlu_other": {
    "acc,none": 0.6923076923076923,
    "acc_stderr,none": 0.008040208485779547,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.71,
    "acc_stderr,none": 0.045604802157206845
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.6716981132075471,
    "acc_stderr,none": 0.02890159361241178
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.6416184971098265,
    "acc_stderr,none": 0.0365634365335316
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.44,
    "acc_stderr,none": 0.049888765156985884
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.7040358744394619,
    "acc_stderr,none": 0.030636591348699796
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7475728155339806,
    "acc_stderr,none": 0.04301250399690878
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8974358974358975,
    "acc_stderr,none": 0.019875655027867447
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.75,
    "acc_stderr,none": 0.04351941398892446
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7828863346104725,
    "acc_stderr,none": 0.014743125394823298
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.6895424836601307,
    "acc_stderr,none": 0.026493033225145894
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5035460992907801,
    "acc_stderr,none": 0.02982674915328092
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.6580882352941176,
    "acc_stderr,none": 0.028814722422254177
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.5,
    "acc_stderr,none": 0.03892494720807614
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7367565810854728,
    "acc_stderr,none": 0.007816843343394761,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.49122807017543857,
    "acc_stderr,none": 0.04702880432049615
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.7878787878787878,
    "acc_stderr,none": 0.029126522834586818
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8341968911917098,
    "acc_stderr,none": 0.026839845022314415
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.6538461538461539,
    "acc_stderr,none": 0.024121125416941187
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.773109243697479,
    "acc_stderr,none": 0.027205371538279472
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8238532110091743,
    "acc_stderr,none": 0.016332882393431364
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7175572519083969,
    "acc_stderr,none": 0.03948406125768361
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.6911764705882353,
    "acc_stderr,none": 0.01869085027359528
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6545454545454545,
    "acc_stderr,none": 0.04554619617541054
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7020408163265306,
    "acc_stderr,none": 0.02927956741106567
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8109452736318408,
    "acc_stderr,none": 0.02768691358801302
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.82,
    "acc_stderr,none": 0.03861229196653695
  },
  "mmlu_stem": {
    "acc,none": 0.5572470662860768,
    "acc_stderr,none": 0.008280003623513214,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.3,
    "acc_stderr,none": 0.046056618647183814
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6222222222222222,
    "acc_stderr,none": 0.04188307537595853
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.7631578947368421,
    "acc_stderr,none": 0.03459777606810537
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.7986111111111112,
    "acc_stderr,none": 0.033536474697138406
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.42,
    "acc_stderr,none": 0.049604496374885836
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.53,
    "acc_stderr,none": 0.050161355804659205
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.3,
    "acc_stderr,none": 0.046056618647183814
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.3627450980392157,
    "acc_stderr,none": 0.04784060704105655
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.75,
    "acc_stderr,none": 0.04351941398892446
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.6851063829787234,
    "acc_stderr,none": 0.030363582197238167
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.6551724137931034,
    "acc_stderr,none": 0.039609335494512087
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.49206349206349204,
    "acc_stderr,none": 0.02574806587167329
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8161290322580645,
    "acc_stderr,none": 0.022037217340267843
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.5911330049261084,
    "acc_stderr,none": 0.034590588158832314
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.75,
    "acc_stderr,none": 0.04351941398892446
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.25555555555555554,
    "acc_stderr,none": 0.026593939101844072
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.3973509933774834,
    "acc_stderr,none": 0.039955240076816806
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.46296296296296297,
    "acc_stderr,none": 0.03400603625538271
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5,
    "acc_stderr,none": 0.04745789978762494
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7606093579978237,
    "acc_stderr,none": 0.009955884250291699,
    "acc_norm,none": 0.7546245919477693,
    "acc_norm_stderr,none": 0.010039831320422405
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4344933469805527,
    "acc_stderr,none": 0.011216550167046256
  }
}