{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.41467576791808874,
    "acc_stderr,none": 0.014397070564409174,
    "acc_norm,none": 0.4180887372013652,
    "acc_norm_stderr,none": 0.014413988396996088
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6140572390572391,
    "acc_stderr,none": 0.009989277329503953,
    "acc_norm,none": 0.49326599326599324,
    "acc_norm_stderr,none": 0.010258852980991825
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.754434250764526,
    "acc_stderr,none": 0.007528130421348603
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.5860500379075056,
    "exact_match_stderr,flexible-extract": 0.013566991960151788
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5321649073889664,
    "acc_stderr,none": 0.00497944603882476,
    "acc_norm,none": 0.6477793268273252,
    "acc_norm_stderr,none": 0.0047668609071715475
  },
  "mmlu": {
    "acc,none": 0.6434980771969805,
    "acc_stderr,none": 0.003830136064071148,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.571519659936238,
    "acc_stderr,none": 0.006838898414989912,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5317460317460317,
    "acc_stderr,none": 0.04463112720677171
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7272727272727273,
    "acc_stderr,none": 0.0347769116216366
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7549019607843137,
    "acc_stderr,none": 0.030190282453501954
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7932489451476793,
    "acc_stderr,none": 0.026361651668389094
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7768595041322314,
    "acc_stderr,none": 0.03800754475228733
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7129629629629629,
    "acc_stderr,none": 0.043733130409147614
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.7791411042944786,
    "acc_stderr,none": 0.03259177392742178
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.7023121387283237,
    "acc_stderr,none": 0.024617055388677003
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.37094972067039106,
    "acc_stderr,none": 0.01615591072134177
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.6688102893890675,
    "acc_stderr,none": 0.026730620728004917
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7561728395061729,
    "acc_stderr,none": 0.023891879541959614
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4576271186440678,
    "acc_stderr,none": 0.012724296550980188
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7719298245614035,
    "acc_stderr,none": 0.032180937956023566
  },
  "mmlu_other": {
    "acc,none": 0.6926295461860316,
    "acc_stderr,none": 0.008024684405967273,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.7,
    "acc_stderr,none": 0.04605661864718381
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.6981132075471698,
    "acc_stderr,none": 0.028254200344438655
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.630057803468208,
    "acc_stderr,none": 0.03681229633394319
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.39,
    "acc_stderr,none": 0.04902071300001975
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.672645739910314,
    "acc_stderr,none": 0.03149384670994131
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7184466019417476,
    "acc_stderr,none": 0.044532548363264673
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.9017094017094017,
    "acc_stderr,none": 0.019503444900757567
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.75,
    "acc_stderr,none": 0.04351941398892446
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7752234993614304,
    "acc_stderr,none": 0.01492744710193715
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7091503267973857,
    "acc_stderr,none": 0.02600480036395213
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5177304964539007,
    "acc_stderr,none": 0.02980873964223777
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.6985294117647058,
    "acc_stderr,none": 0.027875982114273168
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4759036144578313,
    "acc_stderr,none": 0.03887971849597264
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7549561260968476,
    "acc_stderr,none": 0.007650491442686857,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.5701754385964912,
    "acc_stderr,none": 0.04657047260594964
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.803030303030303,
    "acc_stderr,none": 0.028335609732463362
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8290155440414507,
    "acc_stderr,none": 0.027171213683164542
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7076923076923077,
    "acc_stderr,none": 0.02306043838085774
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8361344537815126,
    "acc_stderr,none": 0.02404405494044048
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8348623853211009,
    "acc_stderr,none": 0.015919557829976047
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7175572519083969,
    "acc_stderr,none": 0.03948406125768361
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.6862745098039216,
    "acc_stderr,none": 0.018771683893528186
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6636363636363637,
    "acc_stderr,none": 0.04525393596302506
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.710204081632653,
    "acc_stderr,none": 0.02904308868330433
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8258706467661692,
    "acc_stderr,none": 0.026814951200421603
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.82,
    "acc_stderr,none": 0.03861229196653695
  },
  "mmlu_stem": {
    "acc,none": 0.5937202664129401,
    "acc_stderr,none": 0.008279082383746205,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.36,
    "acc_stderr,none": 0.048241815132442176
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.674074074074074,
    "acc_stderr,none": 0.040491220417025055
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.7763157894736842,
    "acc_stderr,none": 0.03391160934343603
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8125,
    "acc_stderr,none": 0.032639560491693344
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.42,
    "acc_stderr,none": 0.049604496374885836
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.54,
    "acc_stderr,none": 0.05009082659620332
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.38,
    "acc_stderr,none": 0.048783173121456316
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.4019607843137255,
    "acc_stderr,none": 0.04878608714466996
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.76,
    "acc_stderr,none": 0.042923469599092816
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.6893617021276596,
    "acc_stderr,none": 0.03025123757921317
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.6344827586206897,
    "acc_stderr,none": 0.04013124195424386
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.5370370370370371,
    "acc_stderr,none": 0.02568056464005688
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8290322580645161,
    "acc_stderr,none": 0.021417242936321565
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6403940886699507,
    "acc_stderr,none": 0.03376458246509567
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.81,
    "acc_stderr,none": 0.03942772444036623
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.3111111111111111,
    "acc_stderr,none": 0.028226446749683522
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.47019867549668876,
    "acc_stderr,none": 0.04075224992216979
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.5370370370370371,
    "acc_stderr,none": 0.03400603625538272
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5625,
    "acc_stderr,none": 0.04708567521880525
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7714907508161044,
    "acc_stderr,none": 0.009796313511829522,
    "acc_norm,none": 0.7589771490750816,
    "acc_norm_stderr,none": 0.009979042717267312
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4278403275332651,
    "acc_stderr,none": 0.01119562541819821
  }
}