{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4539249146757679,
    "acc_stderr,none": 0.014549221105171862,
    "acc_norm,none": 0.46075085324232085,
    "acc_norm_stderr,none": 0.01456630367663659
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6788720538720538,
    "acc_stderr,none": 0.009580787536986797,
    "acc_norm,none": 0.5778619528619529,
    "acc_norm_stderr,none": 0.010134620524592268
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.8360856269113149,
    "acc_stderr,none": 0.0064748011773025655
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.2357846853677028,
    "exact_match_stderr,flexible-extract": 0.011692515650666797
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.4836685919139614,
    "acc_stderr,none": 0.004987119003151494,
    "acc_norm,none": 0.6321449910376419,
    "acc_norm_stderr,none": 0.0048123610604939165
  },
  "mmlu": {
    "acc,none": 0.579119783506623,
    "acc_stderr,none": 0.0039122701127851794,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5275239107332624,
    "acc_stderr,none": 0.00675413249362413,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.38095238095238093,
    "acc_stderr,none": 0.04343525428949098
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7151515151515152,
    "acc_stderr,none": 0.0352439084451178
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7843137254901961,
    "acc_stderr,none": 0.028867431449849313
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7805907172995781,
    "acc_stderr,none": 0.026939106581553945
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7355371900826446,
    "acc_stderr,none": 0.04026187527591206
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7314814814814815,
    "acc_stderr,none": 0.042844679680521934
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.7423312883435583,
    "acc_stderr,none": 0.03436150827846917
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6560693641618497,
    "acc_stderr,none": 0.025574123786546648
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.24022346368715083,
    "acc_stderr,none": 0.014288343803925307
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.639871382636656,
    "acc_stderr,none": 0.027264297599804012
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.6697530864197531,
    "acc_stderr,none": 0.026168298456732846
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4491525423728814,
    "acc_stderr,none": 0.012704030518851491
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7894736842105263,
    "acc_stderr,none": 0.03126781714663179
  },
  "mmlu_other": {
    "acc,none": 0.6662375281622144,
    "acc_stderr,none": 0.008167474486447538,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.63,
    "acc_stderr,none": 0.048523658709391
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.6566037735849056,
    "acc_stderr,none": 0.029224526469124792
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.5722543352601156,
    "acc_stderr,none": 0.037724468575180276
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.37,
    "acc_stderr,none": 0.04852365870939098
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6636771300448431,
    "acc_stderr,none": 0.031708824268455005
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7961165048543689,
    "acc_stderr,none": 0.039891398595317706
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8333333333333334,
    "acc_stderr,none": 0.024414947304543688
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.65,
    "acc_stderr,none": 0.047937248544110196
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7931034482758621,
    "acc_stderr,none": 0.01448565604166919
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.6535947712418301,
    "acc_stderr,none": 0.02724561304721537
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.4397163120567376,
    "acc_stderr,none": 0.029609912075594106
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.6470588235294118,
    "acc_stderr,none": 0.029029422815681404
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.5180722891566265,
    "acc_stderr,none": 0.038899512528272166
  },
  "mmlu_social_sciences": {
    "acc,none": 0.6756581085472864,
    "acc_stderr,none": 0.008183694386901898,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.38596491228070173,
    "acc_stderr,none": 0.045796394220704355
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.7272727272727273,
    "acc_stderr,none": 0.03173071239071724
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8393782383419689,
    "acc_stderr,none": 0.02649905770139746
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.5538461538461539,
    "acc_stderr,none": 0.02520357177302833
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.5378151260504201,
    "acc_stderr,none": 0.032385469487589795
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.7889908256880734,
    "acc_stderr,none": 0.01749392240411265
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7557251908396947,
    "acc_stderr,none": 0.037683359597287434
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.6111111111111112,
    "acc_stderr,none": 0.01972205893961807
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6545454545454545,
    "acc_stderr,none": 0.04554619617541054
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.6612244897959184,
    "acc_stderr,none": 0.030299506562154185
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8258706467661692,
    "acc_stderr,none": 0.026814951200421606
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.82,
    "acc_stderr,none": 0.038612291966536934
  },
  "mmlu_stem": {
    "acc,none": 0.47605455122105933,
    "acc_stderr,none": 0.008570171487097409,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.28,
    "acc_stderr,none": 0.045126085985421276
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.562962962962963,
    "acc_stderr,none": 0.042849586397534
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.5921052631578947,
    "acc_stderr,none": 0.039993097127774734
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.6944444444444444,
    "acc_stderr,none": 0.03852084696008534
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.38,
    "acc_stderr,none": 0.04878317312145632
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.5,
    "acc_stderr,none": 0.050251890762960605
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.34,
    "acc_stderr,none": 0.04760952285695235
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.3137254901960784,
    "acc_stderr,none": 0.04617034827006718
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.7,
    "acc_stderr,none": 0.046056618647183814
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.5276595744680851,
    "acc_stderr,none": 0.03263597118409769
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.5310344827586206,
    "acc_stderr,none": 0.04158632762097828
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.3862433862433862,
    "acc_stderr,none": 0.02507598176760168
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.7193548387096774,
    "acc_stderr,none": 0.025560604721022884
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.42857142857142855,
    "acc_stderr,none": 0.03481904844438803
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.55,
    "acc_stderr,none": 0.05
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.2962962962962963,
    "acc_stderr,none": 0.02784081149587193
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.32450331125827814,
    "acc_stderr,none": 0.03822746937658754
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.4444444444444444,
    "acc_stderr,none": 0.03388857118502325
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.4107142857142857,
    "acc_stderr,none": 0.04669510663875191
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7301414581066377,
    "acc_stderr,none": 0.010356595421852216,
    "acc_norm,none": 0.7230685527747551,
    "acc_norm_stderr,none": 0.01044049996933452
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4273285568065507,
    "acc_stderr,none": 0.011193930340551274
  }
}