{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4453924914675768,
    "acc_stderr,none": 0.014523987638344069,
    "acc_norm,none": 0.46245733788395904,
    "acc_norm_stderr,none": 0.014570144495075574
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6982323232323232,
    "acc_stderr,none": 0.009418994158522534,
    "acc_norm,none": 0.622895622895623,
    "acc_norm_stderr,none": 0.009945041946366515
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.8428134556574923,
    "acc_stderr,none": 0.006365990015975543
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.29112964366944655,
    "exact_match_stderr,flexible-extract": 0.012513215297888463
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5060744871539534,
    "acc_stderr,none": 0.004989413158034801,
    "acc_norm,none": 0.6598287193786099,
    "acc_norm_stderr,none": 0.0047279834341955
  },
  "mmlu": {
    "acc,none": 0.5816123059393249,
    "acc_stderr,none": 0.003914918646438467,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5226354941551541,
    "acc_stderr,none": 0.006752285174043489,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.36507936507936506,
    "acc_stderr,none": 0.04306241259127153
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7393939393939394,
    "acc_stderr,none": 0.03427743175816524
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7794117647058824,
    "acc_stderr,none": 0.029102254389674093
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7721518987341772,
    "acc_stderr,none": 0.02730348459906944
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.743801652892562,
    "acc_stderr,none": 0.03984979653302873
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.75,
    "acc_stderr,none": 0.04186091791394607
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.6809815950920245,
    "acc_stderr,none": 0.03661997551073836
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6329479768786127,
    "acc_stderr,none": 0.02595005433765408
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.23798882681564246,
    "acc_stderr,none": 0.014242630070574885
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.6591639871382636,
    "acc_stderr,none": 0.026920841260776162
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.6666666666666666,
    "acc_stderr,none": 0.02622964917882116
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.44132985658409385,
    "acc_stderr,none": 0.012682016335646664
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.8011695906432749,
    "acc_stderr,none": 0.03061111655743253
  },
  "mmlu_other": {
    "acc,none": 0.6739620212423559,
    "acc_stderr,none": 0.008173464029898398,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.66,
    "acc_stderr,none": 0.04760952285695238
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.6716981132075471,
    "acc_stderr,none": 0.02890159361241178
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.5722543352601156,
    "acc_stderr,none": 0.03772446857518028
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.43,
    "acc_stderr,none": 0.04975698519562428
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6322869955156951,
    "acc_stderr,none": 0.03236198350928276
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.8058252427184466,
    "acc_stderr,none": 0.03916667762822584
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8076923076923077,
    "acc_stderr,none": 0.025819233256483744
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.71,
    "acc_stderr,none": 0.045604802157206845
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7867177522349936,
    "acc_stderr,none": 0.014648172749593503
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.6764705882352942,
    "acc_stderr,none": 0.026787453111906494
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.46808510638297873,
    "acc_stderr,none": 0.029766675075873866
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.6764705882352942,
    "acc_stderr,none": 0.028418208619406752
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.5120481927710844,
    "acc_stderr,none": 0.03891364495835817
  },
  "mmlu_social_sciences": {
    "acc,none": 0.6789080272993175,
    "acc_stderr,none": 0.008214092509086545,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.40350877192982454,
    "acc_stderr,none": 0.04615186962583703
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.7323232323232324,
    "acc_stderr,none": 0.03154449888270286
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8341968911917098,
    "acc_stderr,none": 0.026839845022314415
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.5820512820512821,
    "acc_stderr,none": 0.025007329882461217
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.5714285714285714,
    "acc_stderr,none": 0.03214536859788639
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.7889908256880734,
    "acc_stderr,none": 0.01749392240411265
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7099236641221374,
    "acc_stderr,none": 0.03980066246467765
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.6160130718954249,
    "acc_stderr,none": 0.019675808135281515
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6363636363636364,
    "acc_stderr,none": 0.04607582090719976
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.6612244897959184,
    "acc_stderr,none": 0.030299506562154185
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8109452736318408,
    "acc_stderr,none": 0.02768691358801301
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.79,
    "acc_stderr,none": 0.04093601807403326
  },
  "mmlu_stem": {
    "acc,none": 0.4836663495084047,
    "acc_stderr,none": 0.008564146840196633,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.29,
    "acc_stderr,none": 0.045604802157206845
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.5481481481481482,
    "acc_stderr,none": 0.04299268905480864
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.5986842105263158,
    "acc_stderr,none": 0.03988903703336284
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.6666666666666666,
    "acc_stderr,none": 0.03942082639927213
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.4,
    "acc_stderr,none": 0.049236596391733084
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.48,
    "acc_stderr,none": 0.050211673156867795
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.37,
    "acc_stderr,none": 0.04852365870939099
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.3235294117647059,
    "acc_stderr,none": 0.046550104113196177
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.72,
    "acc_stderr,none": 0.04512608598542127
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.5361702127659574,
    "acc_stderr,none": 0.03260038511835771
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.5517241379310345,
    "acc_stderr,none": 0.04144311810878152
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.36507936507936506,
    "acc_stderr,none": 0.02479606060269995
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.7387096774193549,
    "acc_stderr,none": 0.02499305339776482
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.47783251231527096,
    "acc_stderr,none": 0.03514528562175007
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.53,
    "acc_stderr,none": 0.05016135580465919
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.2814814814814815,
    "acc_stderr,none": 0.027420019350945277
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.33774834437086093,
    "acc_stderr,none": 0.038615575462551684
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.4722222222222222,
    "acc_stderr,none": 0.0340470532865388
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.4732142857142857,
    "acc_stderr,none": 0.047389751192741546
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7393906420021763,
    "acc_stderr,none": 0.01024182615581162,
    "acc_norm,none": 0.7404787812840044,
    "acc_norm_stderr,none": 0.010227939888173915
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.44626407369498466,
    "acc_stderr,none": 0.011248540901547959
  }
}