{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.45307167235494883,
    "acc_stderr,none": 0.014546892052005628,
    "acc_norm,none": 0.4598976109215017,
    "acc_norm_stderr,none": 0.01456431885692485
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.7003367003367004,
    "acc_stderr,none": 0.00940022858620597,
    "acc_norm,none": 0.6085858585858586,
    "acc_norm_stderr,none": 0.010014917532627812
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.8366972477064221,
    "acc_stderr,none": 0.006465073432190019
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.3593631539044731,
    "exact_match_stderr,flexible-extract": 0.013216456309851516
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5191196972714599,
    "acc_stderr,none": 0.00498613191967397,
    "acc_norm,none": 0.6829316869149572,
    "acc_norm_stderr,none": 0.004643832742876636
  },
  "mmlu": {
    "acc,none": 0.5725680102549494,
    "acc_stderr,none": 0.003930193528460342,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5268862911795962,
    "acc_stderr,none": 0.00676788651275952,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.40476190476190477,
    "acc_stderr,none": 0.04390259265377563
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7333333333333333,
    "acc_stderr,none": 0.03453131801885416
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7696078431372549,
    "acc_stderr,none": 0.029554292605695046
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7679324894514767,
    "acc_stderr,none": 0.02747974455080852
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7355371900826446,
    "acc_stderr,none": 0.04026187527591207
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7129629629629629,
    "acc_stderr,none": 0.043733130409147614
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.7055214723926381,
    "acc_stderr,none": 0.03581165790474082
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.653179190751445,
    "acc_stderr,none": 0.025624723994030457
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.24134078212290502,
    "acc_stderr,none": 0.014310999547961455
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.6527331189710611,
    "acc_stderr,none": 0.027040745502307336
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.6574074074074074,
    "acc_stderr,none": 0.026406145973625676
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4491525423728814,
    "acc_stderr,none": 0.012704030518851496
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.8187134502923976,
    "acc_stderr,none": 0.029547741687640038
  },
  "mmlu_other": {
    "acc,none": 0.657547473447055,
    "acc_stderr,none": 0.00820460840632717,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.63,
    "acc_stderr,none": 0.04852365870939099
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.6377358490566037,
    "acc_stderr,none": 0.0295822451283843
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.5664739884393064,
    "acc_stderr,none": 0.03778621079092055
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.35,
    "acc_stderr,none": 0.04793724854411018
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6547085201793722,
    "acc_stderr,none": 0.03191100192835794
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7961165048543689,
    "acc_stderr,none": 0.039891398595317706
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8205128205128205,
    "acc_stderr,none": 0.02514093595033546
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.7,
    "acc_stderr,none": 0.046056618647183814
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7854406130268199,
    "acc_stderr,none": 0.014680033956893346
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.6535947712418301,
    "acc_stderr,none": 0.027245613047215372
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.4219858156028369,
    "acc_stderr,none": 0.029462189233370573
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.6323529411764706,
    "acc_stderr,none": 0.029289413409403192
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4939759036144578,
    "acc_stderr,none": 0.03892212195333045
  },
  "mmlu_social_sciences": {
    "acc,none": 0.6629834254143646,
    "acc_stderr,none": 0.008282372542398843,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.3684210526315789,
    "acc_stderr,none": 0.04537815354939391
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.7171717171717171,
    "acc_stderr,none": 0.03208779558786752
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8341968911917098,
    "acc_stderr,none": 0.026839845022314415
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.5666666666666667,
    "acc_stderr,none": 0.025124653525885117
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.5504201680672269,
    "acc_stderr,none": 0.03231293497137707
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.7761467889908257,
    "acc_stderr,none": 0.017871217767790215
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7022900763358778,
    "acc_stderr,none": 0.04010358942462202
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.5898692810457516,
    "acc_stderr,none": 0.019898412717635906
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.5818181818181818,
    "acc_stderr,none": 0.04724577405731571
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.6489795918367347,
    "acc_stderr,none": 0.030555316755573637
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8159203980099502,
    "acc_stderr,none": 0.02740385941078685
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.8,
    "acc_stderr,none": 0.040201512610368445
  },
  "mmlu_stem": {
    "acc,none": 0.46875991119568666,
    "acc_stderr,none": 0.008583914697959421,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.25,
    "acc_stderr,none": 0.04351941398892446
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.5481481481481482,
    "acc_stderr,none": 0.04299268905480864
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.5855263157894737,
    "acc_stderr,none": 0.04008973785779206
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.6805555555555556,
    "acc_stderr,none": 0.03899073687357336
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.38,
    "acc_stderr,none": 0.04878317312145633
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.47,
    "acc_stderr,none": 0.05016135580465919
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.33,
    "acc_stderr,none": 0.04725815626252604
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.38235294117647056,
    "acc_stderr,none": 0.04835503696107223
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.72,
    "acc_stderr,none": 0.04512608598542127
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.4851063829787234,
    "acc_stderr,none": 0.032671518489247764
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.5724137931034483,
    "acc_stderr,none": 0.041227371113703316
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.38095238095238093,
    "acc_stderr,none": 0.025010749116137602
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.6903225806451613,
    "acc_stderr,none": 0.026302774983517418
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.4236453201970443,
    "acc_stderr,none": 0.03476725747649037
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.58,
    "acc_stderr,none": 0.049604496374885836
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.3037037037037037,
    "acc_stderr,none": 0.02803792996911499
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.33112582781456956,
    "acc_stderr,none": 0.038425817186598696
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.39351851851851855,
    "acc_stderr,none": 0.03331747876370312
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.41964285714285715,
    "acc_stderr,none": 0.04684099321077106
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7584330794341676,
    "acc_stderr,none": 0.009986718001804475,
    "acc_norm,none": 0.7595212187159956,
    "acc_norm_stderr,none": 0.009971345364651088
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.44933469805527126,
    "acc_stderr,none": 0.011255834856639547
  }
}