{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.41723549488054607,
    "acc_stderr,none": 0.014409825518403079,
    "acc_norm,none": 0.41638225255972694,
    "acc_norm_stderr,none": 0.014405618279436167
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.5976430976430976,
    "acc_stderr,none": 0.010062244711011534,
    "acc_norm,none": 0.47895622895622897,
    "acc_norm_stderr,none": 0.010250692602022556
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.7064220183486238,
    "acc_stderr,none": 0.007965011249420072
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.46398786959818045,
    "exact_match_stderr,flexible-extract": 0.013736715929950318
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5336586337382991,
    "acc_stderr,none": 0.004978462690966913,
    "acc_norm,none": 0.6352320254929297,
    "acc_norm_stderr,none": 0.004803812631994946
  },
  "mmlu": {
    "acc,none": 0.6237715425153112,
    "acc_stderr,none": 0.0038276747166348297,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5513283740701381,
    "acc_stderr,none": 0.006748447425510858,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5238095238095238,
    "acc_stderr,none": 0.04467062628403273
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7212121212121212,
    "acc_stderr,none": 0.03501438706296781
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7696078431372549,
    "acc_stderr,none": 0.029554292605695053
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7890295358649789,
    "acc_stderr,none": 0.026558372502661923
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7520661157024794,
    "acc_stderr,none": 0.039418975265163025
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7037037037037037,
    "acc_stderr,none": 0.04414343666854934
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.754601226993865,
    "acc_stderr,none": 0.03380939813943354
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.7023121387283237,
    "acc_stderr,none": 0.024617055388677006
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.2670391061452514,
    "acc_stderr,none": 0.014796502622562555
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.6430868167202572,
    "acc_stderr,none": 0.027210420375934023
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7530864197530864,
    "acc_stderr,none": 0.023993501709042114
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4661016949152542,
    "acc_stderr,none": 0.012740853872949837
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.783625730994152,
    "acc_stderr,none": 0.03158149539338733
  },
  "mmlu_other": {
    "acc,none": 0.6926295461860316,
    "acc_stderr,none": 0.008033147055170144,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.71,
    "acc_stderr,none": 0.045604802157206845
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.6641509433962264,
    "acc_stderr,none": 0.02906722014664483
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.6705202312138728,
    "acc_stderr,none": 0.03583901754736412
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.42,
    "acc_stderr,none": 0.049604496374885836
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6995515695067265,
    "acc_stderr,none": 0.030769352008229143
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7669902912621359,
    "acc_stderr,none": 0.04185832598928315
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8888888888888888,
    "acc_stderr,none": 0.02058849131609236
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.75,
    "acc_stderr,none": 0.04351941398892446
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7879948914431673,
    "acc_stderr,none": 0.014616099385833674
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.6797385620915033,
    "acc_stderr,none": 0.02671611838015683
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5035460992907801,
    "acc_stderr,none": 0.02982674915328092
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.6544117647058824,
    "acc_stderr,none": 0.028888193103988644
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.5060240963855421,
    "acc_stderr,none": 0.03892212195333045
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7400064998375041,
    "acc_stderr,none": 0.007783071131951106,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.4824561403508772,
    "acc_stderr,none": 0.04700708033551038
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8080808080808081,
    "acc_stderr,none": 0.02805779167298902
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8393782383419689,
    "acc_stderr,none": 0.026499057701397453
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.6743589743589744,
    "acc_stderr,none": 0.02375966576741229
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.7899159663865546,
    "acc_stderr,none": 0.026461398717471874
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.818348623853211,
    "acc_stderr,none": 0.016530617409266857
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7251908396946565,
    "acc_stderr,none": 0.03915345408847836
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.6813725490196079,
    "acc_stderr,none": 0.018850084696468702
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6636363636363637,
    "acc_stderr,none": 0.04525393596302506
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7061224489795919,
    "acc_stderr,none": 0.029162738410249765
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8109452736318408,
    "acc_stderr,none": 0.02768691358801302
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.82,
    "acc_stderr,none": 0.03861229196653695
  },
  "mmlu_stem": {
    "acc,none": 0.5505867427846496,
    "acc_stderr,none": 0.00829633447614584,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.3,
    "acc_stderr,none": 0.046056618647183814
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6148148148148148,
    "acc_stderr,none": 0.042039210401562783
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.756578947368421,
    "acc_stderr,none": 0.034923496688842384
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8055555555555556,
    "acc_stderr,none": 0.03309615177059006
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.4,
    "acc_stderr,none": 0.049236596391733084
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.56,
    "acc_stderr,none": 0.049888765156985884
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.29,
    "acc_stderr,none": 0.045604802157206845
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.37254901960784315,
    "acc_stderr,none": 0.04810840148082634
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.73,
    "acc_stderr,none": 0.04461960433384739
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.6638297872340425,
    "acc_stderr,none": 0.030881618520676942
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.6344827586206897,
    "acc_stderr,none": 0.040131241954243856
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.48412698412698413,
    "acc_stderr,none": 0.02573833063941215
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8161290322580645,
    "acc_stderr,none": 0.022037217340267843
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.5862068965517241,
    "acc_stderr,none": 0.03465304488406796
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.74,
    "acc_stderr,none": 0.044084400227680794
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.2518518518518518,
    "acc_stderr,none": 0.02646611753895992
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.3973509933774834,
    "acc_stderr,none": 0.039955240076816806
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.4444444444444444,
    "acc_stderr,none": 0.03388857118502326
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.49107142857142855,
    "acc_stderr,none": 0.04745033255489123
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7573449401523396,
    "acc_stderr,none": 0.010002002569708698,
    "acc_norm,none": 0.749183895538629,
    "acc_norm_stderr,none": 0.010113869547069042
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.43244626407369496,
    "acc_stderr,none": 0.011210331273967561
  }
}