{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4658703071672355,
    "acc_stderr,none": 0.014577311315231097,
    "acc_norm,none": 0.4761092150170648,
    "acc_norm_stderr,none": 0.014594701798071654
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.752104377104377,
    "acc_stderr,none": 0.008860162361464035,
    "acc_norm,none": 0.625,
    "acc_norm_stderr,none": 0.009933992677987828
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.8318042813455657,
    "acc_stderr,none": 0.006542000663168084
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.3115996967399545,
    "exact_match_stderr,flexible-extract": 0.012757375376754941
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5481975702051384,
    "acc_stderr,none": 0.00496654472445223,
    "acc_norm,none": 0.6791475801633141,
    "acc_norm_stderr,none": 0.004658501662277606
  },
  "mmlu": {
    "acc,none": 0.3720979917390685,
    "acc_stderr,none": 0.003952610654810294,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.40106269925611054,
    "acc_stderr,none": 0.00688499390180384,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.29365079365079366,
    "acc_stderr,none": 0.04073524322147125
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.6121212121212121,
    "acc_stderr,none": 0.038049136539710114
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.6617647058823529,
    "acc_stderr,none": 0.0332057461294543
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.679324894514768,
    "acc_stderr,none": 0.03038193194999042
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.6446280991735537,
    "acc_stderr,none": 0.04369236326573981
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.5462962962962963,
    "acc_stderr,none": 0.04812917324536823
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.4171779141104294,
    "acc_stderr,none": 0.038741028598180814
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.3988439306358382,
    "acc_stderr,none": 0.026362437574546545
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.23798882681564246,
    "acc_stderr,none": 0.014242630070574885
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.2958199356913183,
    "acc_stderr,none": 0.025922371788818777
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.4166666666666667,
    "acc_stderr,none": 0.02743162372241502
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.37353324641460234,
    "acc_stderr,none": 0.012354994823515264
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.5672514619883041,
    "acc_stderr,none": 0.03799978644370608
  },
  "mmlu_other": {
    "acc,none": 0.41229481815255875,
    "acc_stderr,none": 0.008704495893010457,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.47,
    "acc_stderr,none": 0.05016135580465919
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.42641509433962266,
    "acc_stderr,none": 0.03043779434298305
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.3063583815028902,
    "acc_stderr,none": 0.03514942551267437
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.2,
    "acc_stderr,none": 0.04020151261036846
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.5336322869955157,
    "acc_stderr,none": 0.033481800170603065
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.42718446601941745,
    "acc_stderr,none": 0.04897957737781168
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.5769230769230769,
    "acc_stderr,none": 0.032366121762202014
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.48,
    "acc_stderr,none": 0.050211673156867795
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.4112388250319285,
    "acc_stderr,none": 0.017595971908056576
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.46405228758169936,
    "acc_stderr,none": 0.028555827516528777
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.32978723404255317,
    "acc_stderr,none": 0.0280459469420424
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.27941176470588236,
    "acc_stderr,none": 0.027257202606114948
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.41566265060240964,
    "acc_stderr,none": 0.03836722176598052
  },
  "mmlu_social_sciences": {
    "acc,none": 0.4107897302567436,
    "acc_stderr,none": 0.008690847715518063,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.2807017543859649,
    "acc_stderr,none": 0.042270544512322
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.3434343434343434,
    "acc_stderr,none": 0.033832012232444406
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.49740932642487046,
    "acc_stderr,none": 0.03608390745384488
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.258974358974359,
    "acc_stderr,none": 0.022211106810061672
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.27310924369747897,
    "acc_stderr,none": 0.02894200404099817
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.4055045871559633,
    "acc_stderr,none": 0.021050997991896837
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.4732824427480916,
    "acc_stderr,none": 0.04379024936553894
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.4362745098039216,
    "acc_stderr,none": 0.02006287424353913
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.39090909090909093,
    "acc_stderr,none": 0.04673752333670237
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.5387755102040817,
    "acc_stderr,none": 0.03191282052669278
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.6069651741293532,
    "acc_stderr,none": 0.0345368246603156
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.55,
    "acc_stderr,none": 0.04999999999999999
  },
  "mmlu_stem": {
    "acc,none": 0.25150650174437045,
    "acc_stderr,none": 0.007667031622410437,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.22,
    "acc_stderr,none": 0.04163331998932268
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.22962962962962963,
    "acc_stderr,none": 0.03633384414073463
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.28289473684210525,
    "acc_stderr,none": 0.03665349695640767
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.3333333333333333,
    "acc_stderr,none": 0.039420826399272135
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.22,
    "acc_stderr,none": 0.04163331998932269
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.27,
    "acc_stderr,none": 0.04461960433384741
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.21,
    "acc_stderr,none": 0.040936018074033256
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.21568627450980393,
    "acc_stderr,none": 0.04092563958237655
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.4,
    "acc_stderr,none": 0.049236596391733084
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.2723404255319149,
    "acc_stderr,none": 0.0291012906983867
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.27586206896551724,
    "acc_stderr,none": 0.03724563619774632
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.20899470899470898,
    "acc_stderr,none": 0.02094048156533485
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.33225806451612905,
    "acc_stderr,none": 0.0267955608481228
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.1625615763546798,
    "acc_stderr,none": 0.025960300064605576
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.35,
    "acc_stderr,none": 0.047937248544110196
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.2111111111111111,
    "acc_stderr,none": 0.024882116857655113
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.1986754966887417,
    "acc_stderr,none": 0.032578473844367774
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.1712962962962963,
    "acc_stderr,none": 0.025695341643824674
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.3482142857142857,
    "acc_stderr,none": 0.04521829902833585
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7916213275299239,
    "acc_stderr,none": 0.009476125383049447,
    "acc_norm,none": 0.7899891186071817,
    "acc_norm_stderr,none": 0.009503353305818554
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4564994882292733,
    "acc_stderr,none": 0.011271170113045128
  }
}