{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4061433447098976,
    "acc_stderr,none": 0.014351656690097862,
    "acc_norm,none": 0.4044368600682594,
    "acc_norm_stderr,none": 0.014342036483436177
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.5357744107744108,
    "acc_stderr,none": 0.010233488709726552,
    "acc_norm,none": 0.4444444444444444,
    "acc_norm_stderr,none": 0.010196254838691684
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.6522935779816513,
    "acc_stderr,none": 0.008329529048948698
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0037907505686125853,
    "exact_match_stderr,strict-match": 0.001692700740150188,
    "exact_match,flexible-extract": 0.5852918877937832,
    "exact_match_stderr,flexible-extract": 0.01357062384230451
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5262895837482573,
    "acc_stderr,none": 0.004982879340691414,
    "acc_norm,none": 0.5920135431189006,
    "acc_norm_stderr,none": 0.004904561795919009
  },
  "mmlu": {
    "acc,none": 0.45826805298390544,
    "acc_stderr,none": 0.003974466103021502,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.45908607863974493,
    "acc_stderr,none": 0.0068411034233697215,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.31746031746031744,
    "acc_stderr,none": 0.04163453031302859
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.6727272727272727,
    "acc_stderr,none": 0.03663974994391242
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7745098039215687,
    "acc_stderr,none": 0.029331162294251718
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.759493670886076,
    "acc_stderr,none": 0.027820781981149678
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.4793388429752066,
    "acc_stderr,none": 0.04560456086387235
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.5277777777777778,
    "acc_stderr,none": 0.04826217294139894
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.49693251533742333,
    "acc_stderr,none": 0.03928297078179663
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.45375722543352603,
    "acc_stderr,none": 0.026803720583206184
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.23798882681564246,
    "acc_stderr,none": 0.014242630070574885
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.4758842443729904,
    "acc_stderr,none": 0.02836504154256457
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.6944444444444444,
    "acc_stderr,none": 0.025630824975621344
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.39374185136897,
    "acc_stderr,none": 0.012478532272564437
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7485380116959064,
    "acc_stderr,none": 0.033275044238468436
  },
  "mmlu_other": {
    "acc,none": 0.5423237850016093,
    "acc_stderr,none": 0.008464736553639335,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.67,
    "acc_stderr,none": 0.047258156262526094
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.4188679245283019,
    "acc_stderr,none": 0.0303650508291152
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.3930635838150289,
    "acc_stderr,none": 0.03724249595817729
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.25,
    "acc_stderr,none": 0.04351941398892446
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.5605381165919282,
    "acc_stderr,none": 0.03331092511038179
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7087378640776699,
    "acc_stderr,none": 0.044986763205729224
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8376068376068376,
    "acc_stderr,none": 0.02416161812798774
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.56,
    "acc_stderr,none": 0.04988876515698589
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7037037037037037,
    "acc_stderr,none": 0.016328814422102052
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.37254901960784315,
    "acc_stderr,none": 0.027684181883302898
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.4078014184397163,
    "acc_stderr,none": 0.02931601177634356
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.40808823529411764,
    "acc_stderr,none": 0.029855261393483924
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4397590361445783,
    "acc_stderr,none": 0.03864139923699121
  },
  "mmlu_social_sciences": {
    "acc,none": 0.48976275593110175,
    "acc_stderr,none": 0.00884676538285771,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.2894736842105263,
    "acc_stderr,none": 0.04266339443159394
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.5404040404040404,
    "acc_stderr,none": 0.035507024651313425
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.5699481865284974,
    "acc_stderr,none": 0.03572954333144808
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.32051282051282054,
    "acc_stderr,none": 0.023661296393964273
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.3319327731092437,
    "acc_stderr,none": 0.030588697013783663
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.5596330275229358,
    "acc_stderr,none": 0.02128431062376154
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.5954198473282443,
    "acc_stderr,none": 0.043046937953806645
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.5081699346405228,
    "acc_stderr,none": 0.020225134343057265
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6181818181818182,
    "acc_stderr,none": 0.046534298079135075
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.5551020408163265,
    "acc_stderr,none": 0.031814251181977865
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.5124378109452736,
    "acc_stderr,none": 0.0353443984853958
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.52,
    "acc_stderr,none": 0.050211673156867795
  },
  "mmlu_stem": {
    "acc,none": 0.3434823977164605,
    "acc_stderr,none": 0.008060223184692142,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.22,
    "acc_stderr,none": 0.04163331998932268
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.4740740740740741,
    "acc_stderr,none": 0.04313531696750574
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.45394736842105265,
    "acc_stderr,none": 0.04051646342874143
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.5555555555555556,
    "acc_stderr,none": 0.04155319955593146
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.25,
    "acc_stderr,none": 0.04351941398892446
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.44,
    "acc_stderr,none": 0.0498887651569859
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.21,
    "acc_stderr,none": 0.040936018074033256
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.23529411764705882,
    "acc_stderr,none": 0.04220773659171453
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.69,
    "acc_stderr,none": 0.04648231987117316
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.3659574468085106,
    "acc_stderr,none": 0.031489558297455304
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.42758620689655175,
    "acc_stderr,none": 0.04122737111370333
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.2275132275132275,
    "acc_stderr,none": 0.021591269407823774
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.5225806451612903,
    "acc_stderr,none": 0.02841498501970786
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.23645320197044334,
    "acc_stderr,none": 0.029896114291733545
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.6,
    "acc_stderr,none": 0.049236596391733084
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.2111111111111111,
    "acc_stderr,none": 0.024882116857655113
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.1986754966887417,
    "acc_stderr,none": 0.032578473844367774
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.16203703703703703,
    "acc_stderr,none": 0.025130453652268455
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.3482142857142857,
    "acc_stderr,none": 0.04521829902833585
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7475516866158868,
    "acc_stderr,none": 0.010135665547362366,
    "acc_norm,none": 0.7312295973884657,
    "acc_norm_stderr,none": 0.010343392940090018
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.43705220061412486,
    "acc_stderr,none": 0.0112240501088565
  }
}