{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4052901023890785,
    "acc_stderr,none": 0.014346869060229327,
    "acc_norm,none": 0.4069965870307167,
    "acc_norm_stderr,none": 0.014356399418009135
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.5311447811447811,
    "acc_stderr,none": 0.01023986025002175,
    "acc_norm,none": 0.4431818181818182,
    "acc_norm_stderr,none": 0.010193324837773495
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.6522935779816513,
    "acc_stderr,none": 0.008329529048948698
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.000758150113722517,
    "exact_match_stderr,strict-match": 0.0007581501137225298,
    "exact_match,flexible-extract": 0.5504169825625473,
    "exact_match_stderr,flexible-extract": 0.013702290047884749
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5309699263095001,
    "acc_stderr,none": 0.004980200451851668,
    "acc_norm,none": 0.5956980681139216,
    "acc_norm_stderr,none": 0.0048975346866863305
  },
  "mmlu": {
    "acc,none": 0.4465888050135308,
    "acc_stderr,none": 0.003969281703264318,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.4512221041445271,
    "acc_stderr,none": 0.006825991640702845,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.31746031746031744,
    "acc_stderr,none": 0.04163453031302859
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.6848484848484848,
    "acc_stderr,none": 0.0362773057502241
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7794117647058824,
    "acc_stderr,none": 0.0291022543896741
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7552742616033755,
    "acc_stderr,none": 0.02798569938703642
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.47107438016528924,
    "acc_stderr,none": 0.04556710331269498
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.5462962962962963,
    "acc_stderr,none": 0.04812917324536823
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.4662576687116564,
    "acc_stderr,none": 0.039194155450484096
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.4508670520231214,
    "acc_stderr,none": 0.026788811931562757
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.23798882681564246,
    "acc_stderr,none": 0.014242630070574885
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.4565916398713826,
    "acc_stderr,none": 0.028290869054197604
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.6728395061728395,
    "acc_stderr,none": 0.026105673861409814
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.379400260756193,
    "acc_stderr,none": 0.012393202029825403
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7543859649122807,
    "acc_stderr,none": 0.0330140594698725
  },
  "mmlu_other": {
    "acc,none": 0.5300933376247183,
    "acc_stderr,none": 0.00846054509199804,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.67,
    "acc_stderr,none": 0.047258156262526094
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.4075471698113208,
    "acc_stderr,none": 0.030242233800854494
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.3815028901734104,
    "acc_stderr,none": 0.037038511930995194
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.26,
    "acc_stderr,none": 0.044084400227680794
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.5426008968609866,
    "acc_stderr,none": 0.03343577705583064
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7087378640776699,
    "acc_stderr,none": 0.044986763205729224
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8162393162393162,
    "acc_stderr,none": 0.025372139671722933
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.54,
    "acc_stderr,none": 0.05009082659620332
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7011494252873564,
    "acc_stderr,none": 0.016369256815093127
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.35294117647058826,
    "acc_stderr,none": 0.027363593284684934
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.40070921985815605,
    "acc_stderr,none": 0.029233465745573093
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.3639705882352941,
    "acc_stderr,none": 0.029227192460032025
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.43373493975903615,
    "acc_stderr,none": 0.03858158940685516
  },
  "mmlu_social_sciences": {
    "acc,none": 0.4676633084172896,
    "acc_stderr,none": 0.008835692067691713,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.2807017543859649,
    "acc_stderr,none": 0.04227054451232199
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.5050505050505051,
    "acc_stderr,none": 0.035621707606254015
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.5284974093264249,
    "acc_stderr,none": 0.036025735712884414
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.29743589743589743,
    "acc_stderr,none": 0.02317740813146594
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.31932773109243695,
    "acc_stderr,none": 0.030283995525884396
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.5247706422018349,
    "acc_stderr,none": 0.02141099975363592
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.5954198473282443,
    "acc_stderr,none": 0.043046937953806645
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.48856209150326796,
    "acc_stderr,none": 0.02022254151561087
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6090909090909091,
    "acc_stderr,none": 0.04673752333670237
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.5224489795918368,
    "acc_stderr,none": 0.03197694118713672
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.48756218905472637,
    "acc_stderr,none": 0.035344398485395785
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.57,
    "acc_stderr,none": 0.049756985195624284
  },
  "mmlu_stem": {
    "acc,none": 0.3368220742150333,
    "acc_stderr,none": 0.008053946117806049,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.22,
    "acc_stderr,none": 0.04163331998932268
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.43703703703703706,
    "acc_stderr,none": 0.04284958639753399
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.4342105263157895,
    "acc_stderr,none": 0.04033565667848319
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.5486111111111112,
    "acc_stderr,none": 0.041614023984032786
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.25,
    "acc_stderr,none": 0.04351941398892446
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.44,
    "acc_stderr,none": 0.04988876515698589
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.21,
    "acc_stderr,none": 0.040936018074033256
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.22549019607843138,
    "acc_stderr,none": 0.041583075330832865
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.71,
    "acc_stderr,none": 0.045604802157206845
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.3659574468085106,
    "acc_stderr,none": 0.031489558297455304
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.4206896551724138,
    "acc_stderr,none": 0.0411391498118926
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.2328042328042328,
    "acc_stderr,none": 0.021765961672154527
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.47419354838709676,
    "acc_stderr,none": 0.028406095057653315
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.2413793103448276,
    "acc_stderr,none": 0.030108330718011625
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.6,
    "acc_stderr,none": 0.049236596391733084
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.2111111111111111,
    "acc_stderr,none": 0.024882116857655113
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.1986754966887417,
    "acc_stderr,none": 0.032578473844367774
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.1574074074074074,
    "acc_stderr,none": 0.02483717351824239
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.35714285714285715,
    "acc_stderr,none": 0.04547960999764376
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7431991294885746,
    "acc_stderr,none": 0.010192864802278058,
    "acc_norm,none": 0.7306855277475517,
    "acc_norm_stderr,none": 0.01035000407058876
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4375639713408393,
    "acc_stderr,none": 0.011225513656335442
  }
}