{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.45307167235494883,
    "acc_stderr,none": 0.014546892052005626,
    "acc_norm,none": 0.46928327645051193,
    "acc_norm_stderr,none": 0.014583792546304038
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.7205387205387206,
    "acc_stderr,none": 0.00920783814259724,
    "acc_norm,none": 0.5715488215488216,
    "acc_norm_stderr,none": 0.010154195733990965
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.8406727828746178,
    "acc_stderr,none": 0.006401047038101462
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.16224412433661864,
    "exact_match_stderr,flexible-extract": 0.010155130880393524
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5646285600477993,
    "acc_stderr,none": 0.004947922692688842,
    "acc_norm,none": 0.6819358693487353,
    "acc_norm_stderr,none": 0.004647727222445389
  },
  "mmlu": {
    "acc,none": 0.3231021222048141,
    "acc_stderr,none": 0.0038700439885078373,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.3287991498405951,
    "acc_stderr,none": 0.006683188805883924,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.2857142857142857,
    "acc_stderr,none": 0.040406101782088394
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.44242424242424244,
    "acc_stderr,none": 0.038783721137112745
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.5441176470588235,
    "acc_stderr,none": 0.03495624522015478
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.5654008438818565,
    "acc_stderr,none": 0.03226759995510145
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.5619834710743802,
    "acc_stderr,none": 0.04529146804435793
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.3611111111111111,
    "acc_stderr,none": 0.04643454608906275
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.3067484662576687,
    "acc_stderr,none": 0.036230899157241474
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.3439306358381503,
    "acc_stderr,none": 0.025574123786546648
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.23798882681564246,
    "acc_stderr,none": 0.014242630070574885
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.2347266881028939,
    "acc_stderr,none": 0.024071805887677045
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.3950617283950617,
    "acc_stderr,none": 0.027201117666925657
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.2737940026075619,
    "acc_stderr,none": 0.01138861216797938
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.4853801169590643,
    "acc_stderr,none": 0.038331852752130205
  },
  "mmlu_other": {
    "acc,none": 0.3656260057933698,
    "acc_stderr,none": 0.008562738848735423,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.41,
    "acc_stderr,none": 0.049431107042371025
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.39622641509433965,
    "acc_stderr,none": 0.030102793781791197
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.30057803468208094,
    "acc_stderr,none": 0.03496101481191181
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.21,
    "acc_stderr,none": 0.040936018074033256
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.47085201793721976,
    "acc_stderr,none": 0.03350073248773403
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.3786407766990291,
    "acc_stderr,none": 0.048026946982589726
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.452991452991453,
    "acc_stderr,none": 0.0326109987309862
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.46,
    "acc_stderr,none": 0.05009082659620333
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.3486590038314176,
    "acc_stderr,none": 0.017041243143490935
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.4411764705882353,
    "acc_stderr,none": 0.028431095444176643
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.29432624113475175,
    "acc_stderr,none": 0.027187127011503793
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.25,
    "acc_stderr,none": 0.026303648393696036
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.37349397590361444,
    "acc_stderr,none": 0.03765845117168863
  },
  "mmlu_social_sciences": {
    "acc,none": 0.35456613584660385,
    "acc_stderr,none": 0.008495916041765268,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.24561403508771928,
    "acc_stderr,none": 0.04049339297748142
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.30808080808080807,
    "acc_stderr,none": 0.03289477330098616
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.44559585492227977,
    "acc_stderr,none": 0.03587014986075658
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.24102564102564103,
    "acc_stderr,none": 0.02168554666533319
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.24369747899159663,
    "acc_stderr,none": 0.027886828078380554
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.3321100917431193,
    "acc_stderr,none": 0.020192682985423344
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.45038167938931295,
    "acc_stderr,none": 0.04363643698524779
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.37745098039215685,
    "acc_stderr,none": 0.019610851474880276
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.2636363636363636,
    "acc_stderr,none": 0.04220224692971987
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.45714285714285713,
    "acc_stderr,none": 0.031891418324213966
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.5074626865671642,
    "acc_stderr,none": 0.03535140084276719
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.5,
    "acc_stderr,none": 0.050251890762960605
  },
  "mmlu_stem": {
    "acc,none": 0.2419917538851887,
    "acc_stderr,none": 0.007592321542609074,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.22,
    "acc_stderr,none": 0.04163331998932268
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.2518518518518518,
    "acc_stderr,none": 0.03749850709174023
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.28289473684210525,
    "acc_stderr,none": 0.03665349695640767
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.3125,
    "acc_stderr,none": 0.038760854559127644
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.21,
    "acc_stderr,none": 0.040936018074033256
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.27,
    "acc_stderr,none": 0.0446196043338474
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.21,
    "acc_stderr,none": 0.040936018074033256
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.21568627450980393,
    "acc_stderr,none": 0.04092563958237655
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.38,
    "acc_stderr,none": 0.04878317312145632
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.2680851063829787,
    "acc_stderr,none": 0.028957342788342347
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.2482758620689655,
    "acc_stderr,none": 0.03600105692727771
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.20899470899470898,
    "acc_stderr,none": 0.02094048156533485
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.29354838709677417,
    "acc_stderr,none": 0.025906087021319288
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.1625615763546798,
    "acc_stderr,none": 0.025960300064605576
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.28,
    "acc_stderr,none": 0.04512608598542127
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.2111111111111111,
    "acc_stderr,none": 0.024882116857655113
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.1986754966887417,
    "acc_stderr,none": 0.032578473844367774
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.16666666666666666,
    "acc_stderr,none": 0.025416428388767478
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.33035714285714285,
    "acc_stderr,none": 0.044642857142857116
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.764417845484222,
    "acc_stderr,none": 0.009901067586473916,
    "acc_norm,none": 0.7616974972796517,
    "acc_norm_stderr,none": 0.009940334245876188
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4452405322415558,
    "acc_stderr,none": 0.011246013134156768
  }
}