{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.40187713310580203,
    "acc_stderr,none": 0.014327268614578276,
    "acc_norm,none": 0.4035836177474403,
    "acc_norm_stderr,none": 0.014337158914268448
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6056397306397306,
    "acc_stderr,none": 0.010028176038393,
    "acc_norm,none": 0.4764309764309764,
    "acc_norm_stderr,none": 0.010248378585554028
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.7097859327217125,
    "acc_stderr,none": 0.007938079855173709
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.004548900682335102,
    "exact_match_stderr,strict-match": 0.0018535550440036204,
    "exact_match,flexible-extract": 0.6353297952994693,
    "exact_match_stderr,flexible-extract": 0.013258428375662247
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5107548297151961,
    "acc_stderr,none": 0.004988626978173083,
    "acc_norm,none": 0.6164110734913364,
    "acc_norm_stderr,none": 0.0048526588767753755
  },
  "mmlu": {
    "acc,none": 0.6070360347528843,
    "acc_stderr,none": 0.00386166617543964,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5396386822529224,
    "acc_stderr,none": 0.006747475985624616,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5238095238095238,
    "acc_stderr,none": 0.04467062628403273
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7333333333333333,
    "acc_stderr,none": 0.03453131801885417
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7696078431372549,
    "acc_stderr,none": 0.029554292605695053
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7974683544303798,
    "acc_stderr,none": 0.02616056824660146
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.768595041322314,
    "acc_stderr,none": 0.03849856098794089
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.6759259259259259,
    "acc_stderr,none": 0.045245960070300476
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.7177914110429447,
    "acc_stderr,none": 0.03536117886664742
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6878612716763006,
    "acc_stderr,none": 0.02494679222527231
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.2536312849162011,
    "acc_stderr,none": 0.014551553659369916
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.6077170418006431,
    "acc_stderr,none": 0.027731258647012
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.75,
    "acc_stderr,none": 0.02409347123262133
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.45241199478487615,
    "acc_stderr,none": 0.012712265105889133
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7719298245614035,
    "acc_stderr,none": 0.032180937956023566
  },
  "mmlu_other": {
    "acc,none": 0.6733183134856775,
    "acc_stderr,none": 0.008159984651781776,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.7,
    "acc_stderr,none": 0.046056618647183814
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.6452830188679245,
    "acc_stderr,none": 0.029445175328199586
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.6242774566473989,
    "acc_stderr,none": 0.036928207672648664
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.39,
    "acc_stderr,none": 0.04902071300001975
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.672645739910314,
    "acc_stderr,none": 0.031493846709941306
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7475728155339806,
    "acc_stderr,none": 0.04301250399690878
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8803418803418803,
    "acc_stderr,none": 0.021262719400407002
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.7,
    "acc_stderr,none": 0.046056618647183814
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7739463601532567,
    "acc_stderr,none": 0.014957458504335835
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.6666666666666666,
    "acc_stderr,none": 0.02699254433929723
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.4858156028368794,
    "acc_stderr,none": 0.02981549448368206
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.6360294117647058,
    "acc_stderr,none": 0.029227192460032025
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4879518072289157,
    "acc_stderr,none": 0.038913644958358196
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7175820604484888,
    "acc_stderr,none": 0.007997482772218479,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.4824561403508772,
    "acc_stderr,none": 0.04700708033551038
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.7626262626262627,
    "acc_stderr,none": 0.03031371053819889
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8186528497409327,
    "acc_stderr,none": 0.02780703236068609
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.6205128205128205,
    "acc_stderr,none": 0.02460362692409742
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.7142857142857143,
    "acc_stderr,none": 0.02934457250063434
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8018348623853211,
    "acc_stderr,none": 0.017090573804217905
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.6793893129770993,
    "acc_stderr,none": 0.040933292298342784
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.6748366013071896,
    "acc_stderr,none": 0.018950886770806304
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6727272727272727,
    "acc_stderr,none": 0.04494290866252088
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7142857142857143,
    "acc_stderr,none": 0.028920583220675585
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8059701492537313,
    "acc_stderr,none": 0.02796267760476891
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.82,
    "acc_stderr,none": 0.03861229196653695
  },
  "mmlu_stem": {
    "acc,none": 0.5344116714240406,
    "acc_stderr,none": 0.008296106272814055,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.3,
    "acc_stderr,none": 0.046056618647183814
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6,
    "acc_stderr,none": 0.04232073695151589
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.756578947368421,
    "acc_stderr,none": 0.034923496688842384
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.7916666666666666,
    "acc_stderr,none": 0.033961162058453336
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.41,
    "acc_stderr,none": 0.049431107042371025
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.53,
    "acc_stderr,none": 0.050161355804659205
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.25,
    "acc_stderr,none": 0.04351941398892446
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.35294117647058826,
    "acc_stderr,none": 0.04755129616062949
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.74,
    "acc_stderr,none": 0.04408440022768077
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.6680851063829787,
    "acc_stderr,none": 0.030783736757745643
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.593103448275862,
    "acc_stderr,none": 0.04093793981266236
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.4656084656084656,
    "acc_stderr,none": 0.025690321762493844
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.7935483870967742,
    "acc_stderr,none": 0.02302589961718872
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.5320197044334976,
    "acc_stderr,none": 0.03510766597959215
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.76,
    "acc_stderr,none": 0.04292346959909282
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.23703703703703705,
    "acc_stderr,none": 0.02592887613276611
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.3443708609271523,
    "acc_stderr,none": 0.03879687024073327
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.4305555555555556,
    "acc_stderr,none": 0.03376922151252336
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5178571428571429,
    "acc_stderr,none": 0.04742762361243011
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.750272034820457,
    "acc_stderr,none": 0.010099232969867497,
    "acc_norm,none": 0.7470076169749728,
    "acc_norm_stderr,none": 0.010142888698862465
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.43858751279426816,
    "acc_stderr,none": 0.011228404348646078
  }
}