{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.47440273037542663,
    "acc_stderr,none": 0.014592230885298959,
    "acc_norm,none": 0.48378839590443684,
    "acc_norm_stderr,none": 0.014603708567414941
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.7272727272727273,
    "acc_stderr,none": 0.00913863072636423,
    "acc_norm,none": 0.6245791245791246,
    "acc_norm_stderr,none": 0.009936218527114295
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.8351681957186544,
    "acc_stderr,none": 0.006489332389894505
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.7604245640636846,
    "exact_match_stderr,flexible-extract": 0.01175686434407741
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5150368452499502,
    "acc_stderr,none": 0.004987524454849716,
    "acc_norm,none": 0.6408086038637721,
    "acc_norm_stderr,none": 0.004787829168255662
  },
  "mmlu": {
    "acc,none": 0.6970517020367469,
    "acc_stderr,none": 0.0036595499634494,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5978746014877789,
    "acc_stderr,none": 0.006717089653366038,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5793650793650794,
    "acc_stderr,none": 0.04415438226743745
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7818181818181819,
    "acc_stderr,none": 0.03225078108306289
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.8186274509803921,
    "acc_stderr,none": 0.027044621719474072
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.8016877637130801,
    "acc_stderr,none": 0.025955020841621112
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7603305785123967,
    "acc_stderr,none": 0.03896878985070416
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7777777777777778,
    "acc_stderr,none": 0.040191074725573483
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.8404907975460123,
    "acc_stderr,none": 0.02876748172598387
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.7196531791907514,
    "acc_stderr,none": 0.024182427496577622
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.3675977653631285,
    "acc_stderr,none": 0.01612554382355295
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.7427652733118971,
    "acc_stderr,none": 0.024826171289250888
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7777777777777778,
    "acc_stderr,none": 0.023132376234543332
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4876140808344198,
    "acc_stderr,none": 0.01276631731547356
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7719298245614035,
    "acc_stderr,none": 0.032180937956023566
  },
  "mmlu_other": {
    "acc,none": 0.7454135822336659,
    "acc_stderr,none": 0.0075946640131693945,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.73,
    "acc_stderr,none": 0.044619604333847394
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.7811320754716982,
    "acc_stderr,none": 0.02544786382510862
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.7398843930635838,
    "acc_stderr,none": 0.03345036916788991
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.42,
    "acc_stderr,none": 0.04960449637488584
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6905829596412556,
    "acc_stderr,none": 0.03102441174057221
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.8446601941747572,
    "acc_stderr,none": 0.03586594738573975
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8803418803418803,
    "acc_stderr,none": 0.021262719400407012
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.8,
    "acc_stderr,none": 0.04020151261036845
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.8135376756066411,
    "acc_stderr,none": 0.013927751372001505
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7647058823529411,
    "acc_stderr,none": 0.02428861946604611
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5921985815602837,
    "acc_stderr,none": 0.02931601177634356
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.7794117647058824,
    "acc_stderr,none": 0.025187786660227272
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.536144578313253,
    "acc_stderr,none": 0.03882310850890593
  },
  "mmlu_social_sciences": {
    "acc,none": 0.8056548586285343,
    "acc_stderr,none": 0.006990882230489451,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.631578947368421,
    "acc_stderr,none": 0.04537815354939392
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8484848484848485,
    "acc_stderr,none": 0.025545650426603634
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.9015544041450777,
    "acc_stderr,none": 0.021500249576033456
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.8153846153846154,
    "acc_stderr,none": 0.019671632413100295
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8949579831932774,
    "acc_stderr,none": 0.019916300758805225
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8972477064220183,
    "acc_stderr,none": 0.013018246509173761
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7557251908396947,
    "acc_stderr,none": 0.037683359597287414
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.7238562091503268,
    "acc_stderr,none": 0.018087276935663137
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6272727272727273,
    "acc_stderr,none": 0.04631381319425464
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7551020408163265,
    "acc_stderr,none": 0.02752963744017492
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8159203980099502,
    "acc_stderr,none": 0.02740385941078684
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.85,
    "acc_stderr,none": 0.035887028128263734
  },
  "mmlu_stem": {
    "acc,none": 0.6914050111005392,
    "acc_stderr,none": 0.007911565921655001,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.51,
    "acc_stderr,none": 0.05024183937956912
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.7111111111111111,
    "acc_stderr,none": 0.0391545063041425
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.8223684210526315,
    "acc_stderr,none": 0.031103182383123384
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8333333333333334,
    "acc_stderr,none": 0.031164899666948617
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.53,
    "acc_stderr,none": 0.05016135580465919
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.61,
    "acc_stderr,none": 0.04902071300001975
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.48,
    "acc_stderr,none": 0.050211673156867795
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.5392156862745098,
    "acc_stderr,none": 0.04959859966384181
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.78,
    "acc_stderr,none": 0.04163331998932264
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.8085106382978723,
    "acc_stderr,none": 0.025722149992637795
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.7448275862068966,
    "acc_stderr,none": 0.03632984052707842
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.6613756613756614,
    "acc_stderr,none": 0.024373197867983067
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.9032258064516129,
    "acc_stderr,none": 0.016818943416345197
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6748768472906403,
    "acc_stderr,none": 0.03295797566311271
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.85,
    "acc_stderr,none": 0.0358870281282637
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.4888888888888889,
    "acc_stderr,none": 0.030478009819615817
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.5827814569536424,
    "acc_stderr,none": 0.0402614149763461
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.7222222222222222,
    "acc_stderr,none": 0.03054674526495319
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5982142857142857,
    "acc_stderr,none": 0.04653333146973646
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.73449401523395,
    "acc_stderr,none": 0.010303308653024427,
    "acc_norm,none": 0.7393906420021763,
    "acc_norm_stderr,none": 0.010241826155811618
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4524053224155578,
    "acc_stderr,none": 0.011262695440459566
  }
}