{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.5127986348122867,
    "acc_stderr,none": 0.014606603181012544,
    "acc_norm,none": 0.515358361774744,
    "acc_norm_stderr,none": 0.014604496129394906
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.7891414141414141,
    "acc_stderr,none": 0.008370304468544143,
    "acc_norm,none": 0.6999158249158249,
    "acc_norm_stderr,none": 0.009404000558513346
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.8327217125382264,
    "acc_stderr,none": 0.006527731323590484
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.7179681576952237,
    "exact_match_stderr,flexible-extract": 0.012394926584335693
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5534754033061143,
    "acc_stderr,none": 0.004961161589228394,
    "acc_norm,none": 0.689205337582155,
    "acc_norm_stderr,none": 0.004618730353217041
  },
  "mmlu": {
    "acc,none": 0.6832360062669135,
    "acc_stderr,none": 0.003689857992599713,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.591710945802338,
    "acc_stderr,none": 0.0067135350207152885,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.6349206349206349,
    "acc_stderr,none": 0.04306241259127153
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7878787878787878,
    "acc_stderr,none": 0.031922715695483
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.8186274509803921,
    "acc_stderr,none": 0.02704462171947408
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.8143459915611815,
    "acc_stderr,none": 0.02531049537694486
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7603305785123967,
    "acc_stderr,none": 0.03896878985070417
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.75,
    "acc_stderr,none": 0.04186091791394607
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.852760736196319,
    "acc_stderr,none": 0.02783991527833965
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.7023121387283237,
    "acc_stderr,none": 0.024617055388677
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.3854748603351955,
    "acc_stderr,none": 0.01627792703963819
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.7331189710610932,
    "acc_stderr,none": 0.025122637608816646
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7716049382716049,
    "acc_stderr,none": 0.023358211840626267
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.455019556714472,
    "acc_stderr,none": 0.012718456618701785
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.8070175438596491,
    "acc_stderr,none": 0.030267457554898458
  },
  "mmlu_other": {
    "acc,none": 0.7293208883167042,
    "acc_stderr,none": 0.007673367600259794,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.78,
    "acc_stderr,none": 0.041633319989322626
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.7584905660377359,
    "acc_stderr,none": 0.026341480371118362
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.7052023121387283,
    "acc_stderr,none": 0.03476599607516479
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.33,
    "acc_stderr,none": 0.047258156262526045
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.695067264573991,
    "acc_stderr,none": 0.030898610882477515
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.8349514563106796,
    "acc_stderr,none": 0.036756688322331886
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8675213675213675,
    "acc_stderr,none": 0.02220930907316561
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.76,
    "acc_stderr,none": 0.042923469599092816
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.8109833971902938,
    "acc_stderr,none": 0.014000791294407003
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7549019607843137,
    "acc_stderr,none": 0.02463004897982477
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5319148936170213,
    "acc_stderr,none": 0.029766675075873866
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.7757352941176471,
    "acc_stderr,none": 0.02533684856333236
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.5120481927710844,
    "acc_stderr,none": 0.03891364495835817
  },
  "mmlu_social_sciences": {
    "acc,none": 0.8007799805004875,
    "acc_stderr,none": 0.007053513696701648,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.5964912280701754,
    "acc_stderr,none": 0.04615186962583706
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8232323232323232,
    "acc_stderr,none": 0.027178752639044915
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8911917098445595,
    "acc_stderr,none": 0.022473253332768766
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.8051282051282052,
    "acc_stderr,none": 0.020083167595181393
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8823529411764706,
    "acc_stderr,none": 0.020928472557788794
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8990825688073395,
    "acc_stderr,none": 0.012914673545364415
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7404580152671756,
    "acc_stderr,none": 0.03844876139785271
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.7238562091503268,
    "acc_stderr,none": 0.018087276935663137
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6181818181818182,
    "acc_stderr,none": 0.04653429807913508
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7714285714285715,
    "acc_stderr,none": 0.026882144922307748
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8159203980099502,
    "acc_stderr,none": 0.027403859410786848
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.86,
    "acc_stderr,none": 0.03487350880197772
  },
  "mmlu_stem": {
    "acc,none": 0.6596891849032668,
    "acc_stderr,none": 0.008069553380191679,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.4,
    "acc_stderr,none": 0.049236596391733084
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.674074074074074,
    "acc_stderr,none": 0.040491220417025055
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.8421052631578947,
    "acc_stderr,none": 0.029674167520101442
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8194444444444444,
    "acc_stderr,none": 0.032166008088022675
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.48,
    "acc_stderr,none": 0.050211673156867795
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.58,
    "acc_stderr,none": 0.049604496374885836
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.41,
    "acc_stderr,none": 0.049431107042371025
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.4803921568627451,
    "acc_stderr,none": 0.04971358884367406
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.79,
    "acc_stderr,none": 0.040936018074033256
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.774468085106383,
    "acc_stderr,none": 0.02732107841738753
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.6896551724137931,
    "acc_stderr,none": 0.03855289616378947
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.6375661375661376,
    "acc_stderr,none": 0.024757473902752063
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8774193548387097,
    "acc_stderr,none": 0.018656720991789413
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6551724137931034,
    "acc_stderr,none": 0.03344283744280458
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.8,
    "acc_stderr,none": 0.04020151261036845
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.45185185185185184,
    "acc_stderr,none": 0.030343862998512633
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.543046357615894,
    "acc_stderr,none": 0.04067325174247443
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.6620370370370371,
    "acc_stderr,none": 0.03225941352631295
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.6517857142857143,
    "acc_stderr,none": 0.04521829902833585
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7747551686615887,
    "acc_stderr,none": 0.009746643471032148,
    "acc_norm,none": 0.7704026115342764,
    "acc_norm_stderr,none": 0.009812682950815176
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4836233367451382,
    "acc_stderr,none": 0.011308000169997617
  }
}