{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4129692832764505,
    "acc_stderr,none": 0.014388344935398322,
    "acc_norm,none": 0.4206484641638225,
    "acc_norm_stderr,none": 0.014426211252508397
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6372053872053872,
    "acc_stderr,none": 0.009865936757013942,
    "acc_norm,none": 0.5336700336700336,
    "acc_norm_stderr,none": 0.010236494647406476
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.7767584097859327,
    "acc_stderr,none": 0.007283214076520012
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.7824109173616376,
    "exact_match_stderr,flexible-extract": 0.01136523176118958
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5132443736307508,
    "acc_stderr,none": 0.004988030554894816,
    "acc_norm,none": 0.6463851822346146,
    "acc_norm_stderr,none": 0.0047711430744261304
  },
  "mmlu": {
    "acc,none": 0.6669990029910269,
    "acc_stderr,none": 0.00376370058024083,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5727948990435706,
    "acc_stderr,none": 0.006770418229376138,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5396825396825397,
    "acc_stderr,none": 0.04458029125470973
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7393939393939394,
    "acc_stderr,none": 0.03427743175816524
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7745098039215687,
    "acc_stderr,none": 0.02933116229425172
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.8059071729957806,
    "acc_stderr,none": 0.02574490253229094
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7603305785123967,
    "acc_stderr,none": 0.03896878985070416
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.75,
    "acc_stderr,none": 0.04186091791394607
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.8098159509202454,
    "acc_stderr,none": 0.030833491146281245
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6763005780346821,
    "acc_stderr,none": 0.025190181327608405
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.36089385474860336,
    "acc_stderr,none": 0.016062290671110462
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.7202572347266881,
    "acc_stderr,none": 0.025494259350694902
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7685185185185185,
    "acc_stderr,none": 0.023468429832451163
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.44589308996088656,
    "acc_stderr,none": 0.012695244711379778
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.8011695906432749,
    "acc_stderr,none": 0.030611116557432528
  },
  "mmlu_other": {
    "acc,none": 0.7074348245896364,
    "acc_stderr,none": 0.007909755900012153,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.72,
    "acc_stderr,none": 0.04512608598542129
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.7283018867924528,
    "acc_stderr,none": 0.02737770662467071
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.653179190751445,
    "acc_stderr,none": 0.036291466701596636
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.35,
    "acc_stderr,none": 0.04793724854411019
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6816143497757847,
    "acc_stderr,none": 0.03126580522513713
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7281553398058253,
    "acc_stderr,none": 0.044052680241409216
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8846153846153846,
    "acc_stderr,none": 0.02093019318517934
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.74,
    "acc_stderr,none": 0.0440844002276808
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7931034482758621,
    "acc_stderr,none": 0.014485656041669195
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7156862745098039,
    "acc_stderr,none": 0.025829163272757468
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.549645390070922,
    "acc_stderr,none": 0.02968010556502904
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.7352941176470589,
    "acc_stderr,none": 0.026799562024887678
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4939759036144578,
    "acc_stderr,none": 0.03892212195333045
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7842053948651284,
    "acc_stderr,none": 0.007320957187684336,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.6228070175438597,
    "acc_stderr,none": 0.04559522141958216
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8131313131313131,
    "acc_stderr,none": 0.027772533334218967
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8652849740932642,
    "acc_stderr,none": 0.024639789097709443
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7538461538461538,
    "acc_stderr,none": 0.02184086699042308
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8613445378151261,
    "acc_stderr,none": 0.022448264476832593
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8697247706422019,
    "acc_stderr,none": 0.014431862852473247
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7099236641221374,
    "acc_stderr,none": 0.03980066246467766
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.7140522875816994,
    "acc_stderr,none": 0.01828048507295467
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6909090909090909,
    "acc_stderr,none": 0.044262946482000985
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7755102040816326,
    "acc_stderr,none": 0.02671143055553841
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8009950248756219,
    "acc_stderr,none": 0.028231365092758406
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.84,
    "acc_stderr,none": 0.0368452949177471
  },
  "mmlu_stem": {
    "acc,none": 0.6533460196638122,
    "acc_stderr,none": 0.00819081695586364,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.47,
    "acc_stderr,none": 0.050161355804659205
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.674074074074074,
    "acc_stderr,none": 0.040491220417025055
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.7631578947368421,
    "acc_stderr,none": 0.034597776068105365
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8194444444444444,
    "acc_stderr,none": 0.03216600808802268
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.5,
    "acc_stderr,none": 0.050251890762960605
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.55,
    "acc_stderr,none": 0.04999999999999999
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.48,
    "acc_stderr,none": 0.050211673156867795
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.5196078431372549,
    "acc_stderr,none": 0.04971358884367406
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.77,
    "acc_stderr,none": 0.04229525846816507
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.7574468085106383,
    "acc_stderr,none": 0.028020226271200217
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.7103448275862069,
    "acc_stderr,none": 0.03780019230438015
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.6216931216931217,
    "acc_stderr,none": 0.024976954053155257
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8548387096774194,
    "acc_stderr,none": 0.020039563628053307
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6650246305418719,
    "acc_stderr,none": 0.0332085274234831
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.81,
    "acc_stderr,none": 0.039427724440366234
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.45185185185185184,
    "acc_stderr,none": 0.030343862998512633
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.5298013245033113,
    "acc_stderr,none": 0.040752249922169775
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.6527777777777778,
    "acc_stderr,none": 0.032468872436376486
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5803571428571429,
    "acc_stderr,none": 0.04684099321077106
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7470076169749728,
    "acc_stderr,none": 0.010142888698862462,
    "acc_norm,none": 0.749183895538629,
    "acc_norm_stderr,none": 0.010113869547069042
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.44626407369498466,
    "acc_stderr,none": 0.011248540901547959
  }
}