{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4206484641638225,
    "acc_stderr,none": 0.014426211252508397,
    "acc_norm,none": 0.4325938566552901,
    "acc_norm_stderr,none": 0.014478005694182531
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6666666666666666,
    "acc_stderr,none": 0.00967301666813338,
    "acc_norm,none": 0.5593434343434344,
    "acc_norm_stderr,none": 0.010187264635711983
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.7522935779816514,
    "acc_stderr,none": 0.007550137311318823
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.7475360121304018,
    "exact_match_stderr,flexible-extract": 0.011966250044833997
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5036845249950209,
    "acc_stderr,none": 0.004989645929811452,
    "acc_norm,none": 0.6353316072495518,
    "acc_norm_stderr,none": 0.004803533333364225
  },
  "mmlu": {
    "acc,none": 0.6621563879789204,
    "acc_stderr,none": 0.0037830435639416436,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5713071200850159,
    "acc_stderr,none": 0.006793142023238839,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5396825396825397,
    "acc_stderr,none": 0.04458029125470973
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7393939393939394,
    "acc_stderr,none": 0.03427743175816524
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7549019607843137,
    "acc_stderr,none": 0.030190282453501954
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7848101265822784,
    "acc_stderr,none": 0.02675082699467618
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7603305785123967,
    "acc_stderr,none": 0.03896878985070416
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7685185185185185,
    "acc_stderr,none": 0.04077494709252626
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.803680981595092,
    "acc_stderr,none": 0.031207970394709218
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6791907514450867,
    "acc_stderr,none": 0.025131000233647904
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.36312849162011174,
    "acc_stderr,none": 0.016083749986853697
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.7106109324758842,
    "acc_stderr,none": 0.025755865922632938
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7623456790123457,
    "acc_stderr,none": 0.023683591837008557
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4471968709256845,
    "acc_stderr,none": 0.012698825252435113
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.8070175438596491,
    "acc_stderr,none": 0.030267457554898458
  },
  "mmlu_other": {
    "acc,none": 0.7022851625362085,
    "acc_stderr,none": 0.00794699652990119,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.72,
    "acc_stderr,none": 0.04512608598542129
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.7094339622641509,
    "acc_stderr,none": 0.027943219989337124
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.653179190751445,
    "acc_stderr,none": 0.036291466701596636
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.36,
    "acc_stderr,none": 0.048241815132442176
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6636771300448431,
    "acc_stderr,none": 0.031708824268455
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7572815533980582,
    "acc_stderr,none": 0.04245022486384495
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8846153846153846,
    "acc_stderr,none": 0.02093019318517934
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.74,
    "acc_stderr,none": 0.0440844002276808
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7828863346104725,
    "acc_stderr,none": 0.014743125394823291
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7156862745098039,
    "acc_stderr,none": 0.025829163272757468
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5177304964539007,
    "acc_stderr,none": 0.02980873964223777
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.75,
    "acc_stderr,none": 0.026303648393696036
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.5060240963855421,
    "acc_stderr,none": 0.03892212195333045
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7780305492362691,
    "acc_stderr,none": 0.007402006432839058,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.6140350877192983,
    "acc_stderr,none": 0.045796394220704355
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8232323232323232,
    "acc_stderr,none": 0.027178752639044915
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8497409326424871,
    "acc_stderr,none": 0.025787723180723882
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7333333333333333,
    "acc_stderr,none": 0.022421273612923707
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8403361344537815,
    "acc_stderr,none": 0.023793353997528802
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8642201834862385,
    "acc_stderr,none": 0.014686907556340008
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7099236641221374,
    "acc_stderr,none": 0.03980066246467766
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.7140522875816994,
    "acc_stderr,none": 0.01828048507295467
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6818181818181818,
    "acc_stderr,none": 0.04461272175910507
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7877551020408163,
    "acc_stderr,none": 0.026176967197866767
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.7910447761194029,
    "acc_stderr,none": 0.028748298931728658
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.83,
    "acc_stderr,none": 0.03775251680686371
  },
  "mmlu_stem": {
    "acc,none": 0.6450999048525214,
    "acc_stderr,none": 0.008221227382117864,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.47,
    "acc_stderr,none": 0.050161355804659205
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6814814814814815,
    "acc_stderr,none": 0.040247784019771096
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.7631578947368421,
    "acc_stderr,none": 0.034597776068105365
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8194444444444444,
    "acc_stderr,none": 0.03216600808802268
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.49,
    "acc_stderr,none": 0.05024183937956912
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.57,
    "acc_stderr,none": 0.049756985195624284
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.5,
    "acc_stderr,none": 0.050251890762960605
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.5196078431372549,
    "acc_stderr,none": 0.04971358884367406
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.74,
    "acc_stderr,none": 0.04408440022768078
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.7404255319148936,
    "acc_stderr,none": 0.02865917937429232
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.7034482758620689,
    "acc_stderr,none": 0.03806142687309992
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.6058201058201058,
    "acc_stderr,none": 0.02516798233389414
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8483870967741935,
    "acc_stderr,none": 0.02040261665441674
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6650246305418719,
    "acc_stderr,none": 0.033208527423483104
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.82,
    "acc_stderr,none": 0.03861229196653694
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.4111111111111111,
    "acc_stderr,none": 0.029999923508706682
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.5231788079470199,
    "acc_stderr,none": 0.04078093859163083
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.6388888888888888,
    "acc_stderr,none": 0.03275773486100999
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5803571428571429,
    "acc_stderr,none": 0.04684099321077106
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7513601741022851,
    "acc_stderr,none": 0.010084511234296857,
    "acc_norm,none": 0.7513601741022851,
    "acc_norm_stderr,none": 0.010084511234296864
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4467758444216991,
    "acc_stderr,none": 0.011249786691110375
  }
}