{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4044368600682594,
    "acc_stderr,none": 0.014342036483436174,
    "acc_norm,none": 0.4180887372013652,
    "acc_norm_stderr,none": 0.014413988396996083
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6405723905723906,
    "acc_stderr,none": 0.009845958893373767,
    "acc_norm,none": 0.5223063973063973,
    "acc_norm_stderr,none": 0.010249568404555645
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.7293577981651376,
    "acc_stderr,none": 0.007770708434960571
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.7452615617892343,
    "exact_match_stderr,flexible-extract": 0.012001731232879138
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.49193387771360286,
    "acc_stderr,none": 0.004989132075598774,
    "acc_norm,none": 0.6129257120095598,
    "acc_norm_stderr,none": 0.004860854240821971
  },
  "mmlu": {
    "acc,none": 0.6653610596781085,
    "acc_stderr,none": 0.0037724154304392466,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.573645058448459,
    "acc_stderr,none": 0.00678945517054231,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5396825396825397,
    "acc_stderr,none": 0.04458029125470973
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7333333333333333,
    "acc_stderr,none": 0.03453131801885417
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7696078431372549,
    "acc_stderr,none": 0.029554292605695053
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.8016877637130801,
    "acc_stderr,none": 0.02595502084162111
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7603305785123967,
    "acc_stderr,none": 0.03896878985070416
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7592592592592593,
    "acc_stderr,none": 0.04133119440243839
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.803680981595092,
    "acc_stderr,none": 0.031207970394709218
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.684971098265896,
    "acc_stderr,none": 0.025009313790069706
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.36983240223463687,
    "acc_stderr,none": 0.016145881256056212
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.707395498392283,
    "acc_stderr,none": 0.025839898334877983
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7592592592592593,
    "acc_stderr,none": 0.023788583551658533
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4471968709256845,
    "acc_stderr,none": 0.012698825252435113
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.8070175438596491,
    "acc_stderr,none": 0.030267457554898465
  },
  "mmlu_other": {
    "acc,none": 0.7058255551979401,
    "acc_stderr,none": 0.007920166489670612,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.72,
    "acc_stderr,none": 0.04512608598542129
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.7169811320754716,
    "acc_stderr,none": 0.027724236492700918
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.653179190751445,
    "acc_stderr,none": 0.036291466701596636
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.35,
    "acc_stderr,none": 0.04793724854411019
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6816143497757847,
    "acc_stderr,none": 0.03126580522513713
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7475728155339806,
    "acc_stderr,none": 0.04301250399690878
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8846153846153846,
    "acc_stderr,none": 0.02093019318517934
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.74,
    "acc_stderr,none": 0.0440844002276808
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7879948914431673,
    "acc_stderr,none": 0.014616099385833666
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7189542483660131,
    "acc_stderr,none": 0.025738854797818716
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5390070921985816,
    "acc_stderr,none": 0.029736592526424438
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.7426470588235294,
    "acc_stderr,none": 0.026556519470041527
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4939759036144578,
    "acc_stderr,none": 0.03892212195333045
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7793305167370815,
    "acc_stderr,none": 0.007382880478576551,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.6052631578947368,
    "acc_stderr,none": 0.045981880578165414
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8131313131313131,
    "acc_stderr,none": 0.027772533334218967
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8601036269430051,
    "acc_stderr,none": 0.025033870583015184
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7410256410256411,
    "acc_stderr,none": 0.02221110681006167
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8445378151260504,
    "acc_stderr,none": 0.023536818625398893
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8623853211009175,
    "acc_stderr,none": 0.014770105878649416
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7099236641221374,
    "acc_stderr,none": 0.03980066246467766
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.7156862745098039,
    "acc_stderr,none": 0.01824902441120765
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6727272727272727,
    "acc_stderr,none": 0.04494290866252088
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7836734693877551,
    "acc_stderr,none": 0.02635891633490403
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8009950248756219,
    "acc_stderr,none": 0.028231365092758406
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.84,
    "acc_stderr,none": 0.0368452949177471
  },
  "mmlu_stem": {
    "acc,none": 0.6511259118300031,
    "acc_stderr,none": 0.008172432970342099,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.48,
    "acc_stderr,none": 0.050211673156867795
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6962962962962963,
    "acc_stderr,none": 0.039725528847851375
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.7697368421052632,
    "acc_stderr,none": 0.034260594244031654
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8263888888888888,
    "acc_stderr,none": 0.03167473383795717
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.48,
    "acc_stderr,none": 0.050211673156867795
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.58,
    "acc_stderr,none": 0.04960449637488583
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.47,
    "acc_stderr,none": 0.05016135580465919
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.5294117647058824,
    "acc_stderr,none": 0.049665709039785295
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.76,
    "acc_stderr,none": 0.04292346959909281
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.7489361702127659,
    "acc_stderr,none": 0.028346963777162452
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.7103448275862069,
    "acc_stderr,none": 0.03780019230438015
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.6164021164021164,
    "acc_stderr,none": 0.02504375731852019
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8548387096774194,
    "acc_stderr,none": 0.020039563628053304
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6650246305418719,
    "acc_stderr,none": 0.033208527423483104
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.83,
    "acc_stderr,none": 0.0377525168068637
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.4185185185185185,
    "acc_stderr,none": 0.030078013075022055
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.5165562913907285,
    "acc_stderr,none": 0.0408024418562897
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.6481481481481481,
    "acc_stderr,none": 0.03256850570293648
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5892857142857143,
    "acc_stderr,none": 0.04669510663875192
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7377584330794341,
    "acc_stderr,none": 0.010262502565172454,
    "acc_norm,none": 0.7393906420021763,
    "acc_norm_stderr,none": 0.010241826155811616
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.44575230296827023,
    "acc_stderr,none": 0.011247283050579059
  }
}