{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.41552901023890787,
    "acc_stderr,none": 0.014401366641216391,
    "acc_norm,none": 0.4274744027303754,
    "acc_norm_stderr,none": 0.014456862944650647
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6342592592592593,
    "acc_stderr,none": 0.009882988069418832,
    "acc_norm,none": 0.5420875420875421,
    "acc_norm_stderr,none": 0.010223371342195897
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.7761467889908257,
    "acc_stderr,none": 0.007290312360053285
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.7088703563305534,
    "exact_match_stderr,flexible-extract": 0.012513215297888465
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5247958573989245,
    "acc_stderr,none": 0.004983641854351155,
    "acc_norm,none": 0.6612228639713205,
    "acc_norm_stderr,none": 0.004723266971563369
  },
  "mmlu": {
    "acc,none": 0.6526135878080046,
    "acc_stderr,none": 0.0038199518382317126,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5696068012752391,
    "acc_stderr,none": 0.0068311386722731795,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5476190476190477,
    "acc_stderr,none": 0.044518079590553275
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7272727272727273,
    "acc_stderr,none": 0.03477691162163659
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7647058823529411,
    "acc_stderr,none": 0.029771775228145628
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7848101265822784,
    "acc_stderr,none": 0.026750826994676177
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7603305785123967,
    "acc_stderr,none": 0.038968789850704164
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7407407407407407,
    "acc_stderr,none": 0.04236511258094631
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.7914110429447853,
    "acc_stderr,none": 0.031921934489347235
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.708092485549133,
    "acc_stderr,none": 0.024476994076247344
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.3687150837988827,
    "acc_stderr,none": 0.01613575901503012
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.6591639871382636,
    "acc_stderr,none": 0.026920841260776155
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7469135802469136,
    "acc_stderr,none": 0.024191808600713
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4511082138200782,
    "acc_stderr,none": 0.012709037347346233
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.783625730994152,
    "acc_stderr,none": 0.03158149539338733
  },
  "mmlu_other": {
    "acc,none": 0.6935951078210493,
    "acc_stderr,none": 0.00801743404678838,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.7,
    "acc_stderr,none": 0.046056618647183814
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.6830188679245283,
    "acc_stderr,none": 0.028637235639800893
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.6416184971098265,
    "acc_stderr,none": 0.0365634365335316
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.37,
    "acc_stderr,none": 0.048523658709391
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6636771300448431,
    "acc_stderr,none": 0.031708824268455
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7184466019417476,
    "acc_stderr,none": 0.044532548363264673
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8974358974358975,
    "acc_stderr,none": 0.019875655027867454
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.75,
    "acc_stderr,none": 0.04351941398892446
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7790549169859514,
    "acc_stderr,none": 0.014836205167333555
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7222222222222222,
    "acc_stderr,none": 0.02564686309713791
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5283687943262412,
    "acc_stderr,none": 0.029779450957303055
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.6948529411764706,
    "acc_stderr,none": 0.027971541370170598
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4819277108433735,
    "acc_stderr,none": 0.03889951252827216
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7591810204744881,
    "acc_stderr,none": 0.007627134383905769,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.5877192982456141,
    "acc_stderr,none": 0.04630653203366596
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8080808080808081,
    "acc_stderr,none": 0.028057791672989024
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8031088082901554,
    "acc_stderr,none": 0.02869787397186068
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7307692307692307,
    "acc_stderr,none": 0.022489389793654824
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8109243697478992,
    "acc_stderr,none": 0.02543511943810535
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8366972477064221,
    "acc_stderr,none": 0.015848255806501548
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7175572519083969,
    "acc_stderr,none": 0.03948406125768361
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.6993464052287581,
    "acc_stderr,none": 0.01855063450295296
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6454545454545455,
    "acc_stderr,none": 0.04582004841505415
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.726530612244898,
    "acc_stderr,none": 0.028535560337128448
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8258706467661692,
    "acc_stderr,none": 0.026814951200421603
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.83,
    "acc_stderr,none": 0.03775251680686371
  },
  "mmlu_stem": {
    "acc,none": 0.6320964161116397,
    "acc_stderr,none": 0.008227124693315404,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.4,
    "acc_stderr,none": 0.04923659639173309
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6592592592592592,
    "acc_stderr,none": 0.040943762699967946
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.7894736842105263,
    "acc_stderr,none": 0.03317672787533157
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8055555555555556,
    "acc_stderr,none": 0.03309615177059006
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.44,
    "acc_stderr,none": 0.04988876515698589
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.59,
    "acc_stderr,none": 0.049431107042371025
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.46,
    "acc_stderr,none": 0.05009082659620333
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.45098039215686275,
    "acc_stderr,none": 0.04951218252396264
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.75,
    "acc_stderr,none": 0.04351941398892446
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.7361702127659574,
    "acc_stderr,none": 0.028809989854102956
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.6275862068965518,
    "acc_stderr,none": 0.04028731532947559
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.6216931216931217,
    "acc_stderr,none": 0.02497695405315526
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8419354838709677,
    "acc_stderr,none": 0.020752831511875264
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6650246305418719,
    "acc_stderr,none": 0.033208527423483104
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.82,
    "acc_stderr,none": 0.03861229196653694
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.3851851851851852,
    "acc_stderr,none": 0.02967090612463089
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.5099337748344371,
    "acc_stderr,none": 0.04081677107248437
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.6342592592592593,
    "acc_stderr,none": 0.03284738857647207
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5625,
    "acc_stderr,none": 0.04708567521880525
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.766050054406964,
    "acc_stderr,none": 0.009877236895137462,
    "acc_norm,none": 0.7568008705114254,
    "acc_norm_stderr,none": 0.010009611953858924
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4314227226202661,
    "acc_stderr,none": 0.011207148736838401
  }
}