{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.4189419795221843,
        "acc_stderr,none": 0.01441810695363901,
        "acc_norm,none": 0.4232081911262799,
        "acc_norm_stderr,none": 0.014438036220848029
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.5673400673400674,
        "acc_stderr,none": 0.010166307932642863,
        "acc_norm,none": 0.4890572390572391,
        "acc_norm_stderr,none": 0.010257326131172874
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.8116207951070337,
        "acc_stderr,none": 0.0068388897481771755
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.24791508718726307,
        "exact_match_stderr,flexible-extract": 0.011893980214826171
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.4541923919537941,
        "acc_stderr,none": 0.004968796800410415,
        "acc_norm,none": 0.5632344154550887,
        "acc_norm_stderr,none": 0.004949716368890493
    },
    "mmlu": {
        "acc,none": 0.502278877652756,
        "acc_stderr,none": 0.004010450496948222,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.46397449521785333,
        "acc_stderr,none": 0.006845477995515234,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.3253968253968254,
        "acc_stderr,none": 0.04190596438871135
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.6666666666666666,
        "acc_stderr,none": 0.036810508691615486
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.7352941176470589,
        "acc_stderr,none": 0.03096451792692341
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.7215189873417721,
        "acc_stderr,none": 0.029178682304842565
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.6446280991735537,
        "acc_stderr,none": 0.04369236326573981
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.6111111111111112,
        "acc_stderr,none": 0.04712821257426769
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.6257668711656442,
        "acc_stderr,none": 0.03802068102899615
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.5433526011560693,
        "acc_stderr,none": 0.026817718130348923
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.23798882681564246,
        "acc_stderr,none": 0.014242630070574885
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.5401929260450161,
        "acc_stderr,none": 0.028306190403305696
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.5771604938271605,
        "acc_stderr,none": 0.027487472980871598
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.37353324641460234,
        "acc_stderr,none": 0.012354994823515248
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.7953216374269005,
        "acc_stderr,none": 0.030944459778533193
    },
    "mmlu_other": {
        "acc,none": 0.5838429353073704,
        "acc_stderr,none": 0.008519833513682669,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.57,
        "acc_stderr,none": 0.04975698519562428
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.5471698113207547,
        "acc_stderr,none": 0.03063562795796182
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.48554913294797686,
        "acc_stderr,none": 0.03810871630454764
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.23,
        "acc_stderr,none": 0.04229525846816506
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.5246636771300448,
        "acc_stderr,none": 0.03351695167652627
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.6893203883495146,
        "acc_stderr,none": 0.04582124160161549
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.7393162393162394,
        "acc_stderr,none": 0.028760348956523414
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.59,
        "acc_stderr,none": 0.04943110704237102
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.7445721583652618,
        "acc_stderr,none": 0.015594955384455765
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.545751633986928,
        "acc_stderr,none": 0.02850980780262659
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.3723404255319149,
        "acc_stderr,none": 0.02883892147125146
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.5404411764705882,
        "acc_stderr,none": 0.030273325077345748
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.5,
        "acc_stderr,none": 0.03892494720807614
    },
    "mmlu_social_sciences": {
        "acc,none": 0.5778355541111472,
        "acc_stderr,none": 0.00868799528201586,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.32456140350877194,
        "acc_stderr,none": 0.04404556157374768
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.5808080808080808,
        "acc_stderr,none": 0.035155207286704175
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.7150259067357513,
        "acc_stderr,none": 0.032577140777096614
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.4846153846153846,
        "acc_stderr,none": 0.025339003010106515
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.4327731092436975,
        "acc_stderr,none": 0.03218358107742613
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.7192660550458716,
        "acc_stderr,none": 0.019266055045871613
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.5954198473282443,
        "acc_stderr,none": 0.043046937953806645
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.5130718954248366,
        "acc_stderr,none": 0.020220920829626916
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.5454545454545454,
        "acc_stderr,none": 0.04769300568972746
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.563265306122449,
        "acc_stderr,none": 0.031751952375833226
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.736318407960199,
        "acc_stderr,none": 0.03115715086935558
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.66,
        "acc_stderr,none": 0.04760952285695237
    },
    "mmlu_stem": {
        "acc,none": 0.40532825880114176,
        "acc_stderr,none": 0.008502373695126126,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.24,
        "acc_stderr,none": 0.042923469599092816
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.4666666666666667,
        "acc_stderr,none": 0.043097329010363554
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.5131578947368421,
        "acc_stderr,none": 0.04067533136309174
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.5763888888888888,
        "acc_stderr,none": 0.04132125019723369
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.36,
        "acc_stderr,none": 0.04824181513244218
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.42,
        "acc_stderr,none": 0.049604496374885836
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.24,
        "acc_stderr,none": 0.042923469599092816
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.3431372549019608,
        "acc_stderr,none": 0.04724007352383888
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.6,
        "acc_stderr,none": 0.04923659639173309
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.4425531914893617,
        "acc_stderr,none": 0.032469569197899575
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.503448275862069,
        "acc_stderr,none": 0.041665675771015785
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.31216931216931215,
        "acc_stderr,none": 0.023865206836972595
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.6225806451612903,
        "acc_stderr,none": 0.02757596072327824
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.32019704433497537,
        "acc_stderr,none": 0.03282649385304149
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.44,
        "acc_stderr,none": 0.04988876515698589
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.25555555555555554,
        "acc_stderr,none": 0.02659393910184408
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.304635761589404,
        "acc_stderr,none": 0.037579499229433426
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.3611111111111111,
        "acc_stderr,none": 0.03275773486100999
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.38392857142857145,
        "acc_stderr,none": 0.04616143075028547
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.6969532100108814,
        "acc_stderr,none": 0.010722648689531525,
        "acc_norm,none": 0.7040261153427638,
        "acc_norm_stderr,none": 0.01065041431714812
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.406345957011259,
        "acc_stderr,none": 0.01111382527548005
    }
}