{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.38310580204778155,
        "acc_stderr,none": 0.014206472661672877,
        "acc_norm,none": 0.3993174061433447,
        "acc_norm_stderr,none": 0.0143120945579467
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.5290404040404041,
        "acc_stderr,none": 0.01024246382639562,
        "acc_norm,none": 0.460016835016835,
        "acc_norm_stderr,none": 0.010226927233491499
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.8339449541284404,
        "acc_stderr,none": 0.006508595338469725
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.21607278241091737,
        "exact_match_stderr,flexible-extract": 0.01133653148963886
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.44542919737104164,
        "acc_stderr,none": 0.0049599735147725045,
        "acc_norm,none": 0.5496912965544712,
        "acc_norm_stderr,none": 0.0049650784774355914
    },
    "mmlu": {
        "acc,none": 0.4865403788634098,
        "acc_stderr,none": 0.004037247111710939,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.461211477151966,
        "acc_stderr,none": 0.006910809568372539,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.373015873015873,
        "acc_stderr,none": 0.04325506042017086
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.6242424242424243,
        "acc_stderr,none": 0.037818873532059816
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.7205882352941176,
        "acc_stderr,none": 0.03149328104507956
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.6919831223628692,
        "acc_stderr,none": 0.03005238933560569
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.628099173553719,
        "acc_stderr,none": 0.04412015806624505
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.5740740740740741,
        "acc_stderr,none": 0.047803436269367894
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.588957055214724,
        "acc_stderr,none": 0.038656978537853624
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.5664739884393064,
        "acc_stderr,none": 0.026680134761679217
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.23798882681564246,
        "acc_stderr,none": 0.014242630070574885
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.5209003215434084,
        "acc_stderr,none": 0.028373270961069414
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.5370370370370371,
        "acc_stderr,none": 0.027744313443376536
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.3898305084745763,
        "acc_stderr,none": 0.012456386619082611
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.7719298245614035,
        "acc_stderr,none": 0.032180937956023566
    },
    "mmlu_other": {
        "acc,none": 0.5687158030254265,
        "acc_stderr,none": 0.008591378850227887,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.52,
        "acc_stderr,none": 0.050211673156867795
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.5433962264150943,
        "acc_stderr,none": 0.03065674869673943
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.4682080924855491,
        "acc_stderr,none": 0.03804749744364764
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.27,
        "acc_stderr,none": 0.044619604333847394
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.4798206278026906,
        "acc_stderr,none": 0.03353046167412301
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.5728155339805825,
        "acc_stderr,none": 0.04897957737781168
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.7051282051282052,
        "acc_stderr,none": 0.0298725777088912
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.61,
        "acc_stderr,none": 0.04902071300001975
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.7318007662835249,
        "acc_stderr,none": 0.01584243083526943
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.5326797385620915,
        "acc_stderr,none": 0.028568699752225868
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.35815602836879434,
        "acc_stderr,none": 0.028602085862759415
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.5698529411764706,
        "acc_stderr,none": 0.030074971917302875
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.4759036144578313,
        "acc_stderr,none": 0.03887971849597264
    },
    "mmlu_social_sciences": {
        "acc,none": 0.5466363340916477,
        "acc_stderr,none": 0.008804460346105374,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.30701754385964913,
        "acc_stderr,none": 0.043391383225798594
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.5555555555555556,
        "acc_stderr,none": 0.03540294377095368
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.6787564766839378,
        "acc_stderr,none": 0.033699508685490674
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.45897435897435895,
        "acc_stderr,none": 0.025265525491284295
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.42016806722689076,
        "acc_stderr,none": 0.03206183783236152
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.6788990825688074,
        "acc_stderr,none": 0.02001814977273375
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.5725190839694656,
        "acc_stderr,none": 0.04338920305792401
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.47549019607843135,
        "acc_stderr,none": 0.02020351728026145
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.5636363636363636,
        "acc_stderr,none": 0.04750185058907297
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.5795918367346938,
        "acc_stderr,none": 0.03160106993449601
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.6119402985074627,
        "acc_stderr,none": 0.0344578996436275
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.64,
        "acc_stderr,none": 0.04824181513244218
    },
    "mmlu_stem": {
        "acc,none": 0.3847129717729147,
        "acc_stderr,none": 0.00845179845031901,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.24,
        "acc_stderr,none": 0.042923469599092816
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.45925925925925926,
        "acc_stderr,none": 0.04304979692464242
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.506578947368421,
        "acc_stderr,none": 0.040685900502249704
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.5694444444444444,
        "acc_stderr,none": 0.04140685639111502
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.32,
        "acc_stderr,none": 0.04688261722621504
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.36,
        "acc_stderr,none": 0.04824181513244218
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.24,
        "acc_stderr,none": 0.04292346959909282
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.3235294117647059,
        "acc_stderr,none": 0.046550104113196177
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.51,
        "acc_stderr,none": 0.05024183937956912
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.42127659574468085,
        "acc_stderr,none": 0.03227834510146267
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.4827586206896552,
        "acc_stderr,none": 0.04164188720169377
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.30423280423280424,
        "acc_stderr,none": 0.023695415009463087
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.5903225806451613,
        "acc_stderr,none": 0.027976054915347368
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.30049261083743845,
        "acc_stderr,none": 0.03225799476233484
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.42,
        "acc_stderr,none": 0.049604496374885836
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.25925925925925924,
        "acc_stderr,none": 0.026719240783712152
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.2847682119205298,
        "acc_stderr,none": 0.03684881521389024
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.3055555555555556,
        "acc_stderr,none": 0.03141554629402544
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.38392857142857145,
        "acc_stderr,none": 0.04616143075028547
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.6817192600652884,
        "acc_stderr,none": 0.010868093932082237,
        "acc_norm,none": 0.6920565832426551,
        "acc_norm_stderr,none": 0.01077089236746369
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.390992835209826,
        "acc_stderr,none": 0.011041917016537851
    }
}