{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.4598976109215017,
        "acc_stderr,none": 0.01456431885692485,
        "acc_norm,none": 0.46501706484641636,
        "acc_norm_stderr,none": 0.014575583922019675
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.7377946127946128,
        "acc_stderr,none": 0.009025197991724835,
        "acc_norm,none": 0.6519360269360269,
        "acc_norm_stderr,none": 0.009774627600259012
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.8189602446483181,
        "acc_stderr,none": 0.006734586365395127
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.44200151630022744,
        "exact_match_stderr,flexible-extract": 0.013679514492814569
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.5400318661621191,
        "acc_stderr,none": 0.004973762948302802,
        "acc_norm,none": 0.7051384186417048,
        "acc_norm_stderr,none": 0.0045504861860190746
    },
    "mmlu": {
        "acc,none": 0.5478564307078764,
        "acc_stderr,none": 0.003977739234999278,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.5013815090329437,
        "acc_stderr,none": 0.006827542661061695,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.373015873015873,
        "acc_stderr,none": 0.04325506042017086
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.696969696969697,
        "acc_stderr,none": 0.035886248000917075
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.75,
        "acc_stderr,none": 0.03039153369274154
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.759493670886076,
        "acc_stderr,none": 0.027820781981149675
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.7107438016528925,
        "acc_stderr,none": 0.041391127276354626
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.6574074074074074,
        "acc_stderr,none": 0.04587904741301811
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.6380368098159509,
        "acc_stderr,none": 0.037757007291414416
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.6011560693641619,
        "acc_stderr,none": 0.02636243757454654
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.23798882681564246,
        "acc_stderr,none": 0.014242630070574885
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.5852090032154341,
        "acc_stderr,none": 0.027982680459759556
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.6234567901234568,
        "acc_stderr,none": 0.026959344518747787
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.4276401564537158,
        "acc_stderr,none": 0.012635799922765839
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.8304093567251462,
        "acc_stderr,none": 0.02878210810540171
    },
    "mmlu_other": {
        "acc,none": 0.6308336015448986,
        "acc_stderr,none": 0.008340905265118768,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.52,
        "acc_stderr,none": 0.050211673156867795
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.5924528301886792,
        "acc_stderr,none": 0.030242233800854494
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.5317919075144508,
        "acc_stderr,none": 0.038047497443647646
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.36,
        "acc_stderr,none": 0.048241815132442176
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.5829596412556054,
        "acc_stderr,none": 0.03309266936071721
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.7475728155339806,
        "acc_stderr,none": 0.04301250399690879
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.7905982905982906,
        "acc_stderr,none": 0.026655699653922733
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.64,
        "acc_stderr,none": 0.048241815132442176
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.7790549169859514,
        "acc_stderr,none": 0.01483620516733355
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.5915032679738562,
        "acc_stderr,none": 0.028146405993096358
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.40425531914893614,
        "acc_stderr,none": 0.02927553215970473
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.6691176470588235,
        "acc_stderr,none": 0.028582709753898435
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.4819277108433735,
        "acc_stderr,none": 0.03889951252827216
    },
    "mmlu_social_sciences": {
        "acc,none": 0.6301592460188495,
        "acc_stderr,none": 0.008453813989305189,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.34210526315789475,
        "acc_stderr,none": 0.044629175353369376
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.6767676767676768,
        "acc_stderr,none": 0.03332299921070645
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.7772020725388601,
        "acc_stderr,none": 0.030031147977641545
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.5256410256410257,
        "acc_stderr,none": 0.025317649726448666
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.5168067226890757,
        "acc_stderr,none": 0.03246013680375308
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.763302752293578,
        "acc_stderr,none": 0.01822407811729908
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.6946564885496184,
        "acc_stderr,none": 0.040393149787245626
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.5555555555555556,
        "acc_stderr,none": 0.020102583895887184
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.5818181818181818,
        "acc_stderr,none": 0.047245774057315705
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.5755102040816327,
        "acc_stderr,none": 0.031642094879429414
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.7810945273631841,
        "acc_stderr,none": 0.029239174636647
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.79,
        "acc_stderr,none": 0.04093601807403326
    },
    "mmlu_stem": {
        "acc,none": 0.4551221059308595,
        "acc_stderr,none": 0.008626207299494133,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.25,
        "acc_stderr,none": 0.04351941398892446
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.5111111111111111,
        "acc_stderr,none": 0.04318275491977976
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.5263157894736842,
        "acc_stderr,none": 0.04063302731486671
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.6458333333333334,
        "acc_stderr,none": 0.039994111357535424
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.42,
        "acc_stderr,none": 0.04960449637488584
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.49,
        "acc_stderr,none": 0.05024183937956912
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.32,
        "acc_stderr,none": 0.046882617226215034
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.35294117647058826,
        "acc_stderr,none": 0.04755129616062947
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.69,
        "acc_stderr,none": 0.04648231987117316
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.46808510638297873,
        "acc_stderr,none": 0.03261936918467382
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.5241379310344828,
        "acc_stderr,none": 0.0416180850350153
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.38095238095238093,
        "acc_stderr,none": 0.025010749116137595
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.6741935483870968,
        "acc_stderr,none": 0.026662010578567104
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.39901477832512317,
        "acc_stderr,none": 0.03445487686264715
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.55,
        "acc_stderr,none": 0.049999999999999996
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.3333333333333333,
        "acc_stderr,none": 0.028742040903948492
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.36423841059602646,
        "acc_stderr,none": 0.03929111781242742
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.3472222222222222,
        "acc_stderr,none": 0.032468872436376486
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.4017857142857143,
        "acc_stderr,none": 0.04653333146973646
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.7758433079434167,
        "acc_stderr,none": 0.009729897956410029,
        "acc_norm,none": 0.7747551686615887,
        "acc_norm_stderr,none": 0.009746643471032157
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.45598771750255884,
        "acc_stderr,none": 0.011270152774853937
    }
}