{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.45563139931740615,
        "acc_stderr,none": 0.014553749939306863,
        "acc_norm,none": 0.45819112627986347,
        "acc_norm_stderr,none": 0.014560220308714705
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.7281144781144782,
        "acc_stderr,none": 0.009129795867310492,
        "acc_norm,none": 0.6401515151515151,
        "acc_norm_stderr,none": 0.00984848484848484
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.7978593272171254,
        "acc_stderr,none": 0.007023968517730726
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.44200151630022744,
        "exact_match_stderr,flexible-extract": 0.013679514492814569
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.5309699263095001,
        "acc_stderr,none": 0.0049802004518516695,
        "acc_norm,none": 0.692989444333798,
        "acc_norm_stderr,none": 0.004603111343213068
    },
    "mmlu": {
        "acc,none": 0.5282723258795043,
        "acc_stderr,none": 0.0040098851072823015,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.4869287991498406,
        "acc_stderr,none": 0.006883968677698824,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.38095238095238093,
        "acc_stderr,none": 0.04343525428949097
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.6606060606060606,
        "acc_stderr,none": 0.03697442205031595
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.7303921568627451,
        "acc_stderr,none": 0.031145570659486782
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.7046413502109705,
        "acc_stderr,none": 0.029696338713422903
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.6942148760330579,
        "acc_stderr,none": 0.04205953933884122
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.6481481481481481,
        "acc_stderr,none": 0.046166311118017146
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.6196319018404908,
        "acc_stderr,none": 0.038142698932618374
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.5606936416184971,
        "acc_stderr,none": 0.026720034380514995
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.23687150837988827,
        "acc_stderr,none": 0.014219570788103986
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.5562700964630225,
        "acc_stderr,none": 0.02821768355665231
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.6018518518518519,
        "acc_stderr,none": 0.02723741509459248
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.4230769230769231,
        "acc_stderr,none": 0.012618204066588389
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.8187134502923976,
        "acc_stderr,none": 0.029547741687640038
    },
    "mmlu_other": {
        "acc,none": 0.6112005149662053,
        "acc_stderr,none": 0.008450520123950632,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.48,
        "acc_stderr,none": 0.050211673156867795
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.5660377358490566,
        "acc_stderr,none": 0.030503292013342596
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.4797687861271676,
        "acc_stderr,none": 0.03809342081273957
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.4,
        "acc_stderr,none": 0.04923659639173309
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.57847533632287,
        "acc_stderr,none": 0.033141902221106564
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.7087378640776699,
        "acc_stderr,none": 0.044986763205729245
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.7564102564102564,
        "acc_stderr,none": 0.028120966503914418
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.63,
        "acc_stderr,none": 0.048523658709391
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.7650063856960408,
        "acc_stderr,none": 0.015162024152278452
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.565359477124183,
        "acc_stderr,none": 0.028384256704883037
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.3900709219858156,
        "acc_stderr,none": 0.029097675599463926
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.6397058823529411,
        "acc_stderr,none": 0.029163128570670736
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.4819277108433735,
        "acc_stderr,none": 0.03889951252827216
    },
    "mmlu_social_sciences": {
        "acc,none": 0.6057848553786155,
        "acc_stderr,none": 0.008586126767468755,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.3684210526315789,
        "acc_stderr,none": 0.04537815354939391
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.6666666666666666,
        "acc_stderr,none": 0.03358618145732523
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.7409326424870466,
        "acc_stderr,none": 0.031618779179354115
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.5102564102564102,
        "acc_stderr,none": 0.025345672221942374
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.47478991596638653,
        "acc_stderr,none": 0.0324371805513741
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.7357798165137615,
        "acc_stderr,none": 0.018904164171510193
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.6641221374045801,
        "acc_stderr,none": 0.04142313771996664
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.5359477124183006,
        "acc_stderr,none": 0.020175488765484043
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.5818181818181818,
        "acc_stderr,none": 0.0472457740573157
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.5224489795918368,
        "acc_stderr,none": 0.031976941187136725
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.736318407960199,
        "acc_stderr,none": 0.03115715086935557
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.79,
        "acc_stderr,none": 0.04093601807403326
    },
    "mmlu_stem": {
        "acc,none": 0.4326038693307961,
        "acc_stderr,none": 0.008593219556209425,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.27,
        "acc_stderr,none": 0.044619604333847394
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.5111111111111111,
        "acc_stderr,none": 0.04318275491977976
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.5131578947368421,
        "acc_stderr,none": 0.04067533136309174
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.6111111111111112,
        "acc_stderr,none": 0.04076663253918567
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.35,
        "acc_stderr,none": 0.047937248544110196
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.48,
        "acc_stderr,none": 0.050211673156867795
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.32,
        "acc_stderr,none": 0.04688261722621504
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.38235294117647056,
        "acc_stderr,none": 0.04835503696107224
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.67,
        "acc_stderr,none": 0.047258156262526066
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.44680851063829785,
        "acc_stderr,none": 0.0325005368436584
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.45517241379310347,
        "acc_stderr,none": 0.04149886942192117
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.37037037037037035,
        "acc_stderr,none": 0.024870815251057093
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.6451612903225806,
        "acc_stderr,none": 0.02721888977330876
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.39901477832512317,
        "acc_stderr,none": 0.03445487686264715
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.51,
        "acc_stderr,none": 0.05024183937956911
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.3074074074074074,
        "acc_stderr,none": 0.028133252578815635
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.2847682119205298,
        "acc_stderr,none": 0.03684881521389023
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.3148148148148148,
        "acc_stderr,none": 0.03167468706828979
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.39285714285714285,
        "acc_stderr,none": 0.04635550135609976
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.7622415669205659,
        "acc_stderr,none": 0.009932525779525485,
        "acc_norm,none": 0.7606093579978237,
        "acc_norm_stderr,none": 0.00995588425029169
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.4564994882292733,
        "acc_stderr,none": 0.011271170113045128
    }
}