{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.4598976109215017,
        "acc_stderr,none": 0.01456431885692485,
        "acc_norm,none": 0.4718430034129693,
        "acc_norm_stderr,none": 0.014588204105102203
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.7436868686868687,
        "acc_stderr,none": 0.008958775997918356,
        "acc_norm,none": 0.6485690235690236,
        "acc_norm_stderr,none": 0.00979639558281772
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.8195718654434251,
        "acc_stderr,none": 0.006725710812517876
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.4473085670962851,
        "exact_match_stderr,flexible-extract": 0.013695795709089898
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.5443138816968731,
        "acc_stderr,none": 0.00497014570818799,
        "acc_norm,none": 0.7094204341764588,
        "acc_norm_stderr,none": 0.004531019159414104
    },
    "mmlu": {
        "acc,none": 0.5744195983478138,
        "acc_stderr,none": 0.003943695959211974,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.5249734325185972,
        "acc_stderr,none": 0.006807121064866188,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.40476190476190477,
        "acc_stderr,none": 0.043902592653775614
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.7333333333333333,
        "acc_stderr,none": 0.03453131801885415
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.7696078431372549,
        "acc_stderr,none": 0.02955429260569506
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.7721518987341772,
        "acc_stderr,none": 0.027303484599069436
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.71900826446281,
        "acc_stderr,none": 0.04103203830514512
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.6944444444444444,
        "acc_stderr,none": 0.044531975073749834
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.7055214723926381,
        "acc_stderr,none": 0.03581165790474082
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.6445086705202312,
        "acc_stderr,none": 0.025770292082977243
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.2670391061452514,
        "acc_stderr,none": 0.014796502622562557
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.6463022508038585,
        "acc_stderr,none": 0.027155208103200865
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.6419753086419753,
        "acc_stderr,none": 0.026675611926037103
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.43546284224250326,
        "acc_stderr,none": 0.012663412101248328
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.8304093567251462,
        "acc_stderr,none": 0.02878210810540171
    },
    "mmlu_other": {
        "acc,none": 0.6514322497586096,
        "acc_stderr,none": 0.008280559344285664,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.61,
        "acc_stderr,none": 0.04902071300001975
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.6113207547169811,
        "acc_stderr,none": 0.030000485448675986
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.5202312138728323,
        "acc_stderr,none": 0.03809342081273957
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.37,
        "acc_stderr,none": 0.048523658709391
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.6412556053811659,
        "acc_stderr,none": 0.03219079200419994
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.7961165048543689,
        "acc_stderr,none": 0.039891398595317706
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.8034188034188035,
        "acc_stderr,none": 0.026035386098951282
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.65,
        "acc_stderr,none": 0.0479372485441102
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.776500638569604,
        "acc_stderr,none": 0.01489723522945071
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.6339869281045751,
        "acc_stderr,none": 0.027582811415159624
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.45390070921985815,
        "acc_stderr,none": 0.02970045324729146
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.6727941176470589,
        "acc_stderr,none": 0.02850145286039657
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.5,
        "acc_stderr,none": 0.03892494720807614
    },
    "mmlu_social_sciences": {
        "acc,none": 0.6714332141696457,
        "acc_stderr,none": 0.008246354152274942,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.3684210526315789,
        "acc_stderr,none": 0.04537815354939391
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.7121212121212122,
        "acc_stderr,none": 0.03225883512300993
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.8134715025906736,
        "acc_stderr,none": 0.02811209121011746
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.5820512820512821,
        "acc_stderr,none": 0.025007329882461213
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.5798319327731093,
        "acc_stderr,none": 0.03206183783236152
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.7871559633027523,
        "acc_stderr,none": 0.01754937638931369
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.6870229007633588,
        "acc_stderr,none": 0.04066962905677697
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.5947712418300654,
        "acc_stderr,none": 0.019861155193829163
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.6181818181818182,
        "acc_stderr,none": 0.04653429807913508
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.6693877551020408,
        "acc_stderr,none": 0.030116426296540603
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.8258706467661692,
        "acc_stderr,none": 0.026814951200421603
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.8,
        "acc_stderr,none": 0.040201512610368445
    },
    "mmlu_stem": {
        "acc,none": 0.4776403425309229,
        "acc_stderr,none": 0.00859980887894179,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.22,
        "acc_stderr,none": 0.04163331998932268
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.5703703703703704,
        "acc_stderr,none": 0.04276349494376599
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.5921052631578947,
        "acc_stderr,none": 0.039993097127774734
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.6458333333333334,
        "acc_stderr,none": 0.039994111357535424
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.43,
        "acc_stderr,none": 0.04975698519562428
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.51,
        "acc_stderr,none": 0.05024183937956912
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.3,
        "acc_stderr,none": 0.046056618647183814
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.38235294117647056,
        "acc_stderr,none": 0.04835503696107223
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.7,
        "acc_stderr,none": 0.046056618647183814
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.5063829787234042,
        "acc_stderr,none": 0.032683358999363366
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.5379310344827586,
        "acc_stderr,none": 0.04154659671707548
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.3968253968253968,
        "acc_stderr,none": 0.025197101074246494
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.7129032258064516,
        "acc_stderr,none": 0.025736542745594528
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.45320197044334976,
        "acc_stderr,none": 0.03502544650845872
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.59,
        "acc_stderr,none": 0.04943110704237102
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.31851851851851853,
        "acc_stderr,none": 0.02840653309060846
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.3576158940397351,
        "acc_stderr,none": 0.03913453431177258
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.39351851851851855,
        "acc_stderr,none": 0.03331747876370312
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.41964285714285715,
        "acc_stderr,none": 0.04684099321077106
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.779107725788901,
        "acc_stderr,none": 0.00967908804884222,
        "acc_norm,none": 0.7834602829162133,
        "acc_norm_stderr,none": 0.00960998471438462
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.4554759467758444,
        "acc_stderr,none": 0.011269123444510762
    }
}