{
    "arc_challenge": {
        "alias": "arc_challenge",
        "acc,none": 0.49146757679180886,
        "acc_stderr,none": 0.014609263165632182,
        "acc_norm,none": 0.5059726962457338,
        "acc_norm_stderr,none": 0.014610348300255795
    },
    "arc_easy": {
        "alias": "arc_easy",
        "acc,none": 0.7916666666666666,
        "acc_stderr,none": 0.008333333333333194,
        "acc_norm,none": 0.7314814814814815,
        "acc_norm_stderr,none": 0.00909404255499485
    },
    "boolq": {
        "alias": "boolq",
        "acc,none": 0.8275229357798165,
        "acc_stderr,none": 0.0066076683238905365
    },
    "gsm8k": {
        "alias": "gsm8k",
        "exact_match,strict-match": 0.0,
        "exact_match_stderr,strict-match": 0.0,
        "exact_match,flexible-extract": 0.4632297194844579,
        "exact_match_stderr,flexible-extract": 0.013735191956468652
    },
    "hellaswag": {
        "alias": "hellaswag",
        "acc,none": 0.5829516032662816,
        "acc_stderr,none": 0.004920633227844461,
        "acc_norm,none": 0.7463652658832902,
        "acc_norm_stderr,none": 0.004342017709967977
    },
    "mmlu": {
        "acc,none": 0.5130323315766985,
        "acc_stderr,none": 0.003978347805932047,
        "alias": "mmlu"
    },
    "mmlu_humanities": {
        "acc,none": 0.4971307120085016,
        "acc_stderr,none": 0.006904467940091514,
        "alias": " - humanities"
    },
    "mmlu_formal_logic": {
        "alias": "  - formal_logic",
        "acc,none": 0.3412698412698413,
        "acc_stderr,none": 0.04240799327574925
    },
    "mmlu_high_school_european_history": {
        "alias": "  - high_school_european_history",
        "acc,none": 0.703030303030303,
        "acc_stderr,none": 0.03567969772268049
    },
    "mmlu_high_school_us_history": {
        "alias": "  - high_school_us_history",
        "acc,none": 0.7549019607843137,
        "acc_stderr,none": 0.030190282453501943
    },
    "mmlu_high_school_world_history": {
        "alias": "  - high_school_world_history",
        "acc,none": 0.729957805907173,
        "acc_stderr,none": 0.028900721906293426
    },
    "mmlu_international_law": {
        "alias": "  - international_law",
        "acc,none": 0.6942148760330579,
        "acc_stderr,none": 0.04205953933884123
    },
    "mmlu_jurisprudence": {
        "alias": "  - jurisprudence",
        "acc,none": 0.6388888888888888,
        "acc_stderr,none": 0.046434546089062764
    },
    "mmlu_logical_fallacies": {
        "alias": "  - logical_fallacies",
        "acc,none": 0.6441717791411042,
        "acc_stderr,none": 0.03761521380046734
    },
    "mmlu_moral_disputes": {
        "alias": "  - moral_disputes",
        "acc,none": 0.5433526011560693,
        "acc_stderr,none": 0.026817718130348927
    },
    "mmlu_moral_scenarios": {
        "alias": "  - moral_scenarios",
        "acc,none": 0.2849162011173184,
        "acc_stderr,none": 0.015096222302469795
    },
    "mmlu_philosophy": {
        "alias": "  - philosophy",
        "acc,none": 0.6045016077170418,
        "acc_stderr,none": 0.027770918531427834
    },
    "mmlu_prehistory": {
        "alias": "  - prehistory",
        "acc,none": 0.6327160493827161,
        "acc_stderr,none": 0.026822801759507894
    },
    "mmlu_professional_law": {
        "alias": "  - professional_law",
        "acc,none": 0.4074315514993481,
        "acc_stderr,none": 0.012549473714212226
    },
    "mmlu_world_religions": {
        "alias": "  - world_religions",
        "acc,none": 0.783625730994152,
        "acc_stderr,none": 0.03158149539338733
    },
    "mmlu_other": {
        "acc,none": 0.6137753459929192,
        "acc_stderr,none": 0.008431101057154186,
        "alias": " - other"
    },
    "mmlu_business_ethics": {
        "alias": "  - business_ethics",
        "acc,none": 0.59,
        "acc_stderr,none": 0.04943110704237102
    },
    "mmlu_clinical_knowledge": {
        "alias": "  - clinical_knowledge",
        "acc,none": 0.5962264150943396,
        "acc_stderr,none": 0.03019761160019795
    },
    "mmlu_college_medicine": {
        "alias": "  - college_medicine",
        "acc,none": 0.4624277456647399,
        "acc_stderr,none": 0.0380168510452446
    },
    "mmlu_global_facts": {
        "alias": "  - global_facts",
        "acc,none": 0.31,
        "acc_stderr,none": 0.04648231987117316
    },
    "mmlu_human_aging": {
        "alias": "  - human_aging",
        "acc,none": 0.5739910313901345,
        "acc_stderr,none": 0.033188332862172806
    },
    "mmlu_management": {
        "alias": "  - management",
        "acc,none": 0.7475728155339806,
        "acc_stderr,none": 0.04301250399690878
    },
    "mmlu_marketing": {
        "alias": "  - marketing",
        "acc,none": 0.7692307692307693,
        "acc_stderr,none": 0.027601921381417566
    },
    "mmlu_medical_genetics": {
        "alias": "  - medical_genetics",
        "acc,none": 0.62,
        "acc_stderr,none": 0.04878317312145632
    },
    "mmlu_miscellaneous": {
        "alias": "  - miscellaneous",
        "acc,none": 0.7432950191570882,
        "acc_stderr,none": 0.01562048026306452
    },
    "mmlu_nutrition": {
        "alias": "  - nutrition",
        "acc,none": 0.5784313725490197,
        "acc_stderr,none": 0.02827549015679145
    },
    "mmlu_professional_accounting": {
        "alias": "  - professional_accounting",
        "acc,none": 0.3723404255319149,
        "acc_stderr,none": 0.02883892147125146
    },
    "mmlu_professional_medicine": {
        "alias": "  - professional_medicine",
        "acc,none": 0.6654411764705882,
        "acc_stderr,none": 0.02866199620233531
    },
    "mmlu_virology": {
        "alias": "  - virology",
        "acc,none": 0.5240963855421686,
        "acc_stderr,none": 0.03887971849597264
    },
    "mmlu_social_sciences": {
        "acc,none": 0.585635359116022,
        "acc_stderr,none": 0.008580384541663792,
        "alias": " - social sciences"
    },
    "mmlu_econometrics": {
        "alias": "  - econometrics",
        "acc,none": 0.30701754385964913,
        "acc_stderr,none": 0.0433913832257986
    },
    "mmlu_high_school_geography": {
        "alias": "  - high_school_geography",
        "acc,none": 0.5858585858585859,
        "acc_stderr,none": 0.03509438348879629
    },
    "mmlu_high_school_government_and_politics": {
        "alias": "  - high_school_government_and_politics",
        "acc,none": 0.7305699481865285,
        "acc_stderr,none": 0.03201867122877794
    },
    "mmlu_high_school_macroeconomics": {
        "alias": "  - high_school_macroeconomics",
        "acc,none": 0.4307692307692308,
        "acc_stderr,none": 0.025106820660539753
    },
    "mmlu_high_school_microeconomics": {
        "alias": "  - high_school_microeconomics",
        "acc,none": 0.3697478991596639,
        "acc_stderr,none": 0.031357095996135904
    },
    "mmlu_high_school_psychology": {
        "alias": "  - high_school_psychology",
        "acc,none": 0.7302752293577982,
        "acc_stderr,none": 0.01902848671111544
    },
    "mmlu_human_sexuality": {
        "alias": "  - human_sexuality",
        "acc,none": 0.6717557251908397,
        "acc_stderr,none": 0.041184385658062976
    },
    "mmlu_professional_psychology": {
        "alias": "  - professional_psychology",
        "acc,none": 0.5555555555555556,
        "acc_stderr,none": 0.020102583895887184
    },
    "mmlu_public_relations": {
        "alias": "  - public_relations",
        "acc,none": 0.5909090909090909,
        "acc_stderr,none": 0.047093069786618966
    },
    "mmlu_security_studies": {
        "alias": "  - security_studies",
        "acc,none": 0.5877551020408164,
        "acc_stderr,none": 0.03151236044674269
    },
    "mmlu_sociology": {
        "alias": "  - sociology",
        "acc,none": 0.7263681592039801,
        "acc_stderr,none": 0.031524391865554016
    },
    "mmlu_us_foreign_policy": {
        "alias": "  - us_foreign_policy",
        "acc,none": 0.73,
        "acc_stderr,none": 0.044619604333847394
    },
    "mmlu_stem": {
        "acc,none": 0.3666349508404694,
        "acc_stderr,none": 0.008283619334816894,
        "alias": " - stem"
    },
    "mmlu_abstract_algebra": {
        "alias": "  - abstract_algebra",
        "acc,none": 0.22,
        "acc_stderr,none": 0.04163331998932268
    },
    "mmlu_anatomy": {
        "alias": "  - anatomy",
        "acc,none": 0.4666666666666667,
        "acc_stderr,none": 0.043097329010363554
    },
    "mmlu_astronomy": {
        "alias": "  - astronomy",
        "acc,none": 0.4934210526315789,
        "acc_stderr,none": 0.04068590050224971
    },
    "mmlu_college_biology": {
        "alias": "  - college_biology",
        "acc,none": 0.5694444444444444,
        "acc_stderr,none": 0.04140685639111503
    },
    "mmlu_college_chemistry": {
        "alias": "  - college_chemistry",
        "acc,none": 0.28,
        "acc_stderr,none": 0.04512608598542127
    },
    "mmlu_college_computer_science": {
        "alias": "  - college_computer_science",
        "acc,none": 0.33,
        "acc_stderr,none": 0.04725815626252606
    },
    "mmlu_college_mathematics": {
        "alias": "  - college_mathematics",
        "acc,none": 0.23,
        "acc_stderr,none": 0.04229525846816507
    },
    "mmlu_college_physics": {
        "alias": "  - college_physics",
        "acc,none": 0.27450980392156865,
        "acc_stderr,none": 0.04440521906179325
    },
    "mmlu_computer_security": {
        "alias": "  - computer_security",
        "acc,none": 0.63,
        "acc_stderr,none": 0.048523658709391
    },
    "mmlu_conceptual_physics": {
        "alias": "  - conceptual_physics",
        "acc,none": 0.4127659574468085,
        "acc_stderr,none": 0.03218471141400351
    },
    "mmlu_electrical_engineering": {
        "alias": "  - electrical_engineering",
        "acc,none": 0.4068965517241379,
        "acc_stderr,none": 0.04093793981266237
    },
    "mmlu_elementary_mathematics": {
        "alias": "  - elementary_mathematics",
        "acc,none": 0.2751322751322751,
        "acc_stderr,none": 0.023000086859068642
    },
    "mmlu_high_school_biology": {
        "alias": "  - high_school_biology",
        "acc,none": 0.5774193548387097,
        "acc_stderr,none": 0.02810096472427264
    },
    "mmlu_high_school_chemistry": {
        "alias": "  - high_school_chemistry",
        "acc,none": 0.3054187192118227,
        "acc_stderr,none": 0.03240661565868408
    },
    "mmlu_high_school_computer_science": {
        "alias": "  - high_school_computer_science",
        "acc,none": 0.46,
        "acc_stderr,none": 0.05009082659620332
    },
    "mmlu_high_school_mathematics": {
        "alias": "  - high_school_mathematics",
        "acc,none": 0.21851851851851853,
        "acc_stderr,none": 0.025195752251823793
    },
    "mmlu_high_school_physics": {
        "alias": "  - high_school_physics",
        "acc,none": 0.2185430463576159,
        "acc_stderr,none": 0.03374235550425694
    },
    "mmlu_high_school_statistics": {
        "alias": "  - high_school_statistics",
        "acc,none": 0.25,
        "acc_stderr,none": 0.029531221160930918
    },
    "mmlu_machine_learning": {
        "alias": "  - machine_learning",
        "acc,none": 0.4107142857142857,
        "acc_stderr,none": 0.04669510663875191
    },
    "piqa": {
        "alias": "piqa",
        "acc,none": 0.8052230685527747,
        "acc_stderr,none": 0.009240006693317721,
        "acc_norm,none": 0.8133841131664853,
        "acc_norm_stderr,none": 0.009090077190470834
    },
    "social_iqa": {
        "alias": "social_iqa",
        "acc,none": 0.46212896622313204,
        "acc_stderr,none": 0.011281570251648944
    }
}