{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4812286689419795,
    "acc_stderr,none": 0.014601090150633964,
    "acc_norm,none": 0.4948805460750853,
    "acc_norm_stderr,none": 0.014610624890309157
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.7790404040404041,
    "acc_stderr,none": 0.00851343094701945,
    "acc_norm,none": 0.6784511784511784,
    "acc_norm_stderr,none": 0.00958409157564062
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.818348623853211,
    "acc_stderr,none": 0.006743433267054218
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.7134192570128886,
    "exact_match_stderr,flexible-extract": 0.012454841668337706
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5397331208922526,
    "acc_stderr,none": 0.004974001515580939,
    "acc_norm,none": 0.6824337781318462,
    "acc_norm_stderr,none": 0.004645783048004549
  },
  "mmlu": {
    "acc,none": 0.6720552627830794,
    "acc_stderr,none": 0.0037014634056679405,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5859723698193411,
    "acc_stderr,none": 0.006725664147118033,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5873015873015873,
    "acc_stderr,none": 0.04403438954768177
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7757575757575758,
    "acc_stderr,none": 0.03256866661681102
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.8186274509803921,
    "acc_stderr,none": 0.02704462171947408
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.810126582278481,
    "acc_stderr,none": 0.025530100460233497
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.8016528925619835,
    "acc_stderr,none": 0.03640118271990946
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7407407407407407,
    "acc_stderr,none": 0.042365112580946315
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.8343558282208589,
    "acc_stderr,none": 0.029208296231259104
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.7138728323699421,
    "acc_stderr,none": 0.024332146779134138
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.38212290502793295,
    "acc_stderr,none": 0.016251139711570765
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.729903536977492,
    "acc_stderr,none": 0.025218040373410622
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7561728395061729,
    "acc_stderr,none": 0.023891879541959607
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4471968709256845,
    "acc_stderr,none": 0.012698825252435113
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7953216374269005,
    "acc_stderr,none": 0.030944459778533204
  },
  "mmlu_other": {
    "acc,none": 0.728999034438365,
    "acc_stderr,none": 0.007675097646274066,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.76,
    "acc_stderr,none": 0.042923469599092816
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.7584905660377359,
    "acc_stderr,none": 0.026341480371118362
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.6878612716763006,
    "acc_stderr,none": 0.03533133389323657
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.39,
    "acc_stderr,none": 0.04902071300001975
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6995515695067265,
    "acc_stderr,none": 0.030769352008229146
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.8252427184466019,
    "acc_stderr,none": 0.037601780060266196
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8760683760683761,
    "acc_stderr,none": 0.02158649400128135
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.77,
    "acc_stderr,none": 0.04229525846816505
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.8186462324393359,
    "acc_stderr,none": 0.013778693778464064
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7483660130718954,
    "acc_stderr,none": 0.02484801826387519
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5070921985815603,
    "acc_stderr,none": 0.02982449855912901
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.7720588235294118,
    "acc_stderr,none": 0.025483081468029804
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.5060240963855421,
    "acc_stderr,none": 0.03892212195333045
  },
  "mmlu_social_sciences": {
    "acc,none": 0.793955151121222,
    "acc_stderr,none": 0.007146438928087716,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.5263157894736842,
    "acc_stderr,none": 0.046970851366478626
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8232323232323232,
    "acc_stderr,none": 0.027178752639044915
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.9015544041450777,
    "acc_stderr,none": 0.021500249576033463
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7794871794871795,
    "acc_stderr,none": 0.02102067268082791
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8739495798319328,
    "acc_stderr,none": 0.021559623121213917
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8862385321100917,
    "acc_stderr,none": 0.013613614800232819
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7557251908396947,
    "acc_stderr,none": 0.03768335959728744
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.7352941176470589,
    "acc_stderr,none": 0.017848089574913226
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6636363636363637,
    "acc_stderr,none": 0.04525393596302506
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.746938775510204,
    "acc_stderr,none": 0.027833023871399677
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8109452736318408,
    "acc_stderr,none": 0.02768691358801302
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.83,
    "acc_stderr,none": 0.03775251680686371
  },
  "mmlu_stem": {
    "acc,none": 0.6254360926102125,
    "acc_stderr,none": 0.008072981558684795,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.33,
    "acc_stderr,none": 0.047258156262526045
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6666666666666666,
    "acc_stderr,none": 0.04072314811876837
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.8026315789473685,
    "acc_stderr,none": 0.03238981601699397
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8125,
    "acc_stderr,none": 0.032639560491693344
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.45,
    "acc_stderr,none": 0.049999999999999996
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.6,
    "acc_stderr,none": 0.04923659639173309
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.32,
    "acc_stderr,none": 0.04688261722621505
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.38235294117647056,
    "acc_stderr,none": 0.04835503696107224
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.81,
    "acc_stderr,none": 0.03942772444036623
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.7702127659574468,
    "acc_stderr,none": 0.027501752944412424
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.7034482758620689,
    "acc_stderr,none": 0.03806142687309992
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.6216931216931217,
    "acc_stderr,none": 0.024976954053155257
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.864516129032258,
    "acc_stderr,none": 0.019469334586486937
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6403940886699507,
    "acc_stderr,none": 0.033764582465095665
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.82,
    "acc_stderr,none": 0.038612291966536955
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.337037037037037,
    "acc_stderr,none": 0.028820884666253252
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.47019867549668876,
    "acc_stderr,none": 0.04075224992216979
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.5972222222222222,
    "acc_stderr,none": 0.033448873829978666
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5714285714285714,
    "acc_stderr,none": 0.04697113923010212
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7829162132752993,
    "acc_stderr,none": 0.009618708415756783,
    "acc_norm,none": 0.76550598476605,
    "acc_norm_stderr,none": 0.009885203143240552
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.47850562947799385,
    "acc_stderr,none": 0.011303611339153365
  }
}