{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4684300341296928,
    "acc_stderr,none": 0.014582236460866978,
    "acc_norm,none": 0.4778156996587031,
    "acc_norm_stderr,none": 0.01459700192707613
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.765993265993266,
    "acc_stderr,none": 0.00868750057802318,
    "acc_norm,none": 0.6540404040404041,
    "acc_norm_stderr,none": 0.009760749624427523
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.7837920489296636,
    "acc_stderr,none": 0.007199938669487885
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.714177407126611,
    "exact_match_stderr,flexible-extract": 0.01244496346061563
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5247958573989245,
    "acc_stderr,none": 0.0049836418543511545,
    "acc_norm,none": 0.6770563632742481,
    "acc_norm_stderr,none": 0.004666457279979416
  },
  "mmlu": {
    "acc,none": 0.6763993733086455,
    "acc_stderr,none": 0.0037221796838170225,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5991498405951116,
    "acc_stderr,none": 0.00674168116158947,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5793650793650794,
    "acc_stderr,none": 0.04415438226743745
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7757575757575758,
    "acc_stderr,none": 0.03256866661681102
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.8382352941176471,
    "acc_stderr,none": 0.025845017986926917
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.8143459915611815,
    "acc_stderr,none": 0.025310495376944856
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.8016528925619835,
    "acc_stderr,none": 0.03640118271990947
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7685185185185185,
    "acc_stderr,none": 0.04077494709252626
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.803680981595092,
    "acc_stderr,none": 0.031207970394709218
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.684971098265896,
    "acc_stderr,none": 0.025009313790069713
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.38212290502793295,
    "acc_stderr,none": 0.01625113971157077
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.7202572347266881,
    "acc_stderr,none": 0.025494259350694905
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7685185185185185,
    "acc_stderr,none": 0.023468429832451156
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4908735332464146,
    "acc_stderr,none": 0.012768108601640012
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.8070175438596491,
    "acc_stderr,none": 0.030267457554898458
  },
  "mmlu_other": {
    "acc,none": 0.7248149340199549,
    "acc_stderr,none": 0.007739387540969357,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.72,
    "acc_stderr,none": 0.04512608598542129
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.7320754716981132,
    "acc_stderr,none": 0.027257260322494845
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.6994219653179191,
    "acc_stderr,none": 0.0349610148119118
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.39,
    "acc_stderr,none": 0.04902071300001975
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6816143497757847,
    "acc_stderr,none": 0.03126580522513713
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.8446601941747572,
    "acc_stderr,none": 0.035865947385739755
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8760683760683761,
    "acc_stderr,none": 0.021586494001281344
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.78,
    "acc_stderr,none": 0.04163331998932261
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.8058748403575989,
    "acc_stderr,none": 0.01414397027665757
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7516339869281046,
    "acc_stderr,none": 0.02473998135511359
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5390070921985816,
    "acc_stderr,none": 0.029736592526424438
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.7720588235294118,
    "acc_stderr,none": 0.025483081468029804
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4879518072289157,
    "acc_stderr,none": 0.038913644958358196
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7933051673708157,
    "acc_stderr,none": 0.007207033298799499,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.5964912280701754,
    "acc_stderr,none": 0.046151869625837054
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8181818181818182,
    "acc_stderr,none": 0.027479603010538797
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8808290155440415,
    "acc_stderr,none": 0.023381935348121437
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.782051282051282,
    "acc_stderr,none": 0.020932445774463185
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8529411764705882,
    "acc_stderr,none": 0.02300545944667396
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8715596330275229,
    "acc_stderr,none": 0.014344977542914313
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7633587786259542,
    "acc_stderr,none": 0.03727673575596914
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.7401960784313726,
    "acc_stderr,none": 0.017740899509177795
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6545454545454545,
    "acc_stderr,none": 0.04554619617541054
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.763265306122449,
    "acc_stderr,none": 0.02721283588407315
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8059701492537313,
    "acc_stderr,none": 0.027962677604768914
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.84,
    "acc_stderr,none": 0.0368452949177471
  },
  "mmlu_stem": {
    "acc,none": 0.6298763082778306,
    "acc_stderr,none": 0.008121162284874918,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.4,
    "acc_stderr,none": 0.04923659639173309
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6518518518518519,
    "acc_stderr,none": 0.041153246103369526
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.7828947368421053,
    "acc_stderr,none": 0.03355045304882924
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.7916666666666666,
    "acc_stderr,none": 0.03396116205845335
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.48,
    "acc_stderr,none": 0.050211673156867795
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.58,
    "acc_stderr,none": 0.049604496374885836
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.37,
    "acc_stderr,none": 0.048523658709390974
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.43137254901960786,
    "acc_stderr,none": 0.04928099597287534
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.8,
    "acc_stderr,none": 0.04020151261036846
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.7574468085106383,
    "acc_stderr,none": 0.028020226271200217
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.6758620689655173,
    "acc_stderr,none": 0.03900432069185554
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.6216931216931217,
    "acc_stderr,none": 0.024976954053155254
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8709677419354839,
    "acc_stderr,none": 0.019070889254792778
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6600985221674877,
    "acc_stderr,none": 0.033327690684107895
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.8,
    "acc_stderr,none": 0.04020151261036845
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.31851851851851853,
    "acc_stderr,none": 0.028406533090608463
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.4966887417218543,
    "acc_stderr,none": 0.04082393379449654
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.6388888888888888,
    "acc_stderr,none": 0.03275773486100999
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5714285714285714,
    "acc_stderr,none": 0.04697113923010212
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7840043525571273,
    "acc_stderr,none": 0.009601236303553553,
    "acc_norm,none": 0.7633297062023939,
    "acc_norm_stderr,none": 0.009916841655042809
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4646878198567042,
    "acc_stderr,none": 0.011285819120808789
  }
}