{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.41723549488054607,
    "acc_stderr,none": 0.01440982551840308,
    "acc_norm,none": 0.4453924914675768,
    "acc_norm_stderr,none": 0.014523987638344076
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6658249158249159,
    "acc_stderr,none": 0.009679106032919063,
    "acc_norm,none": 0.5648148148148148,
    "acc_norm_stderr,none": 0.010173216430370908
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.8311926605504587,
    "acc_stderr,none": 0.006551474456801534
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.755117513267627,
    "exact_match_stderr,flexible-extract": 0.01184481902786366
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5116510655247959,
    "acc_stderr,none": 0.004988426528513013,
    "acc_norm,none": 0.6586337382991436,
    "acc_norm_stderr,none": 0.004731989816563666
  },
  "mmlu": {
    "acc,none": 0.6552485400940037,
    "acc_stderr,none": 0.0038038372432372546,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5645058448459086,
    "acc_stderr,none": 0.006815333695109889,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.49206349206349204,
    "acc_stderr,none": 0.044715725362943486
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7333333333333333,
    "acc_stderr,none": 0.03453131801885417
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7450980392156863,
    "acc_stderr,none": 0.03058759135160425
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7637130801687764,
    "acc_stderr,none": 0.02765215314415928
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.768595041322314,
    "acc_stderr,none": 0.0384985609879409
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7870370370370371,
    "acc_stderr,none": 0.03957835471980981
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.803680981595092,
    "acc_stderr,none": 0.031207970394709215
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6734104046242775,
    "acc_stderr,none": 0.025248264774242832
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.36089385474860336,
    "acc_stderr,none": 0.01606229067111047
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.7009646302250804,
    "acc_stderr,none": 0.02600330111788514
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7561728395061729,
    "acc_stderr,none": 0.023891879541959614
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.44198174706649285,
    "acc_stderr,none": 0.01268397251359882
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.783625730994152,
    "acc_stderr,none": 0.03158149539338733
  },
  "mmlu_other": {
    "acc,none": 0.6997103315094947,
    "acc_stderr,none": 0.007957246941533184,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.69,
    "acc_stderr,none": 0.046482319871173156
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.7169811320754716,
    "acc_stderr,none": 0.027724236492700918
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.6473988439306358,
    "acc_stderr,none": 0.03643037168958548
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.34,
    "acc_stderr,none": 0.04760952285695235
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6547085201793722,
    "acc_stderr,none": 0.03191100192835794
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7475728155339806,
    "acc_stderr,none": 0.04301250399690878
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8931623931623932,
    "acc_stderr,none": 0.020237149008990957
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.75,
    "acc_stderr,none": 0.04351941398892446
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7803320561941252,
    "acc_stderr,none": 0.014805384478371146
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7091503267973857,
    "acc_stderr,none": 0.02600480036395213
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.524822695035461,
    "acc_stderr,none": 0.02979071924382972
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.7463235294117647,
    "acc_stderr,none": 0.026431329870789548
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.5,
    "acc_stderr,none": 0.03892494720807614
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7679558011049724,
    "acc_stderr,none": 0.00751737170389516,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.5789473684210527,
    "acc_stderr,none": 0.046446020912223177
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8181818181818182,
    "acc_stderr,none": 0.0274796030105388
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8497409326424871,
    "acc_stderr,none": 0.025787723180723882
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7384615384615385,
    "acc_stderr,none": 0.022282141204204405
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8235294117647058,
    "acc_stderr,none": 0.024762902678057922
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8477064220183487,
    "acc_stderr,none": 0.015405084393157064
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7099236641221374,
    "acc_stderr,none": 0.03980066246467766
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.696078431372549,
    "acc_stderr,none": 0.018607552131279837
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6818181818181818,
    "acc_stderr,none": 0.04461272175910507
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7673469387755102,
    "acc_stderr,none": 0.02704925791589618
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.7960199004975125,
    "acc_stderr,none": 0.02849317624532607
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.83,
    "acc_stderr,none": 0.03775251680686371
  },
  "mmlu_stem": {
    "acc,none": 0.6368537900412305,
    "acc_stderr,none": 0.008261221684536283,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.49,
    "acc_stderr,none": 0.05024183937956911
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6814814814814815,
    "acc_stderr,none": 0.040247784019771096
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.7631578947368421,
    "acc_stderr,none": 0.034597776068105365
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8055555555555556,
    "acc_stderr,none": 0.03309615177059006
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.49,
    "acc_stderr,none": 0.05024183937956912
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.58,
    "acc_stderr,none": 0.04960449637488583
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.43,
    "acc_stderr,none": 0.049756985195624284
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.5196078431372549,
    "acc_stderr,none": 0.04971358884367406
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.73,
    "acc_stderr,none": 0.04461960433384739
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.7404255319148936,
    "acc_stderr,none": 0.028659179374292323
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.696551724137931,
    "acc_stderr,none": 0.038312260488503336
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.5873015873015873,
    "acc_stderr,none": 0.02535574126305526
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.832258064516129,
    "acc_stderr,none": 0.021255464065371342
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6650246305418719,
    "acc_stderr,none": 0.033208527423483104
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.82,
    "acc_stderr,none": 0.03861229196653694
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.4111111111111111,
    "acc_stderr,none": 0.029999923508706686
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.4966887417218543,
    "acc_stderr,none": 0.04082393379449654
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.6435185185185185,
    "acc_stderr,none": 0.032664783315272714
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5535714285714286,
    "acc_stderr,none": 0.047184714852195886
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.764417845484222,
    "acc_stderr,none": 0.009901067586473904,
    "acc_norm,none": 0.7578890097932536,
    "acc_norm_stderr,none": 0.009994371269104378
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.44421699078812693,
    "acc_stderr,none": 0.011243437088559818
  }
}