{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.49829351535836175,
    "acc_stderr,none": 0.014611305705056987,
    "acc_norm,none": 0.49658703071672355,
    "acc_norm_stderr,none": 0.014611050403244077
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.7192760942760943,
    "acc_stderr,none": 0.00922052617471136,
    "acc_norm,none": 0.6279461279461279,
    "acc_norm_stderr,none": 0.009918187193096475
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.8376146788990826,
    "acc_stderr,none": 0.006450421045061457
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.2562547384382108,
    "exact_match_stderr,flexible-extract": 0.012025145867332845
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5093606851224856,
    "acc_stderr,none": 0.004988906901307726,
    "acc_norm,none": 0.6581358295160327,
    "acc_norm_stderr,none": 0.004733649274814532
  },
  "mmlu": {
    "acc,none": 0.588448938897593,
    "acc_stderr,none": 0.003931948814481387,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.542826780021254,
    "acc_stderr,none": 0.006866507160722435,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.3492063492063492,
    "acc_stderr,none": 0.04263906892795131
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7212121212121212,
    "acc_stderr,none": 0.03501438706296781
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7990196078431373,
    "acc_stderr,none": 0.028125972265654373
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.759493670886076,
    "acc_stderr,none": 0.027820781981149675
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7024793388429752,
    "acc_stderr,none": 0.04173349148083499
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7407407407407407,
    "acc_stderr,none": 0.04236511258094631
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.6993865030674846,
    "acc_stderr,none": 0.03602511318806771
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6647398843930635,
    "acc_stderr,none": 0.025416003773165555
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.3229050279329609,
    "acc_stderr,none": 0.015638440380241488
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.662379421221865,
    "acc_stderr,none": 0.026858825879488544
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.6666666666666666,
    "acc_stderr,none": 0.02622964917882116
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4517601043024772,
    "acc_stderr,none": 0.012710662233660247
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7894736842105263,
    "acc_stderr,none": 0.031267817146631786
  },
  "mmlu_other": {
    "acc,none": 0.6794335371741229,
    "acc_stderr,none": 0.008121069560844787,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.57,
    "acc_stderr,none": 0.04975698519562427
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.6981132075471698,
    "acc_stderr,none": 0.028254200344438662
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.5722543352601156,
    "acc_stderr,none": 0.03772446857518028
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.4,
    "acc_stderr,none": 0.049236596391733084
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6816143497757847,
    "acc_stderr,none": 0.03126580522513713
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.8058252427184466,
    "acc_stderr,none": 0.03916667762822585
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8247863247863247,
    "acc_stderr,none": 0.024904439098918246
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.67,
    "acc_stderr,none": 0.047258156262526066
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7943805874840357,
    "acc_stderr,none": 0.01445250045678583
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.673202614379085,
    "acc_stderr,none": 0.026857294663281413
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.48226950354609927,
    "acc_stderr,none": 0.02980873964223777
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.6727941176470589,
    "acc_stderr,none": 0.028501452860396563
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.5301204819277109,
    "acc_stderr,none": 0.03885425420866766
  },
  "mmlu_social_sciences": {
    "acc,none": 0.6753331166720832,
    "acc_stderr,none": 0.008200641712723663,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.41228070175438597,
    "acc_stderr,none": 0.04630653203366596
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.7222222222222222,
    "acc_stderr,none": 0.03191178226713547
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8290155440414507,
    "acc_stderr,none": 0.027171213683164528
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.5564102564102564,
    "acc_stderr,none": 0.025189149894764205
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.5630252100840336,
    "acc_stderr,none": 0.03221943636566197
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.7834862385321101,
    "acc_stderr,none": 0.017658710594443138
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.732824427480916,
    "acc_stderr,none": 0.038808483010823944
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.5996732026143791,
    "acc_stderr,none": 0.01982184368827177
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6090909090909091,
    "acc_stderr,none": 0.04673752333670238
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.6816326530612244,
    "acc_stderr,none": 0.029822533793982055
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8407960199004975,
    "acc_stderr,none": 0.02587064676616914
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.84,
    "acc_stderr,none": 0.03684529491774708
  },
  "mmlu_stem": {
    "acc,none": 0.48208055819854106,
    "acc_stderr,none": 0.008577085186858567,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.28,
    "acc_stderr,none": 0.04512608598542128
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.5259259259259259,
    "acc_stderr,none": 0.04313531696750574
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.5986842105263158,
    "acc_stderr,none": 0.039889037033362836
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.6875,
    "acc_stderr,none": 0.038760854559127644
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.38,
    "acc_stderr,none": 0.04878317312145633
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.54,
    "acc_stderr,none": 0.05009082659620332
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.34,
    "acc_stderr,none": 0.04760952285695235
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.37254901960784315,
    "acc_stderr,none": 0.04810840148082635
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.68,
    "acc_stderr,none": 0.04688261722621504
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.5234042553191489,
    "acc_stderr,none": 0.032650194750335815
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.5379310344827586,
    "acc_stderr,none": 0.04154659671707548
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.38095238095238093,
    "acc_stderr,none": 0.025010749116137595
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.7354838709677419,
    "acc_stderr,none": 0.02509189237885928
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.4433497536945813,
    "acc_stderr,none": 0.034953345821629324
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.6,
    "acc_stderr,none": 0.04923659639173309
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.3,
    "acc_stderr,none": 0.027940457136228405
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.33112582781456956,
    "acc_stderr,none": 0.038425817186598696
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.4351851851851852,
    "acc_stderr,none": 0.033812000056435254
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.45535714285714285,
    "acc_stderr,none": 0.04726835553719099
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7415669205658324,
    "acc_stderr,none": 0.010213971636773332,
    "acc_norm,none": 0.7295973884657236,
    "acc_norm_stderr,none": 0.010363167031620785
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4524053224155578,
    "acc_stderr,none": 0.011262695440459566
  }
}