{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.41552901023890787,
    "acc_stderr,none": 0.014401366641216391,
    "acc_norm,none": 0.41552901023890787,
    "acc_norm_stderr,none": 0.014401366641216398
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6115319865319865,
    "acc_stderr,none": 0.010001276044485223,
    "acc_norm,none": 0.4903198653198653,
    "acc_norm_stderr,none": 0.010257860554461125
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.7562691131498471,
    "acc_stderr,none": 0.007509067459407977
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.5822592873388931,
    "exact_match_stderr,flexible-extract": 0.013584820638504838
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5304720175263892,
    "acc_stderr,none": 0.004980506329407593,
    "acc_norm,none": 0.6496713802031467,
    "acc_norm_stderr,none": 0.004760978203023339
  },
  "mmlu": {
    "acc,none": 0.6419313488107107,
    "acc_stderr,none": 0.00383845723330411,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.5721572794899044,
    "acc_stderr,none": 0.006858853843464798,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5158730158730159,
    "acc_stderr,none": 0.044698818540726076
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7272727272727273,
    "acc_stderr,none": 0.0347769116216366
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.7696078431372549,
    "acc_stderr,none": 0.029554292605695066
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7890295358649789,
    "acc_stderr,none": 0.026558372502661923
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7768595041322314,
    "acc_stderr,none": 0.03800754475228733
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7037037037037037,
    "acc_stderr,none": 0.04414343666854934
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.7607361963190185,
    "acc_stderr,none": 0.03351953879521269
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.7023121387283237,
    "acc_stderr,none": 0.024617055388677006
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.376536312849162,
    "acc_stderr,none": 0.016204672385106603
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.639871382636656,
    "acc_stderr,none": 0.027264297599804012
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7561728395061729,
    "acc_stderr,none": 0.02389187954195961
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.46479791395045633,
    "acc_stderr,none": 0.012738547371303963
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7719298245614035,
    "acc_stderr,none": 0.032180937956023566
  },
  "mmlu_other": {
    "acc,none": 0.6910202767943354,
    "acc_stderr,none": 0.008046620010482567,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.7,
    "acc_stderr,none": 0.04605661864718381
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.6830188679245283,
    "acc_stderr,none": 0.02863723563980089
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.6473988439306358,
    "acc_stderr,none": 0.03643037168958548
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.41,
    "acc_stderr,none": 0.04943110704237102
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6860986547085202,
    "acc_stderr,none": 0.031146796482972465
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7281553398058253,
    "acc_stderr,none": 0.044052680241409216
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.9017094017094017,
    "acc_stderr,none": 0.019503444900757567
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.74,
    "acc_stderr,none": 0.044084400227680794
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7713920817369093,
    "acc_stderr,none": 0.015016884698539873
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7058823529411765,
    "acc_stderr,none": 0.02609016250427904
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5070921985815603,
    "acc_stderr,none": 0.02982449855912901
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.6875,
    "acc_stderr,none": 0.02815637344037142
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4819277108433735,
    "acc_stderr,none": 0.03889951252827216
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7530061748456288,
    "acc_stderr,none": 0.0076660822877224095,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.543859649122807,
    "acc_stderr,none": 0.046854730419077895
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8080808080808081,
    "acc_stderr,none": 0.028057791672989024
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8134715025906736,
    "acc_stderr,none": 0.028112091210117467
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7128205128205128,
    "acc_stderr,none": 0.02293992541853062
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8403361344537815,
    "acc_stderr,none": 0.023793353997528802
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8293577981651377,
    "acc_stderr,none": 0.016129271025099857
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7175572519083969,
    "acc_stderr,none": 0.03948406125768361
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.6879084967320261,
    "acc_stderr,none": 0.018745011201277657
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6545454545454545,
    "acc_stderr,none": 0.04554619617541054
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7020408163265306,
    "acc_stderr,none": 0.02927956741106567
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.8258706467661692,
    "acc_stderr,none": 0.026814951200421603
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.83,
    "acc_stderr,none": 0.03775251680686371
  },
  "mmlu_stem": {
    "acc,none": 0.589280050745322,
    "acc_stderr,none": 0.008284311253037288,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.36,
    "acc_stderr,none": 0.048241815132442176
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6666666666666666,
    "acc_stderr,none": 0.04072314811876837
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.7763157894736842,
    "acc_stderr,none": 0.03391160934343603
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8125,
    "acc_stderr,none": 0.032639560491693344
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.42,
    "acc_stderr,none": 0.049604496374885836
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.55,
    "acc_stderr,none": 0.049999999999999996
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.37,
    "acc_stderr,none": 0.04852365870939099
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.39215686274509803,
    "acc_stderr,none": 0.048580835742663434
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.75,
    "acc_stderr,none": 0.04351941398892446
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.6893617021276596,
    "acc_stderr,none": 0.03025123757921317
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.6344827586206897,
    "acc_stderr,none": 0.040131241954243856
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.5317460317460317,
    "acc_stderr,none": 0.025699352832131796
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.832258064516129,
    "acc_stderr,none": 0.02125546406537133
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6305418719211823,
    "acc_stderr,none": 0.03395970381998575
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.78,
    "acc_stderr,none": 0.04163331998932261
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.3,
    "acc_stderr,none": 0.02794045713622842
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.46357615894039733,
    "acc_stderr,none": 0.04071636065944216
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.5277777777777778,
    "acc_stderr,none": 0.0340470532865388
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5714285714285714,
    "acc_stderr,none": 0.04697113923010212
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7671381936887922,
    "acc_stderr,none": 0.009861236071080746,
    "acc_norm,none": 0.7595212187159956,
    "acc_norm_stderr,none": 0.009971345364651078
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.4263050153531218,
    "acc_stderr,none": 0.011190503463264747
  }
}