{
  "arc_challenge": {
    "alias": "arc_challenge",
    "acc,none": 0.4300341296928328,
    "acc_stderr,none": 0.014467631559138005,
    "acc_norm,none": 0.4462457337883959,
    "acc_norm_stderr,none": 0.014526705548539976
  },
  "arc_easy": {
    "alias": "arc_easy",
    "acc,none": 0.6864478114478114,
    "acc_stderr,none": 0.009519779157242262,
    "acc_norm,none": 0.5753367003367004,
    "acc_norm_stderr,none": 0.010142653687480421
  },
  "boolq": {
    "alias": "boolq",
    "acc,none": 0.8119266055045872,
    "acc_stderr,none": 0.006834623690939714
  },
  "gsm8k": {
    "alias": "gsm8k",
    "exact_match,strict-match": 0.0,
    "exact_match_stderr,strict-match": 0.0,
    "exact_match,flexible-extract": 0.7073540561031084,
    "exact_match_stderr,flexible-extract": 0.012532334368242897
  },
  "hellaswag": {
    "alias": "hellaswag",
    "acc,none": 0.5068711412069309,
    "acc_stderr,none": 0.00498931022827611,
    "acc_norm,none": 0.644991037641904,
    "acc_norm_stderr,none": 0.004775380866948024
  },
  "mmlu": {
    "acc,none": 0.660518444666002,
    "acc_stderr,none": 0.0037906952572867346,
    "alias": "mmlu"
  },
  "mmlu_humanities": {
    "acc,none": 0.571519659936238,
    "acc_stderr,none": 0.006812745837976235,
    "alias": " - humanities"
  },
  "mmlu_formal_logic": {
    "alias": "  - formal_logic",
    "acc,none": 0.5317460317460317,
    "acc_stderr,none": 0.04463112720677171
  },
  "mmlu_high_school_european_history": {
    "alias": "  - high_school_european_history",
    "acc,none": 0.7333333333333333,
    "acc_stderr,none": 0.03453131801885417
  },
  "mmlu_high_school_us_history": {
    "alias": "  - high_school_us_history",
    "acc,none": 0.75,
    "acc_stderr,none": 0.03039153369274154
  },
  "mmlu_high_school_world_history": {
    "alias": "  - high_school_world_history",
    "acc,none": 0.7763713080168776,
    "acc_stderr,none": 0.027123298205229966
  },
  "mmlu_international_law": {
    "alias": "  - international_law",
    "acc,none": 0.7603305785123967,
    "acc_stderr,none": 0.03896878985070416
  },
  "mmlu_jurisprudence": {
    "alias": "  - jurisprudence",
    "acc,none": 0.7870370370370371,
    "acc_stderr,none": 0.03957835471980981
  },
  "mmlu_logical_fallacies": {
    "alias": "  - logical_fallacies",
    "acc,none": 0.803680981595092,
    "acc_stderr,none": 0.031207970394709218
  },
  "mmlu_moral_disputes": {
    "alias": "  - moral_disputes",
    "acc,none": 0.6791907514450867,
    "acc_stderr,none": 0.0251310002336479
  },
  "mmlu_moral_scenarios": {
    "alias": "  - moral_scenarios",
    "acc,none": 0.3675977653631285,
    "acc_stderr,none": 0.01612554382355295
  },
  "mmlu_philosophy": {
    "alias": "  - philosophy",
    "acc,none": 0.7041800643086816,
    "acc_stderr,none": 0.025922371788818784
  },
  "mmlu_prehistory": {
    "alias": "  - prehistory",
    "acc,none": 0.7592592592592593,
    "acc_stderr,none": 0.023788583551658537
  },
  "mmlu_professional_law": {
    "alias": "  - professional_law",
    "acc,none": 0.4511082138200782,
    "acc_stderr,none": 0.012709037347346233
  },
  "mmlu_world_religions": {
    "alias": "  - world_religions",
    "acc,none": 0.7894736842105263,
    "acc_stderr,none": 0.031267817146631786
  },
  "mmlu_other": {
    "acc,none": 0.7006758931445124,
    "acc_stderr,none": 0.007940395589177866,
    "alias": " - other"
  },
  "mmlu_business_ethics": {
    "alias": "  - business_ethics",
    "acc,none": 0.72,
    "acc_stderr,none": 0.04512608598542129
  },
  "mmlu_clinical_knowledge": {
    "alias": "  - clinical_knowledge",
    "acc,none": 0.7132075471698113,
    "acc_stderr,none": 0.027834912527544064
  },
  "mmlu_college_medicine": {
    "alias": "  - college_medicine",
    "acc,none": 0.653179190751445,
    "acc_stderr,none": 0.036291466701596636
  },
  "mmlu_global_facts": {
    "alias": "  - global_facts",
    "acc,none": 0.34,
    "acc_stderr,none": 0.04760952285695235
  },
  "mmlu_human_aging": {
    "alias": "  - human_aging",
    "acc,none": 0.6591928251121076,
    "acc_stderr,none": 0.03181149747055359
  },
  "mmlu_management": {
    "alias": "  - management",
    "acc,none": 0.7572815533980582,
    "acc_stderr,none": 0.04245022486384495
  },
  "mmlu_marketing": {
    "alias": "  - marketing",
    "acc,none": 0.8888888888888888,
    "acc_stderr,none": 0.020588491316092358
  },
  "mmlu_medical_genetics": {
    "alias": "  - medical_genetics",
    "acc,none": 0.73,
    "acc_stderr,none": 0.044619604333847415
  },
  "mmlu_miscellaneous": {
    "alias": "  - miscellaneous",
    "acc,none": 0.7867177522349936,
    "acc_stderr,none": 0.014648172749593501
  },
  "mmlu_nutrition": {
    "alias": "  - nutrition",
    "acc,none": 0.7058823529411765,
    "acc_stderr,none": 0.02609016250427904
  },
  "mmlu_professional_accounting": {
    "alias": "  - professional_accounting",
    "acc,none": 0.5212765957446809,
    "acc_stderr,none": 0.029800481645628693
  },
  "mmlu_professional_medicine": {
    "alias": "  - professional_medicine",
    "acc,none": 0.7463235294117647,
    "acc_stderr,none": 0.026431329870789548
  },
  "mmlu_virology": {
    "alias": "  - virology",
    "acc,none": 0.4879518072289157,
    "acc_stderr,none": 0.0389136449583582
  },
  "mmlu_social_sciences": {
    "acc,none": 0.7741306467338317,
    "acc_stderr,none": 0.007445226955397513,
    "alias": " - social sciences"
  },
  "mmlu_econometrics": {
    "alias": "  - econometrics",
    "acc,none": 0.5964912280701754,
    "acc_stderr,none": 0.046151869625837054
  },
  "mmlu_high_school_geography": {
    "alias": "  - high_school_geography",
    "acc,none": 0.8232323232323232,
    "acc_stderr,none": 0.027178752639044915
  },
  "mmlu_high_school_government_and_politics": {
    "alias": "  - high_school_government_and_politics",
    "acc,none": 0.8497409326424871,
    "acc_stderr,none": 0.025787723180723882
  },
  "mmlu_high_school_macroeconomics": {
    "alias": "  - high_school_macroeconomics",
    "acc,none": 0.7384615384615385,
    "acc_stderr,none": 0.022282141204204405
  },
  "mmlu_high_school_microeconomics": {
    "alias": "  - high_school_microeconomics",
    "acc,none": 0.8277310924369747,
    "acc_stderr,none": 0.02452866497130543
  },
  "mmlu_high_school_psychology": {
    "alias": "  - high_school_psychology",
    "acc,none": 0.8605504587155963,
    "acc_stderr,none": 0.014852421490033059
  },
  "mmlu_human_sexuality": {
    "alias": "  - human_sexuality",
    "acc,none": 0.7022900763358778,
    "acc_stderr,none": 0.04010358942462203
  },
  "mmlu_professional_psychology": {
    "alias": "  - professional_psychology",
    "acc,none": 0.704248366013072,
    "acc_stderr,none": 0.018463154132632817
  },
  "mmlu_public_relations": {
    "alias": "  - public_relations",
    "acc,none": 0.6909090909090909,
    "acc_stderr,none": 0.044262946482000985
  },
  "mmlu_security_studies": {
    "alias": "  - security_studies",
    "acc,none": 0.7836734693877551,
    "acc_stderr,none": 0.026358916334904038
  },
  "mmlu_sociology": {
    "alias": "  - sociology",
    "acc,none": 0.7910447761194029,
    "acc_stderr,none": 0.028748298931728658
  },
  "mmlu_us_foreign_policy": {
    "alias": "  - us_foreign_policy",
    "acc,none": 0.83,
    "acc_stderr,none": 0.03775251680686371
  },
  "mmlu_stem": {
    "acc,none": 0.6428797970187123,
    "acc_stderr,none": 0.008223986866235022,
    "alias": " - stem"
  },
  "mmlu_abstract_algebra": {
    "alias": "  - abstract_algebra",
    "acc,none": 0.51,
    "acc_stderr,none": 0.05024183937956913
  },
  "mmlu_anatomy": {
    "alias": "  - anatomy",
    "acc,none": 0.6962962962962963,
    "acc_stderr,none": 0.039725528847851375
  },
  "mmlu_astronomy": {
    "alias": "  - astronomy",
    "acc,none": 0.756578947368421,
    "acc_stderr,none": 0.034923496688842384
  },
  "mmlu_college_biology": {
    "alias": "  - college_biology",
    "acc,none": 0.8125,
    "acc_stderr,none": 0.032639560491693344
  },
  "mmlu_college_chemistry": {
    "alias": "  - college_chemistry",
    "acc,none": 0.5,
    "acc_stderr,none": 0.050251890762960605
  },
  "mmlu_college_computer_science": {
    "alias": "  - college_computer_science",
    "acc,none": 0.57,
    "acc_stderr,none": 0.049756985195624284
  },
  "mmlu_college_mathematics": {
    "alias": "  - college_mathematics",
    "acc,none": 0.44,
    "acc_stderr,none": 0.04988876515698589
  },
  "mmlu_college_physics": {
    "alias": "  - college_physics",
    "acc,none": 0.5196078431372549,
    "acc_stderr,none": 0.04971358884367406
  },
  "mmlu_computer_security": {
    "alias": "  - computer_security",
    "acc,none": 0.74,
    "acc_stderr,none": 0.04408440022768078
  },
  "mmlu_conceptual_physics": {
    "alias": "  - conceptual_physics",
    "acc,none": 0.7404255319148936,
    "acc_stderr,none": 0.02865917937429232
  },
  "mmlu_electrical_engineering": {
    "alias": "  - electrical_engineering",
    "acc,none": 0.7034482758620689,
    "acc_stderr,none": 0.03806142687309992
  },
  "mmlu_elementary_mathematics": {
    "alias": "  - elementary_mathematics",
    "acc,none": 0.5925925925925926,
    "acc_stderr,none": 0.02530590624159063
  },
  "mmlu_high_school_biology": {
    "alias": "  - high_school_biology",
    "acc,none": 0.8483870967741935,
    "acc_stderr,none": 0.02040261665441674
  },
  "mmlu_high_school_chemistry": {
    "alias": "  - high_school_chemistry",
    "acc,none": 0.6748768472906403,
    "acc_stderr,none": 0.03295797566311271
  },
  "mmlu_high_school_computer_science": {
    "alias": "  - high_school_computer_science",
    "acc,none": 0.82,
    "acc_stderr,none": 0.03861229196653694
  },
  "mmlu_high_school_mathematics": {
    "alias": "  - high_school_mathematics",
    "acc,none": 0.4074074074074074,
    "acc_stderr,none": 0.02995824925008211
  },
  "mmlu_high_school_physics": {
    "alias": "  - high_school_physics",
    "acc,none": 0.5165562913907285,
    "acc_stderr,none": 0.0408024418562897
  },
  "mmlu_high_school_statistics": {
    "alias": "  - high_school_statistics",
    "acc,none": 0.6388888888888888,
    "acc_stderr,none": 0.03275773486100999
  },
  "mmlu_machine_learning": {
    "alias": "  - machine_learning",
    "acc,none": 0.5714285714285714,
    "acc_stderr,none": 0.04697113923010213
  },
  "piqa": {
    "alias": "piqa",
    "acc,none": 0.7622415669205659,
    "acc_stderr,none": 0.009932525779525487,
    "acc_norm,none": 0.7589771490750816,
    "acc_norm_stderr,none": 0.009979042717267312
  },
  "social_iqa": {
    "alias": "social_iqa",
    "acc,none": 0.45496417604913,
    "acc_stderr,none": 0.01126808211872984
  }
}