{
  "results": {
    "hendrycksTest-high_school_world_history": {
      "acc": 0.6962025316455697,
      "acc_stderr": 0.029936696387138598,
      "acc_norm": 0.569620253164557,
      "acc_norm_stderr": 0.032230171959375976
    },
    "hendrycksTest-formal_logic": {
      "acc": 0.42063492063492064,
      "acc_stderr": 0.04415438226743743,
      "acc_norm": 0.3968253968253968,
      "acc_norm_stderr": 0.043758884927270605
    },
    "hendrycksTest-human_aging": {
      "acc": 0.672645739910314,
      "acc_stderr": 0.03149384670994131,
      "acc_norm": 0.3632286995515695,
      "acc_norm_stderr": 0.032277904428505
    },
    "hendrycksTest-international_law": {
      "acc": 0.7024793388429752,
      "acc_stderr": 0.04173349148083499,
      "acc_norm": 0.768595041322314,
      "acc_norm_stderr": 0.03849856098794088
    },
    "hendrycksTest-security_studies": {
      "acc": 0.5714285714285714,
      "acc_stderr": 0.031680911612338825,
      "acc_norm": 0.40408163265306124,
      "acc_norm_stderr": 0.0314147080258659
    },
    "hendrycksTest-medical_genetics": {
      "acc": 0.6,
      "acc_stderr": 0.049236596391733084,
      "acc_norm": 0.54,
      "acc_norm_stderr": 0.05009082659620332
    },
    "hendrycksTest-econometrics": {
      "acc": 0.3508771929824561,
      "acc_stderr": 0.044895393502707,
      "acc_norm": 0.3157894736842105,
      "acc_norm_stderr": 0.043727482902780064
    },
    "hendrycksTest-high_school_macroeconomics": {
      "acc": 0.5153846153846153,
      "acc_stderr": 0.025339003010106515,
      "acc_norm": 0.4153846153846154,
      "acc_norm_stderr": 0.024985354923102332
    },
    "hendrycksTest-us_foreign_policy": {
      "acc": 0.79,
      "acc_stderr": 0.040936018074033256,
      "acc_norm": 0.59,
      "acc_norm_stderr": 0.049431107042371025
    },
    "hendrycksTest-logical_fallacies": {
      "acc": 0.6993865030674846,
      "acc_stderr": 0.03602511318806771,
      "acc_norm": 0.5398773006134969,
      "acc_norm_stderr": 0.039158572914369714
    },
    "hendrycksTest-prehistory": {
      "acc": 0.6635802469135802,
      "acc_stderr": 0.026289734945952926,
      "acc_norm": 0.42901234567901236,
      "acc_norm_stderr": 0.027538925613470867
    },
    "hendrycksTest-professional_psychology": {
      "acc": 0.5882352941176471,
      "acc_stderr": 0.019910377463105932,
      "acc_norm": 0.43300653594771243,
      "acc_norm_stderr": 0.02004544247332422
    },
    "hendrycksTest-professional_accounting": {
      "acc": 0.3971631205673759,
      "acc_stderr": 0.029189805673587105,
      "acc_norm": 0.33687943262411346,
      "acc_norm_stderr": 0.02819553487396673
    },
    "hendrycksTest-college_biology": {
      "acc": 0.6111111111111112,
      "acc_stderr": 0.04076663253918567,
      "acc_norm": 0.4236111111111111,
      "acc_norm_stderr": 0.04132125019723369
    },
    "hendrycksTest-high_school_biology": {
      "acc": 0.6709677419354839,
      "acc_stderr": 0.02672949906834996,
      "acc_norm": 0.5451612903225806,
      "acc_norm_stderr": 0.028327743091561074
    },
    "hendrycksTest-philosophy": {
      "acc": 0.6752411575562701,
      "acc_stderr": 0.02659678228769704,
      "acc_norm": 0.5016077170418006,
      "acc_norm_stderr": 0.02839794490780661
    },
    "hendrycksTest-high_school_european_history": {
      "acc": 0.696969696969697,
      "acc_stderr": 0.03588624800091707,
      "acc_norm": 0.5636363636363636,
      "acc_norm_stderr": 0.03872592983524754
    },
    "hendrycksTest-college_medicine": {
      "acc": 0.5144508670520231,
      "acc_stderr": 0.03810871630454764,
      "acc_norm": 0.43352601156069365,
      "acc_norm_stderr": 0.03778621079092055
    },
    "hendrycksTest-professional_medicine": {
      "acc": 0.5551470588235294,
      "acc_stderr": 0.03018753206032938,
      "acc_norm": 0.35661764705882354,
      "acc_norm_stderr": 0.02909720956841195
    },
    "hendrycksTest-moral_scenarios": {
      "acc": 0.34301675977653634,
      "acc_stderr": 0.015876912673057724,
      "acc_norm": 0.27262569832402234,
      "acc_norm_stderr": 0.014893391735249588
    },
    "hendrycksTest-high_school_chemistry": {
      "acc": 0.39901477832512317,
      "acc_stderr": 0.03445487686264716,
      "acc_norm": 0.3694581280788177,
      "acc_norm_stderr": 0.03395970381998573
    },
    "hendrycksTest-high_school_physics": {
      "acc": 0.31788079470198677,
      "acc_stderr": 0.038020397601079024,
      "acc_norm": 0.31125827814569534,
      "acc_norm_stderr": 0.03780445850526733
    },
    "hendrycksTest-high_school_government_and_politics": {
      "acc": 0.8082901554404145,
      "acc_stderr": 0.028408953626245282,
      "acc_norm": 0.6113989637305699,
      "acc_norm_stderr": 0.03517739796373132
    },
    "hendrycksTest-high_school_geography": {
      "acc": 0.7575757575757576,
      "acc_stderr": 0.030532892233932026,
      "acc_norm": 0.5505050505050505,
      "acc_norm_stderr": 0.0354413249194797
    },
    "hendrycksTest-global_facts": {
      "acc": 0.47,
      "acc_stderr": 0.05016135580465919,
      "acc_norm": 0.37,
      "acc_norm_stderr": 0.04852365870939099
    },
    "hendrycksTest-professional_law": {
      "acc": 0.4002607561929596,
      "acc_stderr": 0.012513582529136213,
      "acc_norm": 0.3435462842242503,
      "acc_norm_stderr": 0.012128961174190158
    },
    "hendrycksTest-college_mathematics": {
      "acc": 0.37,
      "acc_stderr": 0.048523658709391,
      "acc_norm": 0.3,
      "acc_norm_stderr": 0.046056618647183814
    },
    "hendrycksTest-college_physics": {
      "acc": 0.23529411764705882,
      "acc_stderr": 0.04220773659171452,
      "acc_norm": 0.29411764705882354,
      "acc_norm_stderr": 0.04533838195929774
    },
    "hendrycksTest-high_school_statistics": {
      "acc": 0.4351851851851852,
      "acc_stderr": 0.03381200005643525,
      "acc_norm": 0.35648148148148145,
      "acc_norm_stderr": 0.032664783315272714
    },
    "hendrycksTest-machine_learning": {
      "acc": 0.4017857142857143,
      "acc_stderr": 0.04653333146973646,
      "acc_norm": 0.30357142857142855,
      "acc_norm_stderr": 0.04364226155841044
    },
    "hendrycksTest-public_relations": {
      "acc": 0.6454545454545455,
      "acc_stderr": 0.045820048415054174,
      "acc_norm": 0.4090909090909091,
      "acc_norm_stderr": 0.047093069786618966
    },
    "hendrycksTest-high_school_computer_science": {
      "acc": 0.61,
      "acc_stderr": 0.04902071300001974,
      "acc_norm": 0.47,
      "acc_norm_stderr": 0.05016135580465919
    },
    "hendrycksTest-high_school_psychology": {
      "acc": 0.7706422018348624,
      "acc_stderr": 0.018025349724618684,
      "acc_norm": 0.5541284403669725,
      "acc_norm_stderr": 0.021311335009708582
    },
    "hendrycksTest-virology": {
      "acc": 0.4939759036144578,
      "acc_stderr": 0.03892212195333045,
      "acc_norm": 0.3433734939759036,
      "acc_norm_stderr": 0.03696584317010601
    },
    "hendrycksTest-marketing": {
      "acc": 0.8461538461538461,
      "acc_stderr": 0.023636873317489294,
      "acc_norm": 0.7649572649572649,
      "acc_norm_stderr": 0.027778835904935437
    },
    "hendrycksTest-human_sexuality": {
      "acc": 0.7022900763358778,
      "acc_stderr": 0.04010358942462203,
      "acc_norm": 0.46564885496183206,
      "acc_norm_stderr": 0.04374928560599738
    },
    "hendrycksTest-sociology": {
      "acc": 0.7611940298507462,
      "acc_stderr": 0.03014777593540922,
      "acc_norm": 0.6616915422885572,
      "acc_norm_stderr": 0.033455630703391914
    },
    "hendrycksTest-college_computer_science": {
      "acc": 0.43,
      "acc_stderr": 0.049756985195624284,
      "acc_norm": 0.34,
      "acc_norm_stderr": 0.04760952285695236
    },
    "hendrycksTest-conceptual_physics": {
      "acc": 0.5106382978723404,
      "acc_stderr": 0.03267862331014063,
      "acc_norm": 0.3276595744680851,
      "acc_norm_stderr": 0.030683020843231004
    },
    "hendrycksTest-anatomy": {
      "acc": 0.5185185185185185,
      "acc_stderr": 0.043163785995113245,
      "acc_norm": 0.4074074074074074,
      "acc_norm_stderr": 0.04244633238353228
    },
    "hendrycksTest-miscellaneous": {
      "acc": 0.8186462324393359,
      "acc_stderr": 0.013778693778464062,
      "acc_norm": 0.6143039591315453,
      "acc_norm_stderr": 0.017406476619212907
    },
    "hendrycksTest-jurisprudence": {
      "acc": 0.6666666666666666,
      "acc_stderr": 0.04557239513497751,
      "acc_norm": 0.5555555555555556,
      "acc_norm_stderr": 0.04803752235190193
    },
    "hendrycksTest-moral_disputes": {
      "acc": 0.6184971098265896,
      "acc_stderr": 0.026152198619726792,
      "acc_norm": 0.4595375722543353,
      "acc_norm_stderr": 0.026830805998952236
    },
    "hendrycksTest-high_school_us_history": {
      "acc": 0.7205882352941176,
      "acc_stderr": 0.031493281045079556,
      "acc_norm": 0.553921568627451,
      "acc_norm_stderr": 0.03488845451304974
    },
    "hendrycksTest-high_school_mathematics": {
      "acc": 0.25925925925925924,
      "acc_stderr": 0.026719240783712177,
      "acc_norm": 0.3148148148148148,
      "acc_norm_stderr": 0.02831753349606648
    },
    "hendrycksTest-high_school_microeconomics": {
      "acc": 0.5840336134453782,
      "acc_stderr": 0.032016501007396114,
      "acc_norm": 0.4831932773109244,
      "acc_norm_stderr": 0.03246013680375308
    },
    "hendrycksTest-astronomy": {
      "acc": 0.5723684210526315,
      "acc_stderr": 0.04026097083296564,
      "acc_norm": 0.5657894736842105,
      "acc_norm_stderr": 0.04033565667848319
    },
    "hendrycksTest-world_religions": {
      "acc": 0.8128654970760234,
      "acc_stderr": 0.029913127232368043,
      "acc_norm": 0.7660818713450293,
      "acc_norm_stderr": 0.03246721765117825
    },
    "hendrycksTest-clinical_knowledge": {
      "acc": 0.5320754716981132,
      "acc_stderr": 0.03070948699255654,
      "acc_norm": 0.4641509433962264,
      "acc_norm_stderr": 0.030693675018458003
    },
    "hendrycksTest-college_chemistry": {
      "acc": 0.31,
      "acc_stderr": 0.04648231987117316,
      "acc_norm": 0.32,
      "acc_norm_stderr": 0.046882617226215034
    },
    "hendrycksTest-abstract_algebra": {
      "acc": 0.26,
      "acc_stderr": 0.04408440022768078,
      "acc_norm": 0.29,
      "acc_norm_stderr": 0.04560480215720684
    },
    "hendrycksTest-business_ethics": {
      "acc": 0.67,
      "acc_stderr": 0.04725815626252609,
      "acc_norm": 0.48,
      "acc_norm_stderr": 0.050211673156867795
    },
    "hendrycksTest-elementary_mathematics": {
      "acc": 0.4417989417989418,
      "acc_stderr": 0.02557625706125384,
      "acc_norm": 0.37037037037037035,
      "acc_norm_stderr": 0.024870815251057075
    },
    "hendrycksTest-management": {
      "acc": 0.7184466019417476,
      "acc_stderr": 0.044532548363264673,
      "acc_norm": 0.5533980582524272,
      "acc_norm_stderr": 0.04922424153458933
    },
    "hendrycksTest-electrical_engineering": {
      "acc": 0.5172413793103449,
      "acc_stderr": 0.04164188720169375,
      "acc_norm": 0.38620689655172413,
      "acc_norm_stderr": 0.040573247344190336
    },
    "hendrycksTest-nutrition": {
      "acc": 0.6111111111111112,
      "acc_stderr": 0.02791405551046801,
      "acc_norm": 0.5032679738562091,
      "acc_norm_stderr": 0.028629305194003543
    },
    "hendrycksTest-computer_security": {
      "acc": 0.66,
      "acc_stderr": 0.04760952285695237,
      "acc_norm": 0.58,
      "acc_norm_stderr": 0.049604496374885836
    }
  },
  "versions": {
    "hendrycksTest-high_school_world_history": 0,
    "hendrycksTest-formal_logic": 0,
    "hendrycksTest-human_aging": 0,
    "hendrycksTest-international_law": 0,
    "hendrycksTest-security_studies": 0,
    "hendrycksTest-medical_genetics": 0,
    "hendrycksTest-econometrics": 0,
    "hendrycksTest-high_school_macroeconomics": 0,
    "hendrycksTest-us_foreign_policy": 0,
    "hendrycksTest-logical_fallacies": 0,
    "hendrycksTest-prehistory": 0,
    "hendrycksTest-professional_psychology": 0,
    "hendrycksTest-professional_accounting": 0,
    "hendrycksTest-college_biology": 0,
    "hendrycksTest-high_school_biology": 0,
    "hendrycksTest-philosophy": 0,
    "hendrycksTest-high_school_european_history": 0,
    "hendrycksTest-college_medicine": 0,
    "hendrycksTest-professional_medicine": 0,
    "hendrycksTest-moral_scenarios": 0,
    "hendrycksTest-high_school_chemistry": 0,
    "hendrycksTest-high_school_physics": 0,
    "hendrycksTest-high_school_government_and_politics": 0,
    "hendrycksTest-high_school_geography": 0,
    "hendrycksTest-global_facts": 0,
    "hendrycksTest-professional_law": 0,
    "hendrycksTest-college_mathematics": 0,
    "hendrycksTest-college_physics": 0,
    "hendrycksTest-high_school_statistics": 0,
    "hendrycksTest-machine_learning": 0,
    "hendrycksTest-public_relations": 0,
    "hendrycksTest-high_school_computer_science": 0,
    "hendrycksTest-high_school_psychology": 0,
    "hendrycksTest-virology": 0,
    "hendrycksTest-marketing": 0,
    "hendrycksTest-human_sexuality": 0,
    "hendrycksTest-sociology": 0,
    "hendrycksTest-college_computer_science": 0,
    "hendrycksTest-conceptual_physics": 0,
    "hendrycksTest-anatomy": 0,
    "hendrycksTest-miscellaneous": 0,
    "hendrycksTest-jurisprudence": 0,
    "hendrycksTest-moral_disputes": 0,
    "hendrycksTest-high_school_us_history": 0,
    "hendrycksTest-high_school_mathematics": 0,
    "hendrycksTest-high_school_microeconomics": 0,
    "hendrycksTest-astronomy": 0,
    "hendrycksTest-world_religions": 0,
    "hendrycksTest-clinical_knowledge": 0,
    "hendrycksTest-college_chemistry": 0,
    "hendrycksTest-abstract_algebra": 0,
    "hendrycksTest-business_ethics": 0,
    "hendrycksTest-elementary_mathematics": 0,
    "hendrycksTest-management": 0,
    "hendrycksTest-electrical_engineering": 0,
    "hendrycksTest-nutrition": 0,
    "hendrycksTest-computer_security": 0
  },
  "config": {
    "model": "hf-causal-experimental",
    "model_args": "pretrained=/gaueko1/hizkuntza-ereduak/LLaMA/lm/huggingface/30B,use_accelerate=True",
    "num_fewshot": 5,
    "batch_size": "auto",
    "device": "cuda:0",
    "no_cache": true,
    "limit": null,
    "bootstrap_iters": 100000,
    "description_dict": {}
  }
}
