Model,leaderboard_bbh,leaderboard_gpqa,leaderboard_ifeval,leaderboard_math_hard,leaderboard_mmlu_pro,leaderboard_musr
meta-llama/Llama-2-7b-hf,0.34750911300121506,0.26677852348993286,0.31894484412470026,0.012084592145015106,0.18608710106382978,0.36904761904761907
meta-llama/Llama-2-13b-hf,0.4113869119944454,0.28104026845637586,0.31534772182254195,0.010574018126888218,0.23778257978723405,0.3531746031746032
meta-llama/Llama-2-70b-hf,0.5460857490019094,0.3028523489932886,0.3057553956834532,0.024924471299093656,0.37175864361702127,0.4113756613756614
huggyllama/llama-13b,0.39802117687901406,0.2550335570469799,0.3009592326139089,0.012084592145015106,0.19522938829787234,0.34523809523809523
huggyllama/llama-7b,0.32407568130532893,0.2525167785234899,0.31534772182254195,0.006797583081570997,0.13131648936170212,0.33465608465608465
huggyllama/llama-65b,0.4686686339177226,0.276006711409396,0.3129496402877698,0.02190332326283988,0.3077626329787234,0.3584656084656085
meta-llama/Meta-Llama-3.1-70B,0.6271480645721229,0.3875838926174497,0.2278177458033573,0.16691842900302115,0.4654255319148936,0.45634920634920634
meta-llama/Meta-Llama-3.1-8B,0.46380836660302033,0.2961409395973154,0.17266187050359713,0.04607250755287009,0.32455119680851063,0.38227513227513227
meta-llama/Meta-Llama-3-70B,0.6465891338309321,0.3976510067114094,0.21342925659472423,0.16540785498489427,0.4709109042553192,0.45105820105820105
meta-llama/Meta-Llama-3-8B,0.45738587050859225,0.3053691275167785,0.19304556354916066,0.0324773413897281,0.32097739361702127,0.3611111111111111
Qwen/Qwen2-72B,0.6592605450442631,0.39429530201342283,0.4412470023980815,0.29154078549848944,0.5730551861702128,0.4695767195767196
Qwen/Qwen2-57B-A14B,0.5589307411907655,0.3062080536912752,0.36930455635491605,0.1865558912386707,0.4916057180851064,0.4166666666666667
Qwen/Qwen2-7B,0.5287276514494011,0.30453020134228187,0.3764988009592326,0.18806646525679757,0.41830119680851063,0.44312169312169314
Qwen/Qwen2-1.5B,0.3561881617774692,0.26426174496644295,0.2673860911270983,0.06268882175226587,0.2551529255319149,0.36507936507936506
Qwen/Qwen2-0.5B,0.3239021003298039,0.2558724832214765,0.2422062350119904,0.0256797583081571,0.1697972074468085,0.37433862433862436
Qwen/Qwen1.5-32B,0.56865127582017,0.3296979865771812,0.3932853717026379,0.26661631419939574,0.4499667553191489,0.42724867724867727
Qwen/Qwen1.5-14B,0.5037319909737893,0.29446308724832215,0.3537170263788969,0.1646525679758308,0.36436170212765956,0.41798941798941797
Qwen/Qwen1.5-7B,0.4542614129491408,0.2986577181208054,0.3261390887290168,0.0445619335347432,0.29163896276595747,0.41005291005291006
Qwen/Qwen1.5-110B,0.6070126714112133,0.3523489932885906,0.4052757793764988,0.23036253776435045,0.5360704787234043,0.44047619047619047
Qwen/Qwen1.5-0.5B,0.31314007984724873,0.25419463087248323,0.2302158273381295,0.004531722054380665,0.1307347074468085,0.3611111111111111
Qwen/Qwen1.5-1.8B,0.34490539836833883,0.3053691275167785,0.2829736211031175,0.022658610271903322,0.18816489361702127,0.35978835978835977
Qwen/Qwen1.5-4B,0.4035757680958167,0.27684563758389263,0.29856115107913667,0.02416918429003021,0.24601063829787234,0.35978835978835977
mistralai/Mistral-7B-v0.1,0.4422843256379101,0.29194630872483224,0.30335731414868106,0.024924471299093656,0.30127992021276595,0.4126984126984127
mistralai/Mistral-Nemo-Base-2407,0.5023433431695886,0.2936241610738255,0.22062350119904076,0.04984894259818731,0.34715757978723405,0.3915343915343915
mistralai/Mixtral-8x22B-v0.1,0.6233292831105711,0.37583892617449666,0.3261390887290168,0.16842900302114805,0.46392952127659576,0.40343915343915343
mistralai/Mixtral-8x7B-v0.1,0.507377191459816,0.313758389261745,0.28896882494004794,0.08761329305135952,0.3849734042553192,0.4312169312169312
01-ai/Yi-6B,0.42666203784065265,0.26929530201342283,0.3513189448441247,0.015105740181268883,0.29911901595744683,0.39285714285714285
01-ai/Yi-34B,0.5450442631487589,0.36661073825503354,0.36330935251798563,0.0445619335347432,0.441156914893617,0.4113756613756614
01-ai/Yi-1.5-34B,0.5953827460510328,0.36577181208053694,0.3501199040767386,0.1404833836858006,0.4665890957446808,0.42328042328042326
01-ai/Yi-1.5-9B,0.513626106578719,0.37919463087248323,0.35611510791366907,0.10196374622356495,0.3916223404255319,0.43253968253968256
01-ai/Yi-1.5-6B,0.4481860788057629,0.313758389261745,0.30335731414868106,0.05362537764350453,0.31441156914893614,0.4365079365079365
google/gemma-7b,0.4339524388127061,0.28691275167785235,0.328537170263789,0.06419939577039276,0.2947972074468085,0.4060846560846561
google/gemma-2b,0.3351848637389342,0.2550335570469799,0.2577937649880096,0.027190332326283987,0.13655252659574468,0.3968253968253968
google/gemma-2-27b,0.562228779725742,0.35067114093959734,0.3009592326139089,0.1487915407854985,0.4370844414893617,0.43915343915343913
google/gemma-2-2b,0.36764450616212463,0.2625838926174497,0.2685851318944844,0.02416918429003021,0.22165890957446807,0.42063492063492064
google/gemma-2-9b,0.5374067002256553,0.3288590604026846,0.2637889688249401,0.11782477341389729,0.4103224734042553,0.44576719576719576
tiiuae/falcon-7b,0.3249435861829543,0.24496644295302014,0.24580335731414868,0.005287009063444109,0.11253324468085106,0.376984126984127
tiiuae/falcon-40b,0.40097205346294046,0.27348993288590606,0.30335731414868106,0.013595166163141994,0.25049867021276595,0.36243386243386244
microsoft/phi-2,0.4868946363478563,0.27181208053691275,0.3333333333333333,0.02416918429003021,0.26279920212765956,0.41005291005291006
microsoft/phi-1_5,0.33310189203263324,0.2676174496644295,0.2697841726618705,0.011329305135951661,0.16913231382978725,0.33994708994708994
bigscience/bloom-560m,0.29925360180524213,0.26174496644295303,0.0815347721822542,0.0007552870090634441,0.11643949468085106,0.4021164021164021
bigscience/bloom-1b1,0.3081062315570213,0.25922818791946306,0.18225419664268586,0.0015105740181268882,0.1107878989361702,0.36904761904761907
bigscience/bloom-3b,0.3035931261933692,0.23993288590604026,0.16546762589928057,0.0007552870090634441,0.11328125,0.3968253968253968
bigscience/bloom-7b1,0.3086269744835966,0.26426174496644295,0.17386091127098321,0.0,0.11045545212765957,0.3478835978835979
EleutherAI/gpt-neo-1.3B,0.3008158305849679,0.2558724832214765,0.2697841726618705,0.006797583081570997,0.1163563829787234,0.38095238095238093
EleutherAI/gpt-neox-20b,0.313660822773824,0.24328859060402686,0.31774580335731417,0.006042296072507553,0.1155252659574468,0.3637566137566138
EleutherAI/gpt-neo-2.7B,0.3105363652143725,0.26593959731543626,0.32014388489208634,0.005287009063444109,0.11627327127659574,0.3544973544973545
EleutherAI/gpt-neo-125m,0.30914771741017183,0.2533557046979866,0.25539568345323743,0.004531722054380665,0.10255984042553191,0.3584656084656085
EleutherAI/gpt-j-6b,0.31678528033327547,0.24580536912751677,0.31774580335731417,0.012084592145015106,0.12408577127659574,0.36507936507936506
facebook/opt-30b,0.30480819302204476,0.26929530201342283,0.3057553956834532,0.006042296072507553,0.1163563829787234,0.35978835978835977
facebook/opt-1.3b,0.3075854886304461,0.2424496644295302,0.2973621103117506,0.0075528700906344415,0.11070478723404255,0.3412698412698413
mosaicml/mpt-7b,0.32702655788925533,0.2600671140939597,0.27697841726618705,0.01283987915407855,0.12059507978723404,0.3664021164021164
bigcode/starcoder2-3b,0.3490713417809408,0.24412751677852348,0.26139088729016785,0.014350453172205438,0.1636469414893617,0.3425925925925926
bigcode/starcoder2-15b,0.4438465544176358,0.27348993288590606,0.34532374100719426,0.05362537764350453,0.23528922872340424,0.3492063492063492
bigcode/starcoder2-7b,0.36226349592084706,0.2516778523489933,0.2865707434052758,0.0256797583081571,0.16422872340425532,0.3783068783068783
ai21labs/Jamba-v0.1,0.35827113348377015,0.2684563758389262,0.25539568345323743,0.009818731117824773,0.24800531914893617,0.3584656084656085
HuggingFaceTB/SmolLM-1.7B,0.3155702135045999,0.24161073825503357,0.29856115107913667,0.0075528700906344415,0.11477726063829788,0.3412698412698413
HuggingFaceTB/SmolLM-360M,0.3027252213157438,0.2676174496644295,0.27697841726618705,0.004531722054380665,0.11236702127659574,0.4007936507936508
HuggingFaceTB/SmolLM-135M,0.3008158305849679,0.25838926174496646,0.2733812949640288,0.006797583081570997,0.11220079787234043,0.4351851851851852
openai-community/gpt2-xl,0.2976913730255164,0.25838926174496646,0.25059952038369304,0.0030211480362537764,0.11311502659574468,0.37037037037037035
openai-community/gpt2-large,0.30324596424231903,0.25922818791946306,0.25059952038369304,0.006797583081570997,0.11419547872340426,0.3783068783068783
openai-community/gpt2-medium,0.3016837354625933,0.2625838926174497,0.2697841726618705,0.0022658610271903325,0.11818484042553191,0.38756613756613756
openai-community/gpt2,0.29803853497656657,0.25838926174496646,0.2302158273381295,0.0030211480362537764,0.11652260638297872,0.43783068783068785
h2oai/h2o-danube3-4b-base,0.3567089047040444,0.2911073825503356,0.290167865707434,0.016616314199395767,0.2109375,0.376984126984127
deepseek-ai/deepseek-llm-7b-base,0.34889776080541574,0.27348993288590606,0.2841726618705036,0.012084592145015106,0.18060172872340424,0.373015873015873
allenai/OLMo-7B-hf,0.3249435861829543,0.2726510067114094,0.3405275779376499,0.006797583081570997,0.11727061170212766,0.3478835978835979
allenai/OLMo-1B-hf,0.30185731643811836,0.26174496644295303,0.2829736211031175,0.0075528700906344415,0.11735372340425532,0.4087301587301587
TinyLlama/TinyLlama_v1.1,0.29942718278076724,0.24580536912751677,0.2577937649880096,0.004531722054380665,0.10488696808510638,0.36904761904761907
Deci/DeciLM-7B,0.43950703002950875,0.2953020134228188,0.3537170263788969,0.02416918429003021,0.26919880319148937,0.4351851851851852
stabilityai/stablelm-2-1_6b,0.3351848637389342,0.2483221476510067,0.14268585131894485,0.0015105740181268882,0.1463597074468085,0.38756613756613756
RWKV/rwkv-raven-14b,0.3294566915466065,0.22902684563758388,0.10191846522781775,0.0,0.11502659574468085,0.3941798941798942
togethercomputer/RedPajama-INCITE-Base-3B-v1,0.303072383266794,0.24328859060402686,0.2997601918465228,0.00906344410876133,0.11112034574468085,0.373015873015873
