Model,parsinlu_qa_2_acc,word_unscrambling_2_exact_match,ipa_transliterate_2_exact_match,ipa_transliterate_2_bleu,arithmetic_3ds_2_acc,arithmetic_3da_2_acc,arithmetic_2dm_2_acc,arithmetic_2da_2_acc
meta-llama/Llama-2-7b-hf,0.2514285714285714,0.089,0.0717846460618145,27.23879164406593,0.859,0.9315,0.4155,0.977
meta-llama/Llama-2-13b-hf,0.2666666666666666,0.14,0.1326021934197407,38.95820914837535,0.9045,0.9325,0.5475,0.978
meta-llama/Llama-2-70b-hf,0.359047619047619,0.3685,0.2811565304087737,53.06103866311969,0.975,0.9885,0.8435,0.999
huggyllama/llama-65b,0.2866666666666667,0.276,0.1784646061814556,42.82518044503804,0.9715,0.9765,0.752,0.995
huggyllama/llama-30b,0.2685714285714285,0.1525,0.1365902293120638,37.67676562692426,0.957,0.9735,0.6355,0.9955
huggyllama/llama-13b,0.2676190476190476,0.096,0.0558325024925224,26.834892702014777,0.899,0.905,0.477,0.9705
huggyllama/llama-7b,0.259047619047619,0.0445,0.0269192422731804,12.818938421237382,0.753,0.9135,0.3265,0.9655
meta-llama/Meta-Llama-3.1-8B,0.4028571428571428,0.3785,0.135593220338983,39.57408243033752,1.0,1.0,0.8785,1.0
meta-llama/Meta-Llama-3.1-405B-FP8,0.6476190476190476,0.803,0.4107676969092722,65.15831366997278,1.0,1.0,0.999,1.0
meta-llama/Meta-Llama-3.1-70B,0.6057142857142858,0.651,0.3399800598205383,58.37456269991652,1.0,1.0,0.9925,1.0
meta-llama/Meta-Llama-3-70B,0.5904761904761905,0.6425,0.3389830508474576,59.30508535425739,1.0,1.0,0.991,1.0
meta-llama/Meta-Llama-3-8B,0.3952380952380952,0.3695,0.1335992023928215,39.66969652716684,1.0,1.0,0.8545,1.0
Qwen/Qwen2-7B,0.4085714285714286,0.1985,,,0.996,,0.9295,
Qwen/Qwen2-57B-A14B,0.4828571428571429,0.281,0.2771684945164506,50.93148134058548,0.9995,0.9965,0.9855,1.0
Qwen/Qwen2-1.5B,0.2904761904761905,0.033,,,0.9215,,0.599,
Qwen/Qwen2-72B,0.5961904761904762,0.465,,,0.999,,0.988,
Qwen/Qwen2-0.5B,0.2742857142857143,0.007,,,0.6915,,0.3775,
Qwen/Qwen1.5-110B,0.4895238095238095,0.48,,,0.999,,0.9835,
Qwen/Qwen1.5-1.8B,0.2838095238095238,0.0145,0.0029910269192422,,0.64,0.89,0.5435,0.9575
Qwen/Qwen1.5-4B,0.299047619047619,0.073,0.0398803589232303,,0.97,0.967,0.756,0.9915
Qwen/Qwen1.5-7B,0.3085714285714285,0.121,0.1036889332003988,,0.98,0.9875,0.906,0.9995
Qwen/Qwen1.5-14B,0.34,0.197,0.2063808574277168,,0.996,0.988,0.9715,1.0
Qwen/Qwen1.5-32B,0.4142857142857143,0.3485,0.2861415752741774,,0.9985,0.9935,0.9895,1.0
Qwen/Qwen1.5-72B,0.4238095238095238,0.416,0.008,,0.994,1.0,0.982,1.0
Qwen/Qwen1.5-0.5B,0.2771428571428571,0.008,0.0,,0.2995,0.678,0.2855,0.875
Qwen/Qwen-72B,0.4228571428571429,0.452,0.014,19.42664553133737,0.996,1.0,0.988,1.0
Qwen/Qwen-7B,0.2876190476190476,0.1375,0.0648055832502492,0.5270559542321789,0.9725,0.9855,0.888,1.0
Qwen/Qwen-14B,0.3266666666666666,0.2385,0.1226321036889332,0.5032063945984224,0.996,0.994,0.971,1.0
mistralai/Mistral-7B-v0.1,0.2914285714285714,0.2065,0.1076769690927218,35.315672504390825,0.99,0.982,0.676,0.9975
mistralai/Mistral-Nemo-Base-2407,0.4295238095238095,0.36,0.1286141575274177,39.31439329386384,0.984,0.989,0.8745,0.999
mistralai/Mixtral-8x7B-v0.1,0.3552380952380952,0.3655,0.1655034895314057,44.86427536227012,0.9975,0.991,0.926,1.0
mistralai/Mixtral-8x22B-v0.1,0.4276190476190476,0.527,,,0.9985,,0.989,
01-ai/Yi-6B,0.2723809523809524,0.075,0.0279162512462612,4.724353334068296,0.974,0.9875,0.6945,0.9915
01-ai/Yi-34B,0.3533333333333333,0.2955,0.2492522432701894,48.78881592079304,0.996,0.9905,0.9425,0.9995
01-ai/Yi-1.5-6B,,0.156,,,0.994,,0.9355,
01-ai/Yi-1.5-9B,,0.2305,,,0.9925,,0.979,
01-ai/Yi-1.5-34B,,0.333,0.2153539381854436,46.56135713791198,0.9975,0.995,0.9895,1.0
google/gemma-7b,,0.353,,,0.919,0.9775,0.7625,0.9955
google/gemma-2b,,0.0745,,,0.9405,0.9395,0.591,0.9935
google/gemma-2-9b,0.4847619047619048,0.4065,0.3180458624127617,54.72879286744799,0.992,0.9955,0.913,1.0
google/gemma-2-27b,0.5514285714285714,0.6365,0.3668993020937188,62.38546652125766,0.9975,0.9985,0.9825,0.9995
tiiuae/falcon-40b,0.2780952380952381,0.36,0.1146560319042871,33.05850602894047,0.982,0.8795,0.57,0.9965
tiiuae/falcon-7b,0.2542857142857143,0.1015,0.0059820538384845,3.2884811394303606,0.76,0.4645,0.3585,0.988
tiiuae/falcon-rw-1b,0.2533333333333333,0.0065,0.0,0.0918347687896272,0.003,0.0015,0.0245,0.0215
microsoft/phi-2,,0.082,0.0,0.2653288929048503,0.63,0.636,0.7135,0.9995
microsoft/phi-1_5,,0.01,0.0,0.0962673641587057,0.064,0.064,0.3125,0.786
EleutherAI/pythia-1b-deduped,0.2438095238095238,0.0045,0.0,0.0705526078992909,0.002,0.0005,0.03,0.0165
EleutherAI/pythia-410m-deduped,0.2371428571428571,0.003,0.0,0.1134844707754764,0.002,0.0,0.0255,0.009
EleutherAI/pythia-6.9b-deduped,0.2542857142857143,0.0145,0.0,0.1865409994287415,0.092,0.082,0.178,0.486
EleutherAI/pythia-2.8b-deduped,0.2476190476190476,0.0075,0.0,0.1513354071978045,0.2365,0.139,0.173,0.795
EleutherAI/pythia-12b-deduped,0.2733333333333333,0.021,0.0,0.5791710644567151,0.124,0.12,0.186,0.446
EleutherAI/pythia-70m-deduped,0.238095238095238,0.0,0.0,0.0299829647560212,0.001,0.0005,0.007,0.009
EleutherAI/pythia-1.4b-deduped,0.2571428571428571,0.0055,0.0,0.058314418740019,0.004,0.006,0.0485,0.067
EleutherAI/pythia-160m-deduped,0.2504761904761904,0.0,0.0,0.0544514771055789,0.0025,0.0,0.0245,0.005
bigscience/bloom-560m,0.2514285714285714,0.0015,0.0,0.0525140892367206,0.0005,0.0,0.019,0.004
bigscience/bloom-1b1,0.24,0.004,0.0,0.0647893885977694,0.0015,0.0005,0.022,0.007
bigscience/bloom-3b,0.2314285714285714,0.006,0.0,0.1326621705983472,0.0015,0.0005,0.0255,0.021
bigscience/bloom-7b1,0.2476190476190476,0.0085,0.0019940179461615,1.3150181836811448,0.0095,0.0035,0.0475,0.12
EleutherAI/gpt-neox-20b,0.2552380952380952,0.0315,0.003988035892323,1.5694847713332627,0.6515,0.504,0.4175,0.969
EleutherAI/gpt-neo-2.7B,0.2533333333333333,0.006,0.0,0.1463103338009975,0.0065,0.002,0.057,0.054
EleutherAI/gpt-neo-1.3B,0.2638095238095238,0.004,0.0,0.0960039187617264,0.003,0.0005,0.0335,0.0165
EleutherAI/gpt-neo-125m,0.2447619047619047,0.0,0.0,0.0746424380034945,0.0025,0.0,0.017,0.006
EleutherAI/gpt-j-6b,0.2571428571428571,0.016,0.0,0.4484500244487227,0.4425,0.323,0.272,0.8305
facebook/opt-6.7b,0.2447619047619047,0.009,0.0,0.1830997899627163,0.019,0.0065,0.135,0.158
facebook/opt-1.3b,0.2523809523809524,0.002,0.0,0.0725285204286882,0.003,0.0005,0.0245,0.03
facebook/opt-350m,0.2352380952380952,0.0005,0.0,0.0972419896999673,0.0,0.0005,0.0195,0.0065
facebook/opt-13b,0.2476190476190476,0.0115,0.0,0.5875738673515706,0.013,0.0075,0.1015,0.133
facebook/opt-2.7b,0.2552380952380952,0.0075,0.0,0.0915527615420791,0.0045,0.0015,0.036,0.0235
facebook/opt-30b,0.2447619047619047,0.015,0.0019940179461615,0.8414574834351459,0.015,0.0145,0.2075,0.2325
facebook/opt-125m,0.2438095238095238,0.0,0.0,0.0714230122368858,0.0015,0.0,0.023,0.01
facebook/opt-66b,0.259047619047619,0.0135,0.0069790628115653,2.3625677993485605,0.0805,0.054,0.323,0.499
mosaicml/mpt-30b,0.2723809523809524,0.1475,0.0538384845463609,24.702202702762005,0.858,0.7045,0.3525,0.9925
mosaicml/mpt-7b,0.2695238095238095,0.0585,0.0109670987038883,2.338332430291938,0.362,0.2725,0.2515,0.9195
facebook/xglm-564M,,0.0,0.0,0.003808800490808,0.0,0.0,0.002,0.0045
facebook/xglm-1.7B,0.2657142857142857,0.0,0.0,0.0031923476558301,0.0005,0.0005,0.0085,0.005
facebook/xglm-4.5B,0.2619047619047619,0.0,0.0,0.003562787562497,0.0,0.0015,0.0025,0.0025
facebook/xglm-7.5B,0.2647619047619047,0.0,0.0,0.0032020869016776,0.0,0.0005,0.0155,0.0055
bigcode/starcoderbase-1b,0.2571428571428571,0.006,0.0,0.13424660949487,0.0085,0.0045,0.0545,0.126
bigcode/starcoderbase-3b,0.2438095238095238,0.011,0.0,1.5732615849431866,0.31,0.6065,0.1735,0.867
bigcode/starcoderbase-7b,0.24,0.03,0.003988035892323,3.6481422157068097,0.4025,0.8515,0.2495,0.936
bigcode/starcoderbase,0.2609523809523809,0.046,0.003988035892323,5.086507417628581,0.7795,0.9535,0.358,0.99
deepseek-ai/DeepSeek-V2,0.439047619047619,0.5495,,,0.9995,,0.997,
deepseek-ai/DeepSeek-Coder-V2-Base,0.4885714285714285,0.6375,,,0.999,,1.0,
ai21labs/Jamba-v0.1,0.2733333333333333,0.1465,0.1286141575274177,20.06252088356779,0.8215,0.984,0.8965,1.0