Model,Family,Parameters (b), Tokens (T)
decilm-7b,decilm,7,
gemma-1.1-2b-it,gemma-1.1-it,2,6
gemma-1.1-7b-it,gemma-1.1-it,8,6
gemma-2-2b,gemma-2,2,8
gemma-2-9b,gemma-2,9,13
gpt2,gpt-2,0.124,
gpt2-large,gpt-2,0.774,
internlm2-20b,internlm2,20,2.15
internlm2-7b,internlm2,7,2.15
meta-llama-3.1-70b,meta-llama-3.1,70,15
meta-llama-3.1-8b,meta-llama-3.1,8,15
mistral-7b-v0.1,mistral,7.3,
mixtral-8x22b-v0.1,mixtral-8x-v0.1,140,
mixtral-8x7b-v0.1,mixtral-8x-v0.1,46,
olmo-1b,olmo,1,3
olmo-7b,olmo,6,2.5
pythia-12b,pythia,12,0.25
pythia-160m,pythia,0.16,0.25
pythia-2.8b,pythia,2,0.25
pythia-410m,pythia,0.41,0.25
pythia-6.9b,pythia,6,0.25
qwen1.5-110b,qwen1.5,111,
qwen2-0.5b,qwen2,0.5,12
qwen2-1.5b,qwen2,1,7
qwen2-72b,qwen2,72,7
qwen2-7b,qwen2,7,7
recurrentgemma-2b,recurrentgemma,2,2
recurrentgemma-9b,recurrentgemma,9,
rwkv-4-14b-pile,rwkv,14,0.33
rwkv-4-169m-pile,rwkv,0.169,0.33
rwkv-4-1b5-pile,rwkv,1.5,0.33
rwkv-4-3b-pile,rwkv,3,0.33
rwkv-4-430m-pile,rwkv,0.43,0.33
rwkv-4-7b-pile,rwkv,7,0.33
rwkv-raven-14b,rwkv,14,0.33
smollm-1.7b,smollm,1,1
smollm-135m,smollm,0.135,0.6
smollm-360m,smollm,0.36,0.6
yi-1.5-34b,yi-1.5,34,3.5
yi-1.5-6b,yi-1.5,6,3.5
yi-1.5-9b,yi-1.5,8,3.5
yi-9b,yi,8,3.8