model,name.huggingface,n_params.as_str,n_params.as_int,cfg.n_params,cfg.n_layers,cfg.n_heads,cfg.d_model,cfg.d_vocab,cfg.act_fn,cfg.positional_embedding_type,cfg.parallel_attn_mlp,cfg.original_architecture,cfg.normalization_type,tokenizer.name,tokenizer.class,tokenizer.vocab_size,tokenizer.vocab_hash,model_family,model_size
gpt2-small,gpt2,85M,84934656,84934656,12,12,768,50257,gelu_new,standard,False,GPT2LMHeadModel,LN,gpt2,GPT2TokenizerFast,50257.0,v8xfIj5kwZX5RwgLU66lZNZUlE4=,gpt2,0.085
gpt2-medium,gpt2-medium,302M,301989888,301989888,24,16,1024,50257,gelu_new,standard,False,GPT2LMHeadModel,LN,gpt2-medium,GPT2TokenizerFast,50257.0,v8xfIj5kwZX5RwgLU66lZNZUlE4=,gpt2,0.302
gpt2-large,gpt2-large,708M,707788800,707788800,36,20,1280,50257,gelu_new,standard,False,GPT2LMHeadModel,LN,gpt2-large,GPT2TokenizerFast,50257.0,v8xfIj5kwZX5RwgLU66lZNZUlE4=,gpt2,0.708
gpt2-xl,gpt2-xl,1.5B,1474560000,1474560000,48,25,1600,50257,gelu_new,standard,False,GPT2LMHeadModel,LN,gpt2-xl,GPT2TokenizerFast,50257.0,v8xfIj5kwZX5RwgLU66lZNZUlE4=,gpt2,1.5
opt-125m,facebook/opt-125m,85M,84934656,84934656,12,12,768,50272,relu,standard,False,OPTForCausalLM,LN,facebook/opt-125m,GPT2TokenizerFast,50265.0,f1FIzqnRiMYzke1CU0hp8TDxq7k=,opt,0.085
opt-1.3b,facebook/opt-1.3b,1.2B,1207959552,1207959552,24,32,2048,50272,relu,standard,False,OPTForCausalLM,LN,facebook/opt-1.3b,GPT2TokenizerFast,50265.0,f1FIzqnRiMYzke1CU0hp8TDxq7k=,opt,1.2
opt-2.7b,facebook/opt-2.7b,2.5B,2516582400,2516582400,32,32,2560,50272,relu,standard,False,OPTForCausalLM,LN,facebook/opt-2.7b,GPT2TokenizerFast,50265.0,f1FIzqnRiMYzke1CU0hp8TDxq7k=,opt,2.5
opt-6.7b,facebook/opt-6.7b,6.4B,6442450944,6442450944,32,32,4096,50272,relu,standard,False,OPTForCausalLM,LN,facebook/opt-6.7b,GPT2TokenizerFast,50265.0,f1FIzqnRiMYzke1CU0hp8TDxq7k=,opt,6.4
opt-13b,facebook/opt-13b,13B,12582912000,12582912000,40,40,5120,50272,relu,standard,False,OPTForCausalLM,LN,facebook/opt-13b,GPT2TokenizerFast,50265.0,f1FIzqnRiMYzke1CU0hp8TDxq7k=,opt,13.0
opt-30b,facebook/opt-30b,30B,29595009024,29595009024,48,56,7168,50272,relu,standard,False,OPTForCausalLM,LN,facebook/opt-30b,GPT2TokenizerFast,50265.0,f1FIzqnRiMYzke1CU0hp8TDxq7k=,opt,30.0
opt-66b,facebook/opt-66b,65B,65229815808,65229815808,64,72,9216,50272,relu,standard,False,OPTForCausalLM,LN,facebook/opt-66b,GPT2TokenizerFast,50265.0,f1FIzqnRiMYzke1CU0hp8TDxq7k=,opt,65.0
pythia-14m,EleutherAI/pythia-14m,1.2M,1179648,1179648,6,4,128,50304,gelu,rotary,True,GPTNeoXForCausalLM,LN,EleutherAI/pythia-14m,GPTNeoXTokenizerFast,50254.0,96EawM8Lij99W7OBTk0KW2ELUrQ=,pythia,0.0012
pythia-31m,EleutherAI/pythia-31m,4.7M,4718592,4718592,6,8,256,50304,gelu,rotary,True,GPTNeoXForCausalLM,LN,EleutherAI/pythia-31m,GPTNeoXTokenizerFast,50254.0,96EawM8Lij99W7OBTk0KW2ELUrQ=,pythia,0.0047
pythia-70m,EleutherAI/pythia-70m,19M,18874368,18874368,6,8,512,50304,gelu,rotary,True,GPTNeoXForCausalLM,LN,EleutherAI/pythia-70m,GPTNeoXTokenizerFast,50254.0,96EawM8Lij99W7OBTk0KW2ELUrQ=,pythia,0.019
pythia-160m,EleutherAI/pythia-160m,85M,84934656,84934656,12,12,768,50304,gelu,rotary,True,GPTNeoXForCausalLM,LN,EleutherAI/pythia-160m,GPTNeoXTokenizerFast,50254.0,96EawM8Lij99W7OBTk0KW2ELUrQ=,pythia,0.085
pythia-410m,EleutherAI/pythia-410m,302M,301989888,301989888,24,16,1024,50304,gelu,rotary,True,GPTNeoXForCausalLM,LN,EleutherAI/pythia-410m,GPTNeoXTokenizerFast,50254.0,96EawM8Lij99W7OBTk0KW2ELUrQ=,pythia,0.302
pythia-1b,EleutherAI/pythia-1b,805M,805306368,805306368,16,8,2048,50304,gelu,rotary,True,GPTNeoXForCausalLM,LN,EleutherAI/pythia-1b,GPTNeoXTokenizerFast,50254.0,96EawM8Lij99W7OBTk0KW2ELUrQ=,pythia,0.805
pythia-1.4b,EleutherAI/pythia-1.4b,1.2B,1207959552,1207959552,24,16,2048,50304,gelu,rotary,True,GPTNeoXForCausalLM,LN,EleutherAI/pythia-1.4b,GPTNeoXTokenizerFast,50254.0,96EawM8Lij99W7OBTk0KW2ELUrQ=,pythia,1.2
pythia-2.8b,EleutherAI/pythia-2.8b,2.5B,2516582400,2516582400,32,32,2560,50304,gelu,rotary,True,GPTNeoXForCausalLM,LN,EleutherAI/pythia-2.8b,GPTNeoXTokenizerFast,50254.0,96EawM8Lij99W7OBTk0KW2ELUrQ=,pythia,2.5
pythia-6.9b,EleutherAI/pythia-6.9b,6.4B,6442450944,6442450944,32,32,4096,50432,gelu,rotary,True,GPTNeoXForCausalLM,LN,EleutherAI/pythia-6.9b,GPTNeoXTokenizerFast,50254.0,96EawM8Lij99W7OBTk0KW2ELUrQ=,pythia,6.4
pythia-12b,EleutherAI/pythia-12b,11B,11324620800,11324620800,36,40,5120,50688,gelu,rotary,True,GPTNeoXForCausalLM,LN,EleutherAI/pythia-12b,GPTNeoXTokenizerFast,50254.0,96EawM8Lij99W7OBTk0KW2ELUrQ=,pythia,11.0
meta-llama/Meta-Llama-3-8B,,7.8B,7784628224,7784628224,32,32,4096,128256,silu,rotary,False,LlamaForCausalLM,RMS,meta-llama/Meta-Llama-3-8B,PreTrainedTokenizerFast,128000.0,RnzNv9w_ITBp6b2dcibKR7_l85I=,llama-3,7.8
meta-llama/Meta-Llama-3-8B-Instruct,,7.8B,7784628224,7784628224,32,32,4096,128256,silu,rotary,False,LlamaForCausalLM,RMS,meta-llama/Meta-Llama-3-8B-Instruct,PreTrainedTokenizerFast,128000.0,RnzNv9w_ITBp6b2dcibKR7_l85I=,llama-3,7.8
meta-llama/Meta-Llama-3-70B,,78B,77846282240,77846282240,80,64,8192,128256,silu,rotary,False,LlamaForCausalLM,RMS,meta-llama/Meta-Llama-3-70B,PreTrainedTokenizerFast,128000.0,RnzNv9w_ITBp6b2dcibKR7_l85I=,llama-3,78.0
meta-llama/Meta-Llama-3-70B-Instruct,,78B,77846282240,77846282240,80,64,8192,128256,silu,rotary,False,LlamaForCausalLM,RMS,meta-llama/Meta-Llama-3-70B-Instruct,PreTrainedTokenizerFast,128000.0,RnzNv9w_ITBp6b2dcibKR7_l85I=,llama-3,78.0
meta-llama/Llama-3.1-70B,,78B,77846282240,77846282240,80,64,8192,128256,silu,rotary,False,LlamaForCausalLM,RMS,meta-llama/Llama-3.1-70B,PreTrainedTokenizerFast,128000.0,j9N50ddC7mjCgS4GseU9LmKZDKk=,llama-3.1,78.0
meta-llama/Llama-3.1-8B,,7.8B,7784628224,7784628224,32,32,4096,128256,silu,rotary,False,LlamaForCausalLM,RMS,meta-llama/Llama-3.1-8B,PreTrainedTokenizerFast,128000.0,j9N50ddC7mjCgS4GseU9LmKZDKk=,llama-3.1,7.8
meta-llama/Llama-3.1-8B-Instruct,,7.8B,7784628224,7784628224,32,32,4096,128256,silu,rotary,False,LlamaForCausalLM,RMS,meta-llama/Llama-3.1-8B-Instruct,PreTrainedTokenizerFast,128000.0,j9N50ddC7mjCgS4GseU9LmKZDKk=,llama-3.1,7.8
meta-llama/Llama-3.1-70B-Instruct,,78B,77846282240,77846282240,80,64,8192,128256,silu,rotary,False,LlamaForCausalLM,RMS,meta-llama/Llama-3.1-70B-Instruct,PreTrainedTokenizerFast,128000.0,j9N50ddC7mjCgS4GseU9LmKZDKk=,llama-3.1,78.0
meta-llama/Llama-3.2-1B,,1.1B,1073741824,1073741824,16,32,2048,128256,silu,rotary,False,LlamaForCausalLM,RMS,meta-llama/Llama-3.2-1B,PreTrainedTokenizerFast,128000.0,j9N50ddC7mjCgS4GseU9LmKZDKk=,llama-3.2,1.1
meta-llama/Llama-3.2-3B,,3.2B,3170893824,3170893824,28,24,3072,128256,silu,rotary,False,LlamaForCausalLM,RMS,meta-llama/Llama-3.2-3B,PreTrainedTokenizerFast,128000.0,j9N50ddC7mjCgS4GseU9LmKZDKk=,llama-3.2,3.2
meta-llama/Llama-3.2-1B-Instruct,,1.1B,1073741824,1073741824,16,32,2048,128256,silu,rotary,False,LlamaForCausalLM,RMS,meta-llama/Llama-3.2-1B-Instruct,PreTrainedTokenizerFast,128000.0,j9N50ddC7mjCgS4GseU9LmKZDKk=,llama-3.2,1.1
meta-llama/Llama-3.2-3B-Instruct,,3.2B,3170893824,3170893824,28,24,3072,128256,silu,rotary,False,LlamaForCausalLM,RMS,meta-llama/Llama-3.2-3B-Instruct,PreTrainedTokenizerFast,128000.0,j9N50ddC7mjCgS4GseU9LmKZDKk=,llama-3.2,3.2
meta-llama/Llama-3.3-70B-Instruct,,78B,77846282240,77846282240,80,64,8192,128256,silu,rotary,False,LlamaForCausalLM,RMS,meta-llama/Llama-3.3-70B-Instruct,PreTrainedTokenizerFast,128000.0,j9N50ddC7mjCgS4GseU9LmKZDKk=,meta,78.0
bloom-560m,bigscience/bloom-560m,302M,301989888,301989888,24,16,1024,250880,gelu_fast,alibi,False,BloomForCausalLM,LN,bigscience/bloom-560m,BloomTokenizerFast,250680.0,OO9NZoesMCpWsijo1O2DAbq9GqI=,bloom,0.302
bloom-1b1,bigscience/bloom-1b1,679M,679477248,679477248,24,16,1536,250880,gelu_fast,alibi,False,BloomForCausalLM,LN,bigscience/bloom-1b1,BloomTokenizerFast,250680.0,OO9NZoesMCpWsijo1O2DAbq9GqI=,bloom,0.679
bloom-1b7,bigscience/bloom-1b7,1.2B,1207959552,1207959552,24,16,2048,250880,gelu_fast,alibi,False,BloomForCausalLM,LN,bigscience/bloom-1b7,BloomTokenizerFast,250680.0,OO9NZoesMCpWsijo1O2DAbq9GqI=,bloom,1.2
bloom-3b,bigscience/bloom-3b,2.4B,2359296000,2359296000,30,32,2560,250880,gelu_fast,alibi,False,BloomForCausalLM,LN,bigscience/bloom-3b,BloomTokenizerFast,250680.0,OO9NZoesMCpWsijo1O2DAbq9GqI=,bloom,2.4
bloom-7b1,bigscience/bloom-7b1,6.0B,6039797760,6039797760,30,32,4096,250880,gelu_fast,alibi,False,BloomForCausalLM,LN,bigscience/bloom-7b1,BloomTokenizerFast,250680.0,OO9NZoesMCpWsijo1O2DAbq9GqI=,bloom,6.0
qwen-1.8b,Qwen/Qwen-1_8B,1.2B,1214251008,1214251008,24,16,2048,151936,silu,rotary,False,QWenLMHeadModel,RMS,Qwen/Qwen-1_8B,QWenTokenizer,151851.0,LXUZBV-DGPX2Ty50XH848Cn_umU=,qwen,1.2
qwen-7b,Qwen/Qwen-7B,6.5B,6476005376,6476005376,32,32,4096,151936,silu,rotary,False,QWenLMHeadModel,RMS,Qwen/Qwen-7B,QWenTokenizer,151851.0,LXUZBV-DGPX2Ty50XH848Cn_umU=,qwen,6.5
qwen-14b,Qwen/Qwen-14B,13B,12609126400,12609126400,40,40,5120,152064,silu,rotary,False,QWenLMHeadModel,RMS,Qwen/Qwen-14B,QWenTokenizer,151851.0,LXUZBV-DGPX2Ty50XH848Cn_umU=,qwen,13.0
qwen-1.8b-chat,Qwen/Qwen-1_8B-Chat,1.2B,1214251008,1214251008,24,16,2048,151936,silu,rotary,False,QWenLMHeadModel,RMS,Qwen/Qwen-1_8B-Chat,QWenTokenizer,151851.0,LXUZBV-DGPX2Ty50XH848Cn_umU=,qwen,1.2
qwen-7b-chat,Qwen/Qwen-7B-Chat,6.5B,6476005376,6476005376,32,32,4096,151936,silu,rotary,False,QWenLMHeadModel,RMS,Qwen/Qwen-7B-Chat,QWenTokenizer,151851.0,LXUZBV-DGPX2Ty50XH848Cn_umU=,qwen,6.5
qwen-14b-chat,Qwen/Qwen-14B-Chat,13B,12609126400,12609126400,40,40,5120,152064,silu,rotary,False,QWenLMHeadModel,RMS,Qwen/Qwen-14B-Chat,QWenTokenizer,151851.0,LXUZBV-DGPX2Ty50XH848Cn_umU=,qwen,13.0
qwen2.5-0.5b,Qwen/Qwen2.5-0.5B,391M,390856704,390856704,24,14,896,151936,silu,rotary,False,Qwen2ForCausalLM,RMS,Qwen/Qwen2.5-0.5B,Qwen2TokenizerFast,151643.0,NI384GYDfJidzgXg_-9habj8lOk=,qwen2.5,0.391
qwen2.5-0.5b-instruct,Qwen/Qwen2.5-0.5B-Instruct,391M,390856704,390856704,24,14,896,151936,silu,rotary,False,Qwen2ForCausalLM,RMS,Qwen/Qwen2.5-0.5B-Instruct,Qwen2TokenizerFast,151643.0,NI384GYDfJidzgXg_-9habj8lOk=,qwen2.5,0.391
qwen2.5-1.5b,Qwen/Qwen2.5-1.5B,1.4B,1420296192,1420296192,28,12,1536,151936,silu,rotary,False,Qwen2ForCausalLM,RMS,Qwen/Qwen2.5-1.5B,Qwen2TokenizerFast,151643.0,NI384GYDfJidzgXg_-9habj8lOk=,qwen2.5,1.4
qwen2.5-1.5b-instruct,Qwen/Qwen2.5-1.5B-Instruct,1.4B,1420296192,1420296192,28,12,1536,151936,silu,rotary,False,Qwen2ForCausalLM,RMS,Qwen/Qwen2.5-1.5B-Instruct,Qwen2TokenizerFast,151643.0,NI384GYDfJidzgXg_-9habj8lOk=,qwen2.5,1.4
qwen2.5-3b,Qwen/Qwen2.5-3B,3.0B,3038773248,3038773248,36,16,2048,151936,silu,rotary,False,Qwen2ForCausalLM,RMS,Qwen/Qwen2.5-3B,Qwen2TokenizerFast,151643.0,NI384GYDfJidzgXg_-9habj8lOk=,qwen2.5,3.0
qwen2.5-3b-instruct,Qwen/Qwen2.5-3B-Instruct,3.0B,3038773248,3038773248,36,16,2048,151936,silu,rotary,False,Qwen2ForCausalLM,RMS,Qwen/Qwen2.5-3B-Instruct,Qwen2TokenizerFast,151643.0,NI384GYDfJidzgXg_-9habj8lOk=,qwen2.5,3.0
qwen2.5-7b,Qwen/Qwen2.5-7B,7.1B,7141851136,7141851136,28,28,3584,152064,silu,rotary,False,Qwen2ForCausalLM,RMS,Qwen/Qwen2.5-7B,Qwen2TokenizerFast,151643.0,NI384GYDfJidzgXg_-9habj8lOk=,qwen2.5,7.1
qwen2.5-7b-instruct,Qwen/Qwen2.5-7B-Instruct,7.1B,7141851136,7141851136,28,28,3584,152064,silu,rotary,False,Qwen2ForCausalLM,RMS,Qwen/Qwen2.5-7B-Instruct,Qwen2TokenizerFast,151643.0,NI384GYDfJidzgXg_-9habj8lOk=,qwen2.5,7.1
qwen2.5-14b,Qwen/Qwen2.5-14B,15B,15225323520,15225323520,48,40,5120,152064,silu,rotary,False,Qwen2ForCausalLM,RMS,Qwen/Qwen2.5-14B,Qwen2TokenizerFast,151643.0,NI384GYDfJidzgXg_-9habj8lOk=,qwen2.5,15.0
qwen2.5-14b-instruct,Qwen/Qwen2.5-14B-Instruct,15B,15225323520,15225323520,48,40,5120,152064,silu,rotary,False,Qwen2ForCausalLM,RMS,Qwen/Qwen2.5-14B-Instruct,Qwen2TokenizerFast,151643.0,NI384GYDfJidzgXg_-9habj8lOk=,qwen2.5,15.0
qwen2.5-32b,Qwen/Qwen2.5-32B,34B,33889976320,33889976320,64,40,5120,152064,silu,rotary,False,Qwen2ForCausalLM,RMS,Qwen/Qwen2.5-32B,Qwen2TokenizerFast,151643.0,NI384GYDfJidzgXg_-9habj8lOk=,qwen2.5,34.0
qwen2.5-32b-instruct,Qwen/Qwen2.5-32B-Instruct,34B,33889976320,33889976320,64,40,5120,152064,silu,rotary,False,Qwen2ForCausalLM,RMS,Qwen/Qwen2.5-32B-Instruct,Qwen2TokenizerFast,151643.0,NI384GYDfJidzgXg_-9habj8lOk=,qwen2.5,34.0
qwen2.5-72b,Qwen/Qwen2.5-72B,80B,79607889920,79607889920,80,64,8192,152064,silu,rotary,False,Qwen2ForCausalLM,RMS,Qwen/Qwen2.5-72B,Qwen2TokenizerFast,151643.0,NI384GYDfJidzgXg_-9habj8lOk=,qwen2.5,80.0
qwen2.5-72b-instruct,Qwen/Qwen2.5-72B-Instruct,80B,79607889920,79607889920,80,64,8192,152064,silu,rotary,False,Qwen2ForCausalLM,RMS,Qwen/Qwen2.5-72B-Instruct,Qwen2TokenizerFast,151643.0,NI384GYDfJidzgXg_-9habj8lOk=,qwen2.5,80.0
qwen-32b-preview,Qwen/QwQ-32B-Preview,34B,33889976320,33889976320,64,40,5120,152064,silu,rotary,False,Qwen2ForCausalLM,RMS,Qwen/QwQ-32B-Preview,Qwen2TokenizerFast,151643.0,NI384GYDfJidzgXg_-9habj8lOk=,qwen,34.0
qwen3-0.6b,Qwen/Qwen3-0.6B,499M,499122176,499122176,28,16,1024,151936,silu,rotary,False,Qwen3ForCausalLM,RMS,Qwen/Qwen3-0.6B,Qwen2TokenizerFast,151643.0,OfOK7SjmrN4KFUqlDylX-Up77SM=,qwen3,0.499
qwen3-1.7b,Qwen/Qwen3-1.7B,1.5B,1526726656,1526726656,28,16,2048,151936,silu,rotary,False,Qwen3ForCausalLM,RMS,Qwen/Qwen3-1.7B,Qwen2TokenizerFast,151643.0,OfOK7SjmrN4KFUqlDylX-Up77SM=,qwen3,1.5
qwen3-4b,Qwen/Qwen3-4B,4.2B,4199546880,4199546880,36,32,2560,151936,silu,rotary,False,Qwen3ForCausalLM,RMS,Qwen/Qwen3-4B,Qwen2TokenizerFast,151643.0,OfOK7SjmrN4KFUqlDylX-Up77SM=,qwen3,4.2
qwen3-8b,Qwen/Qwen3-8B,7.9B,7851737088,7851737088,36,32,4096,151936,silu,rotary,False,Qwen3ForCausalLM,RMS,Qwen/Qwen3-8B,Qwen2TokenizerFast,151643.0,OfOK7SjmrN4KFUqlDylX-Up77SM=,qwen3,7.9
qwen3-14b,Qwen/Qwen3-14B,15B,14889779200,14889779200,40,40,5120,151936,silu,rotary,False,Qwen3ForCausalLM,RMS,Qwen/Qwen3-14B,Qwen2TokenizerFast,151643.0,OfOK7SjmrN4KFUqlDylX-Up77SM=,qwen3,15.0
phi-1,microsoft/phi-1,1.2B,1207959552,1207959552,24,32,2048,51200,gelu_new,rotary,True,PhiForCausalLM,LN,microsoft/phi-1,CodeGenTokenizer,50257.0,TYk6J3OrqdU2F7JYiSfFXtd-vB4=,phi,1.2
phi-1_5,microsoft/phi-1_5,1.2B,1207959552,1207959552,24,32,2048,51200,gelu_new,rotary,True,PhiForCausalLM,LN,microsoft/phi-1_5,CodeGenTokenizer,50257.0,TYk6J3OrqdU2F7JYiSfFXtd-vB4=,phi,1.2
phi-2,microsoft/phi-2,2.5B,2516582400,2516582400,32,32,2560,51200,gelu_new,rotary,True,PhiForCausalLM,LN,microsoft/phi-2,CodeGenTokenizer,50257.0,TYk6J3OrqdU2F7JYiSfFXtd-vB4=,phi,2.5
phi-3,microsoft/Phi-3-mini-4k-instruct,3.6B,3623878656,3623878656,32,32,3072,32064,silu,rotary,False,Phi3ForCausalLM,RMS,microsoft/Phi-3-mini-4k-instruct,LlamaTokenizer,32000.0,2BcGXsWoZjuOkMtb6uTbGL68fbc=,phi,3.6
phi-4,microsoft/phi-4,15B,15204352000,15204352000,40,40,5120,100352,silu,rotary,False,Phi3ForCausalLM,RMS,microsoft/phi-4,GPT2Tokenizer,100352.0,uJZqWk6gqn6tO_nlSJEZsP9MITQ=,phi,15.0
gemma-2b,google/gemma-2b,2.1B,2113929216,2113929216,18,8,2048,256000,gelu_new,rotary,False,Gemma2ForCausalLM,RMS,google/gemma-2b,GemmaTokenizerFast,256000.0,87mmm7o-5SoGMD05LzhcJdB_XBk=,gemma,2.1
gemma-7b,google/gemma-7b,7.8B,7751073792,7751073792,28,16,3072,256000,gelu_new,rotary,False,GemmaForCausalLM,RMS,google/gemma-7b,GemmaTokenizerFast,256000.0,87mmm7o-5SoGMD05LzhcJdB_XBk=,gemma,7.8
gemma-2b-it,google/gemma-2b-it,2.1B,2113929216,2113929216,18,8,2048,256000,gelu_new,rotary,False,Gemma2ForCausalLM,RMS,google/gemma-2b-it,GemmaTokenizerFast,256000.0,87mmm7o-5SoGMD05LzhcJdB_XBk=,gemma,2.1
gemma-7b-it,google/gemma-7b-it,7.8B,7751073792,7751073792,28,16,3072,256000,gelu_new,rotary,False,GemmaForCausalLM,RMS,google/gemma-7b-it,GemmaTokenizerFast,256000.0,87mmm7o-5SoGMD05LzhcJdB_XBk=,gemma,7.8
gemma-2-2b,google/gemma-2-2b,2.1B,2146959360,2146959360,26,8,2304,256000,gelu_pytorch_tanh,rotary,False,Gemma2ForCausalLM,RMS,google/gemma-2-2b,GemmaTokenizerFast,256000.0,87mmm7o-5SoGMD05LzhcJdB_XBk=,gemma-2,2.1
gemma-2-2b-it,google/gemma-2-2b-it,2.1B,2146959360,2146959360,26,8,2304,256000,gelu_pytorch_tanh,rotary,False,Gemma2ForCausalLM,RMS,google/gemma-2-2b-it,GemmaTokenizerFast,256000.0,87mmm7o-5SoGMD05LzhcJdB_XBk=,gemma-2,2.1
gemma-2-9b,google/gemma-2-9b,8.9B,8940158976,8940158976,42,16,3584,256000,gelu_pytorch_tanh,rotary,False,Gemma2ForCausalLM,RMS,google/gemma-2-9b,GemmaTokenizerFast,256000.0,87mmm7o-5SoGMD05LzhcJdB_XBk=,gemma-2,8.9
gemma-2-9b-it,google/gemma-2-9b-it,8.9B,8940158976,8940158976,42,16,3584,256000,gelu_pytorch_tanh,rotary,False,Gemma2ForCausalLM,RMS,google/gemma-2-9b-it,GemmaTokenizerFast,256000.0,87mmm7o-5SoGMD05LzhcJdB_XBk=,gemma-2,8.9
gemma-2-27b,google/gemma-2-27b,27B,26914848768,26914848768,46,32,4608,256000,gelu_pytorch_tanh,rotary,False,Gemma2ForCausalLM,RMS,google/gemma-2-27b,GemmaTokenizerFast,256000.0,87mmm7o-5SoGMD05LzhcJdB_XBk=,gemma-2,27.0
gemma-2-27b-it,google/gemma-2-27b-it,27B,26914848768,26914848768,46,32,4608,256000,gelu_pytorch_tanh,rotary,False,Gemma2ForCausalLM,RMS,google/gemma-2-27b-it,GemmaTokenizerFast,256000.0,87mmm7o-5SoGMD05LzhcJdB_XBk=,gemma-2,27.0
