hidden_size: 1536
ffn_hidden_size: 4128
num_layers: 12
num_attention_heads: 12
seq_length: 2048
num_kv_heads: 12