hidden_size: 2048
ffn_hidden_size: 5504
num_layers: 24
num_attention_heads: 16
seq_length: 2048
num_kv_heads: 16