llama32-1b:
  rope_scaling_type: llama3
  rope_scaling_factor: 32.0
  low_freq_factor: 1.0
  high_freq_factor: 4.0
  original_max_position_embeddings: 8192
  max_position_embeddings: 8192
  num_layers: 16
  hidden_size: 2048
  ffn_hidden_size: 8192
  num_attention_heads: 32
  group_query_attention: true
  num_query_groups: 8
  make_vocab_size_divisible_by: 1
  padded_vocab_size: 128256
  disable_bias_linear: true
  attention_dropout: 0.0
  init_method_std: 0.01
  hidden_dropout: 0.0
  position_embedding_type: rope
  rotary_base: 500000
  normalization: RMSNorm
  norm_epsilon: 1e-5
  swiglu: true
