- _name_: pmha_gelu
  causal: true
  n_heads: 8
  dropout: null
  bias: True
  add_bias_kv: False
  add_zero_attn: False
  kdim: null
  vdim: null
- _name_: ffn
  expand: 4
  activation: gelu
  dropout: ${...dropout} # Same as null
