dim: 128
expand_v: 1
expand_k: 1
num_heads: 1
gate_fn: swish
layernorm_eps: 0.00001
gate_logit_normalizer: 16
gate_low_rank_dim: 16
mode: 'chunk'
fuse_norm: false