# GatedDeltaNet config for MAD-lab
# Based on NVIDIA's Gated Delta Networks (ICLR '25)
# Paper: "Gated Delta Networks: Improving Mamba2 with Delta Rule"

dim: 128
expand_k: 0.75
expand_v: 1.5
num_heads: 4
qk_norm: 'l2'
conv_size: 4
conv_bias: false
gate_fn: 'swish'
elementwise_affine: true
norm_eps: 1.0e-5
gate_logit_normalizer: 16
fuse_norm: true
use_mamba_gate: true
use_residual: false
use_input_gate: false
