# Gauss Legacy configuration aligned with gauss_core
# Core dimensions
dim: 256
d_state: 16
embed_dim: 16                  # Token embedding dimension (added for alignment)

# Feature map configuration (for selective_h)
h_feature_map_mode: 'repeat'   # Feature map mode (added for alignment)

# State projection (added for alignment)
state_projection_mode: 'equal'  # state_dim = d_model (no projection overhead)

# Head combination strategy (added for alignment)
multi_head_combine: 'projected_linear'  # How expansion dimension is contracted after SSM

# Discretization
discretize: true
dt_min: 0.001
dt_max: 0.1
dt_init_floor: 0.0001

# SSM parameters
a_parameterization: 'log_space'
process_noise_scale: 0.01

# Selective features
selective_h: true
selective_process: false
selective_delta: false

# Gating
gating_mode: 'multiplicative'
mlp_hidden_units: null  # MLP removed for alignment with gauss_core
# Causal convolution
use_causal_conv: true
conv_kernel_size: 4
conv_activation: 'silu'
conv_groups: null

# Gating activation
use_gating: true
gating_activation: 'silu'
use_lambda_skip: true  # Kept as true for legacy compatibility (gauss_core uses false)

# MIMO rank
mimo_rank: 1

# QK Norm and learnable biases
use_qk_norm: false           # QK normalization for H and Q
use_c_bias: false            # Learnable bias for C (q_projected), init=1
use_b_bias: false            # Learnable bias for B (h_projected), init=1

# Misc
bias: false
return_variance: false
