# KLA (Kalman Linear Attention) - Base configuration (no variance output)
# Core dimensions
dim: 128
d_state: 16
embed_dim: 256

# MLP encoder/decoder hidden units
encoder_hidden_units: [120]
decoder_hidden_units: [240]

# Feature map configuration
h_feature_map_mode: 'repeat'

# State projection
state_projection_mode: 'equal'

# Head combination strategy
multi_head_combine: 'projected_linear'

# Discretization
discretize: true
dt_min: 0.001
dt_max: 0.1
dt_init_floor: 0.0001

# SSM parameters
a_parameterization: 'log_space'
process_noise_scale: 0.01

# Selective features
selective_h: true
selective_process: false

# Gating
gating_mode: 'multiplicative'
mlp_hidden_units: null

# Causal convolution
use_causal_conv: true
conv_kernel_size: 4
conv_activation: 'silu'
conv_groups: null

# Gating activation
use_gating: true
gating_activation: 'silu'
use_lambda_skip: true
lambda_skip_mode: 'scalar'
lambda_init: -1.0

# MIMO rank
mimo_rank: 1

# QK Norm
use_qk_norm: true

# Variance (disabled for base config)
scale_variance_through_gating: true
return_variance: false

# Misc
bias: false
max_length: 1280
