#class_name: s4nd
d_state: 16
channels: 1
bidirectional: true
activation: gelu
final_act: glu
initializer: null
weight_norm: false
trank: 1
dropout: 0.1
tie_dropout: false
init: legs
rank: 1
dt_min: 0.001
dt_max: 0.1
lr:
  dt: 0.001
  A: 0.001
  B: 0.001
n_ssm: 2
deterministic: false # Special C init
#l_max: 16 # Grab dataset length if exists, otherwise set to null and kernel will automatically resize
verbose: true
linear: true