defaults: 
 - s4base
 - s4block/s4block_base@params.s4block_args
 - /step: sequential
 - _self_

name: s4dualseq

params:
  _target_: models.s4seq_model.S4DualSeqModel
  d_model: 128
  n_layers: 4
  input_processor: ConcatTrans
  output_processor: Trans
  step_input_processor: Concat
  step_output_processor: identity
  step_layer_input_processor: ["identity"]
  layer_input_processors: ["BatchTime","BatchSpace"]
  layer_output_processors: ["UnbatchTime","UnbatchSpace"]
  # layer_input_processors: ["BatchTime","BatchTime","BatchTime","BatchTime"]
  # layer_output_processors: ["UnbatchTime","UnbatchTime","UnbatchTime","UnbatchTime"]
  # layer_input_processors: ["BatchSpace","BatchTime","BatchSpace","BatchTime"]
  # layer_output_processors: ["UnbatchSpace","UnbatchTime","UnbatchSpace", "UnbatchTime"]
  # layer_input_processors: ["BatchTime","BatchTime"]
  # layer_output_processors: ["UnbatchTime","UnbatchTime"]
  # layer_input_processors: ["BatchTime","SpaceToHidden","BatchTime","SpaceToHidden"]
  # layer_output_processors: ["UnbatchTime","SpaceFromHidden","UnbatchTime","SpaceFromHidden"]
  use_spatial_batch: True
  final_mlp_hidden_expansion: 2
  s4block_args: 
    # kernel: s4d or dplr
    kernel: s4d
    bidirectional: [True, False]
    # bidirectional: [True, True, True, True]
    # d_model: [32, 8192, 32, 8192]
    # bidirectional: [False, True, False, True]
    # bidirectional: [True, True]
    # args for s4.ssm (legs is default)
    # init: diag-lin
    # init: ['legs','diag-lin','legs','diag-lin']
    


optimizer:
  _target_: optimizers.setup_s4_optimizer
  lr: 0.001
  weight_decay: 0.0
  # weight_decay: 0.0

batch_size: 16

# # scheduler: None
# scheduler: cosine
scheduler: step
step_size: 150
gamma: 0.5

warmup_epochs: 1


