
model:
  multi:
    optimizer:
      learning_rate: 0.0008
    batch_size: 128
    seq_hidden_units: 14
    br_size: 12
    fc_hidden_units: 14
    dropout_rate: 0.1
    num_layer: 2
    num_heads: 3

    self_positional_encoding:
      max_relative_position: 20
