# network architecture
# encoder related
elayers: 18
eunits: 1024
# decoder related
# dlayers: 6
# dunits: 2048
# attention related
adim: 256
aheads: 4

# hybrid CTC/attention
mtlalpha: 1.0

# label smoothing
lsm-weight: 0.1

# minibatch related
batch-size: 64
maxlen-in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen-out: 150 # if output length > maxlen-out, batchsize is automatically reduced

# optimization related
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs
opt: noam
accum-grad: 2
grad-clip: 5
patience: 0
epochs: 50
dropout-rate: 0.1

# transformer specific setting
backend: pytorch
model-module: "espnet.nets.pytorch_backend.e2e_asr_vari_mlp:E2E"
transformer-input-layer: conv2d     # encoder architecture type
transformer-lr: 5.0
transformer-warmup-steps: 25000
transformer-attn-dropout-rate: 0.0
transformer-length-normalized-loss: false
transformer-init: pytorch

# mlp module related
tiny_attn_dim: 128
causal: false
act: "gelu"
act_in: "gelu"
mlp_type: "tsmlp"
time_shift: 2
mlp_module_dropout_rate: 0.1

# interCTC related
multi-position: [17]
toplayer-w: 1.0
