encoder-embed-dim-subtransformer: 512
decoder-embed-dim-subtransformer: 512

encoder-ffn-embed-dim-all-subtransformer: [1024, 3072, 3072, 2048, 2048, 2048]
decoder-ffn-embed-dim-all-subtransformer: [3072]

encoder-layer-num-subtransformer: 6
decoder-layer-num-subtransformer: 1

encoder-self-attention-heads-all-subtransformer: [4, 8, 4, 4, 4, 8]
decoder-self-attention-heads-all-subtransformer: [8]
decoder-ende-attention-heads-all-subtransformer: [8]

# for arbitrary encoder decoder attention. -1 means attending to last one encoder layer
# 1 means last two encoder layers, 2 means last three encoder layers
decoder-arbitrary-ende-attn-all-subtransformer: [-1]
