d_hidden: 64
d_input: 16
d_qk: 16
d_rnn: 32
d_v: 16
dropout: 0.1
n_head: 4
n_layers: 3
epsilon: 1.0e-20
mae_step: 32