LAYER: 6
HIDDEN_SIZE: 512
MULTI_HEAD: 8
DROPOUT_R: 0.1
FLAT_MLP_SIZE: 512
FLAT_GLIMPSES: 1
FLAT_OUT_SIZE: 1024
LR_BASE: 0.0001
LR_DECAY_R: 0.2
GRAD_ACCU_STEPS: 1
CKPT_VERSION: 'small'
CKPT_EPOCH: 13