LAYER: 6
HIDDEN_SIZE: 1024
MULTI_HEAD: 8
DROPOUT_R: 0.1
FLAT_MLP_SIZE: 512
FLAT_GLIMPSES: 1
FLAT_OUT_SIZE: 2048
LR_BASE: 0.00005
LR_DECAY_R: 0.2
GRAD_ACCU_STEPS: 2
CKPT_VERSION: 'large'
CKPT_EPOCH: 13