d_input: 32
d_rnn: 32
d_hidden: 64
n_layers_encoder: 4
n_layers_decoder: 4
n_head: 4
d_qk: 32
d_v: 32
dropout: 0.0
probability_gap: 0.5
reversed_probability_gap: 0.9
epsilon_l_c: 1.0
epsilon_l_p: 1.0
epsilon_l_p_reversed: 0.5
epsilon: 1.0e-20
inject_x_o: True
reverse_loss: True
probability_threshold: 0.5
expected_probability_gap: 0.5