discount: 0.99
gae_lambda: 0.95
normalize_advantage: True
ratio_clip: 0.1
value_loss_coeff: 1.
entropy_loss_coeff: 0.01
epochs: 4
minibatches: 4
learning_rate: 0.001
