discount: 0.99
learning_starts: 0
replay_ratio: 256
target_update_tau: 0.005
target_update_interval: 1
ent_coeff: 0.1
batch_size: 256
learning_rate: 0.001
ent_coeff_lr: 0.001
replay_size: 100000
