max_adm_step: 4
n_starts: 4
rollout_batch_size: 512
buffer_size: 30000000
penalty_coef: 25
auto_alpha: False
alpha: 0.5