max_adm_step: 6
n_starts: 6
rollout_batch_size: 512
buffer_size: 30000000
real_ratio: 0.2
penalty_coef: 20
auto_alpha: False
alpha: 0.1