max_adm_step: 2
n_starts: 2
rollout_batch_size: 2048
real_ratio: 0.2
penalty_coef: 100
auto_alpha: False
alpha: 0.05
n_epochs: 3000