max_arm_step: 2
rollout_length: 5
penalty_coef: 20
auto_alpha: False
alpha: 0.1