max_arm_step: 3
rollout_length: 2
real_ratio: 0.5
penalty_coef: 6
auto_alpha: False
alpha: 0.011