max_arm_step: 2
rollout_schedule: [0, 200000, 1, 2]
target_entropy: -1
n_steps: 200000
actor_freq: 1