max_arm_step: 5
rollout_schedule: [0, 100000, 1, 15]
target_entropy: -1
n_steps: 100000
actor_freq: 1
