max_arm_step: 2
arm_hidden_dim: 400
model_update_interval: 1000
model_rollout_interval: 1000
model_retain_steps: 5000
rollout_schedule: [10000, 100000, 1, 10]
target_entropy: -8
actor_freq: 20
n_steps: 300000