
# === Mujoco Envs ===
# Params from the original paper, but capped to 1e6 steps

MuJoCo-Defaults: &mujoco
  n_envs: 1
  policy: 'LinearPolicy'
  n_timesteps: !!float 1e6
  learning_rate: !!float 0.02
  delta_std: !!float 0.01
  n_delta: 1
  n_top: 1
  alive_bonus_offset: 0
  normalize: "dict(norm_obs=True, norm_reward=False)" 

Swimmer-v4:
  <<: *mujoco
  # n_timesteps: !!float 2e6
  learning_rate: !!float 0.02
  delta_std: !!float 0.01
  n_delta: 1
  n_top: 1

Hopper-v4:
  <<: *mujoco
  # n_timesteps: !!float 7e6
  learning_rate: !!float 0.01
  delta_std: !!float 0.025
  n_delta: 8
  n_top: 4
  alive_bonus_offset: -1

HalfCheetah-v4:
  <<: *mujoco
  # n_timesteps: !!float 1.25e7
  learning_rate: !!float 0.02
  delta_std: !!float 0.03
  n_delta: 32
  n_top: 4

Walker2d-v4:
  <<: *mujoco
  # n_timesteps: !!float 7.5e7
  learning_rate: !!float 0.03
  delta_std: !!float 0.025
  n_delta: 40
  n_top: 30
  alive_bonus_offset: -1

Ant-v4:
  <<: *mujoco
  # n_timesteps: !!float 7.5e7
  learning_rate: !!float 0.015
  delta_std: !!float 0.025
  n_delta: 60
  n_top: 20
  alive_bonus_offset: -1
