atari:
  env_wrapper:
    - stable_baselines3.common.atari_wrappers.AtariWrapper
  # Equivalent to
  # vec_env_wrapper:
  #   - stable_baselines3.common.vec_env.VecFrameStack:
  #         n_stack: 4
  frame_stack: 4
  policy: 'CnnPolicy'
  n_envs: 16
  n_timesteps: !!float 1e7
  ent_coef: 0.01
  vf_coef: 0.25
  policy_kwargs: "dict(optimizer_class=RMSpropTFLike, optimizer_kwargs=dict(eps=1e-5))"

CartPole-v1:
  n_envs: 8
  n_timesteps: !!float 5e5
  policy: 'MlpPolicy'
  ent_coef: 0.0

LunarLander-v2:
  n_envs: 8
  n_timesteps: !!float 2e5
  policy: 'MlpPolicy'
  gamma: 0.995
  n_steps: 5
  learning_rate: lin_0.00083
  ent_coef: 0.00001

MountainCar-v0:
  normalize: true
  n_envs: 16
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  ent_coef: .0

Acrobot-v1:
  normalize: true
  n_envs: 16
  n_timesteps: !!float 5e5
  policy: 'MlpPolicy'
  ent_coef: .0

# Tuned
Pendulum-v1:
  normalize: True
  n_envs: 8
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  ent_coef: 0.0
  max_grad_norm: 0.5
  n_steps: 8
  gae_lambda: 0.9
  vf_coef: 0.4
  gamma: 0.9
  use_rms_prop: True
  normalize_advantage: False
  learning_rate: lin_7e-4
  use_sde: True
  policy_kwargs: "dict(log_std_init=-2, ortho_init=False)"

# Tuned
LunarLanderContinuous-v2:
  normalize: true
  n_envs: 4
  n_timesteps: !!float 5e6
  policy: 'MlpPolicy'
  ent_coef: 0.0
  max_grad_norm: 0.5
  n_steps: 8
  gae_lambda: 0.9
  vf_coef: 0.4
  gamma: 0.99
  use_rms_prop: True
  normalize_advantage: False
  learning_rate: lin_7e-4
  use_sde: True
  policy_kwargs: "dict(log_std_init=-2, ortho_init=False)"

# Tuned
MountainCarContinuous-v0:
  normalize: true
  n_envs: 4
  n_steps: 100
  n_timesteps: !!float 1e5
  policy: 'MlpPolicy'
  ent_coef: 0.0
  use_sde: True
  sde_sample_freq: 16
  policy_kwargs: "dict(log_std_init=0.0, ortho_init=False)"

# Tuned
BipedalWalker-v3:
  normalize: true
  n_envs: 16
  n_timesteps: !!float 5e6
  policy: 'MlpPolicy'
  ent_coef: 0.0
  max_grad_norm: 0.5
  n_steps: 8
  gae_lambda: 0.9
  vf_coef: 0.4
  gamma: 0.99
  use_rms_prop: True
  normalize_advantage: False
  learning_rate: lin_0.00096
  use_sde: True
  policy_kwargs: "dict(log_std_init=-2, ortho_init=False)"

# Tuned
BipedalWalkerHardcore-v3:
  normalize: true
  n_envs: 32
  n_timesteps: !!float 20e7
  policy: 'MlpPolicy'
  ent_coef: 0.001
  max_grad_norm: 0.5
  n_steps: 8
  gae_lambda: 0.9
  vf_coef: 0.4
  gamma: 0.99
  use_rms_prop: True
  normalize_advantage: False
  learning_rate: lin_0.0008
  use_sde: True
  policy_kwargs: "dict(log_std_init=-2, ortho_init=False)"

# Tuned
HalfCheetahBulletEnv-v0: &pybullet-defaults
  normalize: true
  n_envs: 4
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  ent_coef: 0.0
  max_grad_norm: 0.5
  n_steps: 8
  gae_lambda: 0.9
  vf_coef: 0.4
  gamma: 0.99
  use_rms_prop: True
  normalize_advantage: False
  # Both works
  learning_rate: lin_0.00096
  # learning_rate: !!float 3e-4
  use_sde: True
  policy_kwargs: "dict(log_std_init=-2, ortho_init=False)"

Walker2DBulletEnv-v0:
  <<: *pybullet-defaults

# Tuned
AntBulletEnv-v0:
  <<: *pybullet-defaults

# Tuned
HopperBulletEnv-v0:
  <<: *pybullet-defaults

# Tuned but unstable
# Not working without SDE?
ReacherBulletEnv-v0:
  <<: *pybullet-defaults
  learning_rate: lin_0.0008

# === Mujoco Envs ===

HalfCheetah-v3: &mujoco-defaults
  normalize: true
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'

Ant-v3:
  <<: *mujoco-defaults

Hopper-v3:
  <<: *mujoco-defaults

Walker2d-v3:
  <<: *mujoco-defaults

Humanoid-v3:
  <<: *mujoco-defaults
  n_timesteps: !!float 2e6

Swimmer-v3:
  <<: *mujoco-defaults
  gamma: 0.9999
