Pendulum-v1:
  n_timesteps: 50000
  policy: 'MlpPolicy'
  learning_rate: !!float 1e-3
  learning_starts: 1000
  n_components: 3

LunarLanderContinuous-v2:
  n_timesteps: !!float 3e5  # !!float 5e5
  policy: 'MlpPolicy'
  batch_size: 256
  learning_rate: lin_7.3e-4
  buffer_size: 1000000
  ent_coef: 'auto'
  gamma: 0.99
  tau: 0.01
  train_freq: 1
  gradient_steps: 1
  learning_starts: 10000
  policy_kwargs: "dict(net_arch=[400, 300])"
  n_components: 3

# === Mujoco Envs ===

HalfCheetah-v3: &mujoco-defaults
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  learning_starts: 10000
  n_components: 3

Ant-v3:
  <<: *mujoco-defaults

Hopper-v3:
  <<: *mujoco-defaults

Walker2d-v3:
  <<: *mujoco-defaults

Humanoid-v3:
  <<: *mujoco-defaults
  n_timesteps: !!float 2e6

Swimmer-v3:
  <<: *mujoco-defaults
  gamma: 0.9999

maze2d-v0:
  n_timesteps: !!float 2e4
  policy: 'MlpPolicy'
  learning_starts: 200
  n_components: 3

FetchPush-v1: &her-defaults
  n_timesteps: !!float 1e6
  policy: 'MultiInputPolicy'
  buffer_size: 1000000
  ent_coef: 'auto'
  batch_size: 2048
  gamma: 0.95
  learning_rate: !!float 1e-3
  replay_buffer_class: HerReplayBuffer
  replay_buffer_kwargs: "dict(
    online_sampling=True,
    goal_selection_strategy='future',
    n_sampled_goal=4
  )"
  policy_kwargs: "dict(net_arch=[512, 512, 512])"
  n_components: 3

FetchSlide-v1:
  n_timesteps: !!float 1e6
  policy: 'MultiInputPolicy'
  buffer_size: 1000000
  ent_coef: 'auto'
  batch_size: 2048
  gamma: 0.95
  learning_rate: !!float 1e-3
  replay_buffer_class: HerReplayBuffer
  replay_buffer_kwargs: "dict(
    online_sampling=True,
    goal_selection_strategy='future',
    n_sampled_goal=4
  )"
  policy_kwargs: "dict(net_arch=[512, 512, 512])"
  n_components: 3

FetchPickAndPlace-v1:
  n_timesteps: !!float 1e6
  policy: 'MultiInputPolicy'
  buffer_size: 1000000
  ent_coef: 'auto'
  batch_size: 1024
  gamma: 0.95
  learning_rate: !!float 1e-3
  replay_buffer_class: HerReplayBuffer
  replay_buffer_kwargs: "dict(
    online_sampling=True,
    goal_selection_strategy='future',
    n_sampled_goal=4
  )"
  policy_kwargs: "dict(net_arch=[512, 512, 512])"
  n_components: 3
