MountainCarContinuous-v0:
  n_timesteps: 300000
  policy: 'MlpPolicy'
  noise_type: 'ornstein-uhlenbeck'
  noise_std: 0.5

Pendulum-v0:
  n_timesteps: 100000
  policy: 'MlpPolicy'
  noise_type: 'normal'
  noise_std: 0.1
  learning_starts: 1000

LunarLanderContinuous-v2:
  n_timesteps: !!float 3e5
  policy: 'MlpPolicy'
  batch_size: 256
  learning_starts: 1000
  noise_type: 'ornstein-uhlenbeck'
  noise_std: 0.1

HalfCheetah-v2:
  env_wrapper: utils.wrappers.TimeFeatureWrapper
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  gamma: 0.99
  buffer_size: 1000000
  noise_type: 'normal'
  noise_std: 0.1
  learning_starts: 10000
  batch_size: 100
  learning_rate: !!float 1e-3
  train_freq: 1000
  gradient_steps: 1000
  policy_kwargs: "dict(layers=[400, 300])"

HalfCheetahBulletEnv-v0:
  env_wrapper: utils.wrappers.TimeFeatureWrapper
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  gamma: 0.99
  buffer_size: 1000000
  noise_type: 'normal'
  noise_std: 0.1
  learning_starts: 10000
  batch_size: 100
  learning_rate: !!float 1e-3
  train_freq: 1000
  gradient_steps: 1000
  policy_kwargs: "dict(layers=[400, 300])"

BipedalWalker-v3:
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  gamma: 0.99
  buffer_size: 1000000
  noise_type: 'normal'
  noise_std: 0.1
  learning_starts: 10000
  batch_size: 100
  learning_rate: !!float 1e-3
  train_freq: 1000
  gradient_steps: 1000
  policy_kwargs: "dict(layers=[400, 300])"

# To be tuned
BipedalWalkerHardcore-v3:
  n_timesteps: !!float 5e7
  policy: 'MlpPolicy'
  gamma: 0.99
  buffer_size: 1000000
  noise_type: 'normal'
  noise_std: 0.2
  learning_starts: 10000
  batch_size: 100
  learning_rate: !!float 1e-3
  train_freq: 1000
  gradient_steps: 1000
  policy_kwargs: "dict(layers=[400, 300])"

AntBulletEnv-v0:
  env_wrapper: utils.wrappers.TimeFeatureWrapper
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  gamma: 0.99
  buffer_size: 1000000
  noise_type: 'normal'
  noise_std: 0.2
  learning_starts: 10000
  batch_size: 100
  learning_rate: !!float 1e-3
  train_freq: 1000
  gradient_steps: 1000
  policy_kwargs: "dict(layers=[400, 300])"

HopperBulletEnv-v0:
  env_wrapper: utils.wrappers.TimeFeatureWrapper
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  gamma: 0.99
  buffer_size: 1000000
  noise_type: 'normal'
  noise_std: 0.2
  learning_starts: 10000
  batch_size: 100
  learning_rate: !!float 1e-3
  train_freq: 1000
  gradient_steps: 1000
  policy_kwargs: "dict(layers=[400, 300])"

Walker2DBulletEnv-v0:
  env_wrapper: utils.wrappers.TimeFeatureWrapper
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  gamma: 0.99
  buffer_size: 1000000
  noise_type: 'normal'
  noise_std: 0.1
  noise_std_final: 0.05
  learning_starts: 10000
  batch_size: 100
  learning_rate: !!float 1e-3
  train_freq: 1000
  gradient_steps: 1000
  policy_kwargs: "dict(layers=[400, 300])"

HumanoidBulletEnv-v0:
  env_wrapper: utils.wrappers.TimeFeatureWrapper
  n_timesteps: !!float 2e7
  policy: 'MlpPolicy'
  gamma: 0.99
  buffer_size: 1000000
  noise_type: 'normal'
  noise_std: 0.1
  learning_starts: 10000
  batch_size: 100
  learning_rate: !!float 1e-3
  train_freq: 1000
  gradient_steps: 1000
  policy_kwargs: "dict(layers=[400, 300])"

ReacherBulletEnv-v0:
  env_wrapper: utils.wrappers.TimeFeatureWrapper
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  gamma: 0.99
  buffer_size: 1000000
  noise_type: 'normal'
  noise_std: 0.1
  learning_starts: 10000
  batch_size: 100
  learning_rate: !!float 1e-3
  train_freq: 1000
  gradient_steps: 1000
  policy_kwargs: "dict(layers=[400, 300])"

InvertedDoublePendulumBulletEnv-v0:
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  gamma: 0.99
  buffer_size: 1000000
  noise_type: 'normal'
  noise_std: 0.1
  learning_starts: 10000
  batch_size: 100
  learning_rate: !!float 1e-3
  train_freq: 1000
  gradient_steps: 1000
  policy_kwargs: "dict(layers=[400, 300])"

InvertedPendulumSwingupBulletEnv-v0:
  env_wrapper: utils.wrappers.TimeFeatureWrapper
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  gamma: 0.99
  buffer_size: 1000000
  noise_type: 'normal'
  noise_std: 0.1
  learning_starts: 10000
  batch_size: 100
  learning_rate: !!float 1e-3
  train_freq: 1000
  gradient_steps: 1000
  policy_kwargs: "dict(layers=[400, 300])"

MinitaurBulletEnv-v0:
  env_wrapper: utils.wrappers.TimeFeatureWrapper
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  gamma: 0.99
  buffer_size: 1000000
  noise_type: 'normal'
  noise_std: 0.1
  learning_starts: 10000
  batch_size: 100
  learning_rate: !!float 1e-3
  train_freq: 1000
  gradient_steps: 1000
  policy_kwargs: "dict(layers=[400, 300])"
