commonroad-v1:
  policy: 'MlpPolicy'
  noise_type: 'ornstein-uhlenbeck'
  noise_std: 0.5
  memory_limit: 5000

MountainCarContinuous-v0:
  n_timesteps: 300000
  policy: 'MlpPolicy'
  noise_type: 'ornstein-uhlenbeck'
  noise_std: 0.5
  memory_limit: 5000

LunarLanderContinuous-v2:
  n_timesteps: !!float 6e5
  policy: 'MlpPolicy'
  noise_type: 'ornstein-uhlenbeck'
  noise_std: 0.1
  memory_limit: 50000

Pendulum-v0:
  n_timesteps: !!float 2e5
  policy: 'MlpPolicy'
  noise_type: 'ornstein-uhlenbeck'
  noise_std: 0.1
  memory_limit: 50000

# Tuned
BipedalWalker-v3:
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  noise_type: 'adaptive-param'
  noise_std: 0.287
  memory_limit: 100000
  normalize_observations: True
  normalize_returns: False
  gamma: 0.999
  actor_lr: !!float 0.000527
  batch_size: 256
  random_exploration: 0.0
  policy_kwargs: 'dict(layer_norm=True)'

# Tuned
HalfCheetahBulletEnv-v0:
  n_timesteps: !!float 2e6
  policy: 'LnMlpPolicy'
  gamma: 0.95
  memory_limit: 1000000
  noise_type: 'normal'
  noise_std: 0.22
  batch_size: 256
  normalize_observations: True
  normalize_returns: False

# Tuned
Walker2DBulletEnv-v0:
  n_timesteps: !!float 2e6
  policy: 'LnMlpPolicy'
  gamma: 0.95
  memory_limit: 1000000
  noise_type: 'normal'
  noise_std: 0.15
  batch_size: 128
  normalize_observations: True
  normalize_returns: True

# To be tuned
AntBulletEnv-v0:
  env_wrapper: src.utils_run.wrappers.TimeFeatureWrapper
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  gamma: 0.99
  memory_limit: 1000000
  noise_type: 'normal'
  noise_std: 0.22
  batch_size: 256
  normalize_observations: True
  normalize_returns: False

# To be tuned
HopperBulletEnv-v0:
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  gamma: 0.98
  memory_limit: 1000000
  noise_type: 'ornstein-uhlenbeck'
  noise_std: 0.652
  batch_size: 256
  actor_lr: 0.00156
  critic_lr: 0.00156
  normalize_observations: True
  normalize_returns: False
