# Tuned
HalfCheetahBulletEnv-v0:
  env_wrapper: utils.wrappers.TimeFeatureWrapper
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  gamma: 0.98
  buffer_size: 200000
  learning_starts: 10000
  train_freq: 64
  gradient_steps: 64
  n_episodes_rollout: -1
  learning_rate: !!float 6e-4
  policy_kwargs: "dict(net_arch=[400, 300], log_std_init=-3.62, lr_sde=1.5e-3)"
  use_sde: True

AntBulletEnv-v0:
  env_wrapper: utils.wrappers.TimeFeatureWrapper
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  gamma: 0.98
  buffer_size: 200000
  learning_starts: 10000
  train_freq: 64
  gradient_steps: 64
  n_episodes_rollout: -1
  learning_rate: !!float 6e-4
  policy_kwargs: "dict(net_arch=[400, 300], log_std_init=-3.62, lr_sde=1.5e-3)"
  use_sde: True

HopperBulletEnv-v0:
  env_wrapper: utils.wrappers.TimeFeatureWrapper
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  gamma: 0.98
  buffer_size: 200000
  learning_starts: 10000
  train_freq: 64
  gradient_steps: 64
  n_episodes_rollout: -1
  learning_rate: !!float 6e-4
  policy_kwargs: "dict(net_arch=[400, 300], log_std_init=-3.62, lr_sde=1.5e-3)"
  use_sde: True

Walker2DBulletEnv-v0:
  env_wrapper: utils.wrappers.TimeFeatureWrapper
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  gamma: 0.98
  buffer_size: 200000
  learning_starts: 10000
  train_freq: 64
  gradient_steps: 64
  n_episodes_rollout: -1
  learning_rate: !!float 6e-4
  policy_kwargs: "dict(net_arch=[400, 300], log_std_init=-3.62, lr_sde=1.5e-3)"
  use_sde: True
