# === Bullet envs ===

# Tuned
HalfCheetahBulletEnv-v0:
  env_wrapper: utils.wrappers.TimeFeatureWrapper
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  learning_rate: !!float 7.3e-4
  buffer_size: 300000
  batch_size: 256
  ent_coef: 'auto'
  gamma: 0.98
  tau: 0.02
  train_freq: 64
  gradient_steps: 64
  learning_starts: 10000
  use_sde: False
  policy_kwargs: "dict(net_arch=[400, 300])"

# Tuned
AntBulletEnv-v0:
  env_wrapper: utils.wrappers.TimeFeatureWrapper
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  learning_rate: !!float 7.3e-4
  buffer_size: 300000
  batch_size: 256
  ent_coef: 'auto'
  gamma: 0.98
  tau: 0.02
  train_freq: 64
  gradient_steps: 64
  learning_starts: 10000
  use_sde: False
  policy_kwargs: "dict(net_arch=[400, 300])"

HopperBulletEnv-v0:
  env_wrapper: utils.wrappers.TimeFeatureWrapper
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  learning_rate: lin_7.3e-4
  buffer_size: 300000
  batch_size: 256
  ent_coef: 'auto'
  gamma: 0.98
  tau: 0.02
  train_freq: 64
  gradient_steps: 64
  learning_starts: 10000
  use_sde: False
  policy_kwargs: "dict(net_arch=[400, 300])"

Walker2DBulletEnv-v0:
  env_wrapper: utils.wrappers.TimeFeatureWrapper
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  learning_rate: lin_7.3e-4
  buffer_size: 300000
  batch_size: 256
  ent_coef: 'auto'
  gamma: 0.98
  tau: 0.02
  train_freq: 64
  gradient_steps: 64
  learning_starts: 10000
  use_sde: False
  policy_kwargs: "dict(net_arch=[400, 300])"
