# Fig 1.
MountainCarContinuous-v0:
  callback:
    - utils.callbacks.PlotNoiseRatioCallback:
        display_freq: 500
  n_timesteps: !!float 50000
  policy: 'MlpPolicy'
  learning_rate: !!float 3e-4
  buffer_size: 50000
  batch_size: 512
  ent_coef: 0.1
  train_freq: [1, "episode"]
  gradient_steps: -1
  gamma: 0.9999
  tau: 0.01
  learning_starts: 0
  use_sde: True
  sde_sample_freq: 32
  policy_kwargs: "dict(log_std_init=-3.67, net_arch=[64, 64])"

# === Bullet envs ===

# Tuned
HalfCheetahBulletEnv-v0: &pybullet-defaults
  env_wrapper:
    # Log continuity cost during training
    - utils.wrappers.ContinuityCostWrapper:
        weight_continuity: 0.0
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  learning_rate: !!float 7.3e-4
  buffer_size: 300000
  batch_size: 256
  ent_coef: 'auto'
  gamma: 0.98
  tau: 0.02
  train_freq: [1, "episode"]
  gradient_steps: -1
  learning_starts: 10000
  use_sde: True
  policy_kwargs: "dict(log_std_init=-3, net_arch=[400, 300])"

AntBulletEnv-v0:
  <<: *pybullet-defaults

HopperBulletEnv-v0:
  <<: *pybullet-defaults
  learning_rate: lin_7.3e-4

Walker2DBulletEnv-v0:
  <<: *pybullet-defaults
  learning_rate: lin_7.3e-4
