MountainCarContinuous-v0:
  epsilon: 8.991897599706103
  batch_size: 256
  n_steps: 512
  gamma: 0.999
  learning_rate: 0.002923361250695006
  n_epochs: 10
  gae_lambda: 0.95
  max_grad_norm: 0.7
  vf_coef: 0.6143251028717782
  sde_sample_freq: 128
  normalize: True
  n_envs: 2
  n_timesteps: !!float 1e5
  policy: 'MlpPolicy'
  use_sde: True
  policy_kwargs: 'dict(net_arch=[dict(pi=[64, 64], vf=[64, 64])], activation_fn=nn.ReLU)'

HalfCheetah-v3:
  epsilon: 0.05482487880015605
  batch_size: 256
  n_steps: 1024
  gamma: 0.99
  learning_rate: 0.00027514124488501023
  n_epochs: 20
  gae_lambda: 0.9
  max_grad_norm: 0.8
  vf_coef: 0.0070355748199713575
  sde_sample_freq: 128
  normalize: True
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  policy_kwargs: "dict(
      log_std_init=-2.029110479243448,
      ortho_init=False,
      activation_fn=nn.LeakyReLU,
      net_arch=[dict(pi=[64, 64], vf=[64, 64])])"

Hopper-v3:
  epsilon: 0.4
  batch_size: 512
  n_steps: 4096
  gamma: 0.995
  learning_rate: 0.0008684495273775452
  n_epochs: 10
  gae_lambda: 0.92
  max_grad_norm: 0.1
  vf_coef: 0.6349328131047219
  sde_sample_freq: 16
  normalize: True
  policy: 'MlpPolicy'
  n_timesteps: !!float 1e6
  policy_kwargs: "dict(
                    log_std_init=-0.36197485369594307,
                    activation_fn=nn.Tanh,
                    net_arch=[dict(pi=[64, 64], vf=[64, 64])]
                  )"

Swimmer-v3:
  epsilon: 0.2
  n_envs: 16
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  n_steps: 1024
  batch_size: 64
  gae_lambda: 0.98
  gamma: 0.999
  n_epochs: 4