# Tuned
HalfCheetahBulletEnv-v0:
  env_wrapper: utils.wrappers.TimeFeatureWrapper
  normalize: true
  n_envs: 1
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  batch_size: 64
  n_steps: 2048
  gamma: 0.99
  gae_lambda: 0.95
  n_epochs: 10
  ent_coef: 0.0
  max_grad_norm: 0.5
  vf_coef: 0.5
  learning_rate: !!float 2.5e-4
  use_sde: False
  clip_range: 0.2
  policy_kwargs: "dict(log_std_init=0.0,
                       ortho_init=True,
                       activation_fn=nn.Tanh,
                       net_arch=[dict(pi=[64, 64], vf=[64, 64])]
                       )"

AntBulletEnv-v0:
  env_wrapper: utils.wrappers.TimeFeatureWrapper
  normalize: true
  n_envs: 1
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  batch_size: 64
  n_steps: 2048
  gamma: 0.99
  gae_lambda: 0.95
  n_epochs: 10
  ent_coef: 0.0
  max_grad_norm: 0.5
  vf_coef: 0.5
  learning_rate: !!float 2.5e-4
  use_sde: False
  clip_range: 0.2
  policy_kwargs: "dict(log_std_init=0.0,
                       ortho_init=True,
                       activation_fn=nn.Tanh,
                       net_arch=[dict(pi=[64, 64], vf=[64, 64])]
                       )"


HopperBulletEnv-v0:
  env_wrapper: utils.wrappers.TimeFeatureWrapper
  normalize: true
  n_envs: 1
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  batch_size: 64
  n_steps: 2048
  gamma: 0.99
  gae_lambda: 0.95
  n_epochs: 10
  ent_coef: 0.0
  max_grad_norm: 0.5
  vf_coef: 0.5
  learning_rate: !!float 2.5e-4
  use_sde: False
  clip_range: 0.2
  policy_kwargs: "dict(log_std_init=0.0,
                       ortho_init=True,
                       activation_fn=nn.Tanh,
                       net_arch=[dict(pi=[64, 64], vf=[64, 64])]
                       )"


Walker2DBulletEnv-v0:
  env_wrapper: utils.wrappers.TimeFeatureWrapper
  normalize: true
  n_envs: 1
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  batch_size: 64
  n_steps: 2048
  gamma: 0.99
  gae_lambda: 0.95
  n_epochs: 10
  ent_coef: 0.0
  max_grad_norm: 0.5
  vf_coef: 0.5
  learning_rate: lin_2.5e-4
  use_sde: False
  clip_range: 0.2
  policy_kwargs: "dict(log_std_init=0.0,
                       ortho_init=True,
                       activation_fn=nn.Tanh,
                       net_arch=[dict(pi=[64, 64], vf=[64, 64])]
                       )"
