atari:
  env_wrapper:
    - stable_baselines3.common.atari_wrappers.AtariWrapper
  frame_stack: 4
  policy: 'CnnPolicy'
  n_envs: 8
  n_steps: 128
  n_epochs: 4
  batch_size: 256
  n_timesteps: !!float 1e7
  learning_rate: lin_2.5e-4
  clip_range: lin_0.1
  vf_coef: 0.5
  ent_coef: 0.01

Pendulum-v0:
  n_envs: 8
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  n_steps: 2048
  batch_size: 64
  gae_lambda: 0.95
  gamma: 0.99
  n_epochs: 10
  ent_coef: 0.0
  learning_rate: !!float 3e-4
  clip_range: 0.2

pointMass-v0:
  n_envs: 2
  n_timesteps: !!float 2e5
  policy: 'MlpPolicy'
  n_steps: 64
  batch_size: 64
  gae_lambda: 0.95
  gamma: 0.99
  n_epochs: 10
  ent_coef: 0.0
  learning_rate: !!float 3e-4
  clip_range: 0.2

pointMassMaze-v0:
  n_envs: 8
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  n_steps: 10
  batch_size: 16
  gae_lambda: 0.95
  gamma: 0.99
  n_epochs: 4
  ent_coef: 0.0
  learning_rate: !!float 3e-4
  clip_range: 0.2

# Tuned
CartPole-v1:
  n_envs: 8
  n_timesteps: !!float 1e5
  policy: 'MlpPolicy'
  n_steps: 32
  batch_size: 256
  gae_lambda: 0.8
  gamma: 0.98
  n_epochs: 20
  ent_coef: 0.0
  learning_rate: lin_0.001
  clip_range: lin_0.2

MountainCar-v0:
  normalize: true
  n_envs: 16
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  n_steps: 16
  gae_lambda: 0.98
  gamma: 0.99
  n_epochs: 4
  ent_coef: 0.0

# Tuned
MountainCarContinuous-v0:
  normalize: true
  n_envs: 1
  n_timesteps: !!float 20000
  policy: 'MlpPolicy'
  batch_size: 256
  n_steps: 8
  gamma: 0.9999
  learning_rate: !!float 7.77e-05
  ent_coef: 0.00429
  clip_range: 0.1
  n_epochs: 10
  gae_lambda: 0.9
  max_grad_norm: 5
  vf_coef: 0.19
  use_sde: True
  policy_kwargs: "dict(log_std_init=-3.29, ortho_init=False)"

Acrobot-v1:
  normalize: true
  n_envs: 16
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  n_steps: 256
  gae_lambda: 0.94
  gamma: 0.99
  n_epochs: 4
  ent_coef: 0.0

BipedalWalker-v3:
  normalize: true
  n_envs: 16
  n_timesteps: !!float 5e6
  policy: 'MlpPolicy'
  n_steps: 2048
  batch_size: 64
  gae_lambda: 0.95
  gamma: 0.99
  n_epochs: 10
  ent_coef: 0.001
  learning_rate: !!float 2.5e-4
  clip_range: 0.2

BipedalWalkerHardcore-v3:
  normalize: true
  n_envs: 16
  n_timesteps: !!float 10e7
  policy: 'MlpPolicy'
  n_steps: 2048
  batch_size: 64
  gae_lambda: 0.95
  gamma: 0.99
  n_epochs: 10
  ent_coef: 0.001
  learning_rate: lin_2.5e-4
  clip_range: lin_0.2

CustomBipedalWalker-v3:
  normalize: true
  n_envs: 16
  n_timesteps: !!float 5e6
  policy: 'MlpPolicy'
  n_steps: 2048
  batch_size: 64
  gae_lambda: 0.95
  gamma: 0.99
  n_epochs: 10
  ent_coef: 0.001
  learning_rate: !!float 2.5e-4
  clip_range: 0.2

LunarLander-v2:
  n_envs: 16
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  n_steps: 1024
  batch_size: 64
  gae_lambda: 0.98
  gamma: 0.999
  n_epochs: 4
  ent_coef: 0.01

LunarLanderContinuous-v2:
  n_envs: 16
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  n_steps: 1024
  batch_size: 64
  gae_lambda: 0.98
  gamma: 0.999
  n_epochs: 4
  ent_coef: 0.01

CustomLunarLander-v2:
  n_envs: 16
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  n_steps: 1024
  batch_size: 64
  gae_lambda: 0.98
  gamma: 0.999
  n_epochs: 4
  ent_coef: 0.01

# Tuned
HalfCheetahBulletEnv-v0:
  env_wrapper: sb3_contrib.common.wrappers.TimeFeatureWrapper
  normalize: true
  n_envs: 16
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  batch_size: 128
  n_steps: 512
  gamma: 0.99
  gae_lambda: 0.9
  n_epochs: 20
  ent_coef: 0.0
  sde_sample_freq: 4
  max_grad_norm: 0.5
  vf_coef: 0.5
  learning_rate: !!float 3e-5
  use_sde: True
  clip_range: 0.4
  policy_kwargs: "dict(log_std_init=-2,
                       ortho_init=False,
                       activation_fn=nn.ReLU,
                       net_arch=[dict(pi=[256, 256], vf=[256, 256])]
                       )"

# Tuned
AntBulletEnv-v0:
  env_wrapper: sb3_contrib.common.wrappers.TimeFeatureWrapper
  normalize: true
  n_envs: 16
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  batch_size: 128
  n_steps: 512
  gamma: 0.99
  gae_lambda: 0.9
  n_epochs: 20
  ent_coef: 0.0
  sde_sample_freq: 4
  max_grad_norm: 0.5
  vf_coef: 0.5
  learning_rate: !!float 3e-5
  use_sde: True
  clip_range: 0.4
  policy_kwargs: "dict(log_std_init=-1,
                       ortho_init=False,
                       activation_fn=nn.ReLU,
                       net_arch=[dict(pi=[256, 256], vf=[256, 256])]
                       )"

# Tuned
Walker2DBulletEnv-v0:
  env_wrapper: sb3_contrib.common.wrappers.TimeFeatureWrapper
  normalize: true
  n_envs: 16
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  batch_size: 128
  n_steps: 512
  gamma: 0.99
  gae_lambda: 0.92
  n_epochs: 20
  ent_coef: 0.0
  sde_sample_freq: 4
  max_grad_norm: 0.5
  vf_coef: 0.5
  learning_rate: !!float 3e-5
  use_sde: True
  clip_range: lin_0.4
  policy_kwargs: "dict(log_std_init=-2,
                       ortho_init=False,
                       activation_fn=nn.ReLU,
                       net_arch=[dict(pi=[256, 256], vf=[256, 256])]
                       )"

# Tuned
HopperBulletEnv-v0:
  env_wrapper: sb3_contrib.common.wrappers.TimeFeatureWrapper
  normalize: true
  n_envs: 16
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  batch_size: 128
  n_steps: 512
  gamma: 0.99
  gae_lambda: 0.92
  n_epochs: 20
  ent_coef: 0.0
  sde_sample_freq: 4
  max_grad_norm: 0.5
  vf_coef: 0.5
  learning_rate: !!float 3e-5
  use_sde: True
  clip_range: lin_0.4
  policy_kwargs: "dict(log_std_init=-2,
                       ortho_init=False,
                       activation_fn=nn.ReLU,
                       net_arch=[dict(pi=[256, 256], vf=[256, 256])]
                       )"

# Tuned
ReacherBulletEnv-v0:
  env_wrapper: sb3_contrib.common.wrappers.TimeFeatureWrapper
  normalize: true
  n_envs: 8
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  batch_size: 64
  n_steps: 512
  gamma: 0.99
  gae_lambda: 0.9
  n_epochs: 20
  ent_coef: 0.0
  sde_sample_freq: 4
  max_grad_norm: 0.5
  vf_coef: 0.5
  learning_rate: !!float 3e-5
  use_sde: True
  clip_range: lin_0.4
  policy_kwargs: "dict(log_std_init=-2.7,
                       ortho_init=False,
                       activation_fn=nn.ReLU,
                       net_arch=[dict(pi=[256, 256], vf=[256, 256])]
                       )"

MinitaurBulletEnv-v0:
  normalize: true
  n_envs: 8
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  n_steps: 2048
  batch_size: 64
  gae_lambda: 0.95
  gamma: 0.99
  n_epochs: 10
  ent_coef: 0.0
  learning_rate: 2.5e-4
  clip_range: 0.2

MinitaurBulletDuckEnv-v0:
  normalize: true
  n_envs: 8
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  n_steps: 2048
  batch_size: 64
  gae_lambda: 0.95
  gamma: 0.99
  n_epochs: 10
  ent_coef: 0.0
  learning_rate: 2.5e-4
  clip_range: 0.2

# To be tuned
HumanoidBulletEnv-v0:
  normalize: true
  n_envs: 8
  n_timesteps: !!float 1e7
  policy: 'MlpPolicy'
  n_steps: 2048
  batch_size: 64
  gae_lambda: 0.95
  gamma: 0.99
  n_epochs: 10
  ent_coef: 0.0
  learning_rate: 2.5e-4
  clip_range: 0.2

InvertedDoublePendulumBulletEnv-v0:
  normalize: true
  n_envs: 8
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  n_steps: 2048
  batch_size: 64
  gae_lambda: 0.95
  gamma: 0.99
  n_epochs: 10
  ent_coef: 0.0
  learning_rate: 2.5e-4
  clip_range: 0.2

InvertedPendulumSwingupBulletEnv-v0:
  normalize: true
  n_envs: 8
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  n_steps: 2048
  batch_size: 64
  gae_lambda: 0.95
  gamma: 0.99
  n_epochs: 10
  ent_coef: 0.0
  learning_rate: 2.5e-4
  clip_range: 0.2

# Following https://github.com/lcswillems/rl-starter-files
MiniGrid-DoorKey-5x5-v0:
  env_wrapper: gym_minigrid.wrappers.FlatObsWrapper # requires --gym-packages gym_minigrid
  normalize: true
  n_envs: 8 # number of environment copies running in parallel
  n_timesteps: !!float 1e5
  policy: MlpPolicy
  n_steps: 128 # batch size is n_steps * n_env
  batch_size: 64 # Number of training minibatches per update
  gae_lambda: 0.95 #  Factor for trade-off of bias vs variance for Generalized Advantage Estimator
  gamma: 0.99
  n_epochs: 10 #  Number of epoch when optimizing the surrogate
  ent_coef: 0.0 # Entropy coefficient for the loss caculation
  learning_rate: 2.5e-4 # The learning rate, it can be a function
  clip_range: 0.2 # Clipping parameter, it can be a function

MiniGrid-FourRooms-v0:
  env_wrapper: gym_minigrid.wrappers.FlatObsWrapper # requires --gym-packages gym_minigrid
  normalize: true
  n_envs: 8
  n_timesteps: !!float 4e6
  policy: 'MlpPolicy'
  n_steps: 512
  batch_size: 64
  gae_lambda: 0.95
  gamma: 0.99
  n_epochs: 10
  ent_coef: 0.0
  learning_rate: 2.5e-4
  clip_range: 0.2

MiniGrid-FourRooms-v1:
  env_wrapper: gym_minigrid.wrappers.FlatObsWrapper # requires --gym-packages gym_minigrid
  normalize: true
  n_envs: 8
  n_timesteps: !!float 4e6
  policy: 'MlpPolicy'
  n_steps: 512
  batch_size: 64
  gae_lambda: 0.95
  gamma: 0.99
  n_epochs: 10
  ent_coef: 0.0
  learning_rate: 2.5e-4
  clip_range: 0.2

CarRacing-v0:
  env_wrapper:
    - gym.wrappers.resize_observation.ResizeObservation:
        shape: 64
    - gym.wrappers.gray_scale_observation.GrayScaleObservation:
        keep_dim: true
  frame_stack: 4
  n_envs: 8
  n_timesteps: !!float 1e6
  policy: 'CnnPolicy'
  batch_size: 128
  n_steps: 512
  gamma: 0.99
  gae_lambda: 0.9
  n_epochs: 20
  ent_coef: 0.0
  sde_sample_freq: 4
  max_grad_norm: 0.5
  vf_coef: 0.5
  learning_rate: !!float 3e-5
  use_sde: True
  clip_range: 0.4
  policy_kwargs: "dict(log_std_init=-2,
                       ortho_init=False,
                       )"

#CustomAntMuJoCoEnv-v0:
  #normalize: true
#  n_envs: 1
#  n_timesteps: !!float 2e6
#  policy: 'MlpPolicy'
#  n_steps: 256
#  batch_size: 32
#  gae_lambda: 0.95
#  gamma: 0.99
#  n_epochs: 10
#  ent_coef: 0.0
#  learning_rate: 2.5e-4
#  clip_range: 0.2

AntMuJoCoEnv-v0:
  env_wrapper: sb3_contrib.common.wrappers.TimeFeatureWrapper
  normalize: true
  n_envs: 16
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  batch_size: 128
  n_steps: 512
  gamma: 0.99
  gae_lambda: 0.9
  n_epochs: 20
  ent_coef: 0.0
  sde_sample_freq: 4
  max_grad_norm: 0.5
  vf_coef: 0.5
  learning_rate: !!float 3e-5
  use_sde: False
  clip_range: 0.4
  policy_kwargs: "dict(log_std_init=-1,
                       ortho_init=False,
                       activation_fn=nn.ReLU,
                       net_arch=[dict(pi=[256, 256], vf=[256, 256])]
                       )"

HumanoidMuJoCoEnv-v0:
  #normalize: true
  n_envs: 1
  n_timesteps: !!float 1e7
  policy: 'MlpPolicy'
  n_steps: 2048
  batch_size: 32
  gae_lambda: 0.95
  gamma: 0.99
  n_epochs: 10
  ent_coef: 0.0
  learning_rate: 2.5e-4
  clip_range: 0.2

HalfCheetahMuJoCoEnv-v0:
  normalize: true
  n_envs: 16
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  batch_size: 128
  n_steps: 512
  gamma: 0.99
  gae_lambda: 0.9
  n_epochs: 20
  ent_coef: 0.0
  #sde_sample_freq: 4
  max_grad_norm: 0.5
  vf_coef: 0.5
  learning_rate: !!float 3e-5
  #use_sde: True
  clip_range: 0.4
  policy_kwargs: "dict(log_std_init=-2,
                       ortho_init=False,
                       activation_fn=nn.ReLU,
                       net_arch=[dict(pi=[256, 256], vf=[256, 256])]
                       )"

HopperMuJoCoEnv-v0:
  normalize: true
  n_envs: 16
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  batch_size: 128
  n_steps: 512
  gamma: 0.99
  gae_lambda: 0.92
  n_epochs: 20
  ent_coef: 0.0
  #sde_sample_freq: 4
  max_grad_norm: 0.5
  vf_coef: 0.5
  learning_rate: !!float 3e-5
  #use_sde: True
  clip_range: lin_0.4
  policy_kwargs: "dict(log_std_init=-2,
                       ortho_init=False,
                       activation_fn=nn.ReLU,
                       net_arch=[dict(pi=[256, 256], vf=[256, 256])]
                       )"

Walker2DMuJoCoEnv-v0:
  normalize: true
  n_envs: 16
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  batch_size: 128
  n_steps: 512
  gamma: 0.99
  gae_lambda: 0.92
  n_epochs: 20
  ent_coef: 0.0
  #sde_sample_freq: 4
  max_grad_norm: 0.5
  vf_coef: 0.5
  learning_rate: !!float 3e-5
  #use_sde: False
  clip_range: lin_0.4
  #clip_range_vf: -1


CustomAntMuJoCoEnv-v0:
  env_wrapper: sb3_contrib.common.wrappers.TimeFeatureWrapper
  normalize: true
  n_envs: 16
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  batch_size: 128
  n_steps: 512
  gamma: 0.99
  gae_lambda: 0.9
  n_epochs: 20
  ent_coef: 0.0
  sde_sample_freq: 4
  max_grad_norm: 0.5
  vf_coef: 0.5
  learning_rate: !!float 3e-5
  use_sde: False
  clip_range: 0.4
  policy_kwargs: "dict(log_std_init=-1,
                       ortho_init=False,
                       activation_fn=nn.ReLU,
                       net_arch=[dict(pi=[256, 256], vf=[256, 256])]
                       )"

CustomHumanoidMuJoCoEnv-v0:
  #normalize: true
  n_envs: 1
  n_timesteps: !!float 1e7
  policy: 'MlpPolicy'
  n_steps: 2048
  batch_size: 32
  gae_lambda: 0.95
  gamma: 0.99
  n_epochs: 10
  ent_coef: 0.0
  learning_rate: 2.5e-4
  clip_range: 0.2

CustomHalfCheetahMuJoCoEnv-v0:
  normalize: true
  n_envs: 16
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  batch_size: 128
  n_steps: 512
  gamma: 0.99
  gae_lambda: 0.9
  n_epochs: 20
  ent_coef: 0.0
  #sde_sample_freq: 4
  max_grad_norm: 0.5
  vf_coef: 0.5
  learning_rate: !!float 3e-5
  #use_sde: True
  clip_range: 0.4
  policy_kwargs: "dict(log_std_init=-2,
                       ortho_init=False,
                       activation_fn=nn.ReLU,
                       net_arch=[dict(pi=[256, 256], vf=[256, 256])]
                       )"

CustomHopperMuJoCoEnv-v0:
  normalize: true
  n_envs: 16
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  batch_size: 128
  n_steps: 512
  gamma: 0.99
  gae_lambda: 0.92
  n_epochs: 20
  ent_coef: 0.0
  #sde_sample_freq: 4
  max_grad_norm: 0.5
  vf_coef: 0.5
  learning_rate: !!float 3e-5
  #use_sde: True
  clip_range: lin_0.4
  policy_kwargs: "dict(log_std_init=-2,
                       ortho_init=False,
                       activation_fn=nn.ReLU,
                       net_arch=[dict(pi=[256, 256], vf=[256, 256])]
                       )"

CustomWalker2DMuJoCoEnv-v0:
  normalize: true
  n_envs: 16
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  batch_size: 128
  n_steps: 512
  gamma: 0.99
  gae_lambda: 0.92
  n_epochs: 20
  ent_coef: 0.0
  #sde_sample_freq: 4
  max_grad_norm: 0.5
  vf_coef: 0.5
  learning_rate: !!float 3e-5
  #use_sde: False
  clip_range: lin_0.4
  #clip_range_vf: -1

# Tuned
CustomAntPyBulletEnv-v0:
  env_wrapper: sb3_contrib.common.wrappers.TimeFeatureWrapper
  normalize: true
  n_envs: 16
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  batch_size: 128
  n_steps: 512
  gamma: 0.99
  gae_lambda: 0.9
  n_epochs: 20
  ent_coef: 0.0
  sde_sample_freq: 4
  max_grad_norm: 0.5
  vf_coef: 0.5
  learning_rate: !!float 3e-5
  use_sde: True
  clip_range: 0.4
  policy_kwargs: "dict(log_std_init=-1,
                       ortho_init=False,
                       activation_fn=nn.ReLU,
                       net_arch=[dict(pi=[256, 256], vf=[256, 256])]
                       )"

# Tuned
CustomWalker2DPyBulletEnv-v0:
  env_wrapper: sb3_contrib.common.wrappers.TimeFeatureWrapper
  normalize: true
  n_envs: 16
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  batch_size: 128
  n_steps: 512
  gamma: 0.99
  gae_lambda: 0.92
  n_epochs: 20
  ent_coef: 0.0
  sde_sample_freq: 4
  max_grad_norm: 0.5
  vf_coef: 0.5
  learning_rate: !!float 3e-5
  use_sde: True
  clip_range: lin_0.4
  policy_kwargs: "dict(log_std_init=-2,
                       ortho_init=False,
                       activation_fn=nn.ReLU,
                       net_arch=[dict(pi=[256, 256], vf=[256, 256])]
                       )"

# Tuned
CustomHopperPyBulletEnv-v0:
  env_wrapper: sb3_contrib.common.wrappers.TimeFeatureWrapper
  normalize: true
  n_envs: 16
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  batch_size: 128
  n_steps: 512
  gamma: 0.99
  gae_lambda: 0.92
  n_epochs: 20
  ent_coef: 0.0
  sde_sample_freq: 4
  max_grad_norm: 0.5
  vf_coef: 0.5
  learning_rate: !!float 3e-5
  use_sde: True
  clip_range: lin_0.4
  policy_kwargs: "dict(log_std_init=-2,
                       ortho_init=False,
                       activation_fn=nn.ReLU,
                       net_arch=[dict(pi=[256, 256], vf=[256, 256])]
                       )"

# Tuned
ReacherPyBulletEnv-v0:
  env_wrapper: sb3_contrib.common.wrappers.TimeFeatureWrapper
  normalize: true
  n_envs: 8
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  batch_size: 64
  n_steps: 512
  gamma: 0.99
  gae_lambda: 0.9
  n_epochs: 20
  ent_coef: 0.0
  sde_sample_freq: 4
  max_grad_norm: 0.5
  vf_coef: 0.5
  learning_rate: !!float 3e-5
  use_sde: True
  clip_range: lin_0.4
  policy_kwargs: "dict(log_std_init=-2.7,
                       ortho_init=False,
                       activation_fn=nn.ReLU,
                       net_arch=[dict(pi=[256, 256], vf=[256, 256])]
                       )"

CustomReacherPyBulletEnv-v0:
  env_wrapper: sb3_contrib.common.wrappers.TimeFeatureWrapper
  normalize: true
  n_envs: 8
  n_timesteps: !!float 1e6
  policy: 'MlpPolicy'
  batch_size: 64
  n_steps: 512
  gamma: 0.99
  gae_lambda: 0.9
  n_epochs: 20
  ent_coef: 0.0
  sde_sample_freq: 4
  max_grad_norm: 0.5
  vf_coef: 0.5
  learning_rate: !!float 3e-5
  use_sde: True
  clip_range: lin_0.4
  policy_kwargs: "dict(log_std_init=-2.7,
                       ortho_init=False,
                       activation_fn=nn.ReLU,
                       net_arch=[dict(pi=[256, 256], vf=[256, 256])]
                       )"

# Tuned
CustomHalfCheetahPyBulletEnv-v0:
  env_wrapper: sb3_contrib.common.wrappers.TimeFeatureWrapper
  normalize: true
  n_envs: 16
  n_timesteps: !!float 2e6
  policy: 'MlpPolicy'
  batch_size: 128
  n_steps: 512
  gamma: 0.99
  gae_lambda: 0.9
  n_epochs: 20
  ent_coef: 0.0
  sde_sample_freq: 4
  max_grad_norm: 0.5
  vf_coef: 0.5
  learning_rate: !!float 3e-5
  use_sde: True
  clip_range: 0.4
  policy_kwargs: "dict(log_std_init=-2,
                       ortho_init=False,
                       activation_fn=nn.ReLU,
                       net_arch=[dict(pi=[256, 256], vf=[256, 256])]
                       )"

CustomHumanoidPyBulletEnv-v0:
  normalize: true
  n_envs: 8
  n_timesteps: !!float 1e7
  policy: 'MlpPolicy'
  n_steps: 2048
  batch_size: 64
  gae_lambda: 0.95
  gamma: 0.99
  n_epochs: 10
  ent_coef: 0.0
  learning_rate: 2.5e-4
  clip_range: 0.2
