# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
seed: 42

# epoch * n_steps * nenvs: 500×512*8*8
n_timesteps: 16384000
policy: 'MlpPolicy'
n_steps: 64
# mini batch size: num_envs * nsteps / nminibatches 2048×512÷2048
batch_size: 192
gae_lambda: 0.95
gamma: 0.99
n_epochs: 8
ent_coef: 0.00
vf_coef: 0.0001
learning_rate: !!float 3e-4
clip_range: 0.2
policy_kwargs: "dict(
                  activation_fn=nn.ELU,
                  net_arch=dict(pi=[256, 128, 64], vf=[256, 128, 64])
                )"
target_kl: 0.01
max_grad_norm: 1.0

# # Uses VecNormalize class to normalize obs
# normalize_input: True
# # Uses VecNormalize class to normalize rew
# normalize_value: True
# clip_obs: 5
