algo: PPO

ppo_config: 
  learning_rate: 5.0e-4
  n_steps: 128
  batch_size: 2048
  n_epochs: 4
  gamma: 0.99
  gae_lambda: 0.95
  clip_range: 0.2
  normalize_advantage: true
  ent_coef: 0.01
  vf_coef: 0.25
  max_grad_norm: 3.5
  use_sde: false
  sde_sample_freq: 0
  policy_kwargs:
    ortho_init: true
    net_arch:
      pi: [64, 64]
      vf: [128, 128]

eapo_config:
  tau: 0.02
  c2: 0.0
  augmented_reward: true
  e_gamma: 0.0
  e_lambda: 0.0
  tau_on_entropy: false
  use_entropy_advantage: false

trpo_config: {}

n_envs: 64
procgen: false
env: Ant-v4
handle_timeout: true
action_discretization: true
discretization_num_atomics: 7
norm_obs: true

norm_reward: false
pop_art_beta: 0.03

total_timesteps: 10000000
eval_freq: 2000

verbose: 1
eval_verbose: 1

