_target_: jaxltl.rl.ppo.PPO

total_timesteps: 5e5
num_envs: 4
num_steps: 128
num_minibatches: 4
update_epochs: 4
gamma: 0.99
gae_lambda: 0.95
clip_eps: 0.2
ent_coef: 0.01
vf_coef: 0.5
lr: 2.5e-4
max_grad_norm: 0.5
anneal_lr: true
