env_id: "Humanoid-v5"
num_envs: 1  # Use 8 parallel environments for much better efficiency

wrappers:
  - _target_: benchrl.environments.wrappers.reward_wrappers.PositionDelayWrapper
    position_delay: 2  # Delay reward until agent reaches x=2
    ctrl_w: 0.001  # Control cost weight

