env_id: "Reacher-v5"
num_envs: 1  # Use 8 parallel environments for much better efficiency

wrappers:
  # - _target_: benchrl.environments.wrappers.reward_wrappers.PositionDelayWrapper
  #   position_delay: 2  # Delay reward until agent reaches x=2
  #   ctrl_w: 0.001  # Control cost weight

