episode: 0 training return: tensor(-497.4479, device='cuda:0', grad_fn=<AddBackward0>)
episode: 1 training return: tensor(-499.8671, device='cuda:0', grad_fn=<AddBackward0>)
episode: 2 training return: tensor(-499.9596, device='cuda:0', grad_fn=<AddBackward0>)
episode: 3 training return: tensor(-499.9542, device='cuda:0', grad_fn=<AddBackward0>)
