episode: 0 training return: tensor(-499.9299, device='cuda:0', grad_fn=<AddBackward0>)
episode: 1 training return: tensor(-499.9240, device='cuda:0', grad_fn=<AddBackward0>)
episode: 2 training return: tensor(-496.8800, device='cuda:0', grad_fn=<AddBackward0>)
episode: 3 training return: tensor(-494.8148, device='cuda:0', grad_fn=<AddBackward0>)
