episode: 0 training return: tensor(-499.9566, device='cuda:0', grad_fn=<AddBackward0>)
episode: 1 training return: tensor(-499.9682, device='cuda:0', grad_fn=<AddBackward0>)
episode: 2 training return: tensor(-499.9724, device='cuda:0', grad_fn=<AddBackward0>)
