episode: 0 training return: tensor(-499.9252, device='cuda:0', grad_fn=<AddBackward0>)
episode: 1 training return: tensor(-499.9671, device='cuda:0', grad_fn=<AddBackward0>)
episode: 2 training return: tensor(-493.1829, device='cuda:0', grad_fn=<AddBackward0>)
episode: 3 training return: tensor(-499.9397, device='cuda:0', grad_fn=<AddBackward0>)
episode: 4 training return: tensor(-493.7708, device='cuda:0', grad_fn=<AddBackward0>)
episode: 5 training return: tensor(-497.1656, device='cuda:0', grad_fn=<AddBackward0>)
episode: 6 training return: tensor(-407.5093, device='cuda:0', grad_fn=<AddBackward0>)
episode: 7 training return: tensor(-499.9470, device='cuda:0', grad_fn=<AddBackward0>)
episode: 8 training return: tensor(-499.9637, device='cuda:0', grad_fn=<AddBackward0>)
episode: 9 training return: tensor(-497.4523, device='cuda:0', grad_fn=<AddBackward0>)
episode: 10 training return: tensor(-499.9403, device='cuda:0', grad_fn=<AddBackward0>)
episode: 11 training return: tensor(-413.8886, device='cuda:0', grad_fn=<AddBackward0>)
episode: 12 training return: tensor(-499.9689, device='cuda:0', grad_fn=<AddBackward0>)
episode: 13 training return: tensor(-499.1290, device='cuda:0', grad_fn=<AddBackward0>)
episode: 14 training return: tensor(-499.9609, device='cuda:0', grad_fn=<AddBackward0>)
episode: 15 training return: tensor(-491.8189, device='cuda:0', grad_fn=<AddBackward0>)
episode: 16 training return: tensor(-499.9597, device='cuda:0', grad_fn=<AddBackward0>)
episode: 17 training return: tensor(-483.7948, device='cuda:0', grad_fn=<AddBackward0>)
episode: 18 training return: tensor(-447.0996, device='cuda:0', grad_fn=<AddBackward0>)
episode: 19 training return: tensor(-499.9433, device='cuda:0', grad_fn=<AddBackward0>)
episode: 20 training return: tensor(-452.9055, device='cuda:0', grad_fn=<AddBackward0>)
episode: 21 training return: tensor(-499.9527, device='cuda:0', grad_fn=<AddBackward0>)
episode: 22 training return: tensor(-499.9650, device='cuda:0', grad_fn=<AddBackward0>)
episode: 23 training return: tensor(-499.9694, device='cuda:0', grad_fn=<AddBackward0>)
