episode: 0 training return: tensor(-478.7878, device='cuda:0', grad_fn=<AddBackward0>)
episode: 1 training return: tensor(-499.9622, device='cuda:0', grad_fn=<AddBackward0>)
episode: 2 training return: tensor(-469.1837, device='cuda:0', grad_fn=<AddBackward0>)
episode: 3 training return: tensor(-430.1452, device='cuda:0', grad_fn=<AddBackward0>)
episode: 4 training return: tensor(-499.9464, device='cuda:0', grad_fn=<AddBackward0>)
episode: 5 training return: tensor(-499.9686, device='cuda:0', grad_fn=<AddBackward0>)
episode: 6 training return: tensor(-499.9490, device='cuda:0', grad_fn=<AddBackward0>)
episode: 7 training return: tensor(-499.9346, device='cuda:0', grad_fn=<AddBackward0>)
episode: 8 training return: tensor(-499.9233, device='cuda:0', grad_fn=<AddBackward0>)
episode: 9 training return: tensor(-499.9496, device='cuda:0', grad_fn=<AddBackward0>)
episode: 10 training return: tensor(-499.9709, device='cuda:0', grad_fn=<AddBackward0>)
episode: 11 training return: tensor(-499.9702, device='cuda:0', grad_fn=<AddBackward0>)
episode: 12 training return: tensor(-499.9422, device='cuda:0', grad_fn=<AddBackward0>)
episode: 13 training return: tensor(-499.9346, device='cuda:0', grad_fn=<AddBackward0>)
episode: 14 training return: tensor(-499.9658, device='cuda:0', grad_fn=<AddBackward0>)
episode: 15 training return: tensor(-499.1346, device='cuda:0', grad_fn=<AddBackward0>)
episode: 16 training return: tensor(-499.9464, device='cuda:0', grad_fn=<AddBackward0>)
episode: 17 training return: tensor(-499.9397, device='cuda:0', grad_fn=<AddBackward0>)
episode: 18 training return: tensor(-499.9470, device='cuda:0', grad_fn=<AddBackward0>)
episode: 19 training return: tensor(-499.9692, device='cuda:0', grad_fn=<AddBackward0>)
episode: 20 training return: tensor(-499.9474, device='cuda:0', grad_fn=<AddBackward0>)
