episode: 0 training return: tensor(-499.9263, device='cuda:0', grad_fn=<AddBackward0>)
episode: 1 training return: tensor(-490.9431, device='cuda:0', grad_fn=<AddBackward0>)
episode: 2 training return: tensor(-495.7340, device='cuda:0', grad_fn=<AddBackward0>)
episode: 3 training return: tensor(-499.8394, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 1 test_true_pfm: -5.921368847060308
episode: 4 training return: tensor(-498.4870, device='cuda:0', grad_fn=<AddBackward0>)
episode: 5 training return: tensor(-499.9489, device='cuda:0', grad_fn=<AddBackward0>)
episode: 6 training return: tensor(-499.9613, device='cuda:0', grad_fn=<AddBackward0>)
