episode: 0 training return: tensor(46.9314, device='cuda:0', grad_fn=<AddBackward0>)
episode: 1 training return: tensor(3.0061e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 2 training return: tensor(1.1525e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 3 training return: tensor(6.5418e-11, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 1 test_pfm: -602
episode: 4 training return: tensor(5.3189e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 5 training return: tensor(2.1185e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 6 training return: tensor(4.5409e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 7 training return: tensor(2.3725e-09, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 2 test_pfm: -516
episode: 8 training return: tensor(1.0236e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 9 training return: tensor(3.4841e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 10 training return: tensor(7.9466e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 11 training return: tensor(9.0889e-11, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 3 test_pfm: -531
episode: 12 training return: tensor(1.5215e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 13 training return: tensor(4.0842e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 14 training return: tensor(4.6838e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 15 training return: tensor(3.5210e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 4 test_pfm: -457
episode: 16 training return: tensor(1.1938e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 17 training return: tensor(9.1616e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 18 training return: tensor(1.3600e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 19 training return: tensor(9.1915e-08, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 5 test_pfm: -551
episode: 20 training return: tensor(1.3454e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 21 training return: tensor(4.4475e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 22 training return: tensor(1.3783e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 23 training return: tensor(1.9838e-08, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 6 test_pfm: -287
episode: 24 training return: tensor(1.3405e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 25 training return: tensor(4.0530e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 26 training return: tensor(3.1091e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 27 training return: tensor(1.9405e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 7 test_pfm: -364
episode: 28 training return: tensor(0.0015, device='cuda:0', grad_fn=<AddBackward0>)
episode: 29 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
episode: 30 training return: tensor(5.4814e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 31 training return: tensor(4.2080e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 8 test_pfm: -292
episode: 32 training return: tensor(9.0099e-06, device='cuda:0', grad_fn=<AddBackward0>)
