episode: 0 training return: tensor(24.6245, device='cuda:0', grad_fn=<AddBackward0>)
episode: 1 training return: tensor(2.0753e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 2 training return: tensor(9.1762e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 3 training return: tensor(5.5195e-11, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 1 test_pfm: -327
episode: 4 training return: tensor(8.3630e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 5 training return: tensor(1.0981e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 6 training return: tensor(4.8743e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 7 training return: tensor(0.0073, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 2 test_pfm: 264
episode: 8 training return: tensor(1.4023e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 9 training return: tensor(2.0093e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 10 training return: tensor(2.6121e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 11 training return: tensor(1.3301e-12, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 3 test_pfm: -555
episode: 12 training return: tensor(7.6574e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 13 training return: tensor(1.3455e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 14 training return: tensor(1.4892e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 15 training return: tensor(5.5464e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 4 test_pfm: -511
episode: 16 training return: tensor(7.3458e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 17 training return: tensor(5.4110e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 18 training return: tensor(4.2849e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 19 training return: tensor(3.0618e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 5 test_pfm: -384
episode: 20 training return: tensor(6.3807e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 21 training return: tensor(1.4525e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 22 training return: tensor(2.1047e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 23 training return: tensor(1.6496e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 6 test_pfm: -397
episode: 24 training return: tensor(4.8390e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 25 training return: tensor(1.5441e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 26 training return: tensor(3.6714e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 27 training return: tensor(5.0270e-11, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 7 test_pfm: -450
episode: 28 training return: tensor(2.3615e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 29 training return: tensor(1.7946e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 30 training return: tensor(1.0137e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 31 training return: tensor(5.9297e-12, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 8 test_pfm: -445
episode: 32 training return: tensor(3.7430e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 33 training return: tensor(1.7451e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 34 training return: tensor(3.5111e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 35 training return: tensor(2.1846e-12, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 9 test_pfm: -549
episode: 36 training return: tensor(4.3857e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 37 training return: tensor(1.4280e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 38 training return: tensor(4.5943e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 39 training return: tensor(7.0615e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 10 test_pfm: -469
