initial performance: -2
episode: 0 training return: tensor(1.4720e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 1 training return: tensor(3.9230e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 2 training return: tensor(5.0510e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 3 training return: tensor(7.0743e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 1 test_true_pfm: -732 test_simulate_pfm 0
episode: 4 training return: tensor(1.5126e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 5 training return: tensor(2.4487e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 6 training return: tensor(1.2338e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 7 training return: tensor(3.0904e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 2 test_true_pfm: -809 test_simulate_pfm 0
episode: 8 training return: tensor(1.3716e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 9 training return: tensor(3.9147e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 10 training return: tensor(9.0888e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 11 training return: tensor(1.0724e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 3 test_true_pfm: -529 test_simulate_pfm 0
episode: 12 training return: tensor(0.0013, device='cuda:0', grad_fn=<AddBackward0>)
episode: 13 training return: tensor(5.6484e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 14 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
episode: 15 training return: tensor(1.6161e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 4 test_true_pfm: -497 test_simulate_pfm 0
episode: 16 training return: tensor(7.4353e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 17 training return: tensor(6.8188e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 18 training return: tensor(5.7572e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 19 training return: tensor(2.8784e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 5 test_true_pfm: -569 test_simulate_pfm 0
episode: 20 training return: tensor(1.8471e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 21 training return: tensor(1.6245e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 22 training return: tensor(5.4555e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 23 training return: tensor(5.8773e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 6 test_true_pfm: -421 test_simulate_pfm 0
episode: 24 training return: tensor(8.7338e-21, device='cuda:0', grad_fn=<AddBackward0>)
episode: 25 training return: tensor(4.4928e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 26 training return: tensor(9.7185e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 27 training return: tensor(5.6574e-11, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 7 test_true_pfm: -606 test_simulate_pfm 0
episode: 28 training return: tensor(1.7839e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 29 training return: tensor(1.2734e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 30 training return: tensor(4.7244e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 31 training return: tensor(1.7529e-11, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 8 test_true_pfm: -955 test_simulate_pfm 0
episode: 32 training return: tensor(8.6871e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 33 training return: tensor(1.1746e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 34 training return: tensor(1.2691e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 35 training return: tensor(1.3366e-11, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 9 test_true_pfm: -648 test_simulate_pfm 0
episode: 36 training return: tensor(9.9527e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 37 training return: tensor(2.6385e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 38 training return: tensor(7.4179e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 39 training return: tensor(1.5524e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 10 test_true_pfm: -620 test_simulate_pfm 0
episode: 40 training return: tensor(1.5176e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 41 training return: tensor(2.9218e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 42 training return: tensor(1.3218e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 43 training return: tensor(3.5139e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 11 test_true_pfm: -658 test_simulate_pfm 0
episode: 44 training return: tensor(3.2301e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 45 training return: tensor(4.6122e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 46 training return: tensor(3.3845e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 47 training return: tensor(1.9146e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 12 test_true_pfm: -634 test_simulate_pfm 0
episode: 48 training return: tensor(1.6177e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 49 training return: tensor(1.0181e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 50 training return: tensor(1.8150e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 51 training return: tensor(5.6168e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 13 test_true_pfm: -493 test_simulate_pfm 0
episode: 52 training return: tensor(1.0541e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 53 training return: tensor(1.4285e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 54 training return: tensor(4.1846e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 55 training return: tensor(9.7253e-09, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 14 test_true_pfm: -350 test_simulate_pfm 0
