initial performance: -5 initial simulated performance: 0
episode: 0 training return: tensor(5.0899e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 1 training return: tensor(8.9127e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 2 training return: tensor(8.0162e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 3 training return: tensor(1.8462e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 1 test_true_pfm: -490 test_simulate_pfm tensor(1.3795e-10, device='cuda:0', grad_fn=<DivBackward0>)
episode: 4 training return: tensor(4.9975e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 5 training return: tensor(2.4737e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 6 training return: tensor(5.4591e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 7 training return: tensor(5.9999e-11, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 2 test_true_pfm: -559 test_simulate_pfm tensor(6.1743e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 8 training return: tensor(3.1891e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 9 training return: tensor(2.5887e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 10 training return: tensor(1.0090e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 11 training return: tensor(1.1936e-09, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 3 test_true_pfm: -785 test_simulate_pfm tensor(6.5552e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 12 training return: tensor(2.2425e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 13 training return: tensor(8.5593e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 14 training return: tensor(1.0192e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 15 training return: tensor(6.1712e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 4 test_true_pfm: -528 test_simulate_pfm tensor(1.2112e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 16 training return: tensor(1.1203e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 17 training return: tensor(1.4393e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 18 training return: tensor(1.4296e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 19 training return: tensor(6.8506e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 5 test_true_pfm: -622 test_simulate_pfm tensor(1.7963e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 20 training return: tensor(4.5771e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 21 training return: tensor(2.7680e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 22 training return: tensor(4.6259e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 23 training return: tensor(7.3930e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 6 test_true_pfm: -423 test_simulate_pfm tensor(6.4658e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 24 training return: tensor(1.3651e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 25 training return: tensor(2.4606e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 26 training return: tensor(1.1417e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 27 training return: tensor(8.0472e-16, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 7 test_true_pfm: -642 test_simulate_pfm tensor(1.0367e-12, device='cuda:0', grad_fn=<DivBackward0>)
episode: 28 training return: tensor(7.9749e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 29 training return: tensor(1.5583e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 30 training return: tensor(1.5630e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 31 training return: tensor(1.4184e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 8 test_true_pfm: -2252 test_simulate_pfm tensor(1.3565e-13, device='cuda:0', grad_fn=<DivBackward0>)
episode: 32 training return: tensor(2.8017e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 33 training return: tensor(6.9087e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 34 training return: tensor(4.3443e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 35 training return: tensor(9.7524e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 9 test_true_pfm: -578 test_simulate_pfm tensor(2.4446e-11, device='cuda:0', grad_fn=<DivBackward0>)
episode: 36 training return: tensor(2.3518e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 37 training return: tensor(2.6350e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 38 training return: tensor(6.7009e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 39 training return: tensor(1.8531e-12, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 10 test_true_pfm: -423 test_simulate_pfm tensor(5.2130e-13, device='cuda:0', grad_fn=<DivBackward0>)
episode: 40 training return: tensor(5.0458e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 41 training return: tensor(8.9570e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 42 training return: tensor(4.9183e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 43 training return: tensor(1.1550e-16, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 11 test_true_pfm: -454 test_simulate_pfm tensor(3.3809e-16, device='cuda:0', grad_fn=<DivBackward0>)
episode: 44 training return: tensor(2.9591e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 45 training return: tensor(1.4231e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 46 training return: tensor(1.1933e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 47 training return: tensor(7.7222e-16, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 12 test_true_pfm: -205 test_simulate_pfm tensor(7.9564e-14, device='cuda:0', grad_fn=<DivBackward0>)
episode: 48 training return: tensor(4.7062e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 49 training return: tensor(9.5003e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 50 training return: tensor(6.8159e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 51 training return: tensor(3.1481e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 13 test_true_pfm: -394 test_simulate_pfm tensor(8.7850e-14, device='cuda:0', grad_fn=<DivBackward0>)
episode: 52 training return: tensor(5.1688e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 53 training return: tensor(1.2083e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 54 training return: tensor(3.3734e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 55 training return: tensor(2.6910e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 14 test_true_pfm: -432 test_simulate_pfm tensor(5.9071e-12, device='cuda:0', grad_fn=<DivBackward0>)
episode: 56 training return: tensor(1.9373e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 57 training return: tensor(1.8157e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 58 training return: tensor(1.8019e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 59 training return: tensor(3.2016e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 15 test_true_pfm: -230 test_simulate_pfm tensor(1.6451e-12, device='cuda:0', grad_fn=<DivBackward0>)
episode: 60 training return: tensor(4.3639e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 61 training return: tensor(7.7005e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 62 training return: tensor(1.5084e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 63 training return: tensor(4.3728e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 16 test_true_pfm: -443 test_simulate_pfm tensor(6.9079e-11, device='cuda:0', grad_fn=<DivBackward0>)
episode: 64 training return: tensor(2.1843e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 65 training return: tensor(2.5725e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 66 training return: tensor(1.1070e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 67 training return: tensor(6.0594e-12, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 17 test_true_pfm: -587 test_simulate_pfm tensor(1.9429e-14, device='cuda:0', grad_fn=<DivBackward0>)
episode: 68 training return: tensor(5.9096e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 69 training return: tensor(1.2165e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 70 training return: tensor(9.8094e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 71 training return: tensor(4.6093e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 18 test_true_pfm: -410 test_simulate_pfm tensor(3.0271e-13, device='cuda:0', grad_fn=<DivBackward0>)
episode: 72 training return: tensor(2.5428e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 73 training return: tensor(4.3988e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 74 training return: tensor(3.4087e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 75 training return: tensor(2.8292e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 19 test_true_pfm: -530 test_simulate_pfm tensor(3.7509e-14, device='cuda:0', grad_fn=<DivBackward0>)
episode: 76 training return: tensor(2.5812e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 77 training return: tensor(1.0266e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 78 training return: tensor(2.8607e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 79 training return: tensor(1.8078e-08, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 20 test_true_pfm: -167 test_simulate_pfm tensor(1.1980e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 80 training return: tensor(4.7380e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 81 training return: tensor(1.2193e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 82 training return: tensor(1.4362e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 83 training return: tensor(2.9778e-17, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 21 test_true_pfm: -494 test_simulate_pfm tensor(5.1109e-11, device='cuda:0', grad_fn=<DivBackward0>)
episode: 84 training return: tensor(4.8824e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 85 training return: tensor(4.1666e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 86 training return: tensor(2.6604e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 87 training return: tensor(4.9647e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 22 test_true_pfm: -518 test_simulate_pfm tensor(2.6737e-10, device='cuda:0', grad_fn=<DivBackward0>)
episode: 88 training return: tensor(5.2685e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 89 training return: tensor(1.3825e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 90 training return: tensor(1.1018e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 91 training return: tensor(1.5307e-12, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 23 test_true_pfm: -467 test_simulate_pfm tensor(5.3204e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 92 training return: tensor(3.8262e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 93 training return: tensor(1.0559e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 94 training return: tensor(2.8066e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 95 training return: tensor(8.1956e-11, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 24 test_true_pfm: -542 test_simulate_pfm tensor(5.6010e-12, device='cuda:0', grad_fn=<DivBackward0>)
episode: 96 training return: tensor(8.0410e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 97 training return: tensor(4.6486e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 98 training return: tensor(4.6323e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 99 training return: tensor(1.4238e-17, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 25 test_true_pfm: -579 test_simulate_pfm tensor(1.8453e-15, device='cuda:0', grad_fn=<DivBackward0>)
episode: 100 training return: tensor(8.7446e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 101 training return: tensor(5.8235e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 102 training return: tensor(9.0196e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 103 training return: tensor(4.6771e-17, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 26 test_true_pfm: -410 test_simulate_pfm tensor(1.0987e-15, device='cuda:0', grad_fn=<DivBackward0>)
episode: 104 training return: tensor(1.0801e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 105 training return: tensor(4.8619e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 106 training return: tensor(3.0889e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 107 training return: tensor(2.6259e-17, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 27 test_true_pfm: -747 test_simulate_pfm tensor(7.8289e-18, device='cuda:0', grad_fn=<DivBackward0>)
episode: 108 training return: tensor(5.4166e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 109 training return: tensor(5.2325e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 110 training return: tensor(1.2799e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 111 training return: tensor(3.3596e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 28 test_true_pfm: -846 test_simulate_pfm tensor(9.2873e-18, device='cuda:0', grad_fn=<DivBackward0>)
episode: 112 training return: tensor(2.8211e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 113 training return: tensor(6.5691e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 114 training return: tensor(2.6352e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 115 training return: tensor(1.6100e-16, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 29 test_true_pfm: -667 test_simulate_pfm tensor(3.9461e-13, device='cuda:0', grad_fn=<DivBackward0>)
episode: 116 training return: tensor(3.3806e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 117 training return: tensor(5.1540e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 118 training return: tensor(3.2187e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 119 training return: tensor(2.2614e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 30 test_true_pfm: -695 test_simulate_pfm tensor(8.5299e-18, device='cuda:0', grad_fn=<DivBackward0>)
episode: 120 training return: tensor(4.0086e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 121 training return: tensor(1.3315e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 122 training return: tensor(3.1468e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 123 training return: tensor(2.7255e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 31 test_true_pfm: -710 test_simulate_pfm tensor(5.5136e-10, device='cuda:0', grad_fn=<DivBackward0>)
episode: 124 training return: tensor(1.4906e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 125 training return: tensor(1.9089e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 126 training return: tensor(9.2812e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 127 training return: tensor(3.2595e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 32 test_true_pfm: -933 test_simulate_pfm tensor(1.3147e-11, device='cuda:0', grad_fn=<DivBackward0>)
episode: 128 training return: tensor(1.0299e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 129 training return: tensor(1.1433e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 130 training return: tensor(3.6294e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 131 training return: tensor(2.3489e-11, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 33 test_true_pfm: -849 test_simulate_pfm tensor(4.3843e-12, device='cuda:0', grad_fn=<DivBackward0>)
episode: 132 training return: tensor(1.1852e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 133 training return: tensor(6.0138e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 134 training return: tensor(1.0421e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 135 training return: tensor(2.1955e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 34 test_true_pfm: -619 test_simulate_pfm tensor(1.9267e-13, device='cuda:0', grad_fn=<DivBackward0>)
episode: 136 training return: tensor(7.0749e-21, device='cuda:0', grad_fn=<AddBackward0>)
episode: 137 training return: tensor(3.4280e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 138 training return: tensor(4.9440e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 139 training return: tensor(2.9489e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 35 test_true_pfm: -612 test_simulate_pfm tensor(2.7852e-11, device='cuda:0', grad_fn=<DivBackward0>)
episode: 140 training return: tensor(6.5770e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 141 training return: tensor(5.3472e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 142 training return: tensor(1.0297e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 143 training return: tensor(1.1946e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 36 test_true_pfm: -675 test_simulate_pfm tensor(5.5792e-14, device='cuda:0', grad_fn=<DivBackward0>)
episode: 144 training return: tensor(4.7449e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 145 training return: tensor(5.2427e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 146 training return: tensor(6.4108e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 147 training return: tensor(1.3206e-12, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 37 test_true_pfm: -427 test_simulate_pfm tensor(2.2159e-10, device='cuda:0', grad_fn=<DivBackward0>)
episode: 148 training return: tensor(1.0263e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 149 training return: tensor(2.6527e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 150 training return: tensor(1.1574e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 151 training return: tensor(4.4313e-09, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 38 test_true_pfm: -751 test_simulate_pfm tensor(1.5865e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 152 training return: tensor(8.3766e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 153 training return: tensor(3.3766e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 154 training return: tensor(8.1162e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 155 training return: tensor(2.4981e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 39 test_true_pfm: -688 test_simulate_pfm tensor(4.0692e-14, device='cuda:0', grad_fn=<DivBackward0>)
episode: 156 training return: tensor(1.2957e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 157 training return: tensor(8.6281e-20, device='cuda:0', grad_fn=<AddBackward0>)
episode: 158 training return: tensor(1.5047e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 159 training return: tensor(7.4929e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 40 test_true_pfm: -683 test_simulate_pfm tensor(6.0791e-14, device='cuda:0', grad_fn=<DivBackward0>)
episode: 160 training return: tensor(4.3639e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 161 training return: tensor(7.4781e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 162 training return: tensor(1.5344e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 163 training return: tensor(5.6542e-17, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 41 test_true_pfm: -664 test_simulate_pfm tensor(2.5800e-14, device='cuda:0', grad_fn=<DivBackward0>)
episode: 164 training return: tensor(1.0935e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 165 training return: tensor(9.3505e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 166 training return: tensor(1.0206e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 167 training return: tensor(2.1850e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 42 test_true_pfm: -799 test_simulate_pfm tensor(2.8211e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 168 training return: tensor(2.6491e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 169 training return: tensor(4.1926e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 170 training return: tensor(1.0859e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 171 training return: tensor(9.2497e-17, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 43 test_true_pfm: -692 test_simulate_pfm tensor(1.5504e-14, device='cuda:0', grad_fn=<DivBackward0>)
episode: 172 training return: tensor(2.1842e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 173 training return: tensor(5.1469e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 174 training return: tensor(1.2391e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 175 training return: tensor(8.8510e-17, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 44 test_true_pfm: -643 test_simulate_pfm tensor(1.1145e-12, device='cuda:0', grad_fn=<DivBackward0>)
episode: 176 training return: tensor(6.6159e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 177 training return: tensor(5.6083e-20, device='cuda:0', grad_fn=<AddBackward0>)
episode: 178 training return: tensor(9.1988e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 179 training return: tensor(3.9181e-16, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 45 test_true_pfm: -348 test_simulate_pfm tensor(7.0693e-13, device='cuda:0', grad_fn=<DivBackward0>)
episode: 180 training return: tensor(9.0862e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 181 training return: tensor(1.2778e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 182 training return: tensor(8.8515e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 183 training return: tensor(8.3069e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 46 test_true_pfm: -675 test_simulate_pfm tensor(4.5431e-12, device='cuda:0', grad_fn=<DivBackward0>)
episode: 184 training return: tensor(5.1818e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 185 training return: tensor(1.8239e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 186 training return: tensor(8.5970e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 187 training return: tensor(1.9925e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 47 test_true_pfm: -822 test_simulate_pfm tensor(5.2168e-12, device='cuda:0', grad_fn=<DivBackward0>)
episode: 188 training return: tensor(1.2973e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 189 training return: tensor(3.7229e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 190 training return: tensor(4.5844e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 191 training return: tensor(1.9337e-12, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 48 test_true_pfm: -687 test_simulate_pfm tensor(2.2867e-11, device='cuda:0', grad_fn=<DivBackward0>)
episode: 192 training return: tensor(7.9573e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 193 training return: tensor(4.2045e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 194 training return: tensor(6.3606e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 195 training return: tensor(3.3311e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 49 test_true_pfm: -1055 test_simulate_pfm tensor(1.0160e-13, device='cuda:0', grad_fn=<DivBackward0>)
episode: 196 training return: tensor(8.0562e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 197 training return: tensor(3.9382e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 198 training return: tensor(3.0801e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 199 training return: tensor(6.9354e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 50 test_true_pfm: -695 test_simulate_pfm tensor(7.4290e-14, device='cuda:0', grad_fn=<DivBackward0>)
episode: 200 training return: tensor(1.1493e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 201 training return: tensor(1.6978e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 202 training return: tensor(3.1307e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 203 training return: tensor(3.4813e-12, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 51 test_true_pfm: -779 test_simulate_pfm tensor(1.2306e-10, device='cuda:0', grad_fn=<DivBackward0>)
episode: 204 training return: tensor(1.6570e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 205 training return: tensor(3.6667e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 206 training return: tensor(6.3406e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 207 training return: tensor(3.7207e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 52 test_true_pfm: -780 test_simulate_pfm tensor(1.8254e-10, device='cuda:0', grad_fn=<DivBackward0>)
episode: 208 training return: tensor(3.9536e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 209 training return: tensor(4.4730e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 210 training return: tensor(7.0584e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 211 training return: tensor(5.0716e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 53 test_true_pfm: -849 test_simulate_pfm tensor(3.0963e-14, device='cuda:0', grad_fn=<DivBackward0>)
episode: 212 training return: tensor(2.1850e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 213 training return: tensor(1.0469e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 214 training return: tensor(2.3712e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 215 training return: tensor(1.0230e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 54 test_true_pfm: -736 test_simulate_pfm tensor(1.5789e-12, device='cuda:0', grad_fn=<DivBackward0>)
episode: 216 training return: tensor(1.0815e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 217 training return: tensor(1.0161e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 218 training return: tensor(1.5169e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 219 training return: tensor(9.0270e-12, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 55 test_true_pfm: -662 test_simulate_pfm tensor(2.5615e-14, device='cuda:0', grad_fn=<DivBackward0>)
episode: 220 training return: tensor(2.4373e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 221 training return: tensor(2.1445e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 222 training return: tensor(3.4503e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 223 training return: tensor(3.3533e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 56 test_true_pfm: -755 test_simulate_pfm tensor(2.3904e-12, device='cuda:0', grad_fn=<DivBackward0>)
episode: 224 training return: tensor(2.2506e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 225 training return: tensor(1.5260e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 226 training return: tensor(1.7399e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 227 training return: tensor(8.0502e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 57 test_true_pfm: -798 test_simulate_pfm tensor(1.7383e-10, device='cuda:0', grad_fn=<DivBackward0>)
episode: 228 training return: tensor(1.1803e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 229 training return: tensor(2.1989e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 230 training return: tensor(6.1867e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 231 training return: tensor(1.6541e-11, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 58 test_true_pfm: -697 test_simulate_pfm tensor(4.9746e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 232 training return: tensor(8.0068e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 233 training return: tensor(3.3174e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 234 training return: tensor(2.6570e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 235 training return: tensor(3.6312e-11, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 59 test_true_pfm: -831 test_simulate_pfm tensor(1.6929e-12, device='cuda:0', grad_fn=<DivBackward0>)
episode: 236 training return: tensor(1.8344e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 237 training return: tensor(1.2454e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 238 training return: tensor(5.4098e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 239 training return: tensor(3.5187e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 60 test_true_pfm: -595 test_simulate_pfm tensor(2.2780e-13, device='cuda:0', grad_fn=<DivBackward0>)
episode: 240 training return: tensor(3.1592e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 241 training return: tensor(1.6978e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 242 training return: tensor(2.4730e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 243 training return: tensor(3.3648e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 61 test_true_pfm: -296 test_simulate_pfm tensor(6.1150e-15, device='cuda:0', grad_fn=<DivBackward0>)
episode: 244 training return: tensor(8.6875e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 245 training return: tensor(4.1538e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 246 training return: tensor(1.3405e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 247 training return: tensor(6.9108e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 62 test_true_pfm: -700 test_simulate_pfm tensor(1.8827e-15, device='cuda:0', grad_fn=<DivBackward0>)
episode: 248 training return: tensor(5.3978e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 249 training return: tensor(1.3112e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 250 training return: tensor(9.0169e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 251 training return: tensor(2.6837e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 63 test_true_pfm: -691 test_simulate_pfm tensor(3.7827e-16, device='cuda:0', grad_fn=<DivBackward0>)
episode: 252 training return: tensor(1.5898e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 253 training return: tensor(1.8595e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 254 training return: tensor(3.7090e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 255 training return: tensor(1.4217e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 64 test_true_pfm: -633 test_simulate_pfm tensor(1.0771e-15, device='cuda:0', grad_fn=<DivBackward0>)
episode: 256 training return: tensor(1.6628e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 257 training return: tensor(4.2227e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 258 training return: tensor(5.0550e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 259 training return: tensor(3.0235e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 65 test_true_pfm: -654 test_simulate_pfm tensor(1.4958e-15, device='cuda:0', grad_fn=<DivBackward0>)
episode: 260 training return: tensor(4.0943e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 261 training return: tensor(4.7260e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 262 training return: tensor(2.2629e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 263 training return: tensor(2.0583e-16, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 66 test_true_pfm: -935 test_simulate_pfm tensor(1.1448e-13, device='cuda:0', grad_fn=<DivBackward0>)
episode: 264 training return: tensor(3.8713e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 265 training return: tensor(2.8615e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 266 training return: tensor(6.8332e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 267 training return: tensor(7.3397e-16, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 67 test_true_pfm: -1004 test_simulate_pfm tensor(7.8066e-15, device='cuda:0', grad_fn=<DivBackward0>)
episode: 268 training return: tensor(4.0353e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 269 training return: tensor(8.6378e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 270 training return: tensor(2.5411e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 271 training return: tensor(1.0852e-17, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 68 test_true_pfm: -1372 test_simulate_pfm tensor(2.4250e-17, device='cuda:0', grad_fn=<DivBackward0>)
episode: 272 training return: tensor(2.0998e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 273 training return: tensor(2.0915e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 274 training return: tensor(2.9261e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 275 training return: tensor(5.6057e-18, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 69 test_true_pfm: -1237 test_simulate_pfm tensor(1.6332e-17, device='cuda:0', grad_fn=<DivBackward0>)
episode: 276 training return: tensor(5.9646e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 277 training return: tensor(3.9068e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 278 training return: tensor(7.9599e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 279 training return: tensor(1.3851e-17, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 70 test_true_pfm: -960 test_simulate_pfm tensor(6.9123e-16, device='cuda:0', grad_fn=<DivBackward0>)
episode: 280 training return: tensor(1.7239e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 281 training return: tensor(1.2435e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 282 training return: tensor(2.8780e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 283 training return: tensor(1.2613e-17, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 71 test_true_pfm: -1272 test_simulate_pfm tensor(2.0775e-18, device='cuda:0', grad_fn=<DivBackward0>)
episode: 284 training return: tensor(8.9540e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 285 training return: tensor(1.5101e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 286 training return: tensor(3.1983e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 287 training return: tensor(2.7661e-17, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 72 test_true_pfm: -666 test_simulate_pfm tensor(1.0307e-17, device='cuda:0', grad_fn=<DivBackward0>)
episode: 288 training return: tensor(4.5903e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 289 training return: tensor(1.3024e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 290 training return: tensor(1.2610e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 291 training return: tensor(6.6238e-18, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 73 test_true_pfm: -1663 test_simulate_pfm tensor(1.4694e-17, device='cuda:0', grad_fn=<DivBackward0>)
episode: 292 training return: tensor(4.4735e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 293 training return: tensor(5.6630e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 294 training return: tensor(2.5421e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 295 training return: tensor(7.9798e-17, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 74 test_true_pfm: -1281 test_simulate_pfm tensor(3.7113e-14, device='cuda:0', grad_fn=<DivBackward0>)
episode: 296 training return: tensor(3.6072e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 297 training return: tensor(7.0924e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 298 training return: tensor(5.2931e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 299 training return: tensor(2.0411e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 75 test_true_pfm: -1262 test_simulate_pfm tensor(1.1409e-12, device='cuda:0', grad_fn=<DivBackward0>)
episode: 300 training return: tensor(5.5518e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 301 training return: tensor(8.4948e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 302 training return: tensor(4.2391e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 303 training return: tensor(1.1589e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 76 test_true_pfm: -1085 test_simulate_pfm tensor(6.5317e-13, device='cuda:0', grad_fn=<DivBackward0>)
episode: 304 training return: tensor(9.5471e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 305 training return: tensor(3.5989e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 306 training return: tensor(4.7563e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 307 training return: tensor(2.0966e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 77 test_true_pfm: -1271 test_simulate_pfm tensor(3.7161e-14, device='cuda:0', grad_fn=<DivBackward0>)
episode: 308 training return: tensor(4.3997e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 309 training return: tensor(2.0010e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 310 training return: tensor(2.2908e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 311 training return: tensor(9.5164e-16, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 78 test_true_pfm: -1214 test_simulate_pfm tensor(9.1865e-17, device='cuda:0', grad_fn=<DivBackward0>)
episode: 312 training return: tensor(4.7122e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 313 training return: tensor(9.3180e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 314 training return: tensor(1.6091e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 315 training return: tensor(5.1788e-18, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 79 test_true_pfm: -1206 test_simulate_pfm tensor(1.0050e-14, device='cuda:0', grad_fn=<DivBackward0>)
episode: 316 training return: tensor(1.1825e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 317 training return: tensor(1.9276e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 318 training return: tensor(3.2148e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 319 training return: tensor(5.2269e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 80 test_true_pfm: -1221 test_simulate_pfm tensor(2.2795e-15, device='cuda:0', grad_fn=<DivBackward0>)
episode: 320 training return: tensor(2.9741e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 321 training return: tensor(8.8536e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 322 training return: tensor(2.3564e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 323 training return: tensor(2.1819e-17, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 81 test_true_pfm: -1184 test_simulate_pfm tensor(5.0266e-17, device='cuda:0', grad_fn=<DivBackward0>)
episode: 324 training return: tensor(2.8043e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 325 training return: tensor(2.5101e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 326 training return: tensor(1.0113e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 327 training return: tensor(8.6184e-18, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 82 test_true_pfm: -1109 test_simulate_pfm tensor(1.5534e-17, device='cuda:0', grad_fn=<DivBackward0>)
episode: 328 training return: tensor(8.7943e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 329 training return: tensor(7.5776e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 330 training return: tensor(9.3206e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 331 training return: tensor(1.1912e-16, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 83 test_true_pfm: -1214 test_simulate_pfm tensor(4.0388e-17, device='cuda:0', grad_fn=<DivBackward0>)
episode: 332 training return: tensor(2.0378e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 333 training return: tensor(1.5535e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 334 training return: tensor(2.8539e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 335 training return: tensor(1.8589e-16, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 84 test_true_pfm: -1202 test_simulate_pfm tensor(3.6784e-18, device='cuda:0', grad_fn=<DivBackward0>)
episode: 336 training return: tensor(3.1831e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 337 training return: tensor(4.0583e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 338 training return: tensor(3.6649e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 339 training return: tensor(5.2451e-18, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 85 test_true_pfm: -1188 test_simulate_pfm tensor(1.3645e-14, device='cuda:0', grad_fn=<DivBackward0>)
episode: 340 training return: tensor(8.1982e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 341 training return: tensor(1.0553e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 342 training return: tensor(2.9904e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 343 training return: tensor(2.7690e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 86 test_true_pfm: -1111 test_simulate_pfm tensor(2.1547e-12, device='cuda:0', grad_fn=<DivBackward0>)
episode: 344 training return: tensor(1.4474e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 345 training return: tensor(4.1630e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 346 training return: tensor(3.0554e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 347 training return: tensor(2.0787e-16, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 87 test_true_pfm: -1165 test_simulate_pfm tensor(5.7166e-16, device='cuda:0', grad_fn=<DivBackward0>)
episode: 348 training return: tensor(1.0704e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 349 training return: tensor(1.0303e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 350 training return: tensor(1.0076e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 351 training return: tensor(1.4671e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 88 test_true_pfm: -1160 test_simulate_pfm tensor(1.1137e-16, device='cuda:0', grad_fn=<DivBackward0>)
episode: 352 training return: tensor(1.5807e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 353 training return: tensor(2.7727e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 354 training return: tensor(3.2492e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 355 training return: tensor(1.3113e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 89 test_true_pfm: -1153 test_simulate_pfm tensor(1.1512e-11, device='cuda:0', grad_fn=<DivBackward0>)
episode: 356 training return: tensor(1.8167e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 357 training return: tensor(9.3537e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 358 training return: tensor(4.5948e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 359 training return: tensor(5.3512e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 90 test_true_pfm: -1177 test_simulate_pfm tensor(1.9299e-16, device='cuda:0', grad_fn=<DivBackward0>)
episode: 360 training return: tensor(8.6916e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 361 training return: tensor(1.6850e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 362 training return: tensor(6.7424e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 363 training return: tensor(6.5762e-17, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 91 test_true_pfm: -1246 test_simulate_pfm tensor(7.0815e-12, device='cuda:0', grad_fn=<DivBackward0>)
episode: 364 training return: tensor(1.7194e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 365 training return: tensor(7.9088e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 366 training return: tensor(1.4382e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 367 training return: tensor(3.3687e-18, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 92 test_true_pfm: -1368 test_simulate_pfm tensor(3.9568e-18, device='cuda:0', grad_fn=<DivBackward0>)
episode: 368 training return: tensor(7.8686e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 369 training return: tensor(8.5897e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 370 training return: tensor(2.7730e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 371 training return: tensor(2.5164e-18, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 93 test_true_pfm: -1318 test_simulate_pfm tensor(5.0375e-17, device='cuda:0', grad_fn=<DivBackward0>)
episode: 372 training return: tensor(5.6644e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 373 training return: tensor(1.8398e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 374 training return: tensor(1.5768e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 375 training return: tensor(8.0426e-17, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 94 test_true_pfm: -1298 test_simulate_pfm tensor(2.7396e-17, device='cuda:0', grad_fn=<DivBackward0>)
episode: 376 training return: tensor(1.0936e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 377 training return: tensor(6.9673e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 378 training return: tensor(1.2359e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 379 training return: tensor(1.8494e-18, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 95 test_true_pfm: -1239 test_simulate_pfm tensor(2.1002e-17, device='cuda:0', grad_fn=<DivBackward0>)
episode: 380 training return: tensor(8.6339e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 381 training return: tensor(4.6520e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 382 training return: tensor(6.0651e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 383 training return: tensor(5.7476e-17, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 96 test_true_pfm: -1096 test_simulate_pfm tensor(3.4448e-18, device='cuda:0', grad_fn=<DivBackward0>)
episode: 384 training return: tensor(2.3019e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 385 training return: tensor(3.5986e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 386 training return: tensor(2.8758e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 387 training return: tensor(5.1428e-19, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 97 test_true_pfm: -1320 test_simulate_pfm tensor(2.9444e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 388 training return: tensor(6.0617e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 389 training return: tensor(3.0281e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 390 training return: tensor(8.4883e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 391 training return: tensor(2.9572e-20, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 98 test_true_pfm: -1316 test_simulate_pfm tensor(4.8299e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 392 training return: tensor(5.4597e-21, device='cuda:0', grad_fn=<AddBackward0>)
episode: 393 training return: tensor(1.1911e-20, device='cuda:0', grad_fn=<AddBackward0>)
episode: 394 training return: tensor(4.7972e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 395 training return: tensor(3.8794e-19, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 99 test_true_pfm: -1309 test_simulate_pfm tensor(1.7595e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 396 training return: tensor(4.8664e-21, device='cuda:0', grad_fn=<AddBackward0>)
episode: 397 training return: tensor(1.5742e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 398 training return: tensor(1.0874e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 399 training return: tensor(3.4496e-21, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 100 test_true_pfm: -1322 test_simulate_pfm tensor(2.4208e-19, device='cuda:0', grad_fn=<DivBackward0>)
episode: 400 training return: tensor(2.9698e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 401 training return: tensor(1.4714e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 402 training return: tensor(2.5781e-20, device='cuda:0', grad_fn=<AddBackward0>)
episode: 403 training return: tensor(3.6318e-20, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 101 test_true_pfm: -1329 test_simulate_pfm tensor(1.2308e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 404 training return: tensor(5.2447e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 405 training return: tensor(1.0748e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 406 training return: tensor(1.2285e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 407 training return: tensor(1.6772e-18, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 102 test_true_pfm: -1338 test_simulate_pfm tensor(3.7591e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 408 training return: tensor(2.2815e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 409 training return: tensor(2.2795e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 410 training return: tensor(6.6296e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 411 training return: tensor(2.0906e-20, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 103 test_true_pfm: -1337 test_simulate_pfm tensor(1.5458e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 412 training return: tensor(1.0426e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 413 training return: tensor(4.5554e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 414 training return: tensor(1.9611e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 415 training return: tensor(3.5102e-19, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 104 test_true_pfm: -1324 test_simulate_pfm tensor(2.2345e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 416 training return: tensor(2.8250e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 417 training return: tensor(6.5330e-21, device='cuda:0', grad_fn=<AddBackward0>)
episode: 418 training return: tensor(5.6944e-20, device='cuda:0', grad_fn=<AddBackward0>)
episode: 419 training return: tensor(1.7536e-18, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 105 test_true_pfm: -625 test_simulate_pfm tensor(2.4922e-22, device='cuda:0', grad_fn=<DivBackward0>)
episode: 420 training return: tensor(5.4851e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 421 training return: tensor(2.1791e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 422 training return: tensor(2.9298e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 423 training return: tensor(7.1788e-18, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 106 test_true_pfm: -620 test_simulate_pfm tensor(1.5828e-22, device='cuda:0', grad_fn=<DivBackward0>)
episode: 424 training return: tensor(1.5937e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 425 training return: tensor(7.4157e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 426 training return: tensor(2.9647e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 427 training return: tensor(8.2808e-19, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 107 test_true_pfm: -604 test_simulate_pfm tensor(5.4905e-17, device='cuda:0', grad_fn=<DivBackward0>)
episode: 428 training return: tensor(7.9776e-20, device='cuda:0', grad_fn=<AddBackward0>)
episode: 429 training return: tensor(3.5814e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 430 training return: tensor(5.8800e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 431 training return: tensor(3.5720e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 108 test_true_pfm: -602 test_simulate_pfm tensor(4.0193e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 432 training return: tensor(4.1169e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 433 training return: tensor(1.5275e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 434 training return: tensor(7.6492e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 435 training return: tensor(3.9771e-17, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 109 test_true_pfm: -599 test_simulate_pfm tensor(1.4084e-16, device='cuda:0', grad_fn=<DivBackward0>)
episode: 436 training return: tensor(6.2329e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 437 training return: tensor(9.3437e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 438 training return: tensor(1.0179e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 439 training return: tensor(8.5724e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 110 test_true_pfm: -593 test_simulate_pfm tensor(3.5740e-17, device='cuda:0', grad_fn=<DivBackward0>)
episode: 440 training return: tensor(2.0536e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 441 training return: tensor(1.1850e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 442 training return: tensor(5.4878e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 443 training return: tensor(8.5766e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 111 test_true_pfm: -1166 test_simulate_pfm tensor(3.9744e-15, device='cuda:0', grad_fn=<DivBackward0>)
episode: 444 training return: tensor(1.5451e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 445 training return: tensor(5.0639e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 446 training return: tensor(2.5242e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 447 training return: tensor(1.1312e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 112 test_true_pfm: -1028 test_simulate_pfm tensor(1.7731e-12, device='cuda:0', grad_fn=<DivBackward0>)
episode: 448 training return: tensor(7.2473e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 449 training return: tensor(1.8417e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 450 training return: tensor(5.3841e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 451 training return: tensor(9.8743e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 113 test_true_pfm: -1115 test_simulate_pfm tensor(1.2152e-15, device='cuda:0', grad_fn=<DivBackward0>)
episode: 452 training return: tensor(1.0367e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 453 training return: tensor(1.3455e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 454 training return: tensor(3.9850e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 455 training return: tensor(2.6712e-16, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 114 test_true_pfm: -1231 test_simulate_pfm tensor(4.2660e-16, device='cuda:0', grad_fn=<DivBackward0>)
episode: 456 training return: tensor(1.3503e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 457 training return: tensor(5.1476e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 458 training return: tensor(1.1574e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 459 training return: tensor(1.7906e-16, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 115 test_true_pfm: -1232 test_simulate_pfm tensor(5.6635e-12, device='cuda:0', grad_fn=<DivBackward0>)
episode: 460 training return: tensor(4.1607e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 461 training return: tensor(2.1645e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 462 training return: tensor(2.1025e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 463 training return: tensor(1.6041e-17, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 116 test_true_pfm: -1249 test_simulate_pfm tensor(1.1775e-16, device='cuda:0', grad_fn=<DivBackward0>)
episode: 464 training return: tensor(2.0602e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 465 training return: tensor(6.5069e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 466 training return: tensor(4.7952e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 467 training return: tensor(4.2259e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 117 test_true_pfm: -1129 test_simulate_pfm tensor(8.2245e-14, device='cuda:0', grad_fn=<DivBackward0>)
episode: 468 training return: tensor(2.5455e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 469 training return: tensor(8.6707e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 470 training return: tensor(2.7530e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 471 training return: tensor(1.5834e-11, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 118 test_true_pfm: -592 test_simulate_pfm tensor(4.4195e-22, device='cuda:0', grad_fn=<DivBackward0>)
episode: 472 training return: tensor(1.3660e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 473 training return: tensor(4.7731e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 474 training return: tensor(2.1553e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 475 training return: tensor(3.8027e-17, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 119 test_true_pfm: -1255 test_simulate_pfm tensor(2.9657e-19, device='cuda:0', grad_fn=<DivBackward0>)
episode: 476 training return: tensor(4.0430e-20, device='cuda:0', grad_fn=<AddBackward0>)
episode: 477 training return: tensor(4.4488e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 478 training return: tensor(8.8506e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 479 training return: tensor(4.7906e-21, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 120 test_true_pfm: -1253 test_simulate_pfm tensor(1.0211e-17, device='cuda:0', grad_fn=<DivBackward0>)
episode: 480 training return: tensor(2.0131e-20, device='cuda:0', grad_fn=<AddBackward0>)
episode: 481 training return: tensor(1.7341e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 482 training return: tensor(2.5296e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 483 training return: tensor(3.3423e-20, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 121 test_true_pfm: -652 test_simulate_pfm tensor(6.2149e-22, device='cuda:0', grad_fn=<DivBackward0>)
episode: 484 training return: tensor(1.4874e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 485 training return: tensor(1.8866e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 486 training return: tensor(1.3945e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 487 training return: tensor(7.0691e-21, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 122 test_true_pfm: -1212 test_simulate_pfm tensor(1.3398e-18, device='cuda:0', grad_fn=<DivBackward0>)
episode: 488 training return: tensor(5.2608e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 489 training return: tensor(6.4860e-20, device='cuda:0', grad_fn=<AddBackward0>)
episode: 490 training return: tensor(8.4048e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 491 training return: tensor(1.8041e-19, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 123 test_true_pfm: -1282 test_simulate_pfm tensor(8.7762e-18, device='cuda:0', grad_fn=<DivBackward0>)
episode: 492 training return: tensor(4.1880e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 493 training return: tensor(2.1447e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 494 training return: tensor(9.7784e-21, device='cuda:0', grad_fn=<AddBackward0>)
episode: 495 training return: tensor(8.5811e-19, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 124 test_true_pfm: -1287 test_simulate_pfm tensor(1.3711e-17, device='cuda:0', grad_fn=<DivBackward0>)
episode: 496 training return: tensor(2.3955e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 497 training return: tensor(1.0551e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 498 training return: tensor(1.5753e-20, device='cuda:0', grad_fn=<AddBackward0>)
episode: 499 training return: tensor(1.1070e-18, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 125 test_true_pfm: -1272 test_simulate_pfm tensor(5.7980e-16, device='cuda:0', grad_fn=<DivBackward0>)
episode: 500 training return: tensor(2.4559e-20, device='cuda:0', grad_fn=<AddBackward0>)
episode: 501 training return: tensor(6.9053e-20, device='cuda:0', grad_fn=<AddBackward0>)
episode: 502 training return: tensor(2.9989e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 503 training return: tensor(1.2938e-16, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 126 test_true_pfm: -1272 test_simulate_pfm tensor(7.7496e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 504 training return: tensor(1.7937e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 505 training return: tensor(6.0236e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 506 training return: tensor(6.8309e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 507 training return: tensor(5.8928e-19, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 127 test_true_pfm: -1246 test_simulate_pfm tensor(4.7324e-19, device='cuda:0', grad_fn=<DivBackward0>)
episode: 508 training return: tensor(4.6345e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 509 training return: tensor(3.7287e-20, device='cuda:0', grad_fn=<AddBackward0>)
episode: 510 training return: tensor(8.3650e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 511 training return: tensor(1.0023e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 128 test_true_pfm: -1299 test_simulate_pfm tensor(1.3540e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 512 training return: tensor(2.1731e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 513 training return: tensor(4.6505e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 514 training return: tensor(7.6868e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 515 training return: tensor(1.2032e-17, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 129 test_true_pfm: -1154 test_simulate_pfm tensor(2.8763e-16, device='cuda:0', grad_fn=<DivBackward0>)
episode: 516 training return: tensor(5.7825e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 517 training return: tensor(1.1748e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 518 training return: tensor(2.5532e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 519 training return: tensor(7.7723e-21, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 130 test_true_pfm: -1279 test_simulate_pfm tensor(1.1686e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 520 training return: tensor(1.0291e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 521 training return: tensor(4.5171e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 522 training return: tensor(6.8867e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 523 training return: tensor(7.8970e-19, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 131 test_true_pfm: -1227 test_simulate_pfm tensor(1.8521e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 524 training return: tensor(5.1236e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 525 training return: tensor(1.3922e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 526 training return: tensor(1.9412e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 527 training return: tensor(2.6536e-20, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 132 test_true_pfm: -1375 test_simulate_pfm tensor(1.6112e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 528 training return: tensor(1.7294e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 529 training return: tensor(2.3159e-21, device='cuda:0', grad_fn=<AddBackward0>)
episode: 530 training return: tensor(3.6527e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 531 training return: tensor(1.0822e-20, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 133 test_true_pfm: -1400 test_simulate_pfm tensor(1.7783e-17, device='cuda:0', grad_fn=<DivBackward0>)
episode: 532 training return: tensor(1.3517e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 533 training return: tensor(6.6824e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 534 training return: tensor(3.7321e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 535 training return: tensor(8.6087e-21, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 134 test_true_pfm: -1381 test_simulate_pfm tensor(1.3179e-18, device='cuda:0', grad_fn=<DivBackward0>)
episode: 536 training return: tensor(1.7068e-20, device='cuda:0', grad_fn=<AddBackward0>)
episode: 537 training return: tensor(6.8806e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 538 training return: tensor(3.2097e-20, device='cuda:0', grad_fn=<AddBackward0>)
episode: 539 training return: tensor(9.5148e-20, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 135 test_true_pfm: -1391 test_simulate_pfm tensor(3.9417e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 540 training return: tensor(4.4710e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 541 training return: tensor(3.0791e-21, device='cuda:0', grad_fn=<AddBackward0>)
episode: 542 training return: tensor(7.8332e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 543 training return: tensor(1.8206e-19, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 136 test_true_pfm: -1397 test_simulate_pfm tensor(5.5057e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 544 training return: tensor(1.6135e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 545 training return: tensor(1.1844e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 546 training return: tensor(1.2522e-21, device='cuda:0', grad_fn=<AddBackward0>)
episode: 547 training return: tensor(8.4977e-19, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 137 test_true_pfm: -1372 test_simulate_pfm tensor(1.9193e-18, device='cuda:0', grad_fn=<DivBackward0>)
episode: 548 training return: tensor(4.7909e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 549 training return: tensor(1.3679e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 550 training return: tensor(1.1589e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 551 training return: tensor(5.5022e-18, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 138 test_true_pfm: -1346 test_simulate_pfm tensor(6.4256e-19, device='cuda:0', grad_fn=<DivBackward0>)
episode: 552 training return: tensor(5.0749e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 553 training return: tensor(3.3634e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 554 training return: tensor(4.9771e-20, device='cuda:0', grad_fn=<AddBackward0>)
episode: 555 training return: tensor(2.4909e-19, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 139 test_true_pfm: -1373 test_simulate_pfm tensor(5.3697e-15, device='cuda:0', grad_fn=<DivBackward0>)
episode: 556 training return: tensor(2.6645e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 557 training return: tensor(3.6255e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 558 training return: tensor(8.7506e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 559 training return: tensor(2.8613e-19, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 140 test_true_pfm: -1254 test_simulate_pfm tensor(4.5859e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 560 training return: tensor(9.0007e-21, device='cuda:0', grad_fn=<AddBackward0>)
episode: 561 training return: tensor(8.7998e-22, device='cuda:0', grad_fn=<AddBackward0>)
episode: 562 training return: tensor(5.3410e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 563 training return: tensor(8.5026e-20, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 141 test_true_pfm: -1227 test_simulate_pfm tensor(4.6798e-22, device='cuda:0', grad_fn=<DivBackward0>)
episode: 564 training return: tensor(6.5066e-21, device='cuda:0', grad_fn=<AddBackward0>)
episode: 565 training return: tensor(2.0142e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 566 training return: tensor(2.4022e-20, device='cuda:0', grad_fn=<AddBackward0>)
episode: 567 training return: tensor(3.1227e-17, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 142 test_true_pfm: -1367 test_simulate_pfm tensor(7.0072e-17, device='cuda:0', grad_fn=<DivBackward0>)
episode: 568 training return: tensor(1.7085e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 569 training return: tensor(6.7999e-21, device='cuda:0', grad_fn=<AddBackward0>)
episode: 570 training return: tensor(2.6288e-20, device='cuda:0', grad_fn=<AddBackward0>)
episode: 571 training return: tensor(1.5542e-21, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 143 test_true_pfm: -1408 test_simulate_pfm tensor(1.6568e-20, device='cuda:0', grad_fn=<DivBackward0>)
episode: 572 training return: tensor(1.2147e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 573 training return: tensor(1.4443e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 574 training return: tensor(6.8048e-21, device='cuda:0', grad_fn=<AddBackward0>)
episode: 575 training return: tensor(1.2483e-21, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 144 test_true_pfm: -1409 test_simulate_pfm tensor(3.8795e-19, device='cuda:0', grad_fn=<DivBackward0>)
episode: 576 training return: tensor(1.3613e-20, device='cuda:0', grad_fn=<AddBackward0>)
episode: 577 training return: tensor(2.1716e-20, device='cuda:0', grad_fn=<AddBackward0>)
episode: 578 training return: tensor(8.5994e-21, device='cuda:0', grad_fn=<AddBackward0>)
episode: 579 training return: tensor(2.4424e-20, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 145 test_true_pfm: -1396 test_simulate_pfm tensor(2.5568e-16, device='cuda:0', grad_fn=<DivBackward0>)
episode: 580 training return: tensor(8.7128e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 581 training return: tensor(6.2956e-23, device='cuda:0', grad_fn=<AddBackward0>)
episode: 582 training return: tensor(7.5700e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 583 training return: tensor(1.8309e-20, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 146 test_true_pfm: -1420 test_simulate_pfm tensor(5.3046e-19, device='cuda:0', grad_fn=<DivBackward0>)
episode: 584 training return: tensor(3.3712e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 585 training return: tensor(3.2412e-21, device='cuda:0', grad_fn=<AddBackward0>)
episode: 586 training return: tensor(2.0496e-18, device='cuda:0', grad_fn=<AddBackward0>)
episode: 587 training return: tensor(2.8581e-20, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 147 test_true_pfm: -1403 test_simulate_pfm tensor(4.8848e-17, device='cuda:0', grad_fn=<DivBackward0>)
episode: 588 training return: tensor(4.1420e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 589 training return: tensor(1.7199e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 590 training return: tensor(1.9419e-22, device='cuda:0', grad_fn=<AddBackward0>)
episode: 591 training return: tensor(1.2011e-17, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 148 test_true_pfm: -1419 test_simulate_pfm tensor(4.5372e-21, device='cuda:0', grad_fn=<DivBackward0>)
episode: 592 training return: tensor(1.6943e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 593 training return: tensor(6.7395e-21, device='cuda:0', grad_fn=<AddBackward0>)
episode: 594 training return: tensor(4.6647e-21, device='cuda:0', grad_fn=<AddBackward0>)
episode: 595 training return: tensor(8.3152e-20, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 149 test_true_pfm: -1415 test_simulate_pfm tensor(2.0759e-16, device='cuda:0', grad_fn=<DivBackward0>)
episode: 596 training return: tensor(1.2406e-20, device='cuda:0', grad_fn=<AddBackward0>)
episode: 597 training return: tensor(4.9733e-20, device='cuda:0', grad_fn=<AddBackward0>)
episode: 598 training return: tensor(4.0539e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 599 training return: tensor(5.9608e-16, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 150 test_true_pfm: -779 test_simulate_pfm tensor(1.0697e-20, device='cuda:0', grad_fn=<DivBackward0>)
