initial performance: 8218
episode: 0 training return: tensor(1.3390, device='cuda:0', grad_fn=<AddBackward0>)
episode: 1 training return: tensor(1.8935, device='cuda:0', grad_fn=<AddBackward0>)
episode: 2 training return: tensor(1.0118e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 3 training return: tensor(1.5941e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 1 test_true_pfm: -25 test_simulate_pfm tensor(1.0950e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 4 training return: tensor(1.1607e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 5 training return: tensor(7.2127e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 6 training return: tensor(4.2047e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 7 training return: tensor(9.1580e-17, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 2 test_true_pfm: -386 test_simulate_pfm tensor(1.5344e-11, device='cuda:0', grad_fn=<DivBackward0>)
episode: 8 training return: tensor(3.1491e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 9 training return: tensor(4.0293e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 10 training return: tensor(8.8912e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 11 training return: tensor(1.3366e-17, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 3 test_true_pfm: -509 test_simulate_pfm tensor(7.2743e-11, device='cuda:0', grad_fn=<DivBackward0>)
episode: 12 training return: tensor(8.0100e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 13 training return: tensor(3.6138e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 14 training return: tensor(1.2827e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 15 training return: tensor(1.3991e-12, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 4 test_true_pfm: -583 test_simulate_pfm tensor(1.9377e-11, device='cuda:0', grad_fn=<DivBackward0>)
episode: 16 training return: tensor(6.8756e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 17 training return: tensor(1.5630e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 18 training return: tensor(1.7459e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 19 training return: tensor(2.3312e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 5 test_true_pfm: -369 test_simulate_pfm tensor(1.8362e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 20 training return: tensor(7.8661e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 21 training return: tensor(1.0826e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 22 training return: tensor(6.9721e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 23 training return: tensor(1.1480e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 6 test_true_pfm: -548 test_simulate_pfm tensor(3.4672e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 24 training return: tensor(1.4955e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 25 training return: tensor(5.1865e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 26 training return: tensor(1.4913e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 27 training return: tensor(4.5517e-18, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 7 test_true_pfm: -429 test_simulate_pfm tensor(1.4164e-11, device='cuda:0', grad_fn=<DivBackward0>)
episode: 28 training return: tensor(1.4533e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 29 training return: tensor(4.4513e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 30 training return: tensor(1.1516e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 31 training return: tensor(5.0387e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 8 test_true_pfm: -267 test_simulate_pfm tensor(6.6479e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 32 training return: tensor(1.1125e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 33 training return: tensor(3.8567e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 34 training return: tensor(7.9218e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 35 training return: tensor(1.3131e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 9 test_true_pfm: 1669 test_simulate_pfm tensor(1.8313e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 36 training return: tensor(1.0520e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 37 training return: tensor(1.8836e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 38 training return: tensor(6.2540e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 39 training return: tensor(1.1191e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 10 test_true_pfm: 1492 test_simulate_pfm tensor(0.0002, device='cuda:0', grad_fn=<DivBackward0>)
episode: 40 training return: tensor(4.5303e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 41 training return: tensor(1.0699e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 42 training return: tensor(6.0015e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 43 training return: tensor(3.6474e-08, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 11 test_true_pfm: 363 test_simulate_pfm tensor(1.2995e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 44 training return: tensor(2.2615e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 45 training return: tensor(5.5989e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 46 training return: tensor(1.0007e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 47 training return: tensor(3.5992e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 12 test_true_pfm: -18 test_simulate_pfm tensor(1.7304e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 48 training return: tensor(1.8817e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 49 training return: tensor(1.9327e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 50 training return: tensor(7.2692e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 51 training return: tensor(4.1628e-12, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 13 test_true_pfm: 107 test_simulate_pfm tensor(9.2456e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 52 training return: tensor(4.5085e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 53 training return: tensor(1.1860e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 54 training return: tensor(9.7449e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 55 training return: tensor(2.4602e-12, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 14 test_true_pfm: -394 test_simulate_pfm tensor(3.0320e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 56 training return: tensor(2.4543e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 57 training return: tensor(4.4732e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 58 training return: tensor(9.3057e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 59 training return: tensor(2.1900e-11, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 15 test_true_pfm: 13 test_simulate_pfm tensor(1.0309e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 60 training return: tensor(1.0758e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 61 training return: tensor(2.9537e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 62 training return: tensor(9.4884e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 63 training return: tensor(2.5133e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 16 test_true_pfm: -247 test_simulate_pfm tensor(2.5807e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 64 training return: tensor(1.8796e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 65 training return: tensor(7.3721e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 66 training return: tensor(6.4165e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 67 training return: tensor(2.5057e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 17 test_true_pfm: 1045 test_simulate_pfm tensor(6.3863e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 68 training return: tensor(5.1846e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 69 training return: tensor(8.5354e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 70 training return: tensor(1.5952e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 71 training return: tensor(9.4872e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 18 test_true_pfm: 1186 test_simulate_pfm tensor(4.4121e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 72 training return: tensor(7.5835e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 73 training return: tensor(3.4873e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 74 training return: tensor(1.4613e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 75 training return: tensor(1.0854e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 19 test_true_pfm: 1053 test_simulate_pfm tensor(1.6992e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 76 training return: tensor(2.0804e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 77 training return: tensor(9.1518e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 78 training return: tensor(1.2892e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 79 training return: tensor(1.1232e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 20 test_true_pfm: 413 test_simulate_pfm tensor(1.3667e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 80 training return: tensor(1.9075e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 81 training return: tensor(4.6002e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 82 training return: tensor(1.1182e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 83 training return: tensor(4.0825e-08, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 21 test_true_pfm: 701 test_simulate_pfm tensor(1.0073e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 84 training return: tensor(1.4666e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 85 training return: tensor(5.0548e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 86 training return: tensor(3.4704e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 87 training return: tensor(3.2148e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 22 test_true_pfm: 915 test_simulate_pfm tensor(8.0138e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 88 training return: tensor(7.1999e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 89 training return: tensor(3.7565e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 90 training return: tensor(3.2712e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 91 training return: tensor(7.4087e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 23 test_true_pfm: 846 test_simulate_pfm tensor(5.8374e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 92 training return: tensor(2.0558e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 93 training return: tensor(4.2083e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 94 training return: tensor(7.8966e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 95 training return: tensor(3.8792e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 24 test_true_pfm: 513 test_simulate_pfm tensor(4.0051e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 96 training return: tensor(1.1322e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 97 training return: tensor(1.3826e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 98 training return: tensor(7.3272e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 99 training return: tensor(6.4211e-08, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 25 test_true_pfm: 973 test_simulate_pfm tensor(6.2333e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 100 training return: tensor(3.1785e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 101 training return: tensor(4.4333e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 102 training return: tensor(3.2766e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 103 training return: tensor(1.0885e-11, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 26 test_true_pfm: -414 test_simulate_pfm tensor(5.5845e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 104 training return: tensor(1.1887e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 105 training return: tensor(3.3281e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 106 training return: tensor(1.5951e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 107 training return: tensor(2.0889e-11, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 27 test_true_pfm: -445 test_simulate_pfm tensor(3.5631e-10, device='cuda:0', grad_fn=<DivBackward0>)
episode: 108 training return: tensor(3.3564e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 109 training return: tensor(1.7779e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 110 training return: tensor(9.6835e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 111 training return: tensor(3.6436e-09, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 28 test_true_pfm: -376 test_simulate_pfm tensor(6.5592e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 112 training return: tensor(4.7629e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 113 training return: tensor(1.6904e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 114 training return: tensor(1.2842e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 115 training return: tensor(1.9892e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 29 test_true_pfm: -816 test_simulate_pfm tensor(1.8110e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 116 training return: tensor(1.4228e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 117 training return: tensor(9.0403e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 118 training return: tensor(7.7309e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 119 training return: tensor(5.6450e-11, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 30 test_true_pfm: -597 test_simulate_pfm tensor(1.5374e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 120 training return: tensor(6.0405e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 121 training return: tensor(3.8348e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 122 training return: tensor(4.7950e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 123 training return: tensor(6.3800e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 31 test_true_pfm: -512 test_simulate_pfm tensor(1.0021e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 124 training return: tensor(7.6024e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 125 training return: tensor(4.9657e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 126 training return: tensor(1.9914e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 127 training return: tensor(6.8874e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 32 test_true_pfm: -607 test_simulate_pfm tensor(4.2410e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 128 training return: tensor(4.2061e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 129 training return: tensor(3.8317e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 130 training return: tensor(1.2166e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 131 training return: tensor(3.9117e-08, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 33 test_true_pfm: -453 test_simulate_pfm tensor(3.4919e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 132 training return: tensor(5.2673e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 133 training return: tensor(8.7121e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 134 training return: tensor(1.1211e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 135 training return: tensor(2.9011e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 34 test_true_pfm: -64 test_simulate_pfm tensor(2.0980e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 136 training return: tensor(1.7607e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 137 training return: tensor(4.4393e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 138 training return: tensor(1.1471e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 139 training return: tensor(6.7150e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 35 test_true_pfm: 67 test_simulate_pfm tensor(2.1858e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 140 training return: tensor(6.0038e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 141 training return: tensor(5.2794e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 142 training return: tensor(5.8972e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 143 training return: tensor(2.6162e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 36 test_true_pfm: 227 test_simulate_pfm tensor(0.0001, device='cuda:0', grad_fn=<DivBackward0>)
episode: 144 training return: tensor(2.5253e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 145 training return: tensor(9.2372e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 146 training return: tensor(9.5857e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 147 training return: tensor(4.5333e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 37 test_true_pfm: 208 test_simulate_pfm tensor(2.7974e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 148 training return: tensor(9.6986e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 149 training return: tensor(8.6397e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 150 training return: tensor(2.7928e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 151 training return: tensor(1.3712e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 38 test_true_pfm: 173 test_simulate_pfm tensor(5.6436e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 152 training return: tensor(5.9815e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 153 training return: tensor(2.5852e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 154 training return: tensor(5.3172e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 155 training return: tensor(6.2532e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 39 test_true_pfm: -238 test_simulate_pfm tensor(1.8683e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 156 training return: tensor(9.6308e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 157 training return: tensor(4.2453e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 158 training return: tensor(1.6614e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 159 training return: tensor(2.1163e-09, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 40 test_true_pfm: -376 test_simulate_pfm tensor(7.3976e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 160 training return: tensor(5.3969e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 161 training return: tensor(7.9727e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 162 training return: tensor(1.6107e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 163 training return: tensor(3.8773e-09, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 41 test_true_pfm: -331 test_simulate_pfm tensor(1.3668e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 164 training return: tensor(3.9001e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 165 training return: tensor(3.1432e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 166 training return: tensor(1.7805e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 167 training return: tensor(9.9541e-08, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 42 test_true_pfm: -336 test_simulate_pfm tensor(2.0868e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 168 training return: tensor(1.8260e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 169 training return: tensor(2.1510e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 170 training return: tensor(4.8187e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 171 training return: tensor(2.9302e-11, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 43 test_true_pfm: -337 test_simulate_pfm tensor(7.0330e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 172 training return: tensor(9.8381e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 173 training return: tensor(2.6330e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 174 training return: tensor(1.7827e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 175 training return: tensor(2.8435e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 44 test_true_pfm: -601 test_simulate_pfm tensor(1.2866e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 176 training return: tensor(7.6505e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 177 training return: tensor(1.0398e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 178 training return: tensor(1.2268e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 179 training return: tensor(6.0481e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 45 test_true_pfm: -220 test_simulate_pfm tensor(1.7180e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 180 training return: tensor(4.5776e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 181 training return: tensor(6.3182e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 182 training return: tensor(1.8088e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 183 training return: tensor(3.0313e-08, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 46 test_true_pfm: -784 test_simulate_pfm tensor(5.6873e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 184 training return: tensor(7.8876e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 185 training return: tensor(1.6974e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 186 training return: tensor(9.8332e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 187 training return: tensor(6.2458e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 47 test_true_pfm: -687 test_simulate_pfm tensor(5.2112e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 188 training return: tensor(3.1898e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 189 training return: tensor(3.9581e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 190 training return: tensor(2.0352e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 191 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 48 test_true_pfm: -421 test_simulate_pfm tensor(2.0616e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 192 training return: tensor(1.0122e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 193 training return: tensor(1.0491e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 194 training return: tensor(0.0001, device='cuda:0', grad_fn=<AddBackward0>)
episode: 195 training return: tensor(2.7932e-09, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 49 test_true_pfm: 476 test_simulate_pfm tensor(0.0004, device='cuda:0', grad_fn=<DivBackward0>)
episode: 196 training return: tensor(1.6801e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 197 training return: tensor(0.0005, device='cuda:0', grad_fn=<AddBackward0>)
episode: 198 training return: tensor(2.5583e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 199 training return: tensor(3.8047e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 50 test_true_pfm: -299 test_simulate_pfm tensor(1.4376e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 200 training return: tensor(1.3038e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 201 training return: tensor(9.2878e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 202 training return: tensor(1.0266e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 203 training return: tensor(0.0003, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 51 test_true_pfm: 113 test_simulate_pfm tensor(0.0005, device='cuda:0', grad_fn=<DivBackward0>)
episode: 204 training return: tensor(9.7262e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 205 training return: tensor(6.5363e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 206 training return: tensor(4.1940e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 207 training return: tensor(4.1016e-08, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 52 test_true_pfm: 878 test_simulate_pfm tensor(1.3488e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 208 training return: tensor(4.1739e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 209 training return: tensor(5.6028e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 210 training return: tensor(2.0380e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 211 training return: tensor(4.2395e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 53 test_true_pfm: -187 test_simulate_pfm tensor(2.0068e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 212 training return: tensor(4.2977e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 213 training return: tensor(0.0003, device='cuda:0', grad_fn=<AddBackward0>)
episode: 214 training return: tensor(0.0010, device='cuda:0', grad_fn=<AddBackward0>)
episode: 215 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 54 test_true_pfm: 888 test_simulate_pfm tensor(0.0004, device='cuda:0', grad_fn=<DivBackward0>)
episode: 216 training return: tensor(0.0011, device='cuda:0', grad_fn=<AddBackward0>)
episode: 217 training return: tensor(3.0802e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 218 training return: tensor(1.0693e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 219 training return: tensor(4.1032e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 55 test_true_pfm: 53 test_simulate_pfm tensor(8.0005e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 220 training return: tensor(0.0034, device='cuda:0', grad_fn=<AddBackward0>)
episode: 221 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
episode: 222 training return: tensor(0.0052, device='cuda:0', grad_fn=<AddBackward0>)
episode: 223 training return: tensor(5.9174e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 56 test_true_pfm: 484 test_simulate_pfm tensor(1.8038e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 224 training return: tensor(0.0011, device='cuda:0', grad_fn=<AddBackward0>)
episode: 225 training return: tensor(4.5285e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 226 training return: tensor(0.0009, device='cuda:0', grad_fn=<AddBackward0>)
episode: 227 training return: tensor(2.7176e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 57 test_true_pfm: 470 test_simulate_pfm tensor(9.4355e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 228 training return: tensor(2.4266e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 229 training return: tensor(8.8998e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 230 training return: tensor(0.0001, device='cuda:0', grad_fn=<AddBackward0>)
episode: 231 training return: tensor(8.4722e-09, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 58 test_true_pfm: 1608 test_simulate_pfm tensor(0.0018, device='cuda:0', grad_fn=<DivBackward0>)
episode: 232 training return: tensor(4.2691e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 233 training return: tensor(0.0006, device='cuda:0', grad_fn=<AddBackward0>)
episode: 234 training return: tensor(9.9785e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 235 training return: tensor(2.6645e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 59 test_true_pfm: 274 test_simulate_pfm tensor(1.5101e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 236 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
episode: 237 training return: tensor(1.3242e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 238 training return: tensor(2.6879e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 239 training return: tensor(2.0354e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 60 test_true_pfm: 1106 test_simulate_pfm tensor(0.0009, device='cuda:0', grad_fn=<DivBackward0>)
episode: 240 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
episode: 241 training return: tensor(0.0003, device='cuda:0', grad_fn=<AddBackward0>)
episode: 242 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
episode: 243 training return: tensor(7.8224e-09, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 61 test_true_pfm: 454 test_simulate_pfm tensor(7.8516e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 244 training return: tensor(0.0001, device='cuda:0', grad_fn=<AddBackward0>)
episode: 245 training return: tensor(3.6202e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 246 training return: tensor(8.4501e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 247 training return: tensor(2.8330e-11, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 62 test_true_pfm: 572 test_simulate_pfm tensor(9.6144e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 248 training return: tensor(1.9463e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 249 training return: tensor(4.6887e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 250 training return: tensor(3.9906e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 251 training return: tensor(8.8826e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 63 test_true_pfm: -335 test_simulate_pfm tensor(2.7339e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 252 training return: tensor(2.5153e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 253 training return: tensor(9.6051e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 254 training return: tensor(2.1748e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 255 training return: tensor(2.8761e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 64 test_true_pfm: -81 test_simulate_pfm tensor(2.1583e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 256 training return: tensor(3.7266e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 257 training return: tensor(2.9706e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 258 training return: tensor(5.8580e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 259 training return: tensor(3.5307e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 65 test_true_pfm: 486 test_simulate_pfm tensor(3.8924e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 260 training return: tensor(1.5203e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 261 training return: tensor(4.5486e-15, device='cuda:0', grad_fn=<AddBackward0>)
episode: 262 training return: tensor(8.4030e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 263 training return: tensor(1.3109e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 66 test_true_pfm: -9 test_simulate_pfm tensor(6.2390e-10, device='cuda:0', grad_fn=<DivBackward0>)
episode: 264 training return: tensor(8.9625e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 265 training return: tensor(7.5732e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 266 training return: tensor(1.7325e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 267 training return: tensor(4.4918e-08, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 67 test_true_pfm: 273 test_simulate_pfm tensor(1.8201e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 268 training return: tensor(4.5511e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 269 training return: tensor(7.4236e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 270 training return: tensor(9.6358e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 271 training return: tensor(3.6397e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 68 test_true_pfm: -211 test_simulate_pfm tensor(4.4276e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 272 training return: tensor(2.7172e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 273 training return: tensor(2.8163e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 274 training return: tensor(1.6099e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 275 training return: tensor(4.3441e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 69 test_true_pfm: -530 test_simulate_pfm tensor(1.1611e-11, device='cuda:0', grad_fn=<DivBackward0>)
episode: 276 training return: tensor(2.3955e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 277 training return: tensor(4.2684e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 278 training return: tensor(1.6850e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 279 training return: tensor(3.0259e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 70 test_true_pfm: 162 test_simulate_pfm tensor(9.9149e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 280 training return: tensor(9.1392e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 281 training return: tensor(3.2369e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 282 training return: tensor(5.5185e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 283 training return: tensor(3.2360e-08, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 71 test_true_pfm: -213 test_simulate_pfm tensor(8.4784e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 284 training return: tensor(4.6649e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 285 training return: tensor(5.6824e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 286 training return: tensor(3.6236e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 287 training return: tensor(2.2838e-09, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 72 test_true_pfm: -316 test_simulate_pfm tensor(1.0291e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 288 training return: tensor(2.3217e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 289 training return: tensor(6.0073e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 290 training return: tensor(2.3127e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 291 training return: tensor(7.3691e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 73 test_true_pfm: -249 test_simulate_pfm tensor(1.0269e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 292 training return: tensor(9.6162e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 293 training return: tensor(7.1703e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 294 training return: tensor(1.1962e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 295 training return: tensor(1.4360e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 74 test_true_pfm: 156 test_simulate_pfm tensor(5.0984e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 296 training return: tensor(1.1919e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 297 training return: tensor(3.6870e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 298 training return: tensor(7.3490e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 299 training return: tensor(5.8036e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 75 test_true_pfm: -195 test_simulate_pfm tensor(7.0173e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 300 training return: tensor(1.8895e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 301 training return: tensor(1.4618e-19, device='cuda:0', grad_fn=<AddBackward0>)
episode: 302 training return: tensor(3.5650e-17, device='cuda:0', grad_fn=<AddBackward0>)
episode: 303 training return: tensor(1.2908e-16, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 76 test_true_pfm: -498 test_simulate_pfm tensor(3.6526e-10, device='cuda:0', grad_fn=<DivBackward0>)
episode: 304 training return: tensor(8.0252e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 305 training return: tensor(4.1574e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 306 training return: tensor(6.8988e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 307 training return: tensor(5.2839e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 77 test_true_pfm: -389 test_simulate_pfm tensor(2.8444e-10, device='cuda:0', grad_fn=<DivBackward0>)
episode: 308 training return: tensor(7.4172e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 309 training return: tensor(3.8572e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 310 training return: tensor(6.8820e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 311 training return: tensor(3.9173e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 78 test_true_pfm: -178 test_simulate_pfm tensor(8.5173e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 312 training return: tensor(2.2157e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 313 training return: tensor(3.7101e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 314 training return: tensor(1.4303e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 315 training return: tensor(7.2854e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 79 test_true_pfm: 77 test_simulate_pfm tensor(2.7570e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 316 training return: tensor(5.7905e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 317 training return: tensor(4.5987e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 318 training return: tensor(2.4775e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 319 training return: tensor(6.0324e-09, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 80 test_true_pfm: -250 test_simulate_pfm tensor(5.4157e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 320 training return: tensor(3.7118e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 321 training return: tensor(7.8171e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 322 training return: tensor(9.7505e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 323 training return: tensor(2.9962e-09, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 81 test_true_pfm: -510 test_simulate_pfm tensor(3.8728e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 324 training return: tensor(1.3375e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 325 training return: tensor(7.7154e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 326 training return: tensor(5.6510e-16, device='cuda:0', grad_fn=<AddBackward0>)
episode: 327 training return: tensor(9.3963e-15, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 82 test_true_pfm: -53 test_simulate_pfm tensor(7.5463e-10, device='cuda:0', grad_fn=<DivBackward0>)
episode: 328 training return: tensor(4.1534e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 329 training return: tensor(2.3793e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 330 training return: tensor(3.8794e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 331 training return: tensor(4.0626e-09, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 83 test_true_pfm: -385 test_simulate_pfm tensor(1.1205e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 332 training return: tensor(8.0784e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 333 training return: tensor(1.6203e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 334 training return: tensor(3.4814e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 335 training return: tensor(1.2505e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 84 test_true_pfm: -415 test_simulate_pfm tensor(1.0317e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 336 training return: tensor(1.6412e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 337 training return: tensor(3.5163e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 338 training return: tensor(3.3147e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 339 training return: tensor(2.1194e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 85 test_true_pfm: -270 test_simulate_pfm tensor(5.4340e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 340 training return: tensor(1.3787e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 341 training return: tensor(3.6376e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 342 training return: tensor(1.8186e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 343 training return: tensor(4.1690e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 86 test_true_pfm: 831 test_simulate_pfm tensor(4.1263e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 344 training return: tensor(6.0422e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 345 training return: tensor(8.6917e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 346 training return: tensor(1.3845e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 347 training return: tensor(2.9282e-11, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 87 test_true_pfm: 1319 test_simulate_pfm tensor(1.3335e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 348 training return: tensor(2.3255e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 349 training return: tensor(9.0883e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 350 training return: tensor(2.3177e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 351 training return: tensor(1.0440e-08, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 88 test_true_pfm: -69 test_simulate_pfm tensor(1.5011e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 352 training return: tensor(4.3255e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 353 training return: tensor(8.8322e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 354 training return: tensor(1.2172e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 355 training return: tensor(1.6727e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 89 test_true_pfm: 90 test_simulate_pfm tensor(1.4254e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 356 training return: tensor(8.6284e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 357 training return: tensor(3.5576e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 358 training return: tensor(1.2226e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 359 training return: tensor(1.5785e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 90 test_true_pfm: -237 test_simulate_pfm tensor(5.1950e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 360 training return: tensor(1.9280e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 361 training return: tensor(2.5805e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 362 training return: tensor(3.1042e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 363 training return: tensor(6.9113e-11, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 91 test_true_pfm: -383 test_simulate_pfm tensor(1.8995e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 364 training return: tensor(4.6510e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 365 training return: tensor(1.2421e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 366 training return: tensor(6.7032e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 367 training return: tensor(2.6712e-11, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 92 test_true_pfm: -303 test_simulate_pfm tensor(4.8697e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 368 training return: tensor(4.0500e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 369 training return: tensor(3.8933e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 370 training return: tensor(1.4750e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 371 training return: tensor(4.1854e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 93 test_true_pfm: -158 test_simulate_pfm tensor(4.8793e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 372 training return: tensor(3.5155e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 373 training return: tensor(2.5602e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 374 training return: tensor(9.9419e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 375 training return: tensor(1.1709e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 94 test_true_pfm: -437 test_simulate_pfm tensor(1.5302e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 376 training return: tensor(2.1380e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 377 training return: tensor(1.7674e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 378 training return: tensor(2.0716e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 379 training return: tensor(3.7776e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 95 test_true_pfm: 191 test_simulate_pfm tensor(1.6903e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 380 training return: tensor(3.6902e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 381 training return: tensor(2.3877e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 382 training return: tensor(5.3969e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 383 training return: tensor(2.6945e-08, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 96 test_true_pfm: -212 test_simulate_pfm tensor(0.0001, device='cuda:0', grad_fn=<DivBackward0>)
episode: 384 training return: tensor(4.6394e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 385 training return: tensor(2.4011e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 386 training return: tensor(1.3367e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 387 training return: tensor(7.3997e-09, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 97 test_true_pfm: 357 test_simulate_pfm tensor(9.4076e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 388 training return: tensor(4.2148e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 389 training return: tensor(3.7750e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 390 training return: tensor(2.1438e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 391 training return: tensor(4.6604e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 98 test_true_pfm: 479 test_simulate_pfm tensor(7.6577e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 392 training return: tensor(4.0742e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 393 training return: tensor(1.1266e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 394 training return: tensor(4.1585e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 395 training return: tensor(8.3061e-08, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 99 test_true_pfm: 158 test_simulate_pfm tensor(2.3150e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 396 training return: tensor(2.1132e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 397 training return: tensor(8.1316e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 398 training return: tensor(3.7280e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 399 training return: tensor(6.6578e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 100 test_true_pfm: 733 test_simulate_pfm tensor(1.4745e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 400 training return: tensor(1.4396e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 401 training return: tensor(3.7500e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 402 training return: tensor(1.2824e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 403 training return: tensor(4.4681e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 101 test_true_pfm: -136 test_simulate_pfm tensor(2.2261e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 404 training return: tensor(6.7019e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 405 training return: tensor(5.7407e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 406 training return: tensor(1.2637e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 407 training return: tensor(1.7837e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 102 test_true_pfm: -196 test_simulate_pfm tensor(3.0939e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 408 training return: tensor(3.2734e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 409 training return: tensor(3.3997e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 410 training return: tensor(2.4199e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 411 training return: tensor(1.1075e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 103 test_true_pfm: -339 test_simulate_pfm tensor(1.7671e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 412 training return: tensor(1.1482e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 413 training return: tensor(4.5983e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 414 training return: tensor(4.3777e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 415 training return: tensor(1.3617e-08, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 104 test_true_pfm: 13 test_simulate_pfm tensor(4.9849e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 416 training return: tensor(2.4460e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 417 training return: tensor(4.5665e-14, device='cuda:0', grad_fn=<AddBackward0>)
episode: 418 training return: tensor(8.5387e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 419 training return: tensor(8.7129e-09, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 105 test_true_pfm: -3 test_simulate_pfm tensor(2.7761e-09, device='cuda:0', grad_fn=<DivBackward0>)
episode: 420 training return: tensor(1.3297e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 421 training return: tensor(1.5523e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 422 training return: tensor(6.5634e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 423 training return: tensor(1.3795e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 106 test_true_pfm: -173 test_simulate_pfm tensor(1.2844e-10, device='cuda:0', grad_fn=<DivBackward0>)
episode: 424 training return: tensor(6.5461e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 425 training return: tensor(3.1998e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 426 training return: tensor(1.6772e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 427 training return: tensor(3.8572e-09, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 107 test_true_pfm: 487 test_simulate_pfm tensor(1.4693e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 428 training return: tensor(6.3171e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 429 training return: tensor(5.0884e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 430 training return: tensor(4.5522e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 431 training return: tensor(2.2832e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 108 test_true_pfm: 660 test_simulate_pfm tensor(6.5429e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 432 training return: tensor(2.2289e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 433 training return: tensor(2.3725e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 434 training return: tensor(4.2011e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 435 training return: tensor(1.2696e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 109 test_true_pfm: 596 test_simulate_pfm tensor(6.6108e-08, device='cuda:0', grad_fn=<DivBackward0>)
episode: 436 training return: tensor(1.1210e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 437 training return: tensor(2.7409e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 438 training return: tensor(9.2174e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 439 training return: tensor(8.9044e-11, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 110 test_true_pfm: 487 test_simulate_pfm tensor(2.1362e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 440 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
episode: 441 training return: tensor(3.3515e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 442 training return: tensor(2.2297e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 443 training return: tensor(4.3028e-10, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 111 test_true_pfm: 1344 test_simulate_pfm tensor(3.4419e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 444 training return: tensor(4.5425e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 445 training return: tensor(7.6970e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 446 training return: tensor(3.1419e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 447 training return: tensor(6.7599e-09, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 112 test_true_pfm: 652 test_simulate_pfm tensor(2.6725e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 448 training return: tensor(3.5985e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 449 training return: tensor(2.8678e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 450 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
episode: 451 training return: tensor(1.6457e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 113 test_true_pfm: 110 test_simulate_pfm tensor(3.3512e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 452 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
episode: 453 training return: tensor(5.5794e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 454 training return: tensor(1.1451e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 455 training return: tensor(1.8604e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 114 test_true_pfm: 231 test_simulate_pfm tensor(8.0281e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 456 training return: tensor(1.6010e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 457 training return: tensor(1.3370e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 458 training return: tensor(1.4167e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 459 training return: tensor(5.4100e-08, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 115 test_true_pfm: 1051 test_simulate_pfm tensor(1.3330e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 460 training return: tensor(2.6690e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 461 training return: tensor(4.0396e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 462 training return: tensor(2.4841e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 463 training return: tensor(1.5573e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 116 test_true_pfm: 1333 test_simulate_pfm tensor(1.7254e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 464 training return: tensor(2.5127e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 465 training return: tensor(1.6578e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 466 training return: tensor(8.2797e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 467 training return: tensor(1.2563e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 117 test_true_pfm: 40 test_simulate_pfm tensor(3.9014e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 468 training return: tensor(3.6430e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 469 training return: tensor(4.1928e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 470 training return: tensor(1.0425e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 471 training return: tensor(3.3477e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 118 test_true_pfm: 1500 test_simulate_pfm tensor(1.3905e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 472 training return: tensor(5.3758e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 473 training return: tensor(3.8790e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 474 training return: tensor(2.4907e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 475 training return: tensor(2.4487e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 119 test_true_pfm: 787 test_simulate_pfm tensor(0.0002, device='cuda:0', grad_fn=<DivBackward0>)
episode: 476 training return: tensor(2.3805e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 477 training return: tensor(1.7393e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 478 training return: tensor(2.8289e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 479 training return: tensor(1.9097e-12, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 120 test_true_pfm: 836 test_simulate_pfm tensor(1.7927e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 480 training return: tensor(2.1337e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 481 training return: tensor(2.8507e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 482 training return: tensor(1.2793e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 483 training return: tensor(7.9620e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 121 test_true_pfm: 993 test_simulate_pfm tensor(7.0248e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 484 training return: tensor(5.6693e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 485 training return: tensor(5.0334e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 486 training return: tensor(1.4343e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 487 training return: tensor(2.5561e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 122 test_true_pfm: 862 test_simulate_pfm tensor(1.2406e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 488 training return: tensor(3.9963e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 489 training return: tensor(4.7906e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 490 training return: tensor(3.4567e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 491 training return: tensor(3.3895e-09, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 123 test_true_pfm: 254 test_simulate_pfm tensor(2.6767e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 492 training return: tensor(1.5135e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 493 training return: tensor(1.3984e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 494 training return: tensor(0.0001, device='cuda:0', grad_fn=<AddBackward0>)
episode: 495 training return: tensor(2.9640e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 124 test_true_pfm: 836 test_simulate_pfm tensor(2.1363e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 496 training return: tensor(8.8363e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 497 training return: tensor(2.1294e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 498 training return: tensor(2.9331e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 499 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 125 test_true_pfm: 1044 test_simulate_pfm tensor(0.0003, device='cuda:0', grad_fn=<DivBackward0>)
episode: 500 training return: tensor(1.7738e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 501 training return: tensor(1.6037e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 502 training return: tensor(1.0082e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 503 training return: tensor(1.2832e-18, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 126 test_true_pfm: 178 test_simulate_pfm tensor(1.0997e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 504 training return: tensor(0.0004, device='cuda:0', grad_fn=<AddBackward0>)
episode: 505 training return: tensor(5.9811e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 506 training return: tensor(4.8103e-10, device='cuda:0', grad_fn=<AddBackward0>)
episode: 507 training return: tensor(5.0617e-08, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 127 test_true_pfm: 590 test_simulate_pfm tensor(2.1365e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 508 training return: tensor(6.8913e-11, device='cuda:0', grad_fn=<AddBackward0>)
episode: 509 training return: tensor(6.1843e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 510 training return: tensor(1.7111e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 511 training return: tensor(2.1406e-13, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 128 test_true_pfm: -166 test_simulate_pfm tensor(1.0214e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 512 training return: tensor(1.3078e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 513 training return: tensor(7.0362e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 514 training return: tensor(2.0921e-09, device='cuda:0', grad_fn=<AddBackward0>)
episode: 515 training return: tensor(7.7136e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 129 test_true_pfm: -302 test_simulate_pfm tensor(3.6314e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 516 training return: tensor(6.0418e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 517 training return: tensor(6.0792e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 518 training return: tensor(2.9759e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 519 training return: tensor(8.3454e-14, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 130 test_true_pfm: 148 test_simulate_pfm tensor(3.2981e-07, device='cuda:0', grad_fn=<DivBackward0>)
episode: 520 training return: tensor(1.2663e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 521 training return: tensor(4.6966e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 522 training return: tensor(1.2814e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 523 training return: tensor(3.6660e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 131 test_true_pfm: 664 test_simulate_pfm tensor(7.4611e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 524 training return: tensor(5.0566e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 525 training return: tensor(5.5590e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 526 training return: tensor(4.4569e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 527 training return: tensor(6.6432e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 132 test_true_pfm: 1008 test_simulate_pfm tensor(3.6661e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 528 training return: tensor(3.1969e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 529 training return: tensor(6.3235e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 530 training return: tensor(1.6200e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 531 training return: tensor(5.1987e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 133 test_true_pfm: 952 test_simulate_pfm tensor(9.0374e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 532 training return: tensor(8.4294e-13, device='cuda:0', grad_fn=<AddBackward0>)
episode: 533 training return: tensor(7.1250e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 534 training return: tensor(3.8791e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 535 training return: tensor(0.0001, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 134 test_true_pfm: 1661 test_simulate_pfm tensor(0.0002, device='cuda:0', grad_fn=<DivBackward0>)
episode: 536 training return: tensor(0.0001, device='cuda:0', grad_fn=<AddBackward0>)
episode: 537 training return: tensor(4.1489e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 538 training return: tensor(3.4368e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 539 training return: tensor(1.8853e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 135 test_true_pfm: 1177 test_simulate_pfm tensor(2.1094e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 540 training return: tensor(1.1306e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 541 training return: tensor(3.6646e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 542 training return: tensor(1.4276e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 543 training return: tensor(5.3731e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 136 test_true_pfm: 1489 test_simulate_pfm tensor(7.8538e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 544 training return: tensor(4.3293e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 545 training return: tensor(3.6055e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 546 training return: tensor(6.5770e-07, device='cuda:0', grad_fn=<AddBackward0>)
episode: 547 training return: tensor(1.2559e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 137 test_true_pfm: 1143 test_simulate_pfm tensor(1.1953e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 548 training return: tensor(4.9553e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 549 training return: tensor(5.3864e-08, device='cuda:0', grad_fn=<AddBackward0>)
episode: 550 training return: tensor(1.8277e-12, device='cuda:0', grad_fn=<AddBackward0>)
episode: 551 training return: tensor(8.3519e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 138 test_true_pfm: 979 test_simulate_pfm tensor(1.7120e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 552 training return: tensor(6.2177e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 553 training return: tensor(5.2753e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 554 training return: tensor(2.4506e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 555 training return: tensor(7.2717e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 139 test_true_pfm: 1237 test_simulate_pfm tensor(5.1295e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 556 training return: tensor(7.4707e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 557 training return: tensor(3.7661e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 558 training return: tensor(2.9168e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 559 training return: tensor(3.2522e-06, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 140 test_true_pfm: 1995 test_simulate_pfm tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)
episode: 560 training return: tensor(0.0010, device='cuda:0', grad_fn=<AddBackward0>)
episode: 561 training return: tensor(1.0944e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 562 training return: tensor(6.1431e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 563 training return: tensor(0.0004, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 141 test_true_pfm: 2139 test_simulate_pfm tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)
episode: 564 training return: tensor(0.0026, device='cuda:0', grad_fn=<AddBackward0>)
episode: 565 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
episode: 566 training return: tensor(1.7971e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 567 training return: tensor(7.3097e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 142 test_true_pfm: 2608 test_simulate_pfm tensor(2.5152e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 568 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
episode: 569 training return: tensor(0.0046, device='cuda:0', grad_fn=<AddBackward0>)
episode: 570 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
episode: 571 training return: tensor(0.0015, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 143 test_true_pfm: 3376 test_simulate_pfm tensor(0.0111, device='cuda:0', grad_fn=<DivBackward0>)
episode: 572 training return: tensor(8.0273e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 573 training return: tensor(0.0056, device='cuda:0', grad_fn=<AddBackward0>)
episode: 574 training return: tensor(0.0007, device='cuda:0', grad_fn=<AddBackward0>)
episode: 575 training return: tensor(0.0465, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 144 test_true_pfm: 195 test_simulate_pfm tensor(9.5542e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 576 training return: tensor(7.8871e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 577 training return: tensor(0.0008, device='cuda:0', grad_fn=<AddBackward0>)
episode: 578 training return: tensor(4.7122e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 579 training return: tensor(0.0007, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 145 test_true_pfm: 2478 test_simulate_pfm tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)
episode: 580 training return: tensor(0.0008, device='cuda:0', grad_fn=<AddBackward0>)
episode: 581 training return: tensor(6.2682e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 582 training return: tensor(0.0002, device='cuda:0', grad_fn=<AddBackward0>)
episode: 583 training return: tensor(0.0044, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 146 test_true_pfm: 3142 test_simulate_pfm tensor(0.0062, device='cuda:0', grad_fn=<DivBackward0>)
episode: 584 training return: tensor(0.0035, device='cuda:0', grad_fn=<AddBackward0>)
episode: 585 training return: tensor(0.0011, device='cuda:0', grad_fn=<AddBackward0>)
episode: 586 training return: tensor(3.3949e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 587 training return: tensor(5.9482e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 147 test_true_pfm: 1701 test_simulate_pfm tensor(7.6105e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 588 training return: tensor(5.0875e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 589 training return: tensor(1.4082e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 590 training return: tensor(2.6010e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 591 training return: tensor(4.2748e-05, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 148 test_true_pfm: 1533 test_simulate_pfm tensor(2.2703e-05, device='cuda:0', grad_fn=<DivBackward0>)
episode: 592 training return: tensor(0.0003, device='cuda:0', grad_fn=<AddBackward0>)
episode: 593 training return: tensor(7.4871e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 594 training return: tensor(5.0713e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 595 training return: tensor(3.1524e-07, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 149 test_true_pfm: 1114 test_simulate_pfm tensor(8.0976e-06, device='cuda:0', grad_fn=<DivBackward0>)
episode: 596 training return: tensor(6.0239e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 597 training return: tensor(3.2329e-05, device='cuda:0', grad_fn=<AddBackward0>)
episode: 598 training return: tensor(1.7164e-06, device='cuda:0', grad_fn=<AddBackward0>)
episode: 599 training return: tensor(3.7309e-09, device='cuda:0', grad_fn=<AddBackward0>)
epoch: 150 test_true_pfm: 1051 test_simulate_pfm tensor(0.0002, device='cuda:0', grad_fn=<DivBackward0>)
