5149.385581313548
episode: 0 training return: tensor(-488.5414, device='cuda:0')
episode: 1 training return: tensor(-494.9063, device='cuda:0')
episode: 2 training return: tensor(-555.3195, device='cuda:0')
epoch: 1 test_true_pfm: 3558.669723368776
episode: 3 training return: tensor(-488.5407, device='cuda:0')
episode: 4 training return: tensor(-488.5027, device='cuda:0')
episode: 5 training return: tensor(-466.3349, device='cuda:0')
epoch: 2 test_true_pfm: 2264.9643421668434
episode: 6 training return: tensor(-301.8961, device='cuda:0')
episode: 7 training return: tensor(-227.6605, device='cuda:0')
episode: 8 training return: tensor(-331.0784, device='cuda:0')
epoch: 3 test_true_pfm: 1699.1841351527112
episode: 9 training return: tensor(-486.0814, device='cuda:0')
episode: 10 training return: tensor(-462.9983, device='cuda:0')
episode: 11 training return: tensor(-285.1723, device='cuda:0')
epoch: 4 test_true_pfm: 2016.1372794374813
episode: 12 training return: tensor(-485.8772, device='cuda:0')
episode: 13 training return: tensor(-390.3649, device='cuda:0')
episode: 14 training return: tensor(-485.9764, device='cuda:0')
epoch: 5 test_true_pfm: 3007.997665785437
episode: 15 training return: tensor(-384.2233, device='cuda:0')
episode: 16 training return: tensor(-487.6921, device='cuda:0')
episode: 17 training return: tensor(-555.4907, device='cuda:0')
epoch: 6 test_true_pfm: 2866.188125040495
episode: 18 training return: tensor(-306.3421, device='cuda:0')
episode: 19 training return: tensor(-505.7606, device='cuda:0')
episode: 20 training return: tensor(-459.3578, device='cuda:0')
epoch: 7 test_true_pfm: 3565.4044710407325
episode: 21 training return: tensor(-390.7336, device='cuda:0')
episode: 22 training return: tensor(-488.4857, device='cuda:0')
episode: 23 training return: tensor(-489.3240, device='cuda:0')
epoch: 8 test_true_pfm: 1943.2218249130158
episode: 24 training return: tensor(-488.5670, device='cuda:0')
episode: 25 training return: tensor(-229.3424, device='cuda:0')
episode: 26 training return: tensor(-457.9607, device='cuda:0')
epoch: 9 test_true_pfm: 4189.6231743428625
episode: 27 training return: tensor(-408.8380, device='cuda:0')
episode: 28 training return: tensor(-487.5769, device='cuda:0')
episode: 29 training return: tensor(-313.4596, device='cuda:0')
epoch: 10 test_true_pfm: 1261.9209423016978
episode: 30 training return: tensor(-489.1057, device='cuda:0')
episode: 31 training return: tensor(-422.2057, device='cuda:0')
episode: 32 training return: tensor(-344.3555, device='cuda:0')
epoch: 11 test_true_pfm: 1299.2598615030981
episode: 33 training return: tensor(-484.7008, device='cuda:0')
episode: 34 training return: tensor(-446.2398, device='cuda:0')
episode: 35 training return: tensor(-348.9466, device='cuda:0')
epoch: 12 test_true_pfm: 189.4751289758851
episode: 36 training return: tensor(-486.6849, device='cuda:0')
episode: 37 training return: tensor(-482.2924, device='cuda:0')
episode: 38 training return: tensor(-487.5415, device='cuda:0')
epoch: 13 test_true_pfm: 4608.7090441249375
episode: 39 training return: tensor(-574.7181, device='cuda:0')
episode: 40 training return: tensor(-431.5561, device='cuda:0')
episode: 41 training return: tensor(-488.0937, device='cuda:0')
epoch: 14 test_true_pfm: 5917.746798837829
episode: 42 training return: tensor(-182.8712, device='cuda:0')
episode: 43 training return: tensor(-481.5467, device='cuda:0')
episode: 44 training return: tensor(-500.0236, device='cuda:0')
epoch: 15 test_true_pfm: 1257.496891755961
episode: 45 training return: tensor(-177.6364, device='cuda:0')
episode: 46 training return: tensor(-337.7433, device='cuda:0')
episode: 47 training return: tensor(-282.6141, device='cuda:0')
epoch: 16 test_true_pfm: 2712.4097719391066
episode: 48 training return: tensor(-482.2020, device='cuda:0')
episode: 49 training return: tensor(-489.5171, device='cuda:0')
episode: 50 training return: tensor(-480.9290, device='cuda:0')
epoch: 17 test_true_pfm: 6161.967010038471
episode: 51 training return: tensor(-398.3742, device='cuda:0')
episode: 52 training return: tensor(-488.6516, device='cuda:0')
episode: 53 training return: tensor(-359.1220, device='cuda:0')
epoch: 18 test_true_pfm: 3480.4111880943115
episode: 54 training return: tensor(-397.3914, device='cuda:0')
episode: 55 training return: tensor(-385.1931, device='cuda:0')
episode: 56 training return: tensor(-488.9272, device='cuda:0')
epoch: 19 test_true_pfm: 6674.369460877198
episode: 57 training return: tensor(-515.5186, device='cuda:0')
episode: 58 training return: tensor(-141.0794, device='cuda:0')
episode: 59 training return: tensor(-317.0924, device='cuda:0')
epoch: 20 test_true_pfm: 4740.6008817813645
episode: 60 training return: tensor(-521.6378, device='cuda:0')
episode: 61 training return: tensor(-461.6757, device='cuda:0')
episode: 62 training return: tensor(-487.4340, device='cuda:0')
epoch: 21 test_true_pfm: 8595.641164122222
episode: 63 training return: tensor(-480.3116, device='cuda:0')
episode: 64 training return: tensor(-486.4156, device='cuda:0')
episode: 65 training return: tensor(-449.2222, device='cuda:0')
epoch: 22 test_true_pfm: 9366.5039253009
episode: 66 training return: tensor(-487.9405, device='cuda:0')
episode: 67 training return: tensor(-487.0208, device='cuda:0')
episode: 68 training return: tensor(-461.3814, device='cuda:0')
epoch: 23 test_true_pfm: 5214.45295906391
episode: 69 training return: tensor(-534.0448, device='cuda:0')
episode: 70 training return: tensor(-399.1070, device='cuda:0')
episode: 71 training return: tensor(-563.7739, device='cuda:0')
epoch: 24 test_true_pfm: 4037.4493062082106
episode: 72 training return: tensor(-494.9365, device='cuda:0')
episode: 73 training return: tensor(-489.0895, device='cuda:0')
episode: 74 training return: tensor(-488.8037, device='cuda:0')
epoch: 25 test_true_pfm: 3320.8966351712356
episode: 75 training return: tensor(-487.9817, device='cuda:0')
episode: 76 training return: tensor(-443.4435, device='cuda:0')
episode: 77 training return: tensor(-489.5573, device='cuda:0')
epoch: 26 test_true_pfm: 4647.199328138316
episode: 78 training return: tensor(-489.6307, device='cuda:0')
episode: 79 training return: tensor(-489.5462, device='cuda:0')
episode: 80 training return: tensor(-438.8259, device='cuda:0')
epoch: 27 test_true_pfm: 7486.456836408924
episode: 81 training return: tensor(-389.8954, device='cuda:0')
episode: 82 training return: tensor(-444.1728, device='cuda:0')
episode: 83 training return: tensor(-485.4683, device='cuda:0')
epoch: 28 test_true_pfm: 3670.7970095651904
episode: 84 training return: tensor(-451.0356, device='cuda:0')
episode: 85 training return: tensor(-553.5317, device='cuda:0')
episode: 86 training return: tensor(-489.4132, device='cuda:0')
epoch: 29 test_true_pfm: 5284.005206436816
episode: 87 training return: tensor(-427.8245, device='cuda:0')
episode: 88 training return: tensor(-485.8098, device='cuda:0')
episode: 89 training return: tensor(-285.9817, device='cuda:0')
epoch: 30 test_true_pfm: 5982.192733116727
episode: 90 training return: tensor(-488.4721, device='cuda:0')
episode: 91 training return: tensor(-313.3650, device='cuda:0')
episode: 92 training return: tensor(-391.1721, device='cuda:0')
epoch: 31 test_true_pfm: 7917.046919506149
episode: 93 training return: tensor(-488.7884, device='cuda:0')
episode: 94 training return: tensor(-480.4562, device='cuda:0')
episode: 95 training return: tensor(-486.3734, device='cuda:0')
epoch: 32 test_true_pfm: 1619.4353938117456
episode: 96 training return: tensor(-487.7780, device='cuda:0')
episode: 97 training return: tensor(-460.7376, device='cuda:0')
episode: 98 training return: tensor(-487.9996, device='cuda:0')
epoch: 33 test_true_pfm: 1504.2562691378632
episode: 99 training return: tensor(-490.0569, device='cuda:0')
episode: 100 training return: tensor(-358.0390, device='cuda:0')
episode: 101 training return: tensor(-488.7175, device='cuda:0')
epoch: 34 test_true_pfm: 2997.3193710348064
episode: 102 training return: tensor(-229.4273, device='cuda:0')
episode: 103 training return: tensor(-325.1823, device='cuda:0')
episode: 104 training return: tensor(-257.3033, device='cuda:0')
epoch: 35 test_true_pfm: 9395.29813424253
episode: 105 training return: tensor(-438.4444, device='cuda:0')
episode: 106 training return: tensor(-352.5882, device='cuda:0')
episode: 107 training return: tensor(-488.5769, device='cuda:0')
epoch: 36 test_true_pfm: 3812.541010478713
episode: 108 training return: tensor(-385.2755, device='cuda:0')
episode: 109 training return: tensor(-417.8046, device='cuda:0')
episode: 110 training return: tensor(-485.9715, device='cuda:0')
epoch: 37 test_true_pfm: 3420.161787623014
episode: 111 training return: tensor(-444.5448, device='cuda:0')
episode: 112 training return: tensor(-372.5978, device='cuda:0')
episode: 113 training return: tensor(-481.1756, device='cuda:0')
epoch: 38 test_true_pfm: 6978.151231356052
episode: 114 training return: tensor(-488.7221, device='cuda:0')
episode: 115 training return: tensor(-485.0457, device='cuda:0')
episode: 116 training return: tensor(-461.6477, device='cuda:0')
epoch: 39 test_true_pfm: 3108.6652419187462
episode: 117 training return: tensor(-457.2890, device='cuda:0')
episode: 118 training return: tensor(-420.8243, device='cuda:0')
episode: 119 training return: tensor(-482.0091, device='cuda:0')
epoch: 40 test_true_pfm: 3536.084858894547
episode: 120 training return: tensor(-295.5123, device='cuda:0')
episode: 121 training return: tensor(-488.4709, device='cuda:0')
episode: 122 training return: tensor(-488.8864, device='cuda:0')
epoch: 41 test_true_pfm: 3699.52696514337
episode: 123 training return: tensor(-430.3511, device='cuda:0')
episode: 124 training return: tensor(-488.5634, device='cuda:0')
episode: 125 training return: tensor(-479.8321, device='cuda:0')
epoch: 42 test_true_pfm: 7497.111435801152
episode: 126 training return: tensor(-489.1895, device='cuda:0')
episode: 127 training return: tensor(-254.2121, device='cuda:0')
episode: 128 training return: tensor(-488.9637, device='cuda:0')
epoch: 43 test_true_pfm: 5001.43838729891
episode: 129 training return: tensor(-184.6810, device='cuda:0')
episode: 130 training return: tensor(-488.8426, device='cuda:0')
episode: 131 training return: tensor(-466.8808, device='cuda:0')
epoch: 44 test_true_pfm: 824.4620103703043
episode: 132 training return: tensor(-238.4705, device='cuda:0')
episode: 133 training return: tensor(-332.5903, device='cuda:0')
episode: 134 training return: tensor(-488.1835, device='cuda:0')
epoch: 45 test_true_pfm: 4219.864404476518
episode: 135 training return: tensor(-231.1266, device='cuda:0')
episode: 136 training return: tensor(-435.7655, device='cuda:0')
episode: 137 training return: tensor(-288.7086, device='cuda:0')
epoch: 46 test_true_pfm: 3500.597099697263
episode: 138 training return: tensor(-485.4774, device='cuda:0')
episode: 139 training return: tensor(-329.3623, device='cuda:0')
episode: 140 training return: tensor(-511.5930, device='cuda:0')
epoch: 47 test_true_pfm: 5022.716764620178
episode: 141 training return: tensor(-517.2170, device='cuda:0')
episode: 142 training return: tensor(-303.6813, device='cuda:0')
episode: 143 training return: tensor(-221.9721, device='cuda:0')
epoch: 48 test_true_pfm: 3099.7753914179234
episode: 144 training return: tensor(-528.3608, device='cuda:0')
episode: 145 training return: tensor(-318.9423, device='cuda:0')
episode: 146 training return: tensor(-423.4582, device='cuda:0')
epoch: 49 test_true_pfm: -143.81742913598387
episode: 147 training return: tensor(-436.7449, device='cuda:0')
episode: 148 training return: tensor(-490.1027, device='cuda:0')
episode: 149 training return: tensor(-543.3937, device='cuda:0')
epoch: 50 test_true_pfm: 2412.4230074768125
episode: 150 training return: tensor(-457.8601, device='cuda:0')
episode: 151 training return: tensor(-554.7516, device='cuda:0')
episode: 152 training return: tensor(-488.3116, device='cuda:0')
epoch: 51 test_true_pfm: -221.18404449639942
episode: 153 training return: tensor(-398.2368, device='cuda:0')
episode: 154 training return: tensor(-488.5909, device='cuda:0')
episode: 155 training return: tensor(-477.4816, device='cuda:0')
epoch: 52 test_true_pfm: 6627.361520507446
episode: 156 training return: tensor(-316.0201, device='cuda:0')
episode: 157 training return: tensor(-570.3354, device='cuda:0')
episode: 158 training return: tensor(-487.5084, device='cuda:0')
epoch: 53 test_true_pfm: 6091.343880012207
episode: 159 training return: tensor(-488.3196, device='cuda:0')
episode: 160 training return: tensor(-484.4362, device='cuda:0')
episode: 161 training return: tensor(-488.9080, device='cuda:0')
epoch: 54 test_true_pfm: 998.4935935244438
episode: 162 training return: tensor(-511.6547, device='cuda:0')
episode: 163 training return: tensor(-489.1434, device='cuda:0')
episode: 164 training return: tensor(-459.6519, device='cuda:0')
epoch: 55 test_true_pfm: 6993.750883614055
episode: 165 training return: tensor(-443.4671, device='cuda:0')
episode: 166 training return: tensor(-487.7003, device='cuda:0')
episode: 167 training return: tensor(-202.1258, device='cuda:0')
epoch: 56 test_true_pfm: 1858.2955123802833
episode: 168 training return: tensor(-513.7943, device='cuda:0')
episode: 169 training return: tensor(-397.9597, device='cuda:0')
episode: 170 training return: tensor(-427.8235, device='cuda:0')
epoch: 57 test_true_pfm: 3088.1329126511705
episode: 171 training return: tensor(-484.7746, device='cuda:0')
episode: 172 training return: tensor(-468.1075, device='cuda:0')
episode: 173 training return: tensor(-487.4950, device='cuda:0')
epoch: 58 test_true_pfm: 2452.7420722663323
episode: 174 training return: tensor(-486.1785, device='cuda:0')
episode: 175 training return: tensor(-489.5946, device='cuda:0')
episode: 176 training return: tensor(-220.8172, device='cuda:0')
epoch: 59 test_true_pfm: 2904.349808631177
episode: 177 training return: tensor(-276.7501, device='cuda:0')
episode: 178 training return: tensor(-488.2110, device='cuda:0')
episode: 179 training return: tensor(-488.4336, device='cuda:0')
epoch: 60 test_true_pfm: 7239.068635005978
episode: 180 training return: tensor(-551.0964, device='cuda:0')
episode: 181 training return: tensor(-489.1521, device='cuda:0')
episode: 182 training return: tensor(-510.7461, device='cuda:0')
epoch: 61 test_true_pfm: 2926.8467959175628
episode: 183 training return: tensor(-312.8133, device='cuda:0')
episode: 184 training return: tensor(-549.8033, device='cuda:0')
episode: 185 training return: tensor(-364.7040, device='cuda:0')
epoch: 62 test_true_pfm: 2724.4866755560583
episode: 186 training return: tensor(-487.0567, device='cuda:0')
episode: 187 training return: tensor(-375.3258, device='cuda:0')
episode: 188 training return: tensor(-484.2885, device='cuda:0')
epoch: 63 test_true_pfm: 4826.996151407217
episode: 189 training return: tensor(-449.0027, device='cuda:0')
episode: 190 training return: tensor(-371.4319, device='cuda:0')
episode: 191 training return: tensor(-566.7684, device='cuda:0')
epoch: 64 test_true_pfm: 3526.683380230263
episode: 192 training return: tensor(-484.7161, device='cuda:0')
episode: 193 training return: tensor(-474.2023, device='cuda:0')
episode: 194 training return: tensor(-488.7923, device='cuda:0')
epoch: 65 test_true_pfm: 7448.421572032686
episode: 195 training return: tensor(-464.6218, device='cuda:0')
episode: 196 training return: tensor(-301.2426, device='cuda:0')
episode: 197 training return: tensor(-488.9431, device='cuda:0')
epoch: 66 test_true_pfm: 3951.455792663446
episode: 198 training return: tensor(-484.1453, device='cuda:0')
episode: 199 training return: tensor(-490.1270, device='cuda:0')
episode: 200 training return: tensor(-381.5755, device='cuda:0')
epoch: 67 test_true_pfm: 2010.1133710721017
episode: 201 training return: tensor(-465.5811, device='cuda:0')
episode: 202 training return: tensor(-363.3737, device='cuda:0')
episode: 203 training return: tensor(-324.8146, device='cuda:0')
epoch: 68 test_true_pfm: 6465.540340348688
episode: 204 training return: tensor(-488.4126, device='cuda:0')
episode: 205 training return: tensor(-486.3791, device='cuda:0')
episode: 206 training return: tensor(-488.0834, device='cuda:0')
epoch: 69 test_true_pfm: 3818.4127411318664
episode: 207 training return: tensor(-488.0530, device='cuda:0')
episode: 208 training return: tensor(-488.6476, device='cuda:0')
episode: 209 training return: tensor(-258.3489, device='cuda:0')
epoch: 70 test_true_pfm: -182.29641513677848
episode: 210 training return: tensor(-455.5714, device='cuda:0')
episode: 211 training return: tensor(-488.7963, device='cuda:0')
episode: 212 training return: tensor(-488.9738, device='cuda:0')
epoch: 71 test_true_pfm: 3135.880183126419
episode: 213 training return: tensor(-440.0999, device='cuda:0')
episode: 214 training return: tensor(-487.6861, device='cuda:0')
episode: 215 training return: tensor(-488.1486, device='cuda:0')
epoch: 72 test_true_pfm: 3150.168285367843
episode: 216 training return: tensor(-458.5112, device='cuda:0')
episode: 217 training return: tensor(-487.9933, device='cuda:0')
episode: 218 training return: tensor(-447.9440, device='cuda:0')
epoch: 73 test_true_pfm: 1904.7057863099046
episode: 219 training return: tensor(-536.4939, device='cuda:0')
episode: 220 training return: tensor(-509.8827, device='cuda:0')
episode: 221 training return: tensor(-332.5422, device='cuda:0')
epoch: 74 test_true_pfm: 1039.0781985729366
episode: 222 training return: tensor(-488.8478, device='cuda:0')
episode: 223 training return: tensor(-434.4521, device='cuda:0')
episode: 224 training return: tensor(-565.6703, device='cuda:0')
epoch: 75 test_true_pfm: 301.3848904011037
episode: 225 training return: tensor(-488.6877, device='cuda:0')
episode: 226 training return: tensor(-360.2588, device='cuda:0')
episode: 227 training return: tensor(-355.0793, device='cuda:0')
epoch: 76 test_true_pfm: 2965.0290341360815
episode: 228 training return: tensor(-488.7805, device='cuda:0')
episode: 229 training return: tensor(-480.5291, device='cuda:0')
episode: 230 training return: tensor(-200.2532, device='cuda:0')
epoch: 77 test_true_pfm: 502.42829970303296
episode: 231 training return: tensor(-488.1922, device='cuda:0')
episode: 232 training return: tensor(-488.3534, device='cuda:0')
episode: 233 training return: tensor(-488.6854, device='cuda:0')
epoch: 78 test_true_pfm: 3218.998012398691
episode: 234 training return: tensor(-487.8436, device='cuda:0')
episode: 235 training return: tensor(-488.8751, device='cuda:0')
episode: 236 training return: tensor(-458.4365, device='cuda:0')
epoch: 79 test_true_pfm: 2187.7181754404887
episode: 237 training return: tensor(-519.8389, device='cuda:0')
episode: 238 training return: tensor(-510.7943, device='cuda:0')
episode: 239 training return: tensor(-492.7781, device='cuda:0')
epoch: 80 test_true_pfm: 41.957456441191745
episode: 240 training return: tensor(-241.2537, device='cuda:0')
episode: 241 training return: tensor(-488.2281, device='cuda:0')
episode: 242 training return: tensor(-485.4384, device='cuda:0')
epoch: 81 test_true_pfm: 3175.627422163119
episode: 243 training return: tensor(-487.3717, device='cuda:0')
episode: 244 training return: tensor(-543.3203, device='cuda:0')
episode: 245 training return: tensor(-486.0150, device='cuda:0')
epoch: 82 test_true_pfm: 8544.391727231567
episode: 246 training return: tensor(-514.2706, device='cuda:0')
episode: 247 training return: tensor(-489.1178, device='cuda:0')
episode: 248 training return: tensor(-488.1924, device='cuda:0')
epoch: 83 test_true_pfm: 4863.167775107689
episode: 249 training return: tensor(-215.7971, device='cuda:0')
episode: 250 training return: tensor(-479.5112, device='cuda:0')
episode: 251 training return: tensor(-366.2240, device='cuda:0')
epoch: 84 test_true_pfm: 6398.470797159432
episode: 252 training return: tensor(-474.1256, device='cuda:0')
episode: 253 training return: tensor(-545.0051, device='cuda:0')
episode: 254 training return: tensor(-488.7517, device='cuda:0')
epoch: 85 test_true_pfm: 5741.123745318407
episode: 255 training return: tensor(-270.8997, device='cuda:0')
episode: 256 training return: tensor(-457.1116, device='cuda:0')
episode: 257 training return: tensor(-270.0338, device='cuda:0')
epoch: 86 test_true_pfm: 3928.6040151160555
episode: 258 training return: tensor(-185.5619, device='cuda:0')
episode: 259 training return: tensor(-217.9812, device='cuda:0')
episode: 260 training return: tensor(-476.9204, device='cuda:0')
epoch: 87 test_true_pfm: 3232.5071100594414
episode: 261 training return: tensor(-432.3957, device='cuda:0')
episode: 262 training return: tensor(-490.0614, device='cuda:0')
episode: 263 training return: tensor(-290.9776, device='cuda:0')
epoch: 88 test_true_pfm: 6836.488935490131
episode: 264 training return: tensor(-488.5226, device='cuda:0')
episode: 265 training return: tensor(-488.0094, device='cuda:0')
episode: 266 training return: tensor(-388.7137, device='cuda:0')
epoch: 89 test_true_pfm: 3201.792496442266
episode: 267 training return: tensor(-223.2096, device='cuda:0')
episode: 268 training return: tensor(-529.0461, device='cuda:0')
episode: 269 training return: tensor(-487.5408, device='cuda:0')
epoch: 90 test_true_pfm: 4310.895987892487
episode: 270 training return: tensor(-489.0755, device='cuda:0')
episode: 271 training return: tensor(-489.2174, device='cuda:0')
episode: 272 training return: tensor(-486.2797, device='cuda:0')
epoch: 91 test_true_pfm: 3759.243484861255
episode: 273 training return: tensor(-487.9151, device='cuda:0')
episode: 274 training return: tensor(-244.1143, device='cuda:0')
episode: 275 training return: tensor(-474.5533, device='cuda:0')
epoch: 92 test_true_pfm: -274.1136632767944
episode: 276 training return: tensor(-376.6252, device='cuda:0')
episode: 277 training return: tensor(-377.0894, device='cuda:0')
episode: 278 training return: tensor(-484.8870, device='cuda:0')
epoch: 93 test_true_pfm: 5413.395204735169
episode: 279 training return: tensor(-130.8514, device='cuda:0')
episode: 280 training return: tensor(-488.7402, device='cuda:0')
episode: 281 training return: tensor(-265.7657, device='cuda:0')
epoch: 94 test_true_pfm: 3100.517914123766
episode: 282 training return: tensor(-544.3081, device='cuda:0')
episode: 283 training return: tensor(-194.6314, device='cuda:0')
episode: 284 training return: tensor(-485.8730, device='cuda:0')
epoch: 95 test_true_pfm: 5938.912433398345
episode: 285 training return: tensor(-427.0687, device='cuda:0')
episode: 286 training return: tensor(-387.5126, device='cuda:0')
episode: 287 training return: tensor(-343.2092, device='cuda:0')
epoch: 96 test_true_pfm: 4386.735911760904
episode: 288 training return: tensor(-528.4888, device='cuda:0')
episode: 289 training return: tensor(-484.0883, device='cuda:0')
episode: 290 training return: tensor(-485.5795, device='cuda:0')
epoch: 97 test_true_pfm: 3930.7816424679154
episode: 291 training return: tensor(-222.7235, device='cuda:0')
episode: 292 training return: tensor(-488.2377, device='cuda:0')
episode: 293 training return: tensor(-488.6693, device='cuda:0')
epoch: 98 test_true_pfm: 2053.1978147726336
episode: 294 training return: tensor(-488.3037, device='cuda:0')
episode: 295 training return: tensor(-487.1365, device='cuda:0')
episode: 296 training return: tensor(-489.0965, device='cuda:0')
epoch: 99 test_true_pfm: 3950.919585297616
episode: 297 training return: tensor(-484.4857, device='cuda:0')
episode: 298 training return: tensor(-486.1125, device='cuda:0')
episode: 299 training return: tensor(-488.1173, device='cuda:0')
epoch: 100 test_true_pfm: 2726.6368434554547
