['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'brac', '--traj', 'medium', '--seed', '3', '--data', '100000']
4967.860864865563
episode: 0 training return: tensor(-3946.6333, device='cuda:0')
episode: 1 training return: tensor(-3052.0361, device='cuda:0')
episode: 2 training return: tensor(-3374.4771, device='cuda:0')
episode: 3 training return: tensor(-3034.0850, device='cuda:0')
epoch: 1 test_true_pfm: -177.62781513834605
episode: 4 training return: tensor(-5260.8491, device='cuda:0')
episode: 5 training return: tensor(-4849.4888, device='cuda:0')
episode: 6 training return: tensor(-4785.5479, device='cuda:0')
episode: 7 training return: tensor(-5027.4507, device='cuda:0')
epoch: 2 test_true_pfm: -126.46622180018822
episode: 8 training return: tensor(-2578.2698, device='cuda:0')
episode: 9 training return: tensor(-3876.8174, device='cuda:0')
episode: 10 training return: tensor(-1970.9697, device='cuda:0')
episode: 11 training return: tensor(-1848.3441, device='cuda:0')
epoch: 3 test_true_pfm: -40.71054041023593
episode: 12 training return: tensor(-1725.1531, device='cuda:0')
episode: 13 training return: tensor(-1905.8058, device='cuda:0')
episode: 14 training return: tensor(-1734.1334, device='cuda:0')
episode: 15 training return: tensor(-1719.6124, device='cuda:0')
epoch: 4 test_true_pfm: -31.809991719762845
episode: 16 training return: tensor(-1735.3208, device='cuda:0')
episode: 17 training return: tensor(-1737.5569, device='cuda:0')
episode: 18 training return: tensor(-1721.5294, device='cuda:0')
episode: 19 training return: tensor(-1740.4829, device='cuda:0')
epoch: 5 test_true_pfm: -131.2382679239965
episode: 20 training return: tensor(-1694.3501, device='cuda:0')
episode: 21 training return: tensor(-1719.9036, device='cuda:0')
episode: 22 training return: tensor(-1626.7719, device='cuda:0')
episode: 23 training return: tensor(-1731.9712, device='cuda:0')
epoch: 6 test_true_pfm: -48.59019156640954
episode: 24 training return: tensor(-1538.4501, device='cuda:0')
episode: 25 training return: tensor(-1584.3719, device='cuda:0')
episode: 26 training return: tensor(-1745.7814, device='cuda:0')
episode: 27 training return: tensor(-1568.8033, device='cuda:0')
epoch: 7 test_true_pfm: -34.07533543317984
episode: 28 training return: tensor(-1542.4352, device='cuda:0')
episode: 29 training return: tensor(-1536.5931, device='cuda:0')
episode: 30 training return: tensor(-1565.0564, device='cuda:0')
episode: 31 training return: tensor(-1566.8140, device='cuda:0')
epoch: 8 test_true_pfm: -21.43165173554678
episode: 32 training return: tensor(-1571.8359, device='cuda:0')
episode: 33 training return: tensor(-1542.7279, device='cuda:0')
episode: 34 training return: tensor(-1550.7157, device='cuda:0')
episode: 35 training return: tensor(-1563.0624, device='cuda:0')
epoch: 9 test_true_pfm: -39.068859275688716
episode: 36 training return: tensor(-1753.2322, device='cuda:0')
episode: 37 training return: tensor(-1564.8378, device='cuda:0')
episode: 38 training return: tensor(-1547.3564, device='cuda:0')
episode: 39 training return: tensor(-1554.3146, device='cuda:0')
epoch: 10 test_true_pfm: -32.07529906039219
episode: 40 training return: tensor(-1560.7292, device='cuda:0')
episode: 41 training return: tensor(-1571.4061, device='cuda:0')
episode: 42 training return: tensor(-1573.1246, device='cuda:0')
episode: 43 training return: tensor(-1574.1984, device='cuda:0')
epoch: 11 test_true_pfm: -21.91321736696563
episode: 44 training return: tensor(-1584.5282, device='cuda:0')
episode: 45 training return: tensor(-1580.3179, device='cuda:0')
episode: 46 training return: tensor(-1564.5132, device='cuda:0')
episode: 47 training return: tensor(-1580.4241, device='cuda:0')
epoch: 12 test_true_pfm: -17.617550773502142
episode: 48 training return: tensor(-1563.8308, device='cuda:0')
episode: 49 training return: tensor(-1564.1246, device='cuda:0')
episode: 50 training return: tensor(-1552.8683, device='cuda:0')
episode: 51 training return: tensor(-1577.6534, device='cuda:0')
epoch: 13 test_true_pfm: -15.72137925966318
episode: 52 training return: tensor(-1579.2089, device='cuda:0')
episode: 53 training return: tensor(-1567.5991, device='cuda:0')
episode: 54 training return: tensor(-1587.7771, device='cuda:0')
episode: 55 training return: tensor(-1563.6526, device='cuda:0')
epoch: 14 test_true_pfm: -23.437633896404872
episode: 56 training return: tensor(-1561.1282, device='cuda:0')
episode: 57 training return: tensor(-1568.8770, device='cuda:0')
episode: 58 training return: tensor(-1579.0240, device='cuda:0')
episode: 59 training return: tensor(-1584.6482, device='cuda:0')
epoch: 15 test_true_pfm: -16.4343775410626
episode: 60 training return: tensor(-1582.9626, device='cuda:0')
episode: 61 training return: tensor(-1578.2469, device='cuda:0')
episode: 62 training return: tensor(-1593.6599, device='cuda:0')
episode: 63 training return: tensor(-1738.8391, device='cuda:0')
epoch: 16 test_true_pfm: -19.538943742609067
episode: 64 training return: tensor(-1592.1400, device='cuda:0')
episode: 65 training return: tensor(-1592.8978, device='cuda:0')
episode: 66 training return: tensor(-1563.9122, device='cuda:0')
episode: 67 training return: tensor(-1574.5073, device='cuda:0')
epoch: 17 test_true_pfm: -21.10795131841842
episode: 68 training return: tensor(-1616.8887, device='cuda:0')
episode: 69 training return: tensor(-1573.4304, device='cuda:0')
episode: 70 training return: tensor(-1568.1271, device='cuda:0')
episode: 71 training return: tensor(-1565.7753, device='cuda:0')
epoch: 18 test_true_pfm: -13.14938994222171
episode: 72 training return: tensor(-1575.1655, device='cuda:0')
episode: 73 training return: tensor(-1575.3888, device='cuda:0')
episode: 74 training return: tensor(-1571.9648, device='cuda:0')
episode: 75 training return: tensor(-1571.3512, device='cuda:0')
epoch: 19 test_true_pfm: -18.86138118734956
episode: 76 training return: tensor(-1569.8473, device='cuda:0')
episode: 77 training return: tensor(-1590.7422, device='cuda:0')
episode: 78 training return: tensor(-1733.3057, device='cuda:0')
episode: 79 training return: tensor(-1570.4655, device='cuda:0')
epoch: 20 test_true_pfm: -25.15724244131718
episode: 80 training return: tensor(-1674.5869, device='cuda:0')
episode: 81 training return: tensor(-1568.9999, device='cuda:0')
episode: 82 training return: tensor(-1548.4585, device='cuda:0')
episode: 83 training return: tensor(-1571.5043, device='cuda:0')
epoch: 21 test_true_pfm: -19.63662237308199
episode: 84 training return: tensor(-1587.8368, device='cuda:0')
episode: 85 training return: tensor(-1570.2516, device='cuda:0')
episode: 86 training return: tensor(-1569.1959, device='cuda:0')
episode: 87 training return: tensor(-1568.2748, device='cuda:0')
epoch: 22 test_true_pfm: -25.263769217008868
episode: 88 training return: tensor(-1578.5798, device='cuda:0')
episode: 89 training return: tensor(-1559.7161, device='cuda:0')
episode: 90 training return: tensor(-1579.1862, device='cuda:0')
episode: 91 training return: tensor(-1559.0245, device='cuda:0')
epoch: 23 test_true_pfm: -14.612269395576037
episode: 92 training return: tensor(-1571.8696, device='cuda:0')
episode: 93 training return: tensor(-1572.6940, device='cuda:0')
episode: 94 training return: tensor(-1583.9777, device='cuda:0')
episode: 95 training return: tensor(-1577.0419, device='cuda:0')
epoch: 24 test_true_pfm: -19.900534378048828
episode: 96 training return: tensor(-1573.4135, device='cuda:0')
episode: 97 training return: tensor(-1567.8328, device='cuda:0')
episode: 98 training return: tensor(-1575.8058, device='cuda:0')
episode: 99 training return: tensor(-1576.0227, device='cuda:0')
epoch: 25 test_true_pfm: -25.815527265379753
episode: 100 training return: tensor(-1559.3743, device='cuda:0')
episode: 101 training return: tensor(-1570.8823, device='cuda:0')
episode: 102 training return: tensor(-1582.6683, device='cuda:0')
episode: 103 training return: tensor(-1552.0308, device='cuda:0')
epoch: 26 test_true_pfm: -20.554153213702154
episode: 104 training return: tensor(-1559.6655, device='cuda:0')
episode: 105 training return: tensor(-1578.5515, device='cuda:0')
episode: 106 training return: tensor(-1574.3948, device='cuda:0')
episode: 107 training return: tensor(-1575.0709, device='cuda:0')
epoch: 27 test_true_pfm: -16.305851357186274
episode: 108 training return: tensor(-1561.4270, device='cuda:0')
episode: 109 training return: tensor(-1587.4569, device='cuda:0')
episode: 110 training return: tensor(-1585.8400, device='cuda:0')
episode: 111 training return: tensor(-1560.8794, device='cuda:0')
epoch: 28 test_true_pfm: -15.317934379742814
episode: 112 training return: tensor(-1580.6857, device='cuda:0')
episode: 113 training return: tensor(-1565.1871, device='cuda:0')
episode: 114 training return: tensor(-1567.0813, device='cuda:0')
episode: 115 training return: tensor(-1571.6112, device='cuda:0')
epoch: 29 test_true_pfm: -18.956663142456904
episode: 116 training return: tensor(-1566.9742, device='cuda:0')
episode: 117 training return: tensor(-1593.1853, device='cuda:0')
episode: 118 training return: tensor(-1567.6720, device='cuda:0')
episode: 119 training return: tensor(-1563.1779, device='cuda:0')
epoch: 30 test_true_pfm: -13.041720528771776
episode: 120 training return: tensor(-1570.6829, device='cuda:0')
episode: 121 training return: tensor(-1588.2764, device='cuda:0')
episode: 122 training return: tensor(-1562.4401, device='cuda:0')
episode: 123 training return: tensor(-1567.7911, device='cuda:0')
epoch: 31 test_true_pfm: -17.55762485508745
episode: 124 training return: tensor(-1576.1971, device='cuda:0')
episode: 125 training return: tensor(-1557.5952, device='cuda:0')
episode: 126 training return: tensor(-1568.5316, device='cuda:0')
episode: 127 training return: tensor(-1582.1639, device='cuda:0')
epoch: 32 test_true_pfm: -19.786318923837463
episode: 128 training return: tensor(-1578.4294, device='cuda:0')
episode: 129 training return: tensor(-1557.6338, device='cuda:0')
episode: 130 training return: tensor(-1576.9244, device='cuda:0')
episode: 131 training return: tensor(-1555.8003, device='cuda:0')
epoch: 33 test_true_pfm: -18.6862609057706
episode: 132 training return: tensor(-1572.6927, device='cuda:0')
episode: 133 training return: tensor(-1562.2617, device='cuda:0')
episode: 134 training return: tensor(-1575.1689, device='cuda:0')
episode: 135 training return: tensor(-1563.3553, device='cuda:0')
epoch: 34 test_true_pfm: -16.816423320808834
episode: 136 training return: tensor(-1559.7538, device='cuda:0')
episode: 137 training return: tensor(-1552.6538, device='cuda:0')
episode: 138 training return: tensor(-1569.9767, device='cuda:0')
episode: 139 training return: tensor(-1566.2079, device='cuda:0')
epoch: 35 test_true_pfm: -18.797784596698648
episode: 140 training return: tensor(-1561.8358, device='cuda:0')
episode: 141 training return: tensor(-1552.7445, device='cuda:0')
episode: 142 training return: tensor(-1535.1381, device='cuda:0')
episode: 143 training return: tensor(-1555.4758, device='cuda:0')
epoch: 36 test_true_pfm: -20.07091112114557
episode: 144 training return: tensor(-1567.2985, device='cuda:0')
episode: 145 training return: tensor(-1573.1492, device='cuda:0')
episode: 146 training return: tensor(-1558.9330, device='cuda:0')
episode: 147 training return: tensor(-1564.8192, device='cuda:0')
epoch: 37 test_true_pfm: -19.82270761319742
episode: 148 training return: tensor(-1562.0829, device='cuda:0')
episode: 149 training return: tensor(-1576.5096, device='cuda:0')
episode: 150 training return: tensor(-1573.8381, device='cuda:0')
episode: 151 training return: tensor(-1565.4457, device='cuda:0')
epoch: 38 test_true_pfm: -19.02276346401793
episode: 152 training return: tensor(-1564.6157, device='cuda:0')
episode: 153 training return: tensor(-1573.7377, device='cuda:0')
episode: 154 training return: tensor(-1557.1334, device='cuda:0')
episode: 155 training return: tensor(-1575.8325, device='cuda:0')
epoch: 39 test_true_pfm: -18.833774974836647
episode: 156 training return: tensor(-1555.1362, device='cuda:0')
episode: 157 training return: tensor(-1561.5421, device='cuda:0')
episode: 158 training return: tensor(-1557.5708, device='cuda:0')
episode: 159 training return: tensor(-1560.9678, device='cuda:0')
epoch: 40 test_true_pfm: -13.505260825184733
episode: 160 training return: tensor(-1556.7031, device='cuda:0')
episode: 161 training return: tensor(-1566.8567, device='cuda:0')
episode: 162 training return: tensor(-1541.4663, device='cuda:0')
episode: 163 training return: tensor(-1557.6061, device='cuda:0')
epoch: 41 test_true_pfm: -18.084027952269356
episode: 164 training return: tensor(-1564.0298, device='cuda:0')
episode: 165 training return: tensor(-1560.4543, device='cuda:0')
episode: 166 training return: tensor(-1563.6003, device='cuda:0')
episode: 167 training return: tensor(-1572.4569, device='cuda:0')
epoch: 42 test_true_pfm: -19.338098219959576
episode: 168 training return: tensor(-1569.3219, device='cuda:0')
episode: 169 training return: tensor(-1567.0525, device='cuda:0')
episode: 170 training return: tensor(-1567.0729, device='cuda:0')
episode: 171 training return: tensor(-1564.0562, device='cuda:0')
epoch: 43 test_true_pfm: -19.581313399625444
episode: 172 training return: tensor(-1554.2173, device='cuda:0')
episode: 173 training return: tensor(-1559.1326, device='cuda:0')
episode: 174 training return: tensor(-1565.3893, device='cuda:0')
episode: 175 training return: tensor(-1558.1329, device='cuda:0')
epoch: 44 test_true_pfm: -23.236198053581887
episode: 176 training return: tensor(-1543.6444, device='cuda:0')
episode: 177 training return: tensor(-1580.5305, device='cuda:0')
episode: 178 training return: tensor(-1573.1067, device='cuda:0')
episode: 179 training return: tensor(-1560.0625, device='cuda:0')
epoch: 45 test_true_pfm: -16.892024995854467
episode: 180 training return: tensor(-1570.5035, device='cuda:0')
episode: 181 training return: tensor(-1555.5667, device='cuda:0')
episode: 182 training return: tensor(-1591.3203, device='cuda:0')
episode: 183 training return: tensor(-1560.1531, device='cuda:0')
epoch: 46 test_true_pfm: -15.62717335412143
episode: 184 training return: tensor(-1563.9595, device='cuda:0')
episode: 185 training return: tensor(-1564.7109, device='cuda:0')
episode: 186 training return: tensor(-1552.2965, device='cuda:0')
episode: 187 training return: tensor(-1554.5564, device='cuda:0')
epoch: 47 test_true_pfm: -20.05226625085035
episode: 188 training return: tensor(-1552.7574, device='cuda:0')
episode: 189 training return: tensor(-1560.2812, device='cuda:0')
episode: 190 training return: tensor(-1556.2051, device='cuda:0')
episode: 191 training return: tensor(-1558.2310, device='cuda:0')
epoch: 48 test_true_pfm: -15.253150190543067
episode: 192 training return: tensor(-1565.1460, device='cuda:0')
episode: 193 training return: tensor(-1560.6895, device='cuda:0')
episode: 194 training return: tensor(-1574.9401, device='cuda:0')
episode: 195 training return: tensor(-1559.1283, device='cuda:0')
epoch: 49 test_true_pfm: -17.397227278078336
episode: 196 training return: tensor(-1561.7864, device='cuda:0')
episode: 197 training return: tensor(-1573.4253, device='cuda:0')
episode: 198 training return: tensor(-1561.8622, device='cuda:0')
episode: 199 training return: tensor(-1557.7651, device='cuda:0')
epoch: 50 test_true_pfm: -11.927684263322083
episode: 200 training return: tensor(-1548.7542, device='cuda:0')
episode: 201 training return: tensor(-1554.1644, device='cuda:0')
episode: 202 training return: tensor(-1561.6575, device='cuda:0')
episode: 203 training return: tensor(-1566.3765, device='cuda:0')
epoch: 51 test_true_pfm: -13.235470031035254
episode: 204 training return: tensor(-1565.8636, device='cuda:0')
episode: 205 training return: tensor(-1554.1674, device='cuda:0')
episode: 206 training return: tensor(-1557.0002, device='cuda:0')
episode: 207 training return: tensor(-1567.4994, device='cuda:0')
epoch: 52 test_true_pfm: -16.508323166147722
episode: 208 training return: tensor(-1549.7153, device='cuda:0')
episode: 209 training return: tensor(-1554.9735, device='cuda:0')
episode: 210 training return: tensor(-1561.4280, device='cuda:0')
episode: 211 training return: tensor(-1568.8130, device='cuda:0')
epoch: 53 test_true_pfm: -10.075010807756675
episode: 212 training return: tensor(-1563.4340, device='cuda:0')
episode: 213 training return: tensor(-1569.0463, device='cuda:0')
episode: 214 training return: tensor(-1554.4491, device='cuda:0')
episode: 215 training return: tensor(-1544.5442, device='cuda:0')
epoch: 54 test_true_pfm: -14.708004970726146
episode: 216 training return: tensor(-1541.5687, device='cuda:0')
episode: 217 training return: tensor(-1568.6227, device='cuda:0')
episode: 218 training return: tensor(-1547.9263, device='cuda:0')
episode: 219 training return: tensor(-1545.0742, device='cuda:0')
epoch: 55 test_true_pfm: -13.941503626280145
episode: 220 training return: tensor(-1553.5964, device='cuda:0')
episode: 221 training return: tensor(-1555.3579, device='cuda:0')
episode: 222 training return: tensor(-1564.8809, device='cuda:0')
episode: 223 training return: tensor(-1571.9122, device='cuda:0')
epoch: 56 test_true_pfm: -17.57109187026364
episode: 224 training return: tensor(-1584.5251, device='cuda:0')
episode: 225 training return: tensor(-1559.9290, device='cuda:0')
episode: 226 training return: tensor(-1569.8959, device='cuda:0')
episode: 227 training return: tensor(-1554.6906, device='cuda:0')
epoch: 57 test_true_pfm: -18.378831437498377
episode: 228 training return: tensor(-1571.6581, device='cuda:0')
episode: 229 training return: tensor(-1552.5735, device='cuda:0')
episode: 230 training return: tensor(-1565.4016, device='cuda:0')
episode: 231 training return: tensor(-1584.3029, device='cuda:0')
epoch: 58 test_true_pfm: -19.710785691763068
episode: 232 training return: tensor(-1557.1180, device='cuda:0')
episode: 233 training return: tensor(-1560.6256, device='cuda:0')
episode: 234 training return: tensor(-1553.1161, device='cuda:0')
episode: 235 training return: tensor(-1541.3696, device='cuda:0')
epoch: 59 test_true_pfm: -13.735836875905532
episode: 236 training return: tensor(-1552.7628, device='cuda:0')
episode: 237 training return: tensor(-1559.0081, device='cuda:0')
episode: 238 training return: tensor(-1559.2993, device='cuda:0')
episode: 239 training return: tensor(-1567.6328, device='cuda:0')
epoch: 60 test_true_pfm: -16.46026676986892
episode: 240 training return: tensor(-1554.1472, device='cuda:0')
episode: 241 training return: tensor(-1561.5902, device='cuda:0')
episode: 242 training return: tensor(-1564.9620, device='cuda:0')
episode: 243 training return: tensor(-1556.0540, device='cuda:0')
epoch: 61 test_true_pfm: -14.511634736256804
episode: 244 training return: tensor(-1545.2400, device='cuda:0')
episode: 245 training return: tensor(-1549.8149, device='cuda:0')
episode: 246 training return: tensor(-1558.9264, device='cuda:0')
episode: 247 training return: tensor(-1557.5179, device='cuda:0')
epoch: 62 test_true_pfm: -15.720934851616065
episode: 248 training return: tensor(-1562.0536, device='cuda:0')
episode: 249 training return: tensor(-1558.2249, device='cuda:0')
episode: 250 training return: tensor(-1554.1287, device='cuda:0')
episode: 251 training return: tensor(-1542.8170, device='cuda:0')
epoch: 63 test_true_pfm: -15.955535740325033
episode: 252 training return: tensor(-1550.2297, device='cuda:0')
episode: 253 training return: tensor(-1565.6331, device='cuda:0')
episode: 254 training return: tensor(-1546.2229, device='cuda:0')
episode: 255 training return: tensor(-1546.1348, device='cuda:0')
epoch: 64 test_true_pfm: -19.392693957703973
episode: 256 training return: tensor(-1551.7424, device='cuda:0')
episode: 257 training return: tensor(-1545.9603, device='cuda:0')
episode: 258 training return: tensor(-1560.2327, device='cuda:0')
episode: 259 training return: tensor(-1556.1844, device='cuda:0')
epoch: 65 test_true_pfm: -14.300724079060814
episode: 260 training return: tensor(-1551.7928, device='cuda:0')
episode: 261 training return: tensor(-1563.6072, device='cuda:0')
episode: 262 training return: tensor(-1556.9008, device='cuda:0')
episode: 263 training return: tensor(-1555.4403, device='cuda:0')
epoch: 66 test_true_pfm: -14.629032466255042
episode: 264 training return: tensor(-1532.9528, device='cuda:0')
episode: 265 training return: tensor(-1547.6998, device='cuda:0')
episode: 266 training return: tensor(-1549.6079, device='cuda:0')
episode: 267 training return: tensor(-1554.9149, device='cuda:0')
epoch: 67 test_true_pfm: -16.9305115871089
episode: 268 training return: tensor(-1546.4801, device='cuda:0')
episode: 269 training return: tensor(-1544.1256, device='cuda:0')
episode: 270 training return: tensor(-1555.3207, device='cuda:0')
episode: 271 training return: tensor(-1534.4850, device='cuda:0')
epoch: 68 test_true_pfm: -18.541403837138578
episode: 272 training return: tensor(-1552.8475, device='cuda:0')
episode: 273 training return: tensor(-1553.9493, device='cuda:0')
episode: 274 training return: tensor(-1566.3784, device='cuda:0')
episode: 275 training return: tensor(-1545.9465, device='cuda:0')
epoch: 69 test_true_pfm: -19.121028363695938
episode: 276 training return: tensor(-1553.6666, device='cuda:0')
episode: 277 training return: tensor(-1551.5305, device='cuda:0')
episode: 278 training return: tensor(-1540.5206, device='cuda:0')
episode: 279 training return: tensor(-1550.9712, device='cuda:0')
epoch: 70 test_true_pfm: -18.276850830294276
episode: 280 training return: tensor(-1542.4753, device='cuda:0')
episode: 281 training return: tensor(-1548.1754, device='cuda:0')
episode: 282 training return: tensor(-1559.1234, device='cuda:0')
episode: 283 training return: tensor(-1548.8043, device='cuda:0')
epoch: 71 test_true_pfm: -18.609564331315557
episode: 284 training return: tensor(-1552.0370, device='cuda:0')
episode: 285 training return: tensor(-1554.6462, device='cuda:0')
episode: 286 training return: tensor(-1563.3884, device='cuda:0')
episode: 287 training return: tensor(-1553.6421, device='cuda:0')
epoch: 72 test_true_pfm: -15.712752727514273
episode: 288 training return: tensor(-1538.2200, device='cuda:0')
episode: 289 training return: tensor(-1535.0756, device='cuda:0')
episode: 290 training return: tensor(-1548.6575, device='cuda:0')
episode: 291 training return: tensor(-1532.1061, device='cuda:0')
epoch: 73 test_true_pfm: -14.438513148108976
episode: 292 training return: tensor(-1550.8872, device='cuda:0')
episode: 293 training return: tensor(-1547.8984, device='cuda:0')
episode: 294 training return: tensor(-1545.4344, device='cuda:0')
episode: 295 training return: tensor(-1554.8195, device='cuda:0')
epoch: 74 test_true_pfm: -19.311212949812283
episode: 296 training return: tensor(-1569.3267, device='cuda:0')
episode: 297 training return: tensor(-1553.4097, device='cuda:0')
episode: 298 training return: tensor(-1550.5021, device='cuda:0')
episode: 299 training return: tensor(-1560.4011, device='cuda:0')
epoch: 75 test_true_pfm: -13.94033280752707
episode: 300 training return: tensor(-1540.0889, device='cuda:0')
episode: 301 training return: tensor(-1552.4460, device='cuda:0')
episode: 302 training return: tensor(-1546.4229, device='cuda:0')
episode: 303 training return: tensor(-1547.0043, device='cuda:0')
epoch: 76 test_true_pfm: -14.761802481371461
episode: 304 training return: tensor(-1544.8337, device='cuda:0')
episode: 305 training return: tensor(-1559.6770, device='cuda:0')
episode: 306 training return: tensor(-1544.8663, device='cuda:0')
episode: 307 training return: tensor(-1551.8809, device='cuda:0')
epoch: 77 test_true_pfm: -18.685760851193308
episode: 308 training return: tensor(-1546.4119, device='cuda:0')
episode: 309 training return: tensor(-1561.8717, device='cuda:0')
episode: 310 training return: tensor(-1534.1648, device='cuda:0')
episode: 311 training return: tensor(-1542.1295, device='cuda:0')
epoch: 78 test_true_pfm: -16.176581034158396
episode: 312 training return: tensor(-1537.1527, device='cuda:0')
episode: 313 training return: tensor(-1539.3490, device='cuda:0')
episode: 314 training return: tensor(-1538.5822, device='cuda:0')
episode: 315 training return: tensor(-1554.4540, device='cuda:0')
epoch: 79 test_true_pfm: -15.410811533576393
episode: 316 training return: tensor(-1540.9274, device='cuda:0')
episode: 317 training return: tensor(-1548.8630, device='cuda:0')
episode: 318 training return: tensor(-1556.6322, device='cuda:0')
episode: 319 training return: tensor(-1553.5605, device='cuda:0')
epoch: 80 test_true_pfm: -15.650816582517413
episode: 320 training return: tensor(-1668.9437, device='cuda:0')
episode: 321 training return: tensor(-1553.7858, device='cuda:0')
episode: 322 training return: tensor(-1553.5518, device='cuda:0')
episode: 323 training return: tensor(-1549.2571, device='cuda:0')
epoch: 81 test_true_pfm: -13.83279838662439
episode: 324 training return: tensor(-1534.9352, device='cuda:0')
episode: 325 training return: tensor(-1537.3993, device='cuda:0')
episode: 326 training return: tensor(-1529.0280, device='cuda:0')
episode: 327 training return: tensor(-1542.2009, device='cuda:0')
epoch: 82 test_true_pfm: -22.903785174995164
episode: 328 training return: tensor(-1537.4763, device='cuda:0')
episode: 329 training return: tensor(-1549.1069, device='cuda:0')
episode: 330 training return: tensor(-1560.8412, device='cuda:0')
episode: 331 training return: tensor(-1533.2799, device='cuda:0')
epoch: 83 test_true_pfm: -18.115767399656097
episode: 332 training return: tensor(-1547.9167, device='cuda:0')
episode: 333 training return: tensor(-1554.7568, device='cuda:0')
episode: 334 training return: tensor(-1534.3263, device='cuda:0')
episode: 335 training return: tensor(-1536.2864, device='cuda:0')
epoch: 84 test_true_pfm: -15.61021291388802
episode: 336 training return: tensor(-1547.0891, device='cuda:0')
episode: 337 training return: tensor(-1541.8901, device='cuda:0')
episode: 338 training return: tensor(-1726.9185, device='cuda:0')
episode: 339 training return: tensor(-1551.0029, device='cuda:0')
epoch: 85 test_true_pfm: -18.56088828997196
episode: 340 training return: tensor(-1543.9341, device='cuda:0')
episode: 341 training return: tensor(-1542.7139, device='cuda:0')
episode: 342 training return: tensor(-1537.8473, device='cuda:0')
episode: 343 training return: tensor(-1536.3293, device='cuda:0')
epoch: 86 test_true_pfm: -17.238794603813307
episode: 344 training return: tensor(-1550.5969, device='cuda:0')
episode: 345 training return: tensor(-1549.7161, device='cuda:0')
episode: 346 training return: tensor(-1533.6417, device='cuda:0')
episode: 347 training return: tensor(-1544.3381, device='cuda:0')
epoch: 87 test_true_pfm: -24.647478717263226
episode: 348 training return: tensor(-1543.0869, device='cuda:0')
episode: 349 training return: tensor(-1532.3136, device='cuda:0')
episode: 350 training return: tensor(-1539.1975, device='cuda:0')
episode: 351 training return: tensor(-1562.6154, device='cuda:0')
epoch: 88 test_true_pfm: -16.795626629592302
episode: 352 training return: tensor(-1558.2203, device='cuda:0')
episode: 353 training return: tensor(-1553.5374, device='cuda:0')
episode: 354 training return: tensor(-1550.5790, device='cuda:0')
episode: 355 training return: tensor(-1542.3694, device='cuda:0')
epoch: 89 test_true_pfm: -20.11412207804109
episode: 356 training return: tensor(-1526.5896, device='cuda:0')
episode: 357 training return: tensor(-1536.3901, device='cuda:0')
episode: 358 training return: tensor(-1551.5021, device='cuda:0')
episode: 359 training return: tensor(-1537.1849, device='cuda:0')
epoch: 90 test_true_pfm: -16.132430531644744
episode: 360 training return: tensor(-1561.5490, device='cuda:0')
episode: 361 training return: tensor(-1557.0859, device='cuda:0')
episode: 362 training return: tensor(-1541.8492, device='cuda:0')
episode: 363 training return: tensor(-1543.1932, device='cuda:0')
epoch: 91 test_true_pfm: -16.016176113080192
episode: 364 training return: tensor(-1533.7900, device='cuda:0')
episode: 365 training return: tensor(-1541.2430, device='cuda:0')
episode: 366 training return: tensor(-1520.4041, device='cuda:0')
episode: 367 training return: tensor(-1527.9823, device='cuda:0')
epoch: 92 test_true_pfm: -18.10221027893933
episode: 368 training return: tensor(-1555.4047, device='cuda:0')
episode: 369 training return: tensor(-1543.6567, device='cuda:0')
episode: 370 training return: tensor(-1531.6521, device='cuda:0')
episode: 371 training return: tensor(-1529.0122, device='cuda:0')
epoch: 93 test_true_pfm: -18.470575128051063
episode: 372 training return: tensor(-1528.5922, device='cuda:0')
episode: 373 training return: tensor(-1543.7117, device='cuda:0')
episode: 374 training return: tensor(-1546.6482, device='cuda:0')
episode: 375 training return: tensor(-1539.5812, device='cuda:0')
epoch: 94 test_true_pfm: -20.467181281865816
episode: 376 training return: tensor(-1543.8723, device='cuda:0')
episode: 377 training return: tensor(-1555.8745, device='cuda:0')
episode: 378 training return: tensor(-1541.5087, device='cuda:0')
episode: 379 training return: tensor(-1545.8582, device='cuda:0')
epoch: 95 test_true_pfm: -21.511459759799298
episode: 380 training return: tensor(-1530.2836, device='cuda:0')
episode: 381 training return: tensor(-1554.0114, device='cuda:0')
episode: 382 training return: tensor(-1539.1653, device='cuda:0')
episode: 383 training return: tensor(-1519.4641, device='cuda:0')
epoch: 96 test_true_pfm: -20.383786167237965
episode: 384 training return: tensor(-1535.4230, device='cuda:0')
episode: 385 training return: tensor(-1549.1967, device='cuda:0')
episode: 386 training return: tensor(-1532.7191, device='cuda:0')
episode: 387 training return: tensor(-1547.7389, device='cuda:0')
epoch: 97 test_true_pfm: -15.80797915563115
episode: 388 training return: tensor(-1532.7587, device='cuda:0')
episode: 389 training return: tensor(-1531.2345, device='cuda:0')
episode: 390 training return: tensor(-1545.2244, device='cuda:0')
episode: 391 training return: tensor(-1526.0164, device='cuda:0')
epoch: 98 test_true_pfm: -18.6058473614261
episode: 392 training return: tensor(-1535.3550, device='cuda:0')
episode: 393 training return: tensor(-1545.6007, device='cuda:0')
episode: 394 training return: tensor(-1529.5845, device='cuda:0')
episode: 395 training return: tensor(-1532.8551, device='cuda:0')
epoch: 99 test_true_pfm: -17.620529390060586
episode: 396 training return: tensor(-1552.2559, device='cuda:0')
episode: 397 training return: tensor(-1544.9688, device='cuda:0')
episode: 398 training return: tensor(-1540.0809, device='cuda:0')
episode: 399 training return: tensor(-1539.5822, device='cuda:0')
epoch: 100 test_true_pfm: -14.483045409318864
episode: 400 training return: tensor(-1539.2216, device='cuda:0')
episode: 401 training return: tensor(-1533.8705, device='cuda:0')
episode: 402 training return: tensor(-1534.7847, device='cuda:0')
episode: 403 training return: tensor(-1539.4960, device='cuda:0')
epoch: 101 test_true_pfm: -21.989641492253536
episode: 404 training return: tensor(-1548.2189, device='cuda:0')
episode: 405 training return: tensor(-1528.7031, device='cuda:0')
episode: 406 training return: tensor(-1553.9187, device='cuda:0')
episode: 407 training return: tensor(-1528.5592, device='cuda:0')
epoch: 102 test_true_pfm: -16.143093647908923
episode: 408 training return: tensor(-1526.6893, device='cuda:0')
episode: 409 training return: tensor(-1536.9232, device='cuda:0')
episode: 410 training return: tensor(-1538.0159, device='cuda:0')
episode: 411 training return: tensor(-1526.5413, device='cuda:0')
epoch: 103 test_true_pfm: -12.292486575094662
episode: 412 training return: tensor(-1530.1016, device='cuda:0')
episode: 413 training return: tensor(-1530.8627, device='cuda:0')
episode: 414 training return: tensor(-1577.3710, device='cuda:0')
episode: 415 training return: tensor(-1531.5004, device='cuda:0')
epoch: 104 test_true_pfm: -20.860574715031664
episode: 416 training return: tensor(-1543.3395, device='cuda:0')
episode: 417 training return: tensor(-1539.6960, device='cuda:0')
episode: 418 training return: tensor(-1537.4990, device='cuda:0')
episode: 419 training return: tensor(-1524.0681, device='cuda:0')
epoch: 105 test_true_pfm: -16.857439374833394
episode: 420 training return: tensor(-1532.5443, device='cuda:0')
episode: 421 training return: tensor(-1554.8864, device='cuda:0')
episode: 422 training return: tensor(-1738.6210, device='cuda:0')
episode: 423 training return: tensor(-1535.6865, device='cuda:0')
epoch: 106 test_true_pfm: -18.032949636566673
episode: 424 training return: tensor(-1534.9994, device='cuda:0')
episode: 425 training return: tensor(-1531.1598, device='cuda:0')
episode: 426 training return: tensor(-1554.2096, device='cuda:0')
episode: 427 training return: tensor(-1544.3828, device='cuda:0')
epoch: 107 test_true_pfm: -17.629404407193313
episode: 428 training return: tensor(-1552.5768, device='cuda:0')
episode: 429 training return: tensor(-1541.6732, device='cuda:0')
episode: 430 training return: tensor(-1550.9873, device='cuda:0')
episode: 431 training return: tensor(-1537.5516, device='cuda:0')
epoch: 108 test_true_pfm: -17.293979128149
episode: 432 training return: tensor(-1536.8020, device='cuda:0')
episode: 433 training return: tensor(-1539.5815, device='cuda:0')
episode: 434 training return: tensor(-1546.4734, device='cuda:0')
episode: 435 training return: tensor(-1543.6575, device='cuda:0')
epoch: 109 test_true_pfm: -23.308548235770502
episode: 436 training return: tensor(-1527.7733, device='cuda:0')
episode: 437 training return: tensor(-1536.7750, device='cuda:0')
episode: 438 training return: tensor(-1519.7192, device='cuda:0')
episode: 439 training return: tensor(-1542.2656, device='cuda:0')
epoch: 110 test_true_pfm: -19.095955376736608
episode: 440 training return: tensor(-1544.7671, device='cuda:0')
episode: 441 training return: tensor(-1533.9929, device='cuda:0')
episode: 442 training return: tensor(-1537.0823, device='cuda:0')
episode: 443 training return: tensor(-1532.0468, device='cuda:0')
epoch: 111 test_true_pfm: -15.743125983631217
episode: 444 training return: tensor(-1525.8114, device='cuda:0')
episode: 445 training return: tensor(-1548.0803, device='cuda:0')
episode: 446 training return: tensor(-1540.8535, device='cuda:0')
episode: 447 training return: tensor(-1544.4088, device='cuda:0')
epoch: 112 test_true_pfm: -21.077969592743386
episode: 448 training return: tensor(-1532.6301, device='cuda:0')
episode: 449 training return: tensor(-1539.2048, device='cuda:0')
episode: 450 training return: tensor(-1539.1965, device='cuda:0')
episode: 451 training return: tensor(-1532.7028, device='cuda:0')
epoch: 113 test_true_pfm: -18.116560527013107
episode: 452 training return: tensor(-1518.6868, device='cuda:0')
episode: 453 training return: tensor(-1535.1117, device='cuda:0')
episode: 454 training return: tensor(-1540.9642, device='cuda:0')
episode: 455 training return: tensor(-1535.9395, device='cuda:0')
epoch: 114 test_true_pfm: -15.34355374428201
episode: 456 training return: tensor(-1539.9192, device='cuda:0')
episode: 457 training return: tensor(-1554.5842, device='cuda:0')
episode: 458 training return: tensor(-1546.6697, device='cuda:0')
episode: 459 training return: tensor(-1535.7906, device='cuda:0')
epoch: 115 test_true_pfm: -24.7835591843256
episode: 460 training return: tensor(-1536.8754, device='cuda:0')
episode: 461 training return: tensor(-1543.6655, device='cuda:0')
episode: 462 training return: tensor(-1535.7715, device='cuda:0')
episode: 463 training return: tensor(-1544.2439, device='cuda:0')
epoch: 116 test_true_pfm: -18.531556501834583
episode: 464 training return: tensor(-1532.5358, device='cuda:0')
episode: 465 training return: tensor(-1540.9556, device='cuda:0')
episode: 466 training return: tensor(-1535.8676, device='cuda:0')
episode: 467 training return: tensor(-1527.1357, device='cuda:0')
epoch: 117 test_true_pfm: -22.10925527948211
episode: 468 training return: tensor(-1538.7211, device='cuda:0')
episode: 469 training return: tensor(-1532.7463, device='cuda:0')
episode: 470 training return: tensor(-1535.8848, device='cuda:0')
episode: 471 training return: tensor(-1523.6539, device='cuda:0')
epoch: 118 test_true_pfm: -24.30245041476421
episode: 472 training return: tensor(-1527.8010, device='cuda:0')
episode: 473 training return: tensor(-1534.4843, device='cuda:0')
episode: 474 training return: tensor(-1540.9313, device='cuda:0')
episode: 475 training return: tensor(-1536.6355, device='cuda:0')
epoch: 119 test_true_pfm: -14.777785645844943
episode: 476 training return: tensor(-1541.1621, device='cuda:0')
episode: 477 training return: tensor(-1531.3342, device='cuda:0')
episode: 478 training return: tensor(-1557.1661, device='cuda:0')
episode: 479 training return: tensor(-1523.4860, device='cuda:0')
epoch: 120 test_true_pfm: -18.638856364435597
episode: 480 training return: tensor(-1534.9874, device='cuda:0')
episode: 481 training return: tensor(-1539.8221, device='cuda:0')
episode: 482 training return: tensor(-1519.7545, device='cuda:0')
episode: 483 training return: tensor(-1520.8142, device='cuda:0')
epoch: 121 test_true_pfm: -22.337905508228374
episode: 484 training return: tensor(-1536.4917, device='cuda:0')
episode: 485 training return: tensor(-1538.0273, device='cuda:0')
episode: 486 training return: tensor(-1551.6887, device='cuda:0')
episode: 487 training return: tensor(-1533.3756, device='cuda:0')
epoch: 122 test_true_pfm: -19.883986821822976
episode: 488 training return: tensor(-1529.6022, device='cuda:0')
episode: 489 training return: tensor(-1542.4650, device='cuda:0')
episode: 490 training return: tensor(-1527.0066, device='cuda:0')
episode: 491 training return: tensor(-1550.1594, device='cuda:0')
epoch: 123 test_true_pfm: -21.576237007183448
episode: 492 training return: tensor(-1535.7045, device='cuda:0')
episode: 493 training return: tensor(-1537.6509, device='cuda:0')
episode: 494 training return: tensor(-1535.5330, device='cuda:0')
episode: 495 training return: tensor(-1533.7306, device='cuda:0')
epoch: 124 test_true_pfm: -19.18903213781826
episode: 496 training return: tensor(-1546.5856, device='cuda:0')
episode: 497 training return: tensor(-1522.9478, device='cuda:0')
episode: 498 training return: tensor(-1538.7190, device='cuda:0')
episode: 499 training return: tensor(-1544.9647, device='cuda:0')
epoch: 125 test_true_pfm: -17.889257175534727
episode: 500 training return: tensor(-1560.3999, device='cuda:0')
episode: 501 training return: tensor(-1531.1222, device='cuda:0')
episode: 502 training return: tensor(-1546.2853, device='cuda:0')
episode: 503 training return: tensor(-1536.7178, device='cuda:0')
epoch: 126 test_true_pfm: -15.775363710144674
episode: 504 training return: tensor(-1530.8003, device='cuda:0')
episode: 505 training return: tensor(-1535.0034, device='cuda:0')
episode: 506 training return: tensor(-1535.4414, device='cuda:0')
episode: 507 training return: tensor(-1535.8921, device='cuda:0')
epoch: 127 test_true_pfm: -21.6761427511485
episode: 508 training return: tensor(-1554.1082, device='cuda:0')
episode: 509 training return: tensor(-1531.4978, device='cuda:0')
episode: 510 training return: tensor(-1531.9991, device='cuda:0')
episode: 511 training return: tensor(-1543.9530, device='cuda:0')
epoch: 128 test_true_pfm: -20.824698784182555
episode: 512 training return: tensor(-1545.0240, device='cuda:0')
episode: 513 training return: tensor(-1540.0922, device='cuda:0')
episode: 514 training return: tensor(-1528.4139, device='cuda:0')
episode: 515 training return: tensor(-1536.0094, device='cuda:0')
epoch: 129 test_true_pfm: -25.645913396797663
episode: 516 training return: tensor(-1536.8104, device='cuda:0')
episode: 517 training return: tensor(-1537.3005, device='cuda:0')
episode: 518 training return: tensor(-1523.0880, device='cuda:0')
episode: 519 training return: tensor(-1556.5178, device='cuda:0')
epoch: 130 test_true_pfm: -19.88611921529991
episode: 520 training return: tensor(-1532.7665, device='cuda:0')
episode: 521 training return: tensor(-1534.8093, device='cuda:0')
episode: 522 training return: tensor(-1540.4835, device='cuda:0')
episode: 523 training return: tensor(-1528.7000, device='cuda:0')
epoch: 131 test_true_pfm: -20.753387777024347
episode: 524 training return: tensor(-1547.5444, device='cuda:0')
episode: 525 training return: tensor(-1525.6759, device='cuda:0')
episode: 526 training return: tensor(-1554.4318, device='cuda:0')
episode: 527 training return: tensor(-1529.9252, device='cuda:0')
epoch: 132 test_true_pfm: -21.86722975957211
episode: 528 training return: tensor(-1536.7808, device='cuda:0')
episode: 529 training return: tensor(-1539.7451, device='cuda:0')
episode: 530 training return: tensor(-1546.1882, device='cuda:0')
episode: 531 training return: tensor(-1551.4363, device='cuda:0')
epoch: 133 test_true_pfm: -20.908909353897332
episode: 532 training return: tensor(-1539.2550, device='cuda:0')
episode: 533 training return: tensor(-1538.3846, device='cuda:0')
episode: 534 training return: tensor(-1541.7300, device='cuda:0')
episode: 535 training return: tensor(-1536.2639, device='cuda:0')
epoch: 134 test_true_pfm: -18.890061045199555
episode: 536 training return: tensor(-1525.6937, device='cuda:0')
episode: 537 training return: tensor(-1522.1210, device='cuda:0')
episode: 538 training return: tensor(-1517.6539, device='cuda:0')
episode: 539 training return: tensor(-1532.7983, device='cuda:0')
epoch: 135 test_true_pfm: -18.74695702886488
episode: 540 training return: tensor(-1535.5543, device='cuda:0')
episode: 541 training return: tensor(-1539.9968, device='cuda:0')
episode: 542 training return: tensor(-1534.1207, device='cuda:0')
episode: 543 training return: tensor(-1535.8402, device='cuda:0')
epoch: 136 test_true_pfm: -22.569230450980694
episode: 544 training return: tensor(-1546.9796, device='cuda:0')
episode: 545 training return: tensor(-1539.1781, device='cuda:0')
episode: 546 training return: tensor(-1537.4784, device='cuda:0')
episode: 547 training return: tensor(-1535.1031, device='cuda:0')
epoch: 137 test_true_pfm: -22.377144881739497
episode: 548 training return: tensor(-1544.4072, device='cuda:0')
episode: 549 training return: tensor(-1537.6648, device='cuda:0')
episode: 550 training return: tensor(-1521.7910, device='cuda:0')
episode: 551 training return: tensor(-1526.5017, device='cuda:0')
epoch: 138 test_true_pfm: -23.79775335696196
episode: 552 training return: tensor(-1517.7914, device='cuda:0')
episode: 553 training return: tensor(-1539.9965, device='cuda:0')
episode: 554 training return: tensor(-1527.8876, device='cuda:0')
episode: 555 training return: tensor(-1534.7980, device='cuda:0')
epoch: 139 test_true_pfm: -24.32001987267896
episode: 556 training return: tensor(-1535.0006, device='cuda:0')
episode: 557 training return: tensor(-1537.2214, device='cuda:0')
episode: 558 training return: tensor(-1541.4259, device='cuda:0')
episode: 559 training return: tensor(-1537.6953, device='cuda:0')
epoch: 140 test_true_pfm: -23.41342258202518
episode: 560 training return: tensor(-1527.0396, device='cuda:0')
episode: 561 training return: tensor(-1544.1746, device='cuda:0')
episode: 562 training return: tensor(-1528.4402, device='cuda:0')
episode: 563 training return: tensor(-1536.5033, device='cuda:0')
epoch: 141 test_true_pfm: -19.457015470249512
episode: 564 training return: tensor(-1540.8248, device='cuda:0')
episode: 565 training return: tensor(-1543.5876, device='cuda:0')
episode: 566 training return: tensor(-1523.4863, device='cuda:0')
episode: 567 training return: tensor(-1536.9712, device='cuda:0')
epoch: 142 test_true_pfm: -17.737691379077003
episode: 568 training return: tensor(-1536.2222, device='cuda:0')
episode: 569 training return: tensor(-1542.4453, device='cuda:0')
episode: 570 training return: tensor(-1528.1121, device='cuda:0')
episode: 571 training return: tensor(-1547.7728, device='cuda:0')
epoch: 143 test_true_pfm: -21.199133563938897
episode: 572 training return: tensor(-1528.1896, device='cuda:0')
episode: 573 training return: tensor(-1540.8176, device='cuda:0')
episode: 574 training return: tensor(-1534.8887, device='cuda:0')
episode: 575 training return: tensor(-1538.9651, device='cuda:0')
epoch: 144 test_true_pfm: -19.742727298178124
episode: 576 training return: tensor(-1530.2073, device='cuda:0')
episode: 577 training return: tensor(-1535.8843, device='cuda:0')
episode: 578 training return: tensor(-1538.9894, device='cuda:0')
episode: 579 training return: tensor(-1534.6047, device='cuda:0')
epoch: 145 test_true_pfm: -20.47197881300575
episode: 580 training return: tensor(-1531.6531, device='cuda:0')
episode: 581 training return: tensor(-1537.6223, device='cuda:0')
episode: 582 training return: tensor(-1537.0140, device='cuda:0')
episode: 583 training return: tensor(-1517.1979, device='cuda:0')
epoch: 146 test_true_pfm: -14.988929859987616
episode: 584 training return: tensor(-1526.8704, device='cuda:0')
episode: 585 training return: tensor(-1532.6044, device='cuda:0')
episode: 586 training return: tensor(-1532.5052, device='cuda:0')
episode: 587 training return: tensor(-1529.1241, device='cuda:0')
epoch: 147 test_true_pfm: -23.07437372602892
episode: 588 training return: tensor(-1538.0952, device='cuda:0')
episode: 589 training return: tensor(-1545.9885, device='cuda:0')
episode: 590 training return: tensor(-1522.4342, device='cuda:0')
episode: 591 training return: tensor(-1526.8341, device='cuda:0')
epoch: 148 test_true_pfm: -21.818901674527993
episode: 592 training return: tensor(-1530.5835, device='cuda:0')
episode: 593 training return: tensor(-1544.8129, device='cuda:0')
episode: 594 training return: tensor(-1531.2954, device='cuda:0')
episode: 595 training return: tensor(-1530.0496, device='cuda:0')
epoch: 149 test_true_pfm: -24.663541405049752
episode: 596 training return: tensor(-1532.4840, device='cuda:0')
episode: 597 training return: tensor(-1545.2795, device='cuda:0')
episode: 598 training return: tensor(-1520.7770, device='cuda:0')
episode: 599 training return: tensor(-1530.8242, device='cuda:0')
epoch: 150 test_true_pfm: -23.352173281860686
