['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'brac', '--traj', 'medium', '--seed', '4', '--data', '100000']
5031.060609143847
episode: 0 training return: tensor(-4375.5386, device='cuda:0')
episode: 1 training return: tensor(-2661.0439, device='cuda:0')
episode: 2 training return: tensor(-4356.3091, device='cuda:0')
episode: 3 training return: tensor(-4787.1968, device='cuda:0')
epoch: 1 test_true_pfm: -721.0025975200266
episode: 4 training return: tensor(-4402.0806, device='cuda:0')
episode: 5 training return: tensor(-3536.6311, device='cuda:0')
episode: 6 training return: tensor(-4822.7520, device='cuda:0')
episode: 7 training return: tensor(-4355.7314, device='cuda:0')
epoch: 2 test_true_pfm: -222.21023645753203
episode: 8 training return: tensor(-4326.1265, device='cuda:0')
episode: 9 training return: tensor(-4587.8394, device='cuda:0')
episode: 10 training return: tensor(-1833.4780, device='cuda:0')
episode: 11 training return: tensor(-1790.8273, device='cuda:0')
epoch: 3 test_true_pfm: -27.477961326931535
episode: 12 training return: tensor(-1831.5494, device='cuda:0')
episode: 13 training return: tensor(-1739.4381, device='cuda:0')
episode: 14 training return: tensor(-1767.2252, device='cuda:0')
episode: 15 training return: tensor(-1704.2045, device='cuda:0')
epoch: 4 test_true_pfm: -182.30150767312435
episode: 16 training return: tensor(-1704.7789, device='cuda:0')
episode: 17 training return: tensor(-1688.4010, device='cuda:0')
episode: 18 training return: tensor(-1627.1772, device='cuda:0')
episode: 19 training return: tensor(-1645.5389, device='cuda:0')
epoch: 5 test_true_pfm: -293.1211697909821
episode: 20 training return: tensor(-1639.4521, device='cuda:0')
episode: 21 training return: tensor(-1591.7030, device='cuda:0')
episode: 22 training return: tensor(-1596.2876, device='cuda:0')
episode: 23 training return: tensor(-1596.3840, device='cuda:0')
epoch: 6 test_true_pfm: -126.78975377961815
episode: 24 training return: tensor(-1607.5170, device='cuda:0')
episode: 25 training return: tensor(-1592.1038, device='cuda:0')
episode: 26 training return: tensor(-1568.8425, device='cuda:0')
episode: 27 training return: tensor(-1578.6171, device='cuda:0')
epoch: 7 test_true_pfm: -109.82588509880902
episode: 28 training return: tensor(-1656.8503, device='cuda:0')
episode: 29 training return: tensor(-1692.2067, device='cuda:0')
episode: 30 training return: tensor(-1749.8169, device='cuda:0')
episode: 31 training return: tensor(-1759.8568, device='cuda:0')
epoch: 8 test_true_pfm: -141.77468728286343
episode: 32 training return: tensor(-1692.5083, device='cuda:0')
episode: 33 training return: tensor(-1767.4296, device='cuda:0')
episode: 34 training return: tensor(-1747.3038, device='cuda:0')
episode: 35 training return: tensor(-1792.3153, device='cuda:0')
epoch: 9 test_true_pfm: -108.0066527807971
episode: 36 training return: tensor(-1720.0098, device='cuda:0')
episode: 37 training return: tensor(-1558.6615, device='cuda:0')
episode: 38 training return: tensor(-1554.3535, device='cuda:0')
episode: 39 training return: tensor(-1501.8361, device='cuda:0')
epoch: 10 test_true_pfm: -106.26887239692404
episode: 40 training return: tensor(-1579.7061, device='cuda:0')
episode: 41 training return: tensor(-1700.6567, device='cuda:0')
episode: 42 training return: tensor(-1701.2012, device='cuda:0')
episode: 43 training return: tensor(-1724.7653, device='cuda:0')
epoch: 11 test_true_pfm: -91.77174917369287
episode: 44 training return: tensor(-1720.2654, device='cuda:0')
episode: 45 training return: tensor(-1732.2745, device='cuda:0')
episode: 46 training return: tensor(-1733.7507, device='cuda:0')
episode: 47 training return: tensor(-1742.3920, device='cuda:0')
epoch: 12 test_true_pfm: 5.591094453818563
episode: 48 training return: tensor(-1731.1865, device='cuda:0')
episode: 49 training return: tensor(-1735.8444, device='cuda:0')
episode: 50 training return: tensor(-1729.9296, device='cuda:0')
episode: 51 training return: tensor(-1730.4028, device='cuda:0')
epoch: 13 test_true_pfm: 0.16651592759738007
episode: 52 training return: tensor(-1709.9116, device='cuda:0')
episode: 53 training return: tensor(-1726.2133, device='cuda:0')
episode: 54 training return: tensor(-1748.5690, device='cuda:0')
episode: 55 training return: tensor(-1731.5292, device='cuda:0')
epoch: 14 test_true_pfm: 3.8497982236320016
episode: 56 training return: tensor(-1727.2400, device='cuda:0')
episode: 57 training return: tensor(-1722.2721, device='cuda:0')
episode: 58 training return: tensor(-1735.7882, device='cuda:0')
episode: 59 training return: tensor(-1714.1053, device='cuda:0')
epoch: 15 test_true_pfm: 5.199210204048201
episode: 60 training return: tensor(-1739.2777, device='cuda:0')
episode: 61 training return: tensor(-1715.8113, device='cuda:0')
episode: 62 training return: tensor(-1744.2278, device='cuda:0')
episode: 63 training return: tensor(-1717.0388, device='cuda:0')
epoch: 16 test_true_pfm: 3.233229099788886
episode: 64 training return: tensor(-1746.8927, device='cuda:0')
episode: 65 training return: tensor(-1733.5718, device='cuda:0')
episode: 66 training return: tensor(-1728.8422, device='cuda:0')
episode: 67 training return: tensor(-1723.9316, device='cuda:0')
epoch: 17 test_true_pfm: -3.2421495302874384
episode: 68 training return: tensor(-1746.1697, device='cuda:0')
episode: 69 training return: tensor(-1730.7407, device='cuda:0')
episode: 70 training return: tensor(-1709.8169, device='cuda:0')
episode: 71 training return: tensor(-1711.7954, device='cuda:0')
epoch: 18 test_true_pfm: 9.843143893052806
episode: 72 training return: tensor(-1715.0371, device='cuda:0')
episode: 73 training return: tensor(-1720.0583, device='cuda:0')
episode: 74 training return: tensor(-1714.5798, device='cuda:0')
episode: 75 training return: tensor(-1732.7802, device='cuda:0')
epoch: 19 test_true_pfm: 12.742477066379005
episode: 76 training return: tensor(-1719.1936, device='cuda:0')
episode: 77 training return: tensor(-1715.4348, device='cuda:0')
episode: 78 training return: tensor(-1725.1018, device='cuda:0')
episode: 79 training return: tensor(-1715.3323, device='cuda:0')
epoch: 20 test_true_pfm: 1.6868099699421455
episode: 80 training return: tensor(-1710.7035, device='cuda:0')
episode: 81 training return: tensor(-1715.1895, device='cuda:0')
episode: 82 training return: tensor(-1709.0138, device='cuda:0')
episode: 83 training return: tensor(-1709.8125, device='cuda:0')
epoch: 21 test_true_pfm: 1.2810698795576325
episode: 84 training return: tensor(-1714.8698, device='cuda:0')
episode: 85 training return: tensor(-1703.6846, device='cuda:0')
episode: 86 training return: tensor(-1712.2642, device='cuda:0')
episode: 87 training return: tensor(-1702.8306, device='cuda:0')
epoch: 22 test_true_pfm: -1.4589801344395337
episode: 88 training return: tensor(-1696.2877, device='cuda:0')
episode: 89 training return: tensor(-1703.0286, device='cuda:0')
episode: 90 training return: tensor(-1708.2072, device='cuda:0')
episode: 91 training return: tensor(-1710.2323, device='cuda:0')
epoch: 23 test_true_pfm: 7.751085939540904
episode: 92 training return: tensor(-1706.3553, device='cuda:0')
episode: 93 training return: tensor(-1706.1244, device='cuda:0')
episode: 94 training return: tensor(-1709.8383, device='cuda:0')
episode: 95 training return: tensor(-1707.3778, device='cuda:0')
epoch: 24 test_true_pfm: 5.115180448621064
episode: 96 training return: tensor(-1701.7968, device='cuda:0')
episode: 97 training return: tensor(-1705.3085, device='cuda:0')
episode: 98 training return: tensor(-1693.1075, device='cuda:0')
episode: 99 training return: tensor(-1711.4048, device='cuda:0')
epoch: 25 test_true_pfm: 0.23178232505303642
episode: 100 training return: tensor(-1697.8265, device='cuda:0')
episode: 101 training return: tensor(-1713.5244, device='cuda:0')
episode: 102 training return: tensor(-1702.3732, device='cuda:0')
episode: 103 training return: tensor(-1704.6497, device='cuda:0')
epoch: 26 test_true_pfm: -2.3323309132454755
episode: 104 training return: tensor(-1713.5934, device='cuda:0')
episode: 105 training return: tensor(-1690.9395, device='cuda:0')
episode: 106 training return: tensor(-1705.8535, device='cuda:0')
episode: 107 training return: tensor(-1699.9841, device='cuda:0')
epoch: 27 test_true_pfm: -1.5099767234087498
episode: 108 training return: tensor(-1709.8889, device='cuda:0')
episode: 109 training return: tensor(-1715.6755, device='cuda:0')
episode: 110 training return: tensor(-1697.9924, device='cuda:0')
episode: 111 training return: tensor(-1714.8079, device='cuda:0')
epoch: 28 test_true_pfm: 14.448545577438404
episode: 112 training return: tensor(-1706.6923, device='cuda:0')
episode: 113 training return: tensor(-1702.5248, device='cuda:0')
episode: 114 training return: tensor(-1707.7642, device='cuda:0')
episode: 115 training return: tensor(-1709.5723, device='cuda:0')
epoch: 29 test_true_pfm: -3.2890006264939147
episode: 116 training return: tensor(-1700.0884, device='cuda:0')
episode: 117 training return: tensor(-1704.7598, device='cuda:0')
episode: 118 training return: tensor(-1697.9601, device='cuda:0')
episode: 119 training return: tensor(-1703.1395, device='cuda:0')
epoch: 30 test_true_pfm: 7.686720486322986
episode: 120 training return: tensor(-1703.8783, device='cuda:0')
episode: 121 training return: tensor(-1684.0662, device='cuda:0')
episode: 122 training return: tensor(-1687.8878, device='cuda:0')
episode: 123 training return: tensor(-1709.5569, device='cuda:0')
epoch: 31 test_true_pfm: -0.1106054882968482
episode: 124 training return: tensor(-1691.2041, device='cuda:0')
episode: 125 training return: tensor(-1696.1696, device='cuda:0')
episode: 126 training return: tensor(-1691.5146, device='cuda:0')
episode: 127 training return: tensor(-1704.7898, device='cuda:0')
epoch: 32 test_true_pfm: -4.411566342342323
episode: 128 training return: tensor(-1701.0713, device='cuda:0')
episode: 129 training return: tensor(-1715.5409, device='cuda:0')
episode: 130 training return: tensor(-1701.7173, device='cuda:0')
episode: 131 training return: tensor(-1699.5352, device='cuda:0')
epoch: 33 test_true_pfm: 1.472494022686656
episode: 132 training return: tensor(-1700.3840, device='cuda:0')
episode: 133 training return: tensor(-1706.3986, device='cuda:0')
episode: 134 training return: tensor(-1700.7686, device='cuda:0')
episode: 135 training return: tensor(-1688.6849, device='cuda:0')
epoch: 34 test_true_pfm: 2.9191355429949244
episode: 136 training return: tensor(-1704.8704, device='cuda:0')
episode: 137 training return: tensor(-1705.4979, device='cuda:0')
episode: 138 training return: tensor(-1706.7103, device='cuda:0')
episode: 139 training return: tensor(-1698.5059, device='cuda:0')
epoch: 35 test_true_pfm: 12.863005299703628
episode: 140 training return: tensor(-1694.4259, device='cuda:0')
episode: 141 training return: tensor(-1689.6478, device='cuda:0')
episode: 142 training return: tensor(-1684.3157, device='cuda:0')
episode: 143 training return: tensor(-1699.0176, device='cuda:0')
epoch: 36 test_true_pfm: -25.42613900003491
episode: 144 training return: tensor(-1693.1957, device='cuda:0')
episode: 145 training return: tensor(-1703.8876, device='cuda:0')
episode: 146 training return: tensor(-1698.0873, device='cuda:0')
episode: 147 training return: tensor(-1695.7505, device='cuda:0')
epoch: 37 test_true_pfm: 1.6245826551500937
episode: 148 training return: tensor(-1702.4808, device='cuda:0')
episode: 149 training return: tensor(-1698.7512, device='cuda:0')
episode: 150 training return: tensor(-1704.0834, device='cuda:0')
episode: 151 training return: tensor(-1697.0154, device='cuda:0')
epoch: 38 test_true_pfm: 14.493792543142249
episode: 152 training return: tensor(-1699.6843, device='cuda:0')
episode: 153 training return: tensor(-1698.0939, device='cuda:0')
episode: 154 training return: tensor(-1693.5533, device='cuda:0')
episode: 155 training return: tensor(-1698.5024, device='cuda:0')
epoch: 39 test_true_pfm: 12.371846920225002
episode: 156 training return: tensor(-1694.1036, device='cuda:0')
episode: 157 training return: tensor(-1689.6708, device='cuda:0')
episode: 158 training return: tensor(-1697.3811, device='cuda:0')
episode: 159 training return: tensor(-1688.3705, device='cuda:0')
epoch: 40 test_true_pfm: 6.6513454023714935
episode: 160 training return: tensor(-1681.9816, device='cuda:0')
episode: 161 training return: tensor(-1696.2700, device='cuda:0')
episode: 162 training return: tensor(-1694.2699, device='cuda:0')
episode: 163 training return: tensor(-1701.0613, device='cuda:0')
epoch: 41 test_true_pfm: 5.1312145741333435
episode: 164 training return: tensor(-1694.8938, device='cuda:0')
episode: 165 training return: tensor(-1689.9459, device='cuda:0')
episode: 166 training return: tensor(-1703.2874, device='cuda:0')
episode: 167 training return: tensor(-1691.4056, device='cuda:0')
epoch: 42 test_true_pfm: 16.2174729941718
episode: 168 training return: tensor(-1693.3704, device='cuda:0')
episode: 169 training return: tensor(-1684.9276, device='cuda:0')
episode: 170 training return: tensor(-1695.6713, device='cuda:0')
episode: 171 training return: tensor(-1702.7156, device='cuda:0')
epoch: 43 test_true_pfm: 13.512732012972366
episode: 172 training return: tensor(-1691.5807, device='cuda:0')
episode: 173 training return: tensor(-1702.8755, device='cuda:0')
episode: 174 training return: tensor(-1704.0243, device='cuda:0')
episode: 175 training return: tensor(-1697.9076, device='cuda:0')
epoch: 44 test_true_pfm: 3.9011296334139165
episode: 176 training return: tensor(-1694.2278, device='cuda:0')
episode: 177 training return: tensor(-1707.8645, device='cuda:0')
episode: 178 training return: tensor(-1706.6151, device='cuda:0')
episode: 179 training return: tensor(-1682.8861, device='cuda:0')
epoch: 45 test_true_pfm: 6.641658283243935
episode: 180 training return: tensor(-1698.6310, device='cuda:0')
episode: 181 training return: tensor(-1699.0066, device='cuda:0')
episode: 182 training return: tensor(-1693.7020, device='cuda:0')
episode: 183 training return: tensor(-1711.6847, device='cuda:0')
epoch: 46 test_true_pfm: 10.296220089754744
episode: 184 training return: tensor(-1695.2163, device='cuda:0')
episode: 185 training return: tensor(-1693.3815, device='cuda:0')
episode: 186 training return: tensor(-1687.8916, device='cuda:0')
episode: 187 training return: tensor(-1693.1598, device='cuda:0')
epoch: 47 test_true_pfm: 12.750454275558468
episode: 188 training return: tensor(-1696.8846, device='cuda:0')
episode: 189 training return: tensor(-1694.7908, device='cuda:0')
episode: 190 training return: tensor(-1702.8618, device='cuda:0')
episode: 191 training return: tensor(-1697.0396, device='cuda:0')
epoch: 48 test_true_pfm: 13.474511724698816
episode: 192 training return: tensor(-1692.9342, device='cuda:0')
episode: 193 training return: tensor(-1693.1163, device='cuda:0')
episode: 194 training return: tensor(-1692.0967, device='cuda:0')
episode: 195 training return: tensor(-1703.1259, device='cuda:0')
epoch: 49 test_true_pfm: 3.389481865167633
episode: 196 training return: tensor(-1697.6036, device='cuda:0')
episode: 197 training return: tensor(-1694.6362, device='cuda:0')
episode: 198 training return: tensor(-1699.2651, device='cuda:0')
episode: 199 training return: tensor(-1701.6427, device='cuda:0')
epoch: 50 test_true_pfm: 12.372056818343523
episode: 200 training return: tensor(-1696.4384, device='cuda:0')
episode: 201 training return: tensor(-1699.2152, device='cuda:0')
episode: 202 training return: tensor(-1701.5330, device='cuda:0')
episode: 203 training return: tensor(-1686.5389, device='cuda:0')
epoch: 51 test_true_pfm: 10.226190090405725
episode: 204 training return: tensor(-1694.6499, device='cuda:0')
episode: 205 training return: tensor(-1694.8304, device='cuda:0')
episode: 206 training return: tensor(-1689.0537, device='cuda:0')
episode: 207 training return: tensor(-1689.8066, device='cuda:0')
epoch: 52 test_true_pfm: 15.741527102914041
episode: 208 training return: tensor(-1706.8080, device='cuda:0')
episode: 209 training return: tensor(-1700.3470, device='cuda:0')
episode: 210 training return: tensor(-1698.4786, device='cuda:0')
episode: 211 training return: tensor(-1686.9985, device='cuda:0')
epoch: 53 test_true_pfm: 11.801581137868338
episode: 212 training return: tensor(-1706.8181, device='cuda:0')
episode: 213 training return: tensor(-1695.8899, device='cuda:0')
episode: 214 training return: tensor(-1691.9698, device='cuda:0')
episode: 215 training return: tensor(-1700.0929, device='cuda:0')
epoch: 54 test_true_pfm: 15.622058849113577
episode: 216 training return: tensor(-1697.6929, device='cuda:0')
episode: 217 training return: tensor(-1696.1561, device='cuda:0')
episode: 218 training return: tensor(-1693.9008, device='cuda:0')
episode: 219 training return: tensor(-1694.7148, device='cuda:0')
epoch: 55 test_true_pfm: 16.284560053834312
episode: 220 training return: tensor(-1688.2217, device='cuda:0')
episode: 221 training return: tensor(-1693.2039, device='cuda:0')
episode: 222 training return: tensor(-1703.1036, device='cuda:0')
episode: 223 training return: tensor(-1696.8612, device='cuda:0')
epoch: 56 test_true_pfm: -0.6549536859575876
episode: 224 training return: tensor(-1700.4535, device='cuda:0')
episode: 225 training return: tensor(-1685.1311, device='cuda:0')
episode: 226 training return: tensor(-1692.7751, device='cuda:0')
episode: 227 training return: tensor(-1703.3795, device='cuda:0')
epoch: 57 test_true_pfm: 19.579109072074193
episode: 228 training return: tensor(-1696.5151, device='cuda:0')
episode: 229 training return: tensor(-1692.0341, device='cuda:0')
episode: 230 training return: tensor(-1701.7091, device='cuda:0')
episode: 231 training return: tensor(-1688.9712, device='cuda:0')
epoch: 58 test_true_pfm: 14.954960285985626
episode: 232 training return: tensor(-1697.1647, device='cuda:0')
episode: 233 training return: tensor(-1694.3732, device='cuda:0')
episode: 234 training return: tensor(-1715.4779, device='cuda:0')
episode: 235 training return: tensor(-1700.4194, device='cuda:0')
epoch: 59 test_true_pfm: -0.2898357417051394
episode: 236 training return: tensor(-1700.8859, device='cuda:0')
episode: 237 training return: tensor(-1697.6331, device='cuda:0')
episode: 238 training return: tensor(-1689.3889, device='cuda:0')
episode: 239 training return: tensor(-1684.2334, device='cuda:0')
epoch: 60 test_true_pfm: 16.62145847664053
episode: 240 training return: tensor(-1698.4639, device='cuda:0')
episode: 241 training return: tensor(-1686.7780, device='cuda:0')
episode: 242 training return: tensor(-1692.7435, device='cuda:0')
episode: 243 training return: tensor(-1686.0732, device='cuda:0')
epoch: 61 test_true_pfm: 14.542363859817442
episode: 244 training return: tensor(-1694.4595, device='cuda:0')
episode: 245 training return: tensor(-1698.6881, device='cuda:0')
episode: 246 training return: tensor(-1695.2390, device='cuda:0')
episode: 247 training return: tensor(-1698.6721, device='cuda:0')
epoch: 62 test_true_pfm: 17.02749229637721
episode: 248 training return: tensor(-1696.9583, device='cuda:0')
episode: 249 training return: tensor(-1700.3368, device='cuda:0')
episode: 250 training return: tensor(-1696.3621, device='cuda:0')
episode: 251 training return: tensor(-1683.7274, device='cuda:0')
epoch: 63 test_true_pfm: 11.122363593722676
episode: 252 training return: tensor(-1685.6238, device='cuda:0')
episode: 253 training return: tensor(-1688.6375, device='cuda:0')
episode: 254 training return: tensor(-1694.8250, device='cuda:0')
episode: 255 training return: tensor(-1693.1378, device='cuda:0')
epoch: 64 test_true_pfm: -1.3636727868933471
episode: 256 training return: tensor(-1688.5428, device='cuda:0')
episode: 257 training return: tensor(-1695.1998, device='cuda:0')
episode: 258 training return: tensor(-1691.4912, device='cuda:0')
episode: 259 training return: tensor(-1706.3494, device='cuda:0')
epoch: 65 test_true_pfm: 14.160050249618592
episode: 260 training return: tensor(-1688.4719, device='cuda:0')
episode: 261 training return: tensor(-1695.8699, device='cuda:0')
episode: 262 training return: tensor(-1698.5588, device='cuda:0')
episode: 263 training return: tensor(-1697.7272, device='cuda:0')
epoch: 66 test_true_pfm: 8.748922593454353
episode: 264 training return: tensor(-1705.1927, device='cuda:0')
episode: 265 training return: tensor(-1700.0828, device='cuda:0')
episode: 266 training return: tensor(-1697.0006, device='cuda:0')
episode: 267 training return: tensor(-1691.2745, device='cuda:0')
epoch: 67 test_true_pfm: 11.131107225291414
episode: 268 training return: tensor(-1703.2725, device='cuda:0')
episode: 269 training return: tensor(-1701.2761, device='cuda:0')
episode: 270 training return: tensor(-1685.7673, device='cuda:0')
episode: 271 training return: tensor(-1717.1525, device='cuda:0')
epoch: 68 test_true_pfm: 5.141450958791507
episode: 272 training return: tensor(-1692.1033, device='cuda:0')
episode: 273 training return: tensor(-1700.9376, device='cuda:0')
episode: 274 training return: tensor(-1701.0366, device='cuda:0')
episode: 275 training return: tensor(-1685.5719, device='cuda:0')
epoch: 69 test_true_pfm: 11.980899209499569
episode: 276 training return: tensor(-1682.8484, device='cuda:0')
episode: 277 training return: tensor(-1700.8358, device='cuda:0')
episode: 278 training return: tensor(-1679.0240, device='cuda:0')
episode: 279 training return: tensor(-1688.1290, device='cuda:0')
epoch: 70 test_true_pfm: 16.20311078514544
episode: 280 training return: tensor(-1698.4227, device='cuda:0')
episode: 281 training return: tensor(-1699.2926, device='cuda:0')
episode: 282 training return: tensor(-1686.8896, device='cuda:0')
episode: 283 training return: tensor(-1687.8431, device='cuda:0')
epoch: 71 test_true_pfm: 2.6148769519474384
episode: 284 training return: tensor(-1695.5676, device='cuda:0')
episode: 285 training return: tensor(-1694.8765, device='cuda:0')
episode: 286 training return: tensor(-1690.1135, device='cuda:0')
episode: 287 training return: tensor(-1699.7474, device='cuda:0')
epoch: 72 test_true_pfm: 14.335982502562388
episode: 288 training return: tensor(-1686.8992, device='cuda:0')
episode: 289 training return: tensor(-1690.8693, device='cuda:0')
episode: 290 training return: tensor(-1693.1758, device='cuda:0')
episode: 291 training return: tensor(-1684.1421, device='cuda:0')
epoch: 73 test_true_pfm: 7.6221420057384925
episode: 292 training return: tensor(-1696.4415, device='cuda:0')
episode: 293 training return: tensor(-1694.3260, device='cuda:0')
episode: 294 training return: tensor(-1697.9510, device='cuda:0')
episode: 295 training return: tensor(-1699.7228, device='cuda:0')
epoch: 74 test_true_pfm: 14.737194212029323
episode: 296 training return: tensor(-1696.3531, device='cuda:0')
episode: 297 training return: tensor(-1704.3964, device='cuda:0')
episode: 298 training return: tensor(-1689.2512, device='cuda:0')
episode: 299 training return: tensor(-1693.7324, device='cuda:0')
epoch: 75 test_true_pfm: 4.320163175253332
episode: 300 training return: tensor(-1702.9833, device='cuda:0')
episode: 301 training return: tensor(-1698.5970, device='cuda:0')
episode: 302 training return: tensor(-1696.9315, device='cuda:0')
episode: 303 training return: tensor(-1698.8665, device='cuda:0')
epoch: 76 test_true_pfm: 5.3139146439145994
episode: 304 training return: tensor(-1687.7800, device='cuda:0')
episode: 305 training return: tensor(-1688.0023, device='cuda:0')
episode: 306 training return: tensor(-1685.4501, device='cuda:0')
episode: 307 training return: tensor(-1689.0527, device='cuda:0')
epoch: 77 test_true_pfm: 9.161042277617836
episode: 308 training return: tensor(-1699.5024, device='cuda:0')
episode: 309 training return: tensor(-1695.8763, device='cuda:0')
episode: 310 training return: tensor(-1696.8716, device='cuda:0')
episode: 311 training return: tensor(-1686.2507, device='cuda:0')
epoch: 78 test_true_pfm: 9.772262862412225
episode: 312 training return: tensor(-1691.9054, device='cuda:0')
episode: 313 training return: tensor(-1697.3374, device='cuda:0')
episode: 314 training return: tensor(-1700.9512, device='cuda:0')
episode: 315 training return: tensor(-1698.2582, device='cuda:0')
epoch: 79 test_true_pfm: 1.2681655680936714
episode: 316 training return: tensor(-1699.5583, device='cuda:0')
episode: 317 training return: tensor(-1699.3092, device='cuda:0')
episode: 318 training return: tensor(-1686.2209, device='cuda:0')
episode: 319 training return: tensor(-1688.0359, device='cuda:0')
epoch: 80 test_true_pfm: 0.4005839593878288
episode: 320 training return: tensor(-1692.1505, device='cuda:0')
episode: 321 training return: tensor(-1690.5439, device='cuda:0')
episode: 322 training return: tensor(-1699.0690, device='cuda:0')
episode: 323 training return: tensor(-1702.5452, device='cuda:0')
epoch: 81 test_true_pfm: 7.792662493178935
episode: 324 training return: tensor(-1700.1901, device='cuda:0')
episode: 325 training return: tensor(-1686.3711, device='cuda:0')
episode: 326 training return: tensor(-1693.6624, device='cuda:0')
episode: 327 training return: tensor(-1700.5809, device='cuda:0')
epoch: 82 test_true_pfm: 8.31387890654168
episode: 328 training return: tensor(-1686.0414, device='cuda:0')
episode: 329 training return: tensor(-1689.9854, device='cuda:0')
episode: 330 training return: tensor(-1707.4954, device='cuda:0')
episode: 331 training return: tensor(-1696.7141, device='cuda:0')
epoch: 83 test_true_pfm: 4.4208627953546555
episode: 332 training return: tensor(-1698.7113, device='cuda:0')
episode: 333 training return: tensor(-1687.4653, device='cuda:0')
episode: 334 training return: tensor(-1691.5903, device='cuda:0')
episode: 335 training return: tensor(-1691.8439, device='cuda:0')
epoch: 84 test_true_pfm: 2.9635074492704683
episode: 336 training return: tensor(-1688.9620, device='cuda:0')
episode: 337 training return: tensor(-1688.4255, device='cuda:0')
episode: 338 training return: tensor(-1690.7360, device='cuda:0')
episode: 339 training return: tensor(-1686.8418, device='cuda:0')
epoch: 85 test_true_pfm: 7.434103099597793
episode: 340 training return: tensor(-1690.5756, device='cuda:0')
episode: 341 training return: tensor(-1687.5917, device='cuda:0')
episode: 342 training return: tensor(-1699.6182, device='cuda:0')
episode: 343 training return: tensor(-1691.9688, device='cuda:0')
epoch: 86 test_true_pfm: 2.0098822679844446
episode: 344 training return: tensor(-1686.2222, device='cuda:0')
episode: 345 training return: tensor(-1695.8097, device='cuda:0')
episode: 346 training return: tensor(-1690.9296, device='cuda:0')
episode: 347 training return: tensor(-1695.0992, device='cuda:0')
epoch: 87 test_true_pfm: 10.672099574237322
episode: 348 training return: tensor(-1682.5747, device='cuda:0')
episode: 349 training return: tensor(-1694.2301, device='cuda:0')
episode: 350 training return: tensor(-1693.0532, device='cuda:0')
episode: 351 training return: tensor(-1689.0059, device='cuda:0')
epoch: 88 test_true_pfm: 12.867169203268881
episode: 352 training return: tensor(-1696.3358, device='cuda:0')
episode: 353 training return: tensor(-1694.5176, device='cuda:0')
episode: 354 training return: tensor(-1702.0485, device='cuda:0')
episode: 355 training return: tensor(-1692.7283, device='cuda:0')
epoch: 89 test_true_pfm: -1.0900231958824464
episode: 356 training return: tensor(-1694.6910, device='cuda:0')
episode: 357 training return: tensor(-1689.4512, device='cuda:0')
episode: 358 training return: tensor(-1690.7844, device='cuda:0')
episode: 359 training return: tensor(-1690.1891, device='cuda:0')
epoch: 90 test_true_pfm: -0.23693613088927043
episode: 360 training return: tensor(-1699.5354, device='cuda:0')
episode: 361 training return: tensor(-1694.2490, device='cuda:0')
episode: 362 training return: tensor(-1693.3287, device='cuda:0')
episode: 363 training return: tensor(-1691.9402, device='cuda:0')
epoch: 91 test_true_pfm: 5.523379459626038
episode: 364 training return: tensor(-1701.5886, device='cuda:0')
episode: 365 training return: tensor(-1688.7311, device='cuda:0')
episode: 366 training return: tensor(-1691.2502, device='cuda:0')
episode: 367 training return: tensor(-1695.9512, device='cuda:0')
epoch: 92 test_true_pfm: 6.224395247771589
episode: 368 training return: tensor(-1689.0948, device='cuda:0')
episode: 369 training return: tensor(-1692.1575, device='cuda:0')
episode: 370 training return: tensor(-1694.4102, device='cuda:0')
episode: 371 training return: tensor(-1690.5286, device='cuda:0')
epoch: 93 test_true_pfm: 6.331335122715815
episode: 372 training return: tensor(-1678.5636, device='cuda:0')
episode: 373 training return: tensor(-1683.7572, device='cuda:0')
episode: 374 training return: tensor(-1691.8812, device='cuda:0')
episode: 375 training return: tensor(-1688.9485, device='cuda:0')
epoch: 94 test_true_pfm: 6.5428168060434615
episode: 376 training return: tensor(-1692.4109, device='cuda:0')
episode: 377 training return: tensor(-1698.9688, device='cuda:0')
episode: 378 training return: tensor(-1687.9755, device='cuda:0')
episode: 379 training return: tensor(-1686.0581, device='cuda:0')
epoch: 95 test_true_pfm: 3.412329933530612
episode: 380 training return: tensor(-1693.3085, device='cuda:0')
episode: 381 training return: tensor(-1686.7247, device='cuda:0')
episode: 382 training return: tensor(-1706.6771, device='cuda:0')
episode: 383 training return: tensor(-1680.9215, device='cuda:0')
epoch: 96 test_true_pfm: -0.6323647172416611
episode: 384 training return: tensor(-1688.9695, device='cuda:0')
episode: 385 training return: tensor(-1696.1816, device='cuda:0')
episode: 386 training return: tensor(-1681.0641, device='cuda:0')
episode: 387 training return: tensor(-1691.9308, device='cuda:0')
epoch: 97 test_true_pfm: 3.587536480454753
episode: 388 training return: tensor(-1703.0062, device='cuda:0')
episode: 389 training return: tensor(-1692.6210, device='cuda:0')
episode: 390 training return: tensor(-1701.4089, device='cuda:0')
episode: 391 training return: tensor(-1694.9247, device='cuda:0')
epoch: 98 test_true_pfm: 13.959641691776007
episode: 392 training return: tensor(-1694.4620, device='cuda:0')
episode: 393 training return: tensor(-1692.3999, device='cuda:0')
episode: 394 training return: tensor(-1690.8354, device='cuda:0')
episode: 395 training return: tensor(-1689.4214, device='cuda:0')
epoch: 99 test_true_pfm: 3.2490442056181035
episode: 396 training return: tensor(-1694.6422, device='cuda:0')
episode: 397 training return: tensor(-1687.9716, device='cuda:0')
episode: 398 training return: tensor(-1692.5729, device='cuda:0')
episode: 399 training return: tensor(-1705.3198, device='cuda:0')
epoch: 100 test_true_pfm: 9.118321219531948
episode: 400 training return: tensor(-1692.9496, device='cuda:0')
episode: 401 training return: tensor(-1688.1072, device='cuda:0')
episode: 402 training return: tensor(-1688.5825, device='cuda:0')
episode: 403 training return: tensor(-1679.1594, device='cuda:0')
epoch: 101 test_true_pfm: 1.3499698227191956
episode: 404 training return: tensor(-1698.0656, device='cuda:0')
episode: 405 training return: tensor(-1694.0222, device='cuda:0')
episode: 406 training return: tensor(-1689.6190, device='cuda:0')
episode: 407 training return: tensor(-1684.1948, device='cuda:0')
epoch: 102 test_true_pfm: 7.5025517023160075
episode: 408 training return: tensor(-1686.5875, device='cuda:0')
episode: 409 training return: tensor(-1702.2115, device='cuda:0')
episode: 410 training return: tensor(-1678.8199, device='cuda:0')
episode: 411 training return: tensor(-1682.7777, device='cuda:0')
epoch: 103 test_true_pfm: 11.293258024432381
episode: 412 training return: tensor(-1691.0291, device='cuda:0')
episode: 413 training return: tensor(-1677.5553, device='cuda:0')
episode: 414 training return: tensor(-1689.8446, device='cuda:0')
episode: 415 training return: tensor(-1685.7153, device='cuda:0')
epoch: 104 test_true_pfm: 6.310709626910069
episode: 416 training return: tensor(-1692.9491, device='cuda:0')
episode: 417 training return: tensor(-1690.8949, device='cuda:0')
episode: 418 training return: tensor(-1690.0066, device='cuda:0')
episode: 419 training return: tensor(-1696.4967, device='cuda:0')
epoch: 105 test_true_pfm: 2.5016725106488535
episode: 420 training return: tensor(-1699.6849, device='cuda:0')
episode: 421 training return: tensor(-1691.7151, device='cuda:0')
episode: 422 training return: tensor(-1692.5312, device='cuda:0')
episode: 423 training return: tensor(-1694.1517, device='cuda:0')
epoch: 106 test_true_pfm: 7.406112913097282
episode: 424 training return: tensor(-1684.3125, device='cuda:0')
episode: 425 training return: tensor(-1691.0920, device='cuda:0')
episode: 426 training return: tensor(-1692.9418, device='cuda:0')
episode: 427 training return: tensor(-1685.2410, device='cuda:0')
epoch: 107 test_true_pfm: 6.9228346931754805
episode: 428 training return: tensor(-1690.5968, device='cuda:0')
episode: 429 training return: tensor(-1698.6541, device='cuda:0')
episode: 430 training return: tensor(-1698.1826, device='cuda:0')
episode: 431 training return: tensor(-1688.3179, device='cuda:0')
epoch: 108 test_true_pfm: 2.7440263421002284
episode: 432 training return: tensor(-1695.3881, device='cuda:0')
episode: 433 training return: tensor(-1684.9021, device='cuda:0')
episode: 434 training return: tensor(-1690.8776, device='cuda:0')
episode: 435 training return: tensor(-1695.0074, device='cuda:0')
epoch: 109 test_true_pfm: 3.759910449278935
episode: 436 training return: tensor(-1693.4346, device='cuda:0')
episode: 437 training return: tensor(-1699.9624, device='cuda:0')
episode: 438 training return: tensor(-1692.8385, device='cuda:0')
episode: 439 training return: tensor(-1685.1869, device='cuda:0')
epoch: 110 test_true_pfm: 4.135104296606703
episode: 440 training return: tensor(-1674.7394, device='cuda:0')
episode: 441 training return: tensor(-1706.3912, device='cuda:0')
episode: 442 training return: tensor(-1694.9072, device='cuda:0')
episode: 443 training return: tensor(-1683.1971, device='cuda:0')
epoch: 111 test_true_pfm: -2.5551696323477917
episode: 444 training return: tensor(-1696.9874, device='cuda:0')
episode: 445 training return: tensor(-1686.9584, device='cuda:0')
episode: 446 training return: tensor(-1687.0403, device='cuda:0')
episode: 447 training return: tensor(-1690.1663, device='cuda:0')
epoch: 112 test_true_pfm: 16.287989534559085
episode: 448 training return: tensor(-1694.0891, device='cuda:0')
episode: 449 training return: tensor(-1693.8247, device='cuda:0')
episode: 450 training return: tensor(-1685.8420, device='cuda:0')
episode: 451 training return: tensor(-1683.6194, device='cuda:0')
epoch: 113 test_true_pfm: 2.466782537577791
episode: 452 training return: tensor(-1684.0415, device='cuda:0')
episode: 453 training return: tensor(-1706.8853, device='cuda:0')
episode: 454 training return: tensor(-1673.3241, device='cuda:0')
episode: 455 training return: tensor(-1703.6146, device='cuda:0')
epoch: 114 test_true_pfm: 0.5366446121682386
episode: 456 training return: tensor(-1695.5256, device='cuda:0')
episode: 457 training return: tensor(-1690.0400, device='cuda:0')
episode: 458 training return: tensor(-1698.4271, device='cuda:0')
episode: 459 training return: tensor(-1692.0719, device='cuda:0')
epoch: 115 test_true_pfm: 1.7173833041274567
episode: 460 training return: tensor(-1691.6969, device='cuda:0')
episode: 461 training return: tensor(-1677.3921, device='cuda:0')
episode: 462 training return: tensor(-1690.1902, device='cuda:0')
episode: 463 training return: tensor(-1692.7074, device='cuda:0')
epoch: 116 test_true_pfm: 5.44729761025182
episode: 464 training return: tensor(-1694.1790, device='cuda:0')
episode: 465 training return: tensor(-1684.7933, device='cuda:0')
episode: 466 training return: tensor(-1698.0800, device='cuda:0')
episode: 467 training return: tensor(-1695.2543, device='cuda:0')
epoch: 117 test_true_pfm: 10.105107920063345
episode: 468 training return: tensor(-1695.8514, device='cuda:0')
episode: 469 training return: tensor(-1696.9120, device='cuda:0')
episode: 470 training return: tensor(-1696.0924, device='cuda:0')
episode: 471 training return: tensor(-1689.3242, device='cuda:0')
epoch: 118 test_true_pfm: 6.926544124595346
episode: 472 training return: tensor(-1703.3258, device='cuda:0')
episode: 473 training return: tensor(-1687.1958, device='cuda:0')
episode: 474 training return: tensor(-1690.4110, device='cuda:0')
episode: 475 training return: tensor(-1686.8357, device='cuda:0')
epoch: 119 test_true_pfm: 12.675700472084491
episode: 476 training return: tensor(-1691.1302, device='cuda:0')
episode: 477 training return: tensor(-1696.9893, device='cuda:0')
episode: 478 training return: tensor(-1698.6569, device='cuda:0')
episode: 479 training return: tensor(-1685.8685, device='cuda:0')
epoch: 120 test_true_pfm: 9.78340075818183
episode: 480 training return: tensor(-1696.8274, device='cuda:0')
episode: 481 training return: tensor(-1698.3177, device='cuda:0')
episode: 482 training return: tensor(-1688.2714, device='cuda:0')
episode: 483 training return: tensor(-1694.7590, device='cuda:0')
epoch: 121 test_true_pfm: 2.9234555749525035
episode: 484 training return: tensor(-1685.7290, device='cuda:0')
episode: 485 training return: tensor(-1684.8962, device='cuda:0')
episode: 486 training return: tensor(-1692.2777, device='cuda:0')
episode: 487 training return: tensor(-1691.1187, device='cuda:0')
epoch: 122 test_true_pfm: -1.8850822440477657
episode: 488 training return: tensor(-1690.9565, device='cuda:0')
episode: 489 training return: tensor(-1694.8085, device='cuda:0')
episode: 490 training return: tensor(-1682.3419, device='cuda:0')
episode: 491 training return: tensor(-1686.2780, device='cuda:0')
epoch: 123 test_true_pfm: -18.983013345286643
episode: 492 training return: tensor(-1693.1312, device='cuda:0')
episode: 493 training return: tensor(-1690.3508, device='cuda:0')
episode: 494 training return: tensor(-1687.2452, device='cuda:0')
episode: 495 training return: tensor(-1690.3553, device='cuda:0')
epoch: 124 test_true_pfm: 3.9183226003090064
episode: 496 training return: tensor(-1690.7900, device='cuda:0')
episode: 497 training return: tensor(-1689.5594, device='cuda:0')
episode: 498 training return: tensor(-1682.7731, device='cuda:0')
episode: 499 training return: tensor(-1693.3457, device='cuda:0')
epoch: 125 test_true_pfm: 3.110706107560739
episode: 500 training return: tensor(-1691.5428, device='cuda:0')
episode: 501 training return: tensor(-1686.5094, device='cuda:0')
episode: 502 training return: tensor(-1683.9937, device='cuda:0')
episode: 503 training return: tensor(-1685.7493, device='cuda:0')
epoch: 126 test_true_pfm: -5.884079077792243
episode: 504 training return: tensor(-1688.4569, device='cuda:0')
episode: 505 training return: tensor(-1694.3495, device='cuda:0')
episode: 506 training return: tensor(-1685.0586, device='cuda:0')
episode: 507 training return: tensor(-1695.0741, device='cuda:0')
epoch: 127 test_true_pfm: 8.823441263687991
episode: 508 training return: tensor(-1694.7682, device='cuda:0')
episode: 509 training return: tensor(-1696.2535, device='cuda:0')
episode: 510 training return: tensor(-1693.3076, device='cuda:0')
episode: 511 training return: tensor(-1685.2131, device='cuda:0')
epoch: 128 test_true_pfm: -3.8783160558774337
episode: 512 training return: tensor(-1684.0806, device='cuda:0')
episode: 513 training return: tensor(-1682.9752, device='cuda:0')
episode: 514 training return: tensor(-1687.6329, device='cuda:0')
episode: 515 training return: tensor(-1691.5424, device='cuda:0')
epoch: 129 test_true_pfm: 2.912164780011267
episode: 516 training return: tensor(-1701.1035, device='cuda:0')
episode: 517 training return: tensor(-1679.6692, device='cuda:0')
episode: 518 training return: tensor(-1691.3448, device='cuda:0')
episode: 519 training return: tensor(-1689.1451, device='cuda:0')
epoch: 130 test_true_pfm: 8.13195982980074
episode: 520 training return: tensor(-1697.0164, device='cuda:0')
episode: 521 training return: tensor(-1686.3579, device='cuda:0')
episode: 522 training return: tensor(-1683.7499, device='cuda:0')
episode: 523 training return: tensor(-1698.7443, device='cuda:0')
epoch: 131 test_true_pfm: 5.939094415873675
episode: 524 training return: tensor(-1682.4916, device='cuda:0')
episode: 525 training return: tensor(-1690.4672, device='cuda:0')
episode: 526 training return: tensor(-1681.7211, device='cuda:0')
episode: 527 training return: tensor(-1690.4708, device='cuda:0')
epoch: 132 test_true_pfm: 7.060051753873176
episode: 528 training return: tensor(-1688.9786, device='cuda:0')
episode: 529 training return: tensor(-1688.3207, device='cuda:0')
episode: 530 training return: tensor(-1683.3314, device='cuda:0')
episode: 531 training return: tensor(-1717.6606, device='cuda:0')
epoch: 133 test_true_pfm: 16.141930937489665
episode: 532 training return: tensor(-1693.1904, device='cuda:0')
episode: 533 training return: tensor(-1681.5430, device='cuda:0')
episode: 534 training return: tensor(-1687.7206, device='cuda:0')
episode: 535 training return: tensor(-1688.9398, device='cuda:0')
epoch: 134 test_true_pfm: 5.461670371144137
episode: 536 training return: tensor(-1692.8300, device='cuda:0')
episode: 537 training return: tensor(-1684.5516, device='cuda:0')
episode: 538 training return: tensor(-1681.3126, device='cuda:0')
episode: 539 training return: tensor(-1684.1558, device='cuda:0')
epoch: 135 test_true_pfm: -0.37832197878424684
episode: 540 training return: tensor(-1694.1414, device='cuda:0')
episode: 541 training return: tensor(-1687.2477, device='cuda:0')
episode: 542 training return: tensor(-1699.5649, device='cuda:0')
episode: 543 training return: tensor(-1693.3237, device='cuda:0')
epoch: 136 test_true_pfm: 6.883986207430657
episode: 544 training return: tensor(-1685.5190, device='cuda:0')
episode: 545 training return: tensor(-1692.7002, device='cuda:0')
episode: 546 training return: tensor(-1688.7567, device='cuda:0')
episode: 547 training return: tensor(-1689.3555, device='cuda:0')
epoch: 137 test_true_pfm: 4.476142529229361
episode: 548 training return: tensor(-1684.1533, device='cuda:0')
episode: 549 training return: tensor(-1683.5975, device='cuda:0')
episode: 550 training return: tensor(-1681.0499, device='cuda:0')
episode: 551 training return: tensor(-1679.6510, device='cuda:0')
epoch: 138 test_true_pfm: 1.1375920122722463
episode: 552 training return: tensor(-1681.5375, device='cuda:0')
episode: 553 training return: tensor(-1695.7151, device='cuda:0')
episode: 554 training return: tensor(-1683.6427, device='cuda:0')
episode: 555 training return: tensor(-1676.9626, device='cuda:0')
epoch: 139 test_true_pfm: 9.303197122471927
episode: 556 training return: tensor(-1691.5458, device='cuda:0')
episode: 557 training return: tensor(-1679.2954, device='cuda:0')
episode: 558 training return: tensor(-1688.3223, device='cuda:0')
episode: 559 training return: tensor(-1683.1478, device='cuda:0')
epoch: 140 test_true_pfm: 12.143798705856193
episode: 560 training return: tensor(-1681.6066, device='cuda:0')
episode: 561 training return: tensor(-1687.0884, device='cuda:0')
episode: 562 training return: tensor(-1687.7428, device='cuda:0')
episode: 563 training return: tensor(-1686.0117, device='cuda:0')
epoch: 141 test_true_pfm: 13.71678849502275
episode: 564 training return: tensor(-1689.4077, device='cuda:0')
episode: 565 training return: tensor(-1688.2794, device='cuda:0')
episode: 566 training return: tensor(-1684.4635, device='cuda:0')
episode: 567 training return: tensor(-1693.3101, device='cuda:0')
epoch: 142 test_true_pfm: 8.297313987597432
episode: 568 training return: tensor(-1676.8782, device='cuda:0')
episode: 569 training return: tensor(-1681.7295, device='cuda:0')
episode: 570 training return: tensor(-1693.4948, device='cuda:0')
episode: 571 training return: tensor(-1691.5005, device='cuda:0')
epoch: 143 test_true_pfm: 3.5410122108261195
episode: 572 training return: tensor(-1678.3071, device='cuda:0')
episode: 573 training return: tensor(-1685.6968, device='cuda:0')
episode: 574 training return: tensor(-1680.6508, device='cuda:0')
episode: 575 training return: tensor(-1689.4961, device='cuda:0')
epoch: 144 test_true_pfm: 6.621718951649799
episode: 576 training return: tensor(-1692.2117, device='cuda:0')
episode: 577 training return: tensor(-1681.2987, device='cuda:0')
episode: 578 training return: tensor(-1683.8484, device='cuda:0')
episode: 579 training return: tensor(-1691.2845, device='cuda:0')
epoch: 145 test_true_pfm: 4.934543270898517
episode: 580 training return: tensor(-1689.1235, device='cuda:0')
episode: 581 training return: tensor(-1689.3975, device='cuda:0')
episode: 582 training return: tensor(-1680.3732, device='cuda:0')
episode: 583 training return: tensor(-1685.5887, device='cuda:0')
epoch: 146 test_true_pfm: 0.6746107829230068
episode: 584 training return: tensor(-1680.2174, device='cuda:0')
episode: 585 training return: tensor(-1686.6820, device='cuda:0')
episode: 586 training return: tensor(-1687.6998, device='cuda:0')
episode: 587 training return: tensor(-1687.7416, device='cuda:0')
epoch: 147 test_true_pfm: 0.20928821080829607
episode: 588 training return: tensor(-1684.6682, device='cuda:0')
episode: 589 training return: tensor(-1682.1818, device='cuda:0')
episode: 590 training return: tensor(-1692.7855, device='cuda:0')
episode: 591 training return: tensor(-1688.9237, device='cuda:0')
epoch: 148 test_true_pfm: 11.513523683234338
episode: 592 training return: tensor(-1687.5216, device='cuda:0')
episode: 593 training return: tensor(-1681.1255, device='cuda:0')
episode: 594 training return: tensor(-1674.8436, device='cuda:0')
episode: 595 training return: tensor(-1683.2084, device='cuda:0')
epoch: 149 test_true_pfm: 9.52550808766101
episode: 596 training return: tensor(-1686.1293, device='cuda:0')
episode: 597 training return: tensor(-1691.9253, device='cuda:0')
episode: 598 training return: tensor(-1677.6809, device='cuda:0')
episode: 599 training return: tensor(-1689.6096, device='cuda:0')
epoch: 150 test_true_pfm: 7.662065438629425
