episode: 0 training return: tensor(-999.9905, device='cuda:0')
episode: 1 training return: tensor(-999.9891, device='cuda:0')
episode: 2 training return: tensor(-999.9918, device='cuda:0')
epoch: 1 test_true_pfm: -0.10263109239566615
episode: 3 training return: tensor(-999.9897, device='cuda:0')
episode: 4 training return: tensor(-999.9894, device='cuda:0')
episode: 5 training return: tensor(-999.9916, device='cuda:0')
epoch: 2 test_true_pfm: -0.14745030348702015
episode: 6 training return: tensor(-999.9882, device='cuda:0')
episode: 7 training return: tensor(-999.9935, device='cuda:0')
episode: 8 training return: tensor(-999.9855, device='cuda:0')
epoch: 3 test_true_pfm: -0.47919970512588445
episode: 9 training return: tensor(-999.9874, device='cuda:0')
episode: 10 training return: tensor(-999.9954, device='cuda:0')
episode: 11 training return: tensor(-999.9896, device='cuda:0')
epoch: 4 test_true_pfm: -1.0070635978297358
episode: 12 training return: tensor(-999.9927, device='cuda:0')
episode: 13 training return: tensor(-999.9879, device='cuda:0')
episode: 14 training return: tensor(-999.9920, device='cuda:0')
epoch: 5 test_true_pfm: -0.6831504068339633
episode: 15 training return: tensor(-999.9875, device='cuda:0')
episode: 16 training return: tensor(-999.9904, device='cuda:0')
episode: 17 training return: tensor(-999.9916, device='cuda:0')
epoch: 6 test_true_pfm: 0.015248968748888666
episode: 18 training return: tensor(-999.9891, device='cuda:0')
episode: 19 training return: tensor(-999.9905, device='cuda:0')
episode: 20 training return: tensor(-999.9894, device='cuda:0')
epoch: 7 test_true_pfm: -0.2268141190290799
episode: 21 training return: tensor(-999.9920, device='cuda:0')
episode: 22 training return: tensor(-999.9877, device='cuda:0')
episode: 23 training return: tensor(-999.9915, device='cuda:0')
epoch: 8 test_true_pfm: 0.17485859986519245
episode: 24 training return: tensor(-999.9919, device='cuda:0')
episode: 25 training return: tensor(-999.9930, device='cuda:0')
episode: 26 training return: tensor(-999.9930, device='cuda:0')
epoch: 9 test_true_pfm: -0.7348830962220884
episode: 27 training return: tensor(-999.9916, device='cuda:0')
episode: 28 training return: tensor(-999.9906, device='cuda:0')
episode: 29 training return: tensor(-999.9915, device='cuda:0')
epoch: 10 test_true_pfm: -0.45054715102610743
episode: 30 training return: tensor(-999.9892, device='cuda:0')
episode: 31 training return: tensor(-999.9893, device='cuda:0')
episode: 32 training return: tensor(-999.9935, device='cuda:0')
epoch: 11 test_true_pfm: -0.2218402106038061
episode: 33 training return: tensor(-999.9915, device='cuda:0')
episode: 34 training return: tensor(-999.9916, device='cuda:0')
episode: 35 training return: tensor(-999.9878, device='cuda:0')
epoch: 12 test_true_pfm: -0.31898123586510824
episode: 36 training return: tensor(-999.9932, device='cuda:0')
episode: 37 training return: tensor(-999.9921, device='cuda:0')
episode: 38 training return: tensor(-999.9922, device='cuda:0')
epoch: 13 test_true_pfm: -0.7076228157020669
episode: 39 training return: tensor(-999.9912, device='cuda:0')
episode: 40 training return: tensor(-999.9913, device='cuda:0')
episode: 41 training return: tensor(-999.9907, device='cuda:0')
epoch: 14 test_true_pfm: -0.37081932866184975
episode: 42 training return: tensor(-999.9904, device='cuda:0')
episode: 43 training return: tensor(-999.9903, device='cuda:0')
episode: 44 training return: tensor(-999.9905, device='cuda:0')
epoch: 15 test_true_pfm: -0.37044109086423843
episode: 45 training return: tensor(-999.9918, device='cuda:0')
episode: 46 training return: tensor(-999.9855, device='cuda:0')
episode: 47 training return: tensor(-999.9895, device='cuda:0')
epoch: 16 test_true_pfm: 0.15162059885583667
episode: 48 training return: tensor(-999.9885, device='cuda:0')
episode: 49 training return: tensor(-999.9916, device='cuda:0')
episode: 50 training return: tensor(-999.9887, device='cuda:0')
epoch: 17 test_true_pfm: -0.6711439842904786
episode: 51 training return: tensor(-999.9919, device='cuda:0')
episode: 52 training return: tensor(-999.9887, device='cuda:0')
episode: 53 training return: tensor(-999.9875, device='cuda:0')
epoch: 18 test_true_pfm: -0.41519369489002383
episode: 54 training return: tensor(-999.9879, device='cuda:0')
episode: 55 training return: tensor(-999.9909, device='cuda:0')
episode: 56 training return: tensor(-999.9858, device='cuda:0')
epoch: 19 test_true_pfm: -0.52431099164061
episode: 57 training return: tensor(-999.9918, device='cuda:0')
episode: 58 training return: tensor(-999.9935, device='cuda:0')
episode: 59 training return: tensor(-999.9910, device='cuda:0')
epoch: 20 test_true_pfm: -0.8310029651995183
episode: 60 training return: tensor(-999.9881, device='cuda:0')
episode: 61 training return: tensor(-999.9922, device='cuda:0')
episode: 62 training return: tensor(-999.9934, device='cuda:0')
epoch: 21 test_true_pfm: -0.37630577538033844
episode: 63 training return: tensor(-999.9926, device='cuda:0')
episode: 64 training return: tensor(-999.9861, device='cuda:0')
episode: 65 training return: tensor(-999.9915, device='cuda:0')
epoch: 22 test_true_pfm: -0.7084804771492822
episode: 66 training return: tensor(-999.9919, device='cuda:0')
episode: 67 training return: tensor(-999.9871, device='cuda:0')
episode: 68 training return: tensor(-999.9868, device='cuda:0')
epoch: 23 test_true_pfm: -0.6097192542931048
episode: 69 training return: tensor(-999.9924, device='cuda:0')
episode: 70 training return: tensor(-999.9897, device='cuda:0')
episode: 71 training return: tensor(-999.9891, device='cuda:0')
epoch: 24 test_true_pfm: -0.4030059829697332
episode: 72 training return: tensor(-999.9924, device='cuda:0')
episode: 73 training return: tensor(-999.9921, device='cuda:0')
episode: 74 training return: tensor(-999.9856, device='cuda:0')
epoch: 25 test_true_pfm: -0.375088254376915
episode: 75 training return: tensor(-999.9786, device='cuda:0')
episode: 76 training return: tensor(-999.9882, device='cuda:0')
episode: 77 training return: tensor(-999.9941, device='cuda:0')
epoch: 26 test_true_pfm: -0.29980442279884206
episode: 78 training return: tensor(-999.9902, device='cuda:0')
episode: 79 training return: tensor(-999.9932, device='cuda:0')
episode: 80 training return: tensor(-999.9890, device='cuda:0')
epoch: 27 test_true_pfm: -1.1521839504950748
episode: 81 training return: tensor(-999.9915, device='cuda:0')
episode: 82 training return: tensor(-999.9865, device='cuda:0')
episode: 83 training return: tensor(-999.9907, device='cuda:0')
epoch: 28 test_true_pfm: 0.2029929852427126
episode: 84 training return: tensor(-999.9906, device='cuda:0')
episode: 85 training return: tensor(-999.9933, device='cuda:0')
episode: 86 training return: tensor(-999.9929, device='cuda:0')
epoch: 29 test_true_pfm: 0.3384745067594173
episode: 87 training return: tensor(-999.9868, device='cuda:0')
episode: 88 training return: tensor(-999.9882, device='cuda:0')
episode: 89 training return: tensor(-999.9872, device='cuda:0')
epoch: 30 test_true_pfm: -0.7973869076071552
episode: 90 training return: tensor(-999.9875, device='cuda:0')
episode: 91 training return: tensor(-999.9935, device='cuda:0')
episode: 92 training return: tensor(-999.9902, device='cuda:0')
epoch: 31 test_true_pfm: 0.144710268275982
episode: 93 training return: tensor(-999.9925, device='cuda:0')
episode: 94 training return: tensor(-999.9865, device='cuda:0')
episode: 95 training return: tensor(-999.9903, device='cuda:0')
epoch: 32 test_true_pfm: -0.12505419970142773
episode: 96 training return: tensor(-999.9910, device='cuda:0')
episode: 97 training return: tensor(-999.9932, device='cuda:0')
episode: 98 training return: tensor(-999.9929, device='cuda:0')
epoch: 33 test_true_pfm: -0.5084051233457121
episode: 99 training return: tensor(-999.9868, device='cuda:0')
episode: 100 training return: tensor(-999.9905, device='cuda:0')
episode: 101 training return: tensor(-999.9933, device='cuda:0')
epoch: 34 test_true_pfm: 0.2444662482358321
episode: 102 training return: tensor(-999.9929, device='cuda:0')
episode: 103 training return: tensor(-999.9923, device='cuda:0')
episode: 104 training return: tensor(-999.9877, device='cuda:0')
epoch: 35 test_true_pfm: -0.23182556812883315
episode: 105 training return: tensor(-999.9864, device='cuda:0')
episode: 106 training return: tensor(-999.9798, device='cuda:0')
episode: 107 training return: tensor(-999.9906, device='cuda:0')
epoch: 36 test_true_pfm: -0.21649452395039806
episode: 108 training return: tensor(-999.9916, device='cuda:0')
episode: 109 training return: tensor(-999.9948, device='cuda:0')
episode: 110 training return: tensor(-999.9797, device='cuda:0')
epoch: 37 test_true_pfm: -0.29067346874556665
episode: 111 training return: tensor(-999.9927, device='cuda:0')
episode: 112 training return: tensor(-999.9922, device='cuda:0')
episode: 113 training return: tensor(-999.9922, device='cuda:0')
epoch: 38 test_true_pfm: -0.3545007016700368
episode: 114 training return: tensor(-999.9900, device='cuda:0')
episode: 115 training return: tensor(-999.9909, device='cuda:0')
episode: 116 training return: tensor(-999.9910, device='cuda:0')
epoch: 39 test_true_pfm: 0.20169233476565
episode: 117 training return: tensor(-999.9897, device='cuda:0')
episode: 118 training return: tensor(-999.9888, device='cuda:0')
episode: 119 training return: tensor(-999.9934, device='cuda:0')
epoch: 40 test_true_pfm: -0.5903563349501338
episode: 120 training return: tensor(-999.9929, device='cuda:0')
episode: 121 training return: tensor(-999.9907, device='cuda:0')
episode: 122 training return: tensor(-999.9913, device='cuda:0')
epoch: 41 test_true_pfm: -0.07342247813001916
episode: 123 training return: tensor(-999.9878, device='cuda:0')
episode: 124 training return: tensor(-999.9879, device='cuda:0')
episode: 125 training return: tensor(-999.9906, device='cuda:0')
epoch: 42 test_true_pfm: -0.10907184892753956
episode: 126 training return: tensor(-999.9904, device='cuda:0')
episode: 127 training return: tensor(-999.9932, device='cuda:0')
episode: 128 training return: tensor(-999.9924, device='cuda:0')
epoch: 43 test_true_pfm: -0.7028893980101536
episode: 129 training return: tensor(-999.9901, device='cuda:0')
episode: 130 training return: tensor(-999.9902, device='cuda:0')
episode: 131 training return: tensor(-999.9904, device='cuda:0')
epoch: 44 test_true_pfm: 0.10995987032207655
episode: 132 training return: tensor(-999.9851, device='cuda:0')
episode: 133 training return: tensor(-999.9897, device='cuda:0')
episode: 134 training return: tensor(-999.9911, device='cuda:0')
epoch: 45 test_true_pfm: 0.14802928057176348
episode: 135 training return: tensor(-999.9928, device='cuda:0')
episode: 136 training return: tensor(-999.9910, device='cuda:0')
episode: 137 training return: tensor(-999.9894, device='cuda:0')
epoch: 46 test_true_pfm: -0.5798560975909649
episode: 138 training return: tensor(-999.9927, device='cuda:0')
episode: 139 training return: tensor(-999.9893, device='cuda:0')
episode: 140 training return: tensor(-999.9874, device='cuda:0')
epoch: 47 test_true_pfm: -0.5965957931008917
episode: 141 training return: tensor(-999.9916, device='cuda:0')
episode: 142 training return: tensor(-999.9935, device='cuda:0')
episode: 143 training return: tensor(-999.9906, device='cuda:0')
epoch: 48 test_true_pfm: 0.43164273954410354
episode: 144 training return: tensor(-999.9923, device='cuda:0')
episode: 145 training return: tensor(-999.9919, device='cuda:0')
episode: 146 training return: tensor(-999.9874, device='cuda:0')
epoch: 49 test_true_pfm: 0.4967587772237785
episode: 147 training return: tensor(-999.9908, device='cuda:0')
episode: 148 training return: tensor(-999.9924, device='cuda:0')
episode: 149 training return: tensor(-999.9932, device='cuda:0')
epoch: 50 test_true_pfm: -0.7478836187679212
episode: 150 training return: tensor(-999.9915, device='cuda:0')
episode: 151 training return: tensor(-999.9860, device='cuda:0')
episode: 152 training return: tensor(-999.9901, device='cuda:0')
epoch: 51 test_true_pfm: -0.43295327209881945
episode: 153 training return: tensor(-999.9921, device='cuda:0')
episode: 154 training return: tensor(-999.9862, device='cuda:0')
episode: 155 training return: tensor(-999.9929, device='cuda:0')
epoch: 52 test_true_pfm: 0.1033516677745225
episode: 156 training return: tensor(-999.9870, device='cuda:0')
episode: 157 training return: tensor(-999.9903, device='cuda:0')
episode: 158 training return: tensor(-999.9911, device='cuda:0')
epoch: 53 test_true_pfm: 0.129657864809775
episode: 159 training return: tensor(-999.9911, device='cuda:0')
episode: 160 training return: tensor(-999.9935, device='cuda:0')
episode: 161 training return: tensor(-999.9861, device='cuda:0')
epoch: 54 test_true_pfm: -0.0840921431533725
episode: 162 training return: tensor(-999.9905, device='cuda:0')
episode: 163 training return: tensor(-999.9888, device='cuda:0')
episode: 164 training return: tensor(-999.9936, device='cuda:0')
epoch: 55 test_true_pfm: -0.7212383207021579
episode: 165 training return: tensor(-999.9913, device='cuda:0')
episode: 166 training return: tensor(-999.9938, device='cuda:0')
episode: 167 training return: tensor(-999.9899, device='cuda:0')
epoch: 56 test_true_pfm: 0.29450035257504636
episode: 168 training return: tensor(-999.9910, device='cuda:0')
episode: 169 training return: tensor(-999.9913, device='cuda:0')
episode: 170 training return: tensor(-999.9930, device='cuda:0')
epoch: 57 test_true_pfm: -0.2529412452276015
episode: 171 training return: tensor(-999.9926, device='cuda:0')
episode: 172 training return: tensor(-999.9871, device='cuda:0')
episode: 173 training return: tensor(-999.9879, device='cuda:0')
epoch: 58 test_true_pfm: -0.5059029627385156
episode: 174 training return: tensor(-999.9909, device='cuda:0')
episode: 175 training return: tensor(-999.9905, device='cuda:0')
episode: 176 training return: tensor(-999.9904, device='cuda:0')
epoch: 59 test_true_pfm: -1.0199593050932103
episode: 177 training return: tensor(-999.9927, device='cuda:0')
episode: 178 training return: tensor(-999.9921, device='cuda:0')
episode: 179 training return: tensor(-999.9907, device='cuda:0')
epoch: 60 test_true_pfm: -0.4408889216263079
episode: 180 training return: tensor(-999.9883, device='cuda:0')
episode: 181 training return: tensor(-999.9915, device='cuda:0')
episode: 182 training return: tensor(-999.9938, device='cuda:0')
epoch: 61 test_true_pfm: -0.10676568683460856
episode: 183 training return: tensor(-999.9914, device='cuda:0')
episode: 184 training return: tensor(-999.9913, device='cuda:0')
episode: 185 training return: tensor(-999.9899, device='cuda:0')
epoch: 62 test_true_pfm: -0.19106102167609576
episode: 186 training return: tensor(-999.9867, device='cuda:0')
episode: 187 training return: tensor(-999.9916, device='cuda:0')
episode: 188 training return: tensor(-999.9896, device='cuda:0')
epoch: 63 test_true_pfm: 0.08211961651716582
episode: 189 training return: tensor(-999.9899, device='cuda:0')
episode: 190 training return: tensor(-999.9775, device='cuda:0')
episode: 191 training return: tensor(-999.9863, device='cuda:0')
epoch: 64 test_true_pfm: -0.2480514752565212
episode: 192 training return: tensor(-999.9910, device='cuda:0')
episode: 193 training return: tensor(-999.9858, device='cuda:0')
episode: 194 training return: tensor(-999.9937, device='cuda:0')
epoch: 65 test_true_pfm: 0.3834386945367154
episode: 195 training return: tensor(-999.9858, device='cuda:0')
episode: 196 training return: tensor(-999.9904, device='cuda:0')
episode: 197 training return: tensor(-999.9919, device='cuda:0')
epoch: 66 test_true_pfm: -0.5996161078999148
episode: 198 training return: tensor(-999.9868, device='cuda:0')
episode: 199 training return: tensor(-999.9929, device='cuda:0')
episode: 200 training return: tensor(-999.9867, device='cuda:0')
epoch: 67 test_true_pfm: -0.8297342912466764
episode: 201 training return: tensor(-999.9915, device='cuda:0')
episode: 202 training return: tensor(-999.9931, device='cuda:0')
episode: 203 training return: tensor(-999.9911, device='cuda:0')
epoch: 68 test_true_pfm: 0.03057918129282977
episode: 204 training return: tensor(-999.9925, device='cuda:0')
episode: 205 training return: tensor(-999.9919, device='cuda:0')
episode: 206 training return: tensor(-999.9864, device='cuda:0')
epoch: 69 test_true_pfm: -0.32060564333255087
episode: 207 training return: tensor(-999.9863, device='cuda:0')
episode: 208 training return: tensor(-999.9898, device='cuda:0')
episode: 209 training return: tensor(-999.9880, device='cuda:0')
epoch: 70 test_true_pfm: -0.458632218568738
episode: 210 training return: tensor(-999.9904, device='cuda:0')
episode: 211 training return: tensor(-999.9891, device='cuda:0')
episode: 212 training return: tensor(-999.9886, device='cuda:0')
epoch: 71 test_true_pfm: -0.42350325918126924
episode: 213 training return: tensor(-999.9916, device='cuda:0')
episode: 214 training return: tensor(-999.9928, device='cuda:0')
episode: 215 training return: tensor(-999.9930, device='cuda:0')
epoch: 72 test_true_pfm: -0.16808139995600913
episode: 216 training return: tensor(-999.9916, device='cuda:0')
episode: 217 training return: tensor(-999.9882, device='cuda:0')
episode: 218 training return: tensor(-999.9891, device='cuda:0')
epoch: 73 test_true_pfm: -0.3087183948352125
episode: 219 training return: tensor(-999.9935, device='cuda:0')
episode: 220 training return: tensor(-999.9888, device='cuda:0')
episode: 221 training return: tensor(-999.9911, device='cuda:0')
epoch: 74 test_true_pfm: -0.0611430245826868
episode: 222 training return: tensor(-999.9878, device='cuda:0')
episode: 223 training return: tensor(-999.9900, device='cuda:0')
episode: 224 training return: tensor(-999.9886, device='cuda:0')
epoch: 75 test_true_pfm: -0.26164511265049134
episode: 225 training return: tensor(-999.9910, device='cuda:0')
episode: 226 training return: tensor(-999.9925, device='cuda:0')
episode: 227 training return: tensor(-999.9899, device='cuda:0')
epoch: 76 test_true_pfm: -0.6672137984415061
episode: 228 training return: tensor(-999.9908, device='cuda:0')
episode: 229 training return: tensor(-999.9884, device='cuda:0')
episode: 230 training return: tensor(-999.9911, device='cuda:0')
epoch: 77 test_true_pfm: -0.6714009467309996
episode: 231 training return: tensor(-999.9935, device='cuda:0')
episode: 232 training return: tensor(-999.9905, device='cuda:0')
episode: 233 training return: tensor(-999.9911, device='cuda:0')
epoch: 78 test_true_pfm: -0.32927162767918344
episode: 234 training return: tensor(-999.9904, device='cuda:0')
episode: 235 training return: tensor(-999.9929, device='cuda:0')
episode: 236 training return: tensor(-999.9893, device='cuda:0')
epoch: 79 test_true_pfm: -0.5879482205392231
episode: 237 training return: tensor(-999.9944, device='cuda:0')
episode: 238 training return: tensor(-999.9913, device='cuda:0')
episode: 239 training return: tensor(-999.9877, device='cuda:0')
epoch: 80 test_true_pfm: -0.5344720484620998
episode: 240 training return: tensor(-999.9865, device='cuda:0')
episode: 241 training return: tensor(-999.9916, device='cuda:0')
episode: 242 training return: tensor(-999.9936, device='cuda:0')
epoch: 81 test_true_pfm: -0.0369684206640287
episode: 243 training return: tensor(-999.9927, device='cuda:0')
episode: 244 training return: tensor(-999.9922, device='cuda:0')
episode: 245 training return: tensor(-999.9871, device='cuda:0')
epoch: 82 test_true_pfm: 0.28678002922415774
episode: 246 training return: tensor(-999.9910, device='cuda:0')
episode: 247 training return: tensor(-999.9912, device='cuda:0')
episode: 248 training return: tensor(-999.9907, device='cuda:0')
epoch: 83 test_true_pfm: -0.8152294718744786
episode: 249 training return: tensor(-999.9869, device='cuda:0')
episode: 250 training return: tensor(-999.9929, device='cuda:0')
episode: 251 training return: tensor(-999.9921, device='cuda:0')
epoch: 84 test_true_pfm: -1.084113447870802
episode: 252 training return: tensor(-999.9915, device='cuda:0')
episode: 253 training return: tensor(-999.9872, device='cuda:0')
episode: 254 training return: tensor(-999.9866, device='cuda:0')
epoch: 85 test_true_pfm: -0.5544598030197175
episode: 255 training return: tensor(-999.9936, device='cuda:0')
episode: 256 training return: tensor(-999.9888, device='cuda:0')
episode: 257 training return: tensor(-999.9908, device='cuda:0')
epoch: 86 test_true_pfm: 0.134927693525655
episode: 258 training return: tensor(-999.9926, device='cuda:0')
episode: 259 training return: tensor(-999.9875, device='cuda:0')
episode: 260 training return: tensor(-999.9895, device='cuda:0')
epoch: 87 test_true_pfm: -0.24122519296445577
episode: 261 training return: tensor(-999.9932, device='cuda:0')
episode: 262 training return: tensor(-999.9930, device='cuda:0')
episode: 263 training return: tensor(-999.9838, device='cuda:0')
epoch: 88 test_true_pfm: -0.1483836532907494
episode: 264 training return: tensor(-999.9867, device='cuda:0')
episode: 265 training return: tensor(-999.9883, device='cuda:0')
episode: 266 training return: tensor(-999.9902, device='cuda:0')
epoch: 89 test_true_pfm: -0.00021188563238588864
episode: 267 training return: tensor(-999.9905, device='cuda:0')
episode: 268 training return: tensor(-999.9889, device='cuda:0')
episode: 269 training return: tensor(-999.9821, device='cuda:0')
epoch: 90 test_true_pfm: -0.19703296865507183
episode: 270 training return: tensor(-999.9898, device='cuda:0')
episode: 271 training return: tensor(-999.9913, device='cuda:0')
episode: 272 training return: tensor(-999.9874, device='cuda:0')
epoch: 91 test_true_pfm: -0.7362577024350877
episode: 273 training return: tensor(-999.9929, device='cuda:0')
episode: 274 training return: tensor(-999.9918, device='cuda:0')
episode: 275 training return: tensor(-999.9905, device='cuda:0')
epoch: 92 test_true_pfm: 0.19601605693785254
episode: 276 training return: tensor(-999.9903, device='cuda:0')
episode: 277 training return: tensor(-999.9920, device='cuda:0')
episode: 278 training return: tensor(-999.9928, device='cuda:0')
epoch: 93 test_true_pfm: -0.01855487982675441
episode: 279 training return: tensor(-999.9933, device='cuda:0')
episode: 280 training return: tensor(-999.9908, device='cuda:0')
episode: 281 training return: tensor(-999.9927, device='cuda:0')
epoch: 94 test_true_pfm: -0.2538799613974349
episode: 282 training return: tensor(-999.9921, device='cuda:0')
episode: 283 training return: tensor(-999.9902, device='cuda:0')
episode: 284 training return: tensor(-999.9899, device='cuda:0')
epoch: 95 test_true_pfm: -0.22179278725568122
episode: 285 training return: tensor(-999.9913, device='cuda:0')
episode: 286 training return: tensor(-999.9906, device='cuda:0')
episode: 287 training return: tensor(-999.9866, device='cuda:0')
epoch: 96 test_true_pfm: -0.5205250783553821
episode: 288 training return: tensor(-999.9893, device='cuda:0')
episode: 289 training return: tensor(-999.9893, device='cuda:0')
episode: 290 training return: tensor(-999.9888, device='cuda:0')
epoch: 97 test_true_pfm: 0.35292185735817494
episode: 291 training return: tensor(-999.9892, device='cuda:0')
episode: 292 training return: tensor(-999.9899, device='cuda:0')
episode: 293 training return: tensor(-999.9910, device='cuda:0')
epoch: 98 test_true_pfm: -1.2481447384238515
episode: 294 training return: tensor(-999.9929, device='cuda:0')
episode: 295 training return: tensor(-999.9932, device='cuda:0')
episode: 296 training return: tensor(-999.9927, device='cuda:0')
epoch: 99 test_true_pfm: 0.36723382383116837
episode: 297 training return: tensor(-999.9867, device='cuda:0')
episode: 298 training return: tensor(-999.9857, device='cuda:0')
episode: 299 training return: tensor(-999.9855, device='cuda:0')
epoch: 100 test_true_pfm: -0.1535312288007409
