46.17861991760328
episode: 0 training return: tensor(-202.6149, device='cuda:0')
episode: 1 training return: tensor(-205.1924, device='cuda:0')
episode: 2 training return: tensor(-215.4538, device='cuda:0')
episode: 3 training return: tensor(-212.8797, device='cuda:0')
epoch: 1 test_true_pfm: 51.700402572746455 sim_pfm: -186.61393660991453
episode: 4 training return: tensor(-213.5762, device='cuda:0')
episode: 5 training return: tensor(-200.6974, device='cuda:0')
episode: 6 training return: tensor(-204.5887, device='cuda:0')
episode: 7 training return: tensor(-200.3202, device='cuda:0')
epoch: 2 test_true_pfm: 46.658821784555464 sim_pfm: -191.18344486476855
episode: 8 training return: tensor(-212.8966, device='cuda:0')
episode: 9 training return: tensor(-202.3731, device='cuda:0')
episode: 10 training return: tensor(-196.0425, device='cuda:0')
episode: 11 training return: tensor(-178.1920, device='cuda:0')
epoch: 3 test_true_pfm: 57.70382474901139 sim_pfm: -186.86403131405822
episode: 12 training return: tensor(-179.2204, device='cuda:0')
episode: 13 training return: tensor(-81.6626, device='cuda:0')
episode: 14 training return: tensor(-180.7392, device='cuda:0')
episode: 15 training return: tensor(-192.4642, device='cuda:0')
epoch: 4 test_true_pfm: 49.38719340928717 sim_pfm: -179.95177620239556
episode: 16 training return: tensor(-178.1689, device='cuda:0')
episode: 17 training return: tensor(-186.9265, device='cuda:0')
episode: 18 training return: tensor(-204.1695, device='cuda:0')
episode: 19 training return: tensor(-193.8543, device='cuda:0')
epoch: 5 test_true_pfm: 52.8261824610823 sim_pfm: -190.91251685220632
episode: 20 training return: tensor(-187.1836, device='cuda:0')
episode: 21 training return: tensor(-200.0482, device='cuda:0')
episode: 22 training return: tensor(-174.7199, device='cuda:0')
episode: 23 training return: tensor(-193.2691, device='cuda:0')
epoch: 6 test_true_pfm: 58.03175119891512 sim_pfm: -183.97196706979304
episode: 24 training return: tensor(-181.9161, device='cuda:0')
episode: 25 training return: tensor(-183.7006, device='cuda:0')
episode: 26 training return: tensor(-183.1657, device='cuda:0')
episode: 27 training return: tensor(-186.9489, device='cuda:0')
epoch: 7 test_true_pfm: 51.582484297921994 sim_pfm: -167.33093890048332
episode: 28 training return: tensor(-192.4723, device='cuda:0')
episode: 29 training return: tensor(-194.1437, device='cuda:0')
episode: 30 training return: tensor(-193.0827, device='cuda:0')
episode: 31 training return: tensor(-204.7302, device='cuda:0')
epoch: 8 test_true_pfm: 47.83873514611492 sim_pfm: -193.1749854172289
episode: 32 training return: tensor(-195.7599, device='cuda:0')
episode: 33 training return: tensor(-196.1732, device='cuda:0')
episode: 34 training return: tensor(-194.5951, device='cuda:0')
episode: 35 training return: tensor(-206.3697, device='cuda:0')
epoch: 9 test_true_pfm: 46.701162744553564 sim_pfm: -199.60267131258735
episode: 36 training return: tensor(-206.0577, device='cuda:0')
episode: 37 training return: tensor(-198.9478, device='cuda:0')
episode: 38 training return: tensor(-196.8235, device='cuda:0')
episode: 39 training return: tensor(-195.3684, device='cuda:0')
epoch: 10 test_true_pfm: 47.84231691578101 sim_pfm: -188.6553377878503
episode: 40 training return: tensor(-207.2526, device='cuda:0')
episode: 41 training return: tensor(-194.6323, device='cuda:0')
episode: 42 training return: tensor(-205.4040, device='cuda:0')
episode: 43 training return: tensor(-194.8084, device='cuda:0')
epoch: 11 test_true_pfm: 47.46991336668268 sim_pfm: -196.65581585793407
episode: 44 training return: tensor(-197.1066, device='cuda:0')
episode: 45 training return: tensor(-186.6106, device='cuda:0')
episode: 46 training return: tensor(-196.3457, device='cuda:0')
episode: 47 training return: tensor(-194.7815, device='cuda:0')
epoch: 12 test_true_pfm: 55.89372282692149 sim_pfm: -168.2041572713293
episode: 48 training return: tensor(-202.4207, device='cuda:0')
episode: 49 training return: tensor(-182.4064, device='cuda:0')
episode: 50 training return: tensor(-204.5692, device='cuda:0')
episode: 51 training return: tensor(-195.7152, device='cuda:0')
epoch: 13 test_true_pfm: 45.79067680266349 sim_pfm: -188.2205383147928
episode: 52 training return: tensor(-204.8704, device='cuda:0')
episode: 53 training return: tensor(-194.4478, device='cuda:0')
episode: 54 training return: tensor(-194.2188, device='cuda:0')
episode: 55 training return: tensor(-193.9612, device='cuda:0')
epoch: 14 test_true_pfm: 47.35958429144175 sim_pfm: -196.17021673167474
episode: 56 training return: tensor(-192.2699, device='cuda:0')
episode: 57 training return: tensor(-204.3153, device='cuda:0')
episode: 58 training return: tensor(-193.3538, device='cuda:0')
episode: 59 training return: tensor(-205.7173, device='cuda:0')
epoch: 15 test_true_pfm: 46.328294136590635 sim_pfm: -195.65918517495737
episode: 60 training return: tensor(-193.0903, device='cuda:0')
episode: 61 training return: tensor(-186.2157, device='cuda:0')
episode: 62 training return: tensor(-193.0851, device='cuda:0')
episode: 63 training return: tensor(-193.5823, device='cuda:0')
epoch: 16 test_true_pfm: 48.32217408547183 sim_pfm: -192.8479132998269
episode: 64 training return: tensor(-195.6291, device='cuda:0')
episode: 65 training return: tensor(-198.0747, device='cuda:0')
episode: 66 training return: tensor(-194.1674, device='cuda:0')
episode: 67 training return: tensor(-194.0270, device='cuda:0')
epoch: 17 test_true_pfm: 61.204333525398326 sim_pfm: -189.64684506760676
episode: 68 training return: tensor(-177.6649, device='cuda:0')
episode: 69 training return: tensor(-193.0102, device='cuda:0')
episode: 70 training return: tensor(-175.9790, device='cuda:0')
episode: 71 training return: tensor(-191.8122, device='cuda:0')
epoch: 18 test_true_pfm: 46.12822432993232 sim_pfm: -184.86145779928194
episode: 72 training return: tensor(-196.4334, device='cuda:0')
episode: 73 training return: tensor(-192.6640, device='cuda:0')
episode: 74 training return: tensor(-179.4037, device='cuda:0')
episode: 75 training return: tensor(-193.8027, device='cuda:0')
epoch: 19 test_true_pfm: 46.205527207247364 sim_pfm: -190.84021269145887
episode: 76 training return: tensor(-190.5783, device='cuda:0')
episode: 77 training return: tensor(-194.8453, device='cuda:0')
episode: 78 training return: tensor(-192.4636, device='cuda:0')
episode: 79 training return: tensor(-181.1132, device='cuda:0')
epoch: 20 test_true_pfm: 47.795437151440524 sim_pfm: -175.281833909289
episode: 80 training return: tensor(-187.4268, device='cuda:0')
episode: 81 training return: tensor(-200.4624, device='cuda:0')
episode: 82 training return: tensor(-178.3084, device='cuda:0')
episode: 83 training return: tensor(-201.2183, device='cuda:0')
epoch: 21 test_true_pfm: 47.40848862306836 sim_pfm: -189.06784124126426
episode: 84 training return: tensor(-193.8358, device='cuda:0')
episode: 85 training return: tensor(-164.5646, device='cuda:0')
episode: 86 training return: tensor(-190.8908, device='cuda:0')
episode: 87 training return: tensor(-193.0331, device='cuda:0')
epoch: 22 test_true_pfm: 47.08882244051903 sim_pfm: -197.3877649869537
episode: 88 training return: tensor(-194.2104, device='cuda:0')
episode: 89 training return: tensor(-188.8527, device='cuda:0')
episode: 90 training return: tensor(-194.1083, device='cuda:0')
episode: 91 training return: tensor(-206.6248, device='cuda:0')
epoch: 23 test_true_pfm: 45.930071357381166 sim_pfm: -199.8448155048536
episode: 92 training return: tensor(-202.2614, device='cuda:0')
episode: 93 training return: tensor(-196.6393, device='cuda:0')
episode: 94 training return: tensor(-194.3921, device='cuda:0')
episode: 95 training return: tensor(-173.1088, device='cuda:0')
epoch: 24 test_true_pfm: 42.92859614786066 sim_pfm: -193.25994555114303
episode: 96 training return: tensor(-206.9284, device='cuda:0')
episode: 97 training return: tensor(-195.1625, device='cuda:0')
episode: 98 training return: tensor(-194.7943, device='cuda:0')
episode: 99 training return: tensor(-194.0872, device='cuda:0')
epoch: 25 test_true_pfm: 47.47366510008646 sim_pfm: -188.50823602024465
episode: 100 training return: tensor(-194.4499, device='cuda:0')
episode: 101 training return: tensor(-183.4998, device='cuda:0')
episode: 102 training return: tensor(-67.2330, device='cuda:0')
episode: 103 training return: tensor(-202.0545, device='cuda:0')
epoch: 26 test_true_pfm: 46.66352003380446 sim_pfm: -196.36721432735794
episode: 104 training return: tensor(-187.6732, device='cuda:0')
episode: 105 training return: tensor(-184.0622, device='cuda:0')
episode: 106 training return: tensor(-204.2514, device='cuda:0')
episode: 107 training return: tensor(-179.8623, device='cuda:0')
epoch: 27 test_true_pfm: 48.395818665629946 sim_pfm: -186.0723296013428
episode: 108 training return: tensor(-194.6234, device='cuda:0')
episode: 109 training return: tensor(-180.7532, device='cuda:0')
episode: 110 training return: tensor(-180.2756, device='cuda:0')
episode: 111 training return: tensor(-190.7647, device='cuda:0')
epoch: 28 test_true_pfm: 49.15117964687822 sim_pfm: -192.0602858570288
episode: 112 training return: tensor(-203.7153, device='cuda:0')
episode: 113 training return: tensor(-201.4452, device='cuda:0')
episode: 114 training return: tensor(-188.7759, device='cuda:0')
episode: 115 training return: tensor(-189.0969, device='cuda:0')
epoch: 29 test_true_pfm: 46.49592685917139 sim_pfm: -187.9897506582085
episode: 116 training return: tensor(-199.9651, device='cuda:0')
episode: 117 training return: tensor(-197.8752, device='cuda:0')
episode: 118 training return: tensor(-180.7553, device='cuda:0')
episode: 119 training return: tensor(-192.0708, device='cuda:0')
epoch: 30 test_true_pfm: 42.946815649958694 sim_pfm: -190.13416567505337
episode: 120 training return: tensor(-192.0304, device='cuda:0')
episode: 121 training return: tensor(-189.0026, device='cuda:0')
episode: 122 training return: tensor(-187.3452, device='cuda:0')
episode: 123 training return: tensor(-196.9799, device='cuda:0')
epoch: 31 test_true_pfm: 47.45577922229504 sim_pfm: -194.2353714934492
episode: 124 training return: tensor(-188.5578, device='cuda:0')
episode: 125 training return: tensor(-192.1890, device='cuda:0')
episode: 126 training return: tensor(-191.6039, device='cuda:0')
episode: 127 training return: tensor(-201.3898, device='cuda:0')
epoch: 32 test_true_pfm: 50.542842211642025 sim_pfm: -194.99510323997237
episode: 128 training return: tensor(-198.9468, device='cuda:0')
episode: 129 training return: tensor(-188.7855, device='cuda:0')
episode: 130 training return: tensor(-194.2393, device='cuda:0')
episode: 131 training return: tensor(-199.8760, device='cuda:0')
epoch: 33 test_true_pfm: 47.071118366592 sim_pfm: -190.25428737228503
episode: 132 training return: tensor(-191.5915, device='cuda:0')
episode: 133 training return: tensor(-177.8071, device='cuda:0')
episode: 134 training return: tensor(-193.0901, device='cuda:0')
episode: 135 training return: tensor(-179.2991, device='cuda:0')
epoch: 34 test_true_pfm: 50.48805976879056 sim_pfm: -179.66782576106488
episode: 136 training return: tensor(-202.4879, device='cuda:0')
episode: 137 training return: tensor(-189.9473, device='cuda:0')
episode: 138 training return: tensor(-200.5394, device='cuda:0')
episode: 139 training return: tensor(-201.4998, device='cuda:0')
epoch: 35 test_true_pfm: 46.249857755072966 sim_pfm: -185.06236124573041
episode: 140 training return: tensor(-184.1212, device='cuda:0')
episode: 141 training return: tensor(-181.2077, device='cuda:0')
episode: 142 training return: tensor(-200.9040, device='cuda:0')
episode: 143 training return: tensor(-177.6185, device='cuda:0')
epoch: 36 test_true_pfm: 50.007550143020744 sim_pfm: -184.58901786507923
episode: 144 training return: tensor(-191.0955, device='cuda:0')
episode: 145 training return: tensor(-194.1800, device='cuda:0')
episode: 146 training return: tensor(-189.0522, device='cuda:0')
episode: 147 training return: tensor(-201.1279, device='cuda:0')
epoch: 37 test_true_pfm: 49.052038068074026 sim_pfm: -188.19757555032848
episode: 148 training return: tensor(-162.9112, device='cuda:0')
episode: 149 training return: tensor(-202.3463, device='cuda:0')
episode: 150 training return: tensor(-189.9580, device='cuda:0')
episode: 151 training return: tensor(-189.9908, device='cuda:0')
epoch: 38 test_true_pfm: 46.842911878072954 sim_pfm: -190.7967761625594
episode: 152 training return: tensor(-187.9260, device='cuda:0')
episode: 153 training return: tensor(-177.4200, device='cuda:0')
episode: 154 training return: tensor(-189.7263, device='cuda:0')
episode: 155 training return: tensor(-187.8512, device='cuda:0')
epoch: 39 test_true_pfm: 48.30209296431728 sim_pfm: -187.8713756868383
episode: 156 training return: tensor(-186.6251, device='cuda:0')
episode: 157 training return: tensor(-195.8050, device='cuda:0')
episode: 158 training return: tensor(-186.6648, device='cuda:0')
episode: 159 training return: tensor(-205.1129, device='cuda:0')
epoch: 40 test_true_pfm: 48.31116816276076 sim_pfm: -179.03981972212205
episode: 160 training return: tensor(-191.0895, device='cuda:0')
episode: 161 training return: tensor(-202.4862, device='cuda:0')
episode: 162 training return: tensor(-190.0748, device='cuda:0')
episode: 163 training return: tensor(-181.4641, device='cuda:0')
epoch: 41 test_true_pfm: 49.469932851304854 sim_pfm: -162.08957570557249
episode: 164 training return: tensor(-201.8566, device='cuda:0')
episode: 165 training return: tensor(-197.5210, device='cuda:0')
episode: 166 training return: tensor(-174.0380, device='cuda:0')
episode: 167 training return: tensor(-183.0316, device='cuda:0')
epoch: 42 test_true_pfm: 48.04316012945469 sim_pfm: -186.20586298611016
episode: 168 training return: tensor(-171.3044, device='cuda:0')
episode: 169 training return: tensor(-178.5306, device='cuda:0')
episode: 170 training return: tensor(-185.7066, device='cuda:0')
episode: 171 training return: tensor(-191.9743, device='cuda:0')
epoch: 43 test_true_pfm: 44.03976487096291 sim_pfm: -191.41922245055903
episode: 172 training return: tensor(-190.5247, device='cuda:0')
episode: 173 training return: tensor(-184.1881, device='cuda:0')
episode: 174 training return: tensor(-175.7672, device='cuda:0')
episode: 175 training return: tensor(-188.9466, device='cuda:0')
epoch: 44 test_true_pfm: 49.614148332974295 sim_pfm: -183.95510432653245
episode: 176 training return: tensor(-191.3234, device='cuda:0')
episode: 177 training return: tensor(-68.5613, device='cuda:0')
episode: 178 training return: tensor(-179.1863, device='cuda:0')
episode: 179 training return: tensor(-178.7325, device='cuda:0')
epoch: 45 test_true_pfm: 43.56386969834771 sim_pfm: -168.8973484124872
episode: 180 training return: tensor(-179.4301, device='cuda:0')
episode: 181 training return: tensor(-201.6833, device='cuda:0')
episode: 182 training return: tensor(-177.6970, device='cuda:0')
episode: 183 training return: tensor(-178.8304, device='cuda:0')
epoch: 46 test_true_pfm: 45.70944054717508 sim_pfm: -200.35088779047948
episode: 184 training return: tensor(-189.3503, device='cuda:0')
episode: 185 training return: tensor(-188.7921, device='cuda:0')
episode: 186 training return: tensor(-199.6673, device='cuda:0')
episode: 187 training return: tensor(-190.9686, device='cuda:0')
epoch: 47 test_true_pfm: 45.397596699479365 sim_pfm: -184.64950823402032
episode: 188 training return: tensor(-190.0383, device='cuda:0')
episode: 189 training return: tensor(-200.7801, device='cuda:0')
episode: 190 training return: tensor(-179.4290, device='cuda:0')
episode: 191 training return: tensor(-200.9833, device='cuda:0')
epoch: 48 test_true_pfm: 45.6885426835627 sim_pfm: -189.23830870685634
episode: 192 training return: tensor(-188.2631, device='cuda:0')
episode: 193 training return: tensor(-178.6088, device='cuda:0')
episode: 194 training return: tensor(-188.7698, device='cuda:0')
episode: 195 training return: tensor(-200.7671, device='cuda:0')
epoch: 49 test_true_pfm: 47.158584396296625 sim_pfm: -191.35443282641353
episode: 196 training return: tensor(-186.0396, device='cuda:0')
episode: 197 training return: tensor(-181.7306, device='cuda:0')
episode: 198 training return: tensor(-201.0247, device='cuda:0')
episode: 199 training return: tensor(-193.5328, device='cuda:0')
epoch: 50 test_true_pfm: 47.93113422167081 sim_pfm: -192.3357578258845
episode: 200 training return: tensor(-190.6417, device='cuda:0')
episode: 201 training return: tensor(-173.9172, device='cuda:0')
episode: 202 training return: tensor(-177.2603, device='cuda:0')
episode: 203 training return: tensor(-175.3119, device='cuda:0')
epoch: 51 test_true_pfm: 43.3722401994501 sim_pfm: -190.79761953924316
episode: 204 training return: tensor(-200.3841, device='cuda:0')
episode: 205 training return: tensor(-190.0409, device='cuda:0')
episode: 206 training return: tensor(-175.9648, device='cuda:0')
episode: 207 training return: tensor(-199.6284, device='cuda:0')
epoch: 52 test_true_pfm: 54.08854180139167 sim_pfm: -194.3602544428082
episode: 208 training return: tensor(-177.5792, device='cuda:0')
episode: 209 training return: tensor(-198.8443, device='cuda:0')
episode: 210 training return: tensor(-192.5291, device='cuda:0')
episode: 211 training return: tensor(-200.7717, device='cuda:0')
epoch: 53 test_true_pfm: 43.72463337700888 sim_pfm: -161.02743847183302
episode: 212 training return: tensor(-184.8012, device='cuda:0')
episode: 213 training return: tensor(-179.7359, device='cuda:0')
episode: 214 training return: tensor(-180.6754, device='cuda:0')
episode: 215 training return: tensor(-198.4535, device='cuda:0')
epoch: 54 test_true_pfm: 55.71901490649251 sim_pfm: -178.31072196737514
episode: 216 training return: tensor(-188.1679, device='cuda:0')
episode: 217 training return: tensor(-195.3209, device='cuda:0')
episode: 218 training return: tensor(-190.5298, device='cuda:0')
episode: 219 training return: tensor(-180.7646, device='cuda:0')
epoch: 55 test_true_pfm: 43.32272429928848 sim_pfm: -167.18816162624861
episode: 220 training return: tensor(-189.9910, device='cuda:0')
episode: 221 training return: tensor(-176.6919, device='cuda:0')
episode: 222 training return: tensor(-175.4117, device='cuda:0')
episode: 223 training return: tensor(-180.2076, device='cuda:0')
epoch: 56 test_true_pfm: 46.39078976005605 sim_pfm: -191.18587480280547
episode: 224 training return: tensor(-183.4044, device='cuda:0')
episode: 225 training return: tensor(-210.6295, device='cuda:0')
episode: 226 training return: tensor(-197.7975, device='cuda:0')
episode: 227 training return: tensor(-201.2416, device='cuda:0')
epoch: 57 test_true_pfm: 44.398752601402784 sim_pfm: -199.15357961534173
episode: 228 training return: tensor(-86.4620, device='cuda:0')
episode: 229 training return: tensor(-184.2931, device='cuda:0')
episode: 230 training return: tensor(-177.7005, device='cuda:0')
episode: 231 training return: tensor(-198.0407, device='cuda:0')
epoch: 58 test_true_pfm: 41.59783312441296 sim_pfm: -196.83824046021329
episode: 232 training return: tensor(-183.7715, device='cuda:0')
episode: 233 training return: tensor(-212.1701, device='cuda:0')
episode: 234 training return: tensor(-210.4235, device='cuda:0')
episode: 235 training return: tensor(-179.2338, device='cuda:0')
epoch: 59 test_true_pfm: 45.85779168100672 sim_pfm: -202.32193178988527
episode: 236 training return: tensor(-208.6288, device='cuda:0')
episode: 237 training return: tensor(-179.1090, device='cuda:0')
episode: 238 training return: tensor(-177.5798, device='cuda:0')
episode: 239 training return: tensor(-209.6846, device='cuda:0')
epoch: 60 test_true_pfm: 53.680442242190324 sim_pfm: -198.40363573858048
episode: 240 training return: tensor(-198.3776, device='cuda:0')
episode: 241 training return: tensor(-180.0368, device='cuda:0')
episode: 242 training return: tensor(-184.3545, device='cuda:0')
episode: 243 training return: tensor(-198.0954, device='cuda:0')
epoch: 61 test_true_pfm: 51.01589145018155 sim_pfm: -165.76968777368893
episode: 244 training return: tensor(-179.1083, device='cuda:0')
episode: 245 training return: tensor(-181.7245, device='cuda:0')
episode: 246 training return: tensor(-176.8583, device='cuda:0')
episode: 247 training return: tensor(-178.8624, device='cuda:0')
epoch: 62 test_true_pfm: 48.779355932834946 sim_pfm: -186.1752007294097
episode: 248 training return: tensor(-187.9222, device='cuda:0')
episode: 249 training return: tensor(-177.3989, device='cuda:0')
episode: 250 training return: tensor(-198.8667, device='cuda:0')
episode: 251 training return: tensor(-180.0600, device='cuda:0')
epoch: 63 test_true_pfm: 43.31907238554845 sim_pfm: -174.4356841676694
episode: 252 training return: tensor(-187.1084, device='cuda:0')
episode: 253 training return: tensor(-177.8813, device='cuda:0')
episode: 254 training return: tensor(-178.7896, device='cuda:0')
episode: 255 training return: tensor(-199.0199, device='cuda:0')
epoch: 64 test_true_pfm: 46.86575669585535 sim_pfm: -190.7477635115036
episode: 256 training return: tensor(-196.9671, device='cuda:0')
episode: 257 training return: tensor(-199.7799, device='cuda:0')
episode: 258 training return: tensor(-175.7733, device='cuda:0')
episode: 259 training return: tensor(-188.3602, device='cuda:0')
epoch: 65 test_true_pfm: 46.2435699321788 sim_pfm: -186.45737417304773
episode: 260 training return: tensor(-199.5243, device='cuda:0')
episode: 261 training return: tensor(-188.7706, device='cuda:0')
episode: 262 training return: tensor(-200.0614, device='cuda:0')
episode: 263 training return: tensor(-178.8087, device='cuda:0')
epoch: 66 test_true_pfm: 56.50001368077128 sim_pfm: -189.31850948425708
episode: 264 training return: tensor(-199.2157, device='cuda:0')
episode: 265 training return: tensor(-200.6596, device='cuda:0')
episode: 266 training return: tensor(-189.1712, device='cuda:0')
episode: 267 training return: tensor(-176.7067, device='cuda:0')
epoch: 67 test_true_pfm: 43.0615302687674 sim_pfm: -164.82565618362278
episode: 268 training return: tensor(-88.3386, device='cuda:0')
episode: 269 training return: tensor(-179.2855, device='cuda:0')
episode: 270 training return: tensor(-201.0694, device='cuda:0')
episode: 271 training return: tensor(-189.5474, device='cuda:0')
epoch: 68 test_true_pfm: 48.89427798363998 sim_pfm: -166.57555669340655
episode: 272 training return: tensor(-199.8092, device='cuda:0')
episode: 273 training return: tensor(-197.1834, device='cuda:0')
episode: 274 training return: tensor(-175.8777, device='cuda:0')
episode: 275 training return: tensor(-181.5189, device='cuda:0')
epoch: 69 test_true_pfm: 44.811577221145924 sim_pfm: -194.32630379607434
episode: 276 training return: tensor(-175.6335, device='cuda:0')
episode: 277 training return: tensor(-176.7034, device='cuda:0')
episode: 278 training return: tensor(-197.8706, device='cuda:0')
episode: 279 training return: tensor(-199.3148, device='cuda:0')
epoch: 70 test_true_pfm: 48.804451918009455 sim_pfm: -191.01226064522635
episode: 280 training return: tensor(-179.9675, device='cuda:0')
episode: 281 training return: tensor(-176.8266, device='cuda:0')
episode: 282 training return: tensor(-186.1505, device='cuda:0')
episode: 283 training return: tensor(-177.5914, device='cuda:0')
epoch: 71 test_true_pfm: 44.126030981842874 sim_pfm: -185.3900181812234
episode: 284 training return: tensor(-186.6019, device='cuda:0')
episode: 285 training return: tensor(-178.2908, device='cuda:0')
episode: 286 training return: tensor(-181.2248, device='cuda:0')
episode: 287 training return: tensor(-197.4522, device='cuda:0')
epoch: 72 test_true_pfm: 45.286611358670505 sim_pfm: -184.54081964963115
episode: 288 training return: tensor(-183.0502, device='cuda:0')
episode: 289 training return: tensor(-176.6254, device='cuda:0')
episode: 290 training return: tensor(-176.3750, device='cuda:0')
episode: 291 training return: tensor(-179.3406, device='cuda:0')
epoch: 73 test_true_pfm: 44.86385449069168 sim_pfm: -187.05977118125884
episode: 292 training return: tensor(-197.9203, device='cuda:0')
episode: 293 training return: tensor(-199.4615, device='cuda:0')
episode: 294 training return: tensor(-185.4804, device='cuda:0')
episode: 295 training return: tensor(-189.3383, device='cuda:0')
epoch: 74 test_true_pfm: 44.55371412375831 sim_pfm: -190.99084880145966
episode: 296 training return: tensor(-177.5665, device='cuda:0')
episode: 297 training return: tensor(-178.8753, device='cuda:0')
episode: 298 training return: tensor(-197.0593, device='cuda:0')
episode: 299 training return: tensor(-188.1539, device='cuda:0')
epoch: 75 test_true_pfm: 46.27215483451987 sim_pfm: -187.17809871583012
episode: 300 training return: tensor(-198.0617, device='cuda:0')
episode: 301 training return: tensor(-200.3099, device='cuda:0')
episode: 302 training return: tensor(-198.5907, device='cuda:0')
episode: 303 training return: tensor(-197.1333, device='cuda:0')
epoch: 76 test_true_pfm: 46.145828217155255 sim_pfm: -195.5107595346053
episode: 304 training return: tensor(-177.2585, device='cuda:0')
episode: 305 training return: tensor(-189.1551, device='cuda:0')
episode: 306 training return: tensor(-184.2474, device='cuda:0')
episode: 307 training return: tensor(-174.0686, device='cuda:0')
epoch: 77 test_true_pfm: 45.559409201054805 sim_pfm: -188.7123036862118
episode: 308 training return: tensor(-198.7554, device='cuda:0')
episode: 309 training return: tensor(-201.4442, device='cuda:0')
episode: 310 training return: tensor(-189.9833, device='cuda:0')
episode: 311 training return: tensor(-182.7206, device='cuda:0')
epoch: 78 test_true_pfm: 41.724782467685934 sim_pfm: -186.40929689997574
episode: 312 training return: tensor(-200.1312, device='cuda:0')
episode: 313 training return: tensor(-177.8368, device='cuda:0')
episode: 314 training return: tensor(-176.2984, device='cuda:0')
episode: 315 training return: tensor(-180.1225, device='cuda:0')
epoch: 79 test_true_pfm: 48.45779035458418 sim_pfm: -195.87274195238714
episode: 316 training return: tensor(-177.4068, device='cuda:0')
episode: 317 training return: tensor(-179.8859, device='cuda:0')
episode: 318 training return: tensor(-198.2392, device='cuda:0')
episode: 319 training return: tensor(-189.2500, device='cuda:0')
epoch: 80 test_true_pfm: 49.482252072451864 sim_pfm: -184.5135382369859
episode: 320 training return: tensor(-183.8386, device='cuda:0')
episode: 321 training return: tensor(-177.8664, device='cuda:0')
episode: 322 training return: tensor(-189.8142, device='cuda:0')
episode: 323 training return: tensor(-198.6005, device='cuda:0')
epoch: 81 test_true_pfm: 43.54539291639915 sim_pfm: -186.44021586634918
episode: 324 training return: tensor(-187.2287, device='cuda:0')
episode: 325 training return: tensor(-201.0923, device='cuda:0')
episode: 326 training return: tensor(-180.2737, device='cuda:0')
episode: 327 training return: tensor(-196.4305, device='cuda:0')
epoch: 82 test_true_pfm: 45.23114608995742 sim_pfm: -161.68483946122578
episode: 328 training return: tensor(-190.1850, device='cuda:0')
episode: 329 training return: tensor(-188.3896, device='cuda:0')
episode: 330 training return: tensor(-196.1189, device='cuda:0')
episode: 331 training return: tensor(-191.3360, device='cuda:0')
epoch: 83 test_true_pfm: 46.14058919884619 sim_pfm: -183.73677421951433
episode: 332 training return: tensor(-195.4384, device='cuda:0')
episode: 333 training return: tensor(-173.0806, device='cuda:0')
episode: 334 training return: tensor(-177.8896, device='cuda:0')
episode: 335 training return: tensor(-199.2040, device='cuda:0')
epoch: 84 test_true_pfm: 41.78584820333747 sim_pfm: -181.98892054477474
episode: 336 training return: tensor(-197.8542, device='cuda:0')
episode: 337 training return: tensor(-198.8656, device='cuda:0')
episode: 338 training return: tensor(-187.9559, device='cuda:0')
episode: 339 training return: tensor(-181.5193, device='cuda:0')
epoch: 85 test_true_pfm: 53.57274931371717 sim_pfm: -158.676912212983
episode: 340 training return: tensor(-198.4211, device='cuda:0')
episode: 341 training return: tensor(-175.6849, device='cuda:0')
episode: 342 training return: tensor(-187.4329, device='cuda:0')
episode: 343 training return: tensor(-199.0074, device='cuda:0')
epoch: 86 test_true_pfm: 43.608038997619474 sim_pfm: -178.35824050994125
episode: 344 training return: tensor(-179.1207, device='cuda:0')
episode: 345 training return: tensor(-187.8988, device='cuda:0')
episode: 346 training return: tensor(-178.3749, device='cuda:0')
episode: 347 training return: tensor(-179.5953, device='cuda:0')
epoch: 87 test_true_pfm: 52.49438109398979 sim_pfm: -177.06651386329904
episode: 348 training return: tensor(-188.9760, device='cuda:0')
episode: 349 training return: tensor(-177.4661, device='cuda:0')
episode: 350 training return: tensor(-183.1614, device='cuda:0')
episode: 351 training return: tensor(-184.0376, device='cuda:0')
epoch: 88 test_true_pfm: 45.47803820680981 sim_pfm: -188.5376743465662
episode: 352 training return: tensor(-188.8567, device='cuda:0')
episode: 353 training return: tensor(-179.1234, device='cuda:0')
episode: 354 training return: tensor(-186.6742, device='cuda:0')
episode: 355 training return: tensor(-182.2589, device='cuda:0')
epoch: 89 test_true_pfm: 48.16251312285861 sim_pfm: -162.32990964076598
episode: 356 training return: tensor(-174.3848, device='cuda:0')
episode: 357 training return: tensor(-198.9944, device='cuda:0')
episode: 358 training return: tensor(-200.5151, device='cuda:0')
episode: 359 training return: tensor(-176.3935, device='cuda:0')
epoch: 90 test_true_pfm: 48.537190957008306 sim_pfm: -189.55169210184832
episode: 360 training return: tensor(-189.9594, device='cuda:0')
episode: 361 training return: tensor(-197.8893, device='cuda:0')
episode: 362 training return: tensor(-188.9399, device='cuda:0')
episode: 363 training return: tensor(-192.2739, device='cuda:0')
epoch: 91 test_true_pfm: 41.492734480818946 sim_pfm: -184.32504864154618
episode: 364 training return: tensor(-185.6911, device='cuda:0')
episode: 365 training return: tensor(-189.1292, device='cuda:0')
episode: 366 training return: tensor(-194.6726, device='cuda:0')
episode: 367 training return: tensor(-181.1413, device='cuda:0')
epoch: 92 test_true_pfm: 46.24193677870605 sim_pfm: -189.74392971976778
episode: 368 training return: tensor(-175.7572, device='cuda:0')
episode: 369 training return: tensor(-196.0329, device='cuda:0')
episode: 370 training return: tensor(-189.0623, device='cuda:0')
episode: 371 training return: tensor(-188.3511, device='cuda:0')
epoch: 93 test_true_pfm: 47.91290417714584 sim_pfm: -189.59927344426978
episode: 372 training return: tensor(-185.6284, device='cuda:0')
episode: 373 training return: tensor(-179.3835, device='cuda:0')
episode: 374 training return: tensor(-191.2206, device='cuda:0')
episode: 375 training return: tensor(-201.0424, device='cuda:0')
epoch: 94 test_true_pfm: 45.889133755842764 sim_pfm: -195.30291091017426
episode: 376 training return: tensor(-177.0210, device='cuda:0')
episode: 377 training return: tensor(-201.0318, device='cuda:0')
episode: 378 training return: tensor(-189.8206, device='cuda:0')
episode: 379 training return: tensor(-186.6236, device='cuda:0')
epoch: 95 test_true_pfm: 45.229304727192286 sim_pfm: -185.66091622553648
episode: 380 training return: tensor(-201.4657, device='cuda:0')
episode: 381 training return: tensor(-185.5462, device='cuda:0')
episode: 382 training return: tensor(-198.9826, device='cuda:0')
episode: 383 training return: tensor(-187.3140, device='cuda:0')
epoch: 96 test_true_pfm: 48.38946729194949 sim_pfm: -187.52534645720152
episode: 384 training return: tensor(-179.7466, device='cuda:0')
episode: 385 training return: tensor(-189.6121, device='cuda:0')
episode: 386 training return: tensor(-177.8863, device='cuda:0')
episode: 387 training return: tensor(-198.5557, device='cuda:0')
epoch: 97 test_true_pfm: 45.15319756269651 sim_pfm: -186.50029251448578
episode: 388 training return: tensor(-183.0817, device='cuda:0')
episode: 389 training return: tensor(-188.0582, device='cuda:0')
episode: 390 training return: tensor(-176.7859, device='cuda:0')
episode: 391 training return: tensor(-194.9631, device='cuda:0')
epoch: 98 test_true_pfm: 44.094410723650945 sim_pfm: -186.70793130644597
episode: 392 training return: tensor(-188.7812, device='cuda:0')
episode: 393 training return: tensor(-189.2865, device='cuda:0')
episode: 394 training return: tensor(-184.1146, device='cuda:0')
episode: 395 training return: tensor(-196.8953, device='cuda:0')
epoch: 99 test_true_pfm: 46.00644686602961 sim_pfm: -187.8614269239828
episode: 396 training return: tensor(-186.4026, device='cuda:0')
episode: 397 training return: tensor(-188.4888, device='cuda:0')
episode: 398 training return: tensor(-200.4285, device='cuda:0')
episode: 399 training return: tensor(-181.8312, device='cuda:0')
epoch: 100 test_true_pfm: 48.054426539633866 sim_pfm: -194.11270875114715
episode: 400 training return: tensor(-187.9280, device='cuda:0')
episode: 401 training return: tensor(-198.9764, device='cuda:0')
episode: 402 training return: tensor(-198.2487, device='cuda:0')
episode: 403 training return: tensor(-180.6677, device='cuda:0')
epoch: 101 test_true_pfm: 45.40872170049989 sim_pfm: -185.54599581782242
episode: 404 training return: tensor(-189.9112, device='cuda:0')
episode: 405 training return: tensor(-190.2389, device='cuda:0')
episode: 406 training return: tensor(-188.3018, device='cuda:0')
episode: 407 training return: tensor(-174.7625, device='cuda:0')
epoch: 102 test_true_pfm: 47.87383041927915 sim_pfm: -191.7460540839471
episode: 408 training return: tensor(-199.9402, device='cuda:0')
episode: 409 training return: tensor(-191.1169, device='cuda:0')
episode: 410 training return: tensor(-176.0272, device='cuda:0')
episode: 411 training return: tensor(-191.0927, device='cuda:0')
epoch: 103 test_true_pfm: 45.105426916390755 sim_pfm: -186.2738725332427
episode: 412 training return: tensor(-176.7791, device='cuda:0')
episode: 413 training return: tensor(-187.2987, device='cuda:0')
episode: 414 training return: tensor(-188.9982, device='cuda:0')
episode: 415 training return: tensor(-176.5906, device='cuda:0')
epoch: 104 test_true_pfm: 55.94772417671818 sim_pfm: -188.2957482283353
episode: 416 training return: tensor(-186.8740, device='cuda:0')
episode: 417 training return: tensor(-199.7830, device='cuda:0')
episode: 418 training return: tensor(-182.4227, device='cuda:0')
episode: 419 training return: tensor(-198.2522, device='cuda:0')
epoch: 105 test_true_pfm: 47.65033499084618 sim_pfm: -187.47004626204725
episode: 420 training return: tensor(-198.5714, device='cuda:0')
episode: 421 training return: tensor(-199.8778, device='cuda:0')
episode: 422 training return: tensor(-188.9620, device='cuda:0')
episode: 423 training return: tensor(-199.0135, device='cuda:0')
epoch: 106 test_true_pfm: 47.24058239669383 sim_pfm: -184.61680264137686
episode: 424 training return: tensor(-189.6138, device='cuda:0')
episode: 425 training return: tensor(-187.3517, device='cuda:0')
episode: 426 training return: tensor(-185.8188, device='cuda:0')
episode: 427 training return: tensor(-190.0339, device='cuda:0')
epoch: 107 test_true_pfm: 49.64192619717946 sim_pfm: -186.20654982888374
episode: 428 training return: tensor(-174.6325, device='cuda:0')
episode: 429 training return: tensor(-189.0889, device='cuda:0')
episode: 430 training return: tensor(-189.9186, device='cuda:0')
episode: 431 training return: tensor(-200.2417, device='cuda:0')
epoch: 108 test_true_pfm: 45.939724035737854 sim_pfm: -193.2242093487177
episode: 432 training return: tensor(-199.4664, device='cuda:0')
episode: 433 training return: tensor(-186.3241, device='cuda:0')
episode: 434 training return: tensor(-198.1781, device='cuda:0')
episode: 435 training return: tensor(-182.5009, device='cuda:0')
epoch: 109 test_true_pfm: 42.90847111011816 sim_pfm: -189.51495305142598
episode: 436 training return: tensor(-175.4568, device='cuda:0')
episode: 437 training return: tensor(-198.0913, device='cuda:0')
episode: 438 training return: tensor(-187.7455, device='cuda:0')
episode: 439 training return: tensor(-179.7867, device='cuda:0')
epoch: 110 test_true_pfm: 45.43693993374311 sim_pfm: -185.80078076981007
episode: 440 training return: tensor(-192.1797, device='cuda:0')
episode: 441 training return: tensor(-192.1264, device='cuda:0')
episode: 442 training return: tensor(-191.1867, device='cuda:0')
episode: 443 training return: tensor(-197.5443, device='cuda:0')
epoch: 111 test_true_pfm: 46.1543516791199 sim_pfm: -191.58741213751492
episode: 444 training return: tensor(-200.6984, device='cuda:0')
episode: 445 training return: tensor(-183.1873, device='cuda:0')
episode: 446 training return: tensor(-188.2935, device='cuda:0')
episode: 447 training return: tensor(-183.9557, device='cuda:0')
epoch: 112 test_true_pfm: 45.92945083544155 sim_pfm: -193.99338482392486
episode: 448 training return: tensor(-191.4009, device='cuda:0')
episode: 449 training return: tensor(-183.5570, device='cuda:0')
episode: 450 training return: tensor(-198.6925, device='cuda:0')
episode: 451 training return: tensor(-185.5589, device='cuda:0')
epoch: 113 test_true_pfm: 45.25669234931127 sim_pfm: -187.01918607499684
episode: 452 training return: tensor(-192.9958, device='cuda:0')
episode: 453 training return: tensor(-186.9823, device='cuda:0')
episode: 454 training return: tensor(-186.7890, device='cuda:0')
episode: 455 training return: tensor(-187.0622, device='cuda:0')
epoch: 114 test_true_pfm: 45.132542670446455 sim_pfm: -181.80796211949783
episode: 456 training return: tensor(-187.9984, device='cuda:0')
episode: 457 training return: tensor(-190.2902, device='cuda:0')
episode: 458 training return: tensor(-199.2298, device='cuda:0')
episode: 459 training return: tensor(-188.3018, device='cuda:0')
epoch: 115 test_true_pfm: 43.463113516063686 sim_pfm: -189.2331282333238
episode: 460 training return: tensor(-184.6447, device='cuda:0')
episode: 461 training return: tensor(-190.6005, device='cuda:0')
episode: 462 training return: tensor(-188.8066, device='cuda:0')
episode: 463 training return: tensor(-184.9131, device='cuda:0')
epoch: 116 test_true_pfm: 45.4055118053576 sim_pfm: -190.57950051496738
episode: 464 training return: tensor(-188.6164, device='cuda:0')
episode: 465 training return: tensor(-197.7281, device='cuda:0')
episode: 466 training return: tensor(-189.5042, device='cuda:0')
episode: 467 training return: tensor(-187.4743, device='cuda:0')
epoch: 117 test_true_pfm: 41.79582463256061 sim_pfm: -192.3722913099802
episode: 468 training return: tensor(-199.3891, device='cuda:0')
episode: 469 training return: tensor(-199.1755, device='cuda:0')
episode: 470 training return: tensor(-199.9595, device='cuda:0')
episode: 471 training return: tensor(-187.6451, device='cuda:0')
epoch: 118 test_true_pfm: 46.24626354258872 sim_pfm: -183.91600527339614
episode: 472 training return: tensor(-188.7612, device='cuda:0')
episode: 473 training return: tensor(-195.7357, device='cuda:0')
episode: 474 training return: tensor(-189.3203, device='cuda:0')
episode: 475 training return: tensor(-189.1447, device='cuda:0')
epoch: 119 test_true_pfm: 48.19397420669116 sim_pfm: -189.50245355977677
episode: 476 training return: tensor(-184.0936, device='cuda:0')
episode: 477 training return: tensor(-188.2238, device='cuda:0')
episode: 478 training return: tensor(-189.4980, device='cuda:0')
episode: 479 training return: tensor(-199.0419, device='cuda:0')
epoch: 120 test_true_pfm: 49.85005503155698 sim_pfm: -192.74775971597992
episode: 480 training return: tensor(-192.0890, device='cuda:0')
episode: 481 training return: tensor(-200.3417, device='cuda:0')
episode: 482 training return: tensor(-199.1272, device='cuda:0')
episode: 483 training return: tensor(-189.3195, device='cuda:0')
epoch: 121 test_true_pfm: 47.449999812172244 sim_pfm: -189.88132855596487
episode: 484 training return: tensor(-189.3879, device='cuda:0')
episode: 485 training return: tensor(-189.1750, device='cuda:0')
episode: 486 training return: tensor(-188.2328, device='cuda:0')
episode: 487 training return: tensor(-185.2575, device='cuda:0')
epoch: 122 test_true_pfm: 46.09057576345767 sim_pfm: -188.92173892769497
episode: 488 training return: tensor(-167.9298, device='cuda:0')
episode: 489 training return: tensor(-174.8318, device='cuda:0')
episode: 490 training return: tensor(-192.6096, device='cuda:0')
episode: 491 training return: tensor(-198.1742, device='cuda:0')
epoch: 123 test_true_pfm: 46.18412804550614 sim_pfm: -183.92747748593683
episode: 492 training return: tensor(-188.3908, device='cuda:0')
episode: 493 training return: tensor(-188.7285, device='cuda:0')
episode: 494 training return: tensor(-198.5753, device='cuda:0')
episode: 495 training return: tensor(-180.1190, device='cuda:0')
epoch: 124 test_true_pfm: 47.14008970366584 sim_pfm: -189.62370250229725
episode: 496 training return: tensor(-183.7463, device='cuda:0')
episode: 497 training return: tensor(-188.9275, device='cuda:0')
episode: 498 training return: tensor(-190.2835, device='cuda:0')
episode: 499 training return: tensor(-189.0497, device='cuda:0')
epoch: 125 test_true_pfm: 48.059630279685095 sim_pfm: -188.09310254101874
episode: 500 training return: tensor(-199.9492, device='cuda:0')
episode: 501 training return: tensor(-189.4477, device='cuda:0')
episode: 502 training return: tensor(-197.0054, device='cuda:0')
episode: 503 training return: tensor(-182.0712, device='cuda:0')
epoch: 126 test_true_pfm: 48.6452483000402 sim_pfm: -186.71328403005026
episode: 504 training return: tensor(-187.9355, device='cuda:0')
episode: 505 training return: tensor(-194.4337, device='cuda:0')
episode: 506 training return: tensor(-186.7280, device='cuda:0')
episode: 507 training return: tensor(-178.0204, device='cuda:0')
epoch: 127 test_true_pfm: 46.928874465183455 sim_pfm: -181.69897193134528
episode: 508 training return: tensor(-188.4476, device='cuda:0')
episode: 509 training return: tensor(-186.6960, device='cuda:0')
episode: 510 training return: tensor(-194.7401, device='cuda:0')
episode: 511 training return: tensor(-197.6767, device='cuda:0')
epoch: 128 test_true_pfm: 44.66564585682359 sim_pfm: -196.60043116363232
episode: 512 training return: tensor(-195.9069, device='cuda:0')
episode: 513 training return: tensor(-186.7424, device='cuda:0')
episode: 514 training return: tensor(-198.2482, device='cuda:0')
episode: 515 training return: tensor(-197.5976, device='cuda:0')
epoch: 129 test_true_pfm: 42.89807154111425 sim_pfm: -186.9670344280079
episode: 516 training return: tensor(-199.0588, device='cuda:0')
episode: 517 training return: tensor(-187.6704, device='cuda:0')
episode: 518 training return: tensor(-188.0168, device='cuda:0')
episode: 519 training return: tensor(-193.8483, device='cuda:0')
epoch: 130 test_true_pfm: 43.920679688995385 sim_pfm: -191.33850873699413
episode: 520 training return: tensor(-187.9700, device='cuda:0')
episode: 521 training return: tensor(-187.8600, device='cuda:0')
episode: 522 training return: tensor(-197.1766, device='cuda:0')
episode: 523 training return: tensor(-189.5713, device='cuda:0')
epoch: 131 test_true_pfm: 49.57180015956955 sim_pfm: -188.86512066012946
episode: 524 training return: tensor(-190.1293, device='cuda:0')
episode: 525 training return: tensor(-189.3808, device='cuda:0')
episode: 526 training return: tensor(-185.2828, device='cuda:0')
episode: 527 training return: tensor(-188.9534, device='cuda:0')
epoch: 132 test_true_pfm: 43.03527128746183 sim_pfm: -187.85479857065948
episode: 528 training return: tensor(-197.4548, device='cuda:0')
episode: 529 training return: tensor(-177.4446, device='cuda:0')
episode: 530 training return: tensor(-198.9495, device='cuda:0')
episode: 531 training return: tensor(-186.2183, device='cuda:0')
epoch: 133 test_true_pfm: 44.299898725263844 sim_pfm: -185.99709885236808
episode: 532 training return: tensor(-190.1969, device='cuda:0')
episode: 533 training return: tensor(-189.3584, device='cuda:0')
episode: 534 training return: tensor(-198.3054, device='cuda:0')
episode: 535 training return: tensor(-182.3512, device='cuda:0')
epoch: 134 test_true_pfm: 45.196625157390294 sim_pfm: -189.60622614755994
episode: 536 training return: tensor(-180.6331, device='cuda:0')
episode: 537 training return: tensor(-190.2751, device='cuda:0')
episode: 538 training return: tensor(-199.4609, device='cuda:0')
episode: 539 training return: tensor(-195.6955, device='cuda:0')
epoch: 135 test_true_pfm: 45.126504551082995 sim_pfm: -194.73899622763275
episode: 540 training return: tensor(-175.7335, device='cuda:0')
episode: 541 training return: tensor(-187.9733, device='cuda:0')
episode: 542 training return: tensor(-188.7788, device='cuda:0')
episode: 543 training return: tensor(-176.8672, device='cuda:0')
epoch: 136 test_true_pfm: 45.39477628655505 sim_pfm: -187.4499199432088
episode: 544 training return: tensor(-176.0301, device='cuda:0')
episode: 545 training return: tensor(-186.9463, device='cuda:0')
episode: 546 training return: tensor(-188.2373, device='cuda:0')
episode: 547 training return: tensor(-187.8294, device='cuda:0')
epoch: 137 test_true_pfm: 45.40743378656536 sim_pfm: -185.56253677770147
episode: 548 training return: tensor(-186.9461, device='cuda:0')
episode: 549 training return: tensor(-187.2046, device='cuda:0')
episode: 550 training return: tensor(-179.5642, device='cuda:0')
episode: 551 training return: tensor(-187.2112, device='cuda:0')
epoch: 138 test_true_pfm: 46.03814698432541 sim_pfm: -187.43766311421058
episode: 552 training return: tensor(-184.7430, device='cuda:0')
episode: 553 training return: tensor(-187.9852, device='cuda:0')
episode: 554 training return: tensor(-199.3482, device='cuda:0')
episode: 555 training return: tensor(-188.9081, device='cuda:0')
epoch: 139 test_true_pfm: 45.5478561327845 sim_pfm: -189.76547638202902
episode: 556 training return: tensor(-199.1125, device='cuda:0')
episode: 557 training return: tensor(-185.7091, device='cuda:0')
episode: 558 training return: tensor(-188.1767, device='cuda:0')
episode: 559 training return: tensor(-202.2267, device='cuda:0')
epoch: 140 test_true_pfm: 42.865247172585576 sim_pfm: -192.99336554519832
episode: 560 training return: tensor(-200.2782, device='cuda:0')
episode: 561 training return: tensor(-194.0507, device='cuda:0')
episode: 562 training return: tensor(-185.2160, device='cuda:0')
episode: 563 training return: tensor(-188.9095, device='cuda:0')
epoch: 141 test_true_pfm: 44.019863627732455 sim_pfm: -194.42921981069375
episode: 564 training return: tensor(-185.1190, device='cuda:0')
episode: 565 training return: tensor(-198.4177, device='cuda:0')
episode: 566 training return: tensor(-200.1664, device='cuda:0')
episode: 567 training return: tensor(-200.2412, device='cuda:0')
epoch: 142 test_true_pfm: 45.01619542908595 sim_pfm: -184.14188898227877
episode: 568 training return: tensor(-193.6151, device='cuda:0')
episode: 569 training return: tensor(-198.2522, device='cuda:0')
episode: 570 training return: tensor(-188.5329, device='cuda:0')
episode: 571 training return: tensor(-189.1312, device='cuda:0')
epoch: 143 test_true_pfm: 47.113851136399795 sim_pfm: -191.49649135267245
episode: 572 training return: tensor(-164.6759, device='cuda:0')
episode: 573 training return: tensor(-197.8853, device='cuda:0')
episode: 574 training return: tensor(-176.8506, device='cuda:0')
episode: 575 training return: tensor(-189.9523, device='cuda:0')
epoch: 144 test_true_pfm: 45.23382112952679 sim_pfm: -187.8853942997579
episode: 576 training return: tensor(-198.7266, device='cuda:0')
episode: 577 training return: tensor(-189.0330, device='cuda:0')
episode: 578 training return: tensor(-190.0917, device='cuda:0')
episode: 579 training return: tensor(-188.6345, device='cuda:0')
epoch: 145 test_true_pfm: 47.035621694769496 sim_pfm: -183.9377806484059
episode: 580 training return: tensor(-177.0632, device='cuda:0')
episode: 581 training return: tensor(-175.5586, device='cuda:0')
episode: 582 training return: tensor(-197.7404, device='cuda:0')
episode: 583 training return: tensor(-177.0665, device='cuda:0')
epoch: 146 test_true_pfm: 48.04669637177285 sim_pfm: -187.71856865821172
episode: 584 training return: tensor(-191.2107, device='cuda:0')
episode: 585 training return: tensor(-188.9209, device='cuda:0')
episode: 586 training return: tensor(-197.6877, device='cuda:0')
episode: 587 training return: tensor(-190.3173, device='cuda:0')
epoch: 147 test_true_pfm: 43.0777996678884 sim_pfm: -186.62181361836846
episode: 588 training return: tensor(-187.8888, device='cuda:0')
episode: 589 training return: tensor(-194.7499, device='cuda:0')
episode: 590 training return: tensor(-181.8157, device='cuda:0')
episode: 591 training return: tensor(-187.8412, device='cuda:0')
epoch: 148 test_true_pfm: 47.59711122746001 sim_pfm: -181.66844407405006
episode: 592 training return: tensor(-178.0725, device='cuda:0')
episode: 593 training return: tensor(-178.6297, device='cuda:0')
episode: 594 training return: tensor(-199.7936, device='cuda:0')
episode: 595 training return: tensor(-179.7534, device='cuda:0')
epoch: 149 test_true_pfm: 43.88389176344116 sim_pfm: -189.41113261729478
episode: 596 training return: tensor(-178.3357, device='cuda:0')
episode: 597 training return: tensor(-197.7182, device='cuda:0')
episode: 598 training return: tensor(-184.2628, device='cuda:0')
episode: 599 training return: tensor(-189.5014, device='cuda:0')
epoch: 150 test_true_pfm: 49.023633648302656 sim_pfm: -188.3225218589243
