['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'brac', '--traj', 'medium', '--seed', '0', '--data', '100000']
1062.6214634397943
episode: 0 training return: tensor(-1.5107e+21, device='cuda:0')
episode: 1 training return: tensor(-1.9571e+21, device='cuda:0')
episode: 2 training return: tensor(-8.3105e+20, device='cuda:0')
episode: 3 training return: tensor(-1.7557e+21, device='cuda:0')
epoch: 1 test_true_pfm: 91.47622794858971
episode: 4 training return: tensor(-3.3234e+18, device='cuda:0')
episode: 5 training return: tensor(-1.8525e+21, device='cuda:0')
episode: 6 training return: tensor(-1.8793e+21, device='cuda:0')
episode: 7 training return: tensor(-2.0598e+21, device='cuda:0')
epoch: 2 test_true_pfm: 91.64754249304711
episode: 8 training return: tensor(-4.0515e+21, device='cuda:0')
episode: 9 training return: tensor(-1.2538e+11, device='cuda:0')
episode: 10 training return: tensor(-1.2932e+09, device='cuda:0')
episode: 11 training return: tensor(-1.4683e+09, device='cuda:0')
epoch: 3 test_true_pfm: 91.78255929201849
episode: 12 training return: tensor(-1.7807e+09, device='cuda:0')
episode: 13 training return: tensor(-1.8168e+09, device='cuda:0')
episode: 14 training return: tensor(-1.3950e+09, device='cuda:0')
episode: 15 training return: tensor(-1.8314e+09, device='cuda:0')
epoch: 4 test_true_pfm: 90.71995248128468
episode: 16 training return: tensor(-1.9396e+09, device='cuda:0')
episode: 17 training return: tensor(-1.4095e+09, device='cuda:0')
episode: 18 training return: tensor(-1.4508e+09, device='cuda:0')
episode: 19 training return: tensor(-1.4745e+09, device='cuda:0')
epoch: 5 test_true_pfm: 91.18136421107204
episode: 20 training return: tensor(-1.6049e+09, device='cuda:0')
episode: 21 training return: tensor(-1.4033e+09, device='cuda:0')
episode: 22 training return: tensor(-1.5390e+09, device='cuda:0')
episode: 23 training return: tensor(-1.5820e+09, device='cuda:0')
epoch: 6 test_true_pfm: 90.27318751225205
episode: 24 training return: tensor(-1.7940e+09, device='cuda:0')
episode: 25 training return: tensor(-1.5018e+09, device='cuda:0')
episode: 26 training return: tensor(-1.8317e+09, device='cuda:0')
episode: 27 training return: tensor(-1.4905e+09, device='cuda:0')
epoch: 7 test_true_pfm: 90.59895533150667
episode: 28 training return: tensor(-1.5534e+09, device='cuda:0')
episode: 29 training return: tensor(-1.8914e+09, device='cuda:0')
episode: 30 training return: tensor(-1.8783e+09, device='cuda:0')
episode: 31 training return: tensor(-1.9859e+09, device='cuda:0')
epoch: 8 test_true_pfm: 92.39661299934743
episode: 32 training return: tensor(-1.5448e+09, device='cuda:0')
episode: 33 training return: tensor(-1.8235e+09, device='cuda:0')
episode: 34 training return: tensor(-1.4178e+09, device='cuda:0')
episode: 35 training return: tensor(-1.5444e+09, device='cuda:0')
epoch: 9 test_true_pfm: 90.27197249636895
episode: 36 training return: tensor(-1.6259e+09, device='cuda:0')
episode: 37 training return: tensor(-1.4823e+09, device='cuda:0')
episode: 38 training return: tensor(-1.8611e+09, device='cuda:0')
episode: 39 training return: tensor(-1.2939e+09, device='cuda:0')
epoch: 10 test_true_pfm: 91.35238426723136
episode: 40 training return: tensor(-1.5867e+09, device='cuda:0')
episode: 41 training return: tensor(-1.2200e+09, device='cuda:0')
episode: 42 training return: tensor(-1.6154e+09, device='cuda:0')
episode: 43 training return: tensor(-1.8584e+09, device='cuda:0')
epoch: 11 test_true_pfm: 91.46590607625234
episode: 44 training return: tensor(-1.7229e+09, device='cuda:0')
episode: 45 training return: tensor(-1.8079e+09, device='cuda:0')
episode: 46 training return: tensor(-1.8440e+09, device='cuda:0')
episode: 47 training return: tensor(-1.6410e+09, device='cuda:0')
epoch: 12 test_true_pfm: 90.28136982774748
episode: 48 training return: tensor(-1.6627e+09, device='cuda:0')
episode: 49 training return: tensor(-1.3998e+09, device='cuda:0')
episode: 50 training return: tensor(-1.9675e+09, device='cuda:0')
episode: 51 training return: tensor(-1.5249e+09, device='cuda:0')
epoch: 13 test_true_pfm: 92.5233986449435
episode: 52 training return: tensor(-1.6223e+09, device='cuda:0')
episode: 53 training return: tensor(-1.6097e+09, device='cuda:0')
episode: 54 training return: tensor(-1.6179e+09, device='cuda:0')
episode: 55 training return: tensor(-1.4917e+09, device='cuda:0')
epoch: 14 test_true_pfm: 92.37218461873748
episode: 56 training return: tensor(-1.6471e+09, device='cuda:0')
episode: 57 training return: tensor(-1.8506e+09, device='cuda:0')
episode: 58 training return: tensor(-1.7857e+09, device='cuda:0')
episode: 59 training return: tensor(-1.5301e+09, device='cuda:0')
epoch: 15 test_true_pfm: 92.23374723345698
episode: 60 training return: tensor(-1.6904e+09, device='cuda:0')
episode: 61 training return: tensor(-1.4760e+09, device='cuda:0')
episode: 62 training return: tensor(-1.5135e+09, device='cuda:0')
episode: 63 training return: tensor(-1.6224e+09, device='cuda:0')
epoch: 16 test_true_pfm: 90.02518479015157
episode: 64 training return: tensor(-1.8849e+09, device='cuda:0')
episode: 65 training return: tensor(-1.6489e+09, device='cuda:0')
episode: 66 training return: tensor(-1.6738e+09, device='cuda:0')
episode: 67 training return: tensor(-2.1469e+09, device='cuda:0')
epoch: 17 test_true_pfm: 91.89231586796029
episode: 68 training return: tensor(-2.2049e+09, device='cuda:0')
episode: 69 training return: tensor(-1.7194e+09, device='cuda:0')
episode: 70 training return: tensor(-2.1791e+09, device='cuda:0')
episode: 71 training return: tensor(-2.1771e+09, device='cuda:0')
epoch: 18 test_true_pfm: 309.90262765420624
episode: 72 training return: tensor(-1.8123e+09, device='cuda:0')
episode: 73 training return: tensor(-2.2538e+09, device='cuda:0')
episode: 74 training return: tensor(-1.6616e+09, device='cuda:0')
episode: 75 training return: tensor(-1.4984e+13, device='cuda:0')
epoch: 19 test_true_pfm: 96.76842084501584
episode: 76 training return: tensor(-2.2204e+09, device='cuda:0')
episode: 77 training return: tensor(-2.9827e+11, device='cuda:0')
episode: 78 training return: tensor(-1.6721e+11, device='cuda:0')
episode: 79 training return: tensor(-8.1351e+10, device='cuda:0')
epoch: 20 test_true_pfm: 355.7185982253504
episode: 80 training return: tensor(-1.5290e+11, device='cuda:0')
episode: 81 training return: tensor(-1.3571e+11, device='cuda:0')
episode: 82 training return: tensor(-6.0602e+10, device='cuda:0')
episode: 83 training return: tensor(-1.6570e+11, device='cuda:0')
epoch: 21 test_true_pfm: 387.15306838696193
episode: 84 training return: tensor(-1.2143e+11, device='cuda:0')
episode: 85 training return: tensor(-9.3034e+10, device='cuda:0')
episode: 86 training return: tensor(-5.7589e+10, device='cuda:0')
episode: 87 training return: tensor(-1.3589e+11, device='cuda:0')
epoch: 22 test_true_pfm: 149.6248165731835
episode: 88 training return: tensor(-1.3788e+11, device='cuda:0')
episode: 89 training return: tensor(-8.7440e+10, device='cuda:0')
episode: 90 training return: tensor(-1.6211e+11, device='cuda:0')
episode: 91 training return: tensor(-1.7972e+11, device='cuda:0')
epoch: 23 test_true_pfm: 265.4698181569084
episode: 92 training return: tensor(-1.1268e+11, device='cuda:0')
episode: 93 training return: tensor(-8.1994e+10, device='cuda:0')
episode: 94 training return: tensor(-1.0922e+11, device='cuda:0')
episode: 95 training return: tensor(-1.5705e+11, device='cuda:0')
epoch: 24 test_true_pfm: 196.4136337397711
episode: 96 training return: tensor(-1.4123e+11, device='cuda:0')
episode: 97 training return: tensor(-1.0399e+11, device='cuda:0')
episode: 98 training return: tensor(-1.5977e+11, device='cuda:0')
episode: 99 training return: tensor(-1.0144e+12, device='cuda:0')
epoch: 25 test_true_pfm: 203.31047266236087
episode: 100 training return: tensor(-1.7935e+11, device='cuda:0')
episode: 101 training return: tensor(-1.6263e+11, device='cuda:0')
episode: 102 training return: tensor(-6.6022e+10, device='cuda:0')
episode: 103 training return: tensor(-1.2554e+10, device='cuda:0')
epoch: 26 test_true_pfm: 950.4065084808062
episode: 104 training return: tensor(-1.6776e+10, device='cuda:0')
episode: 105 training return: tensor(-4.5426e+11, device='cuda:0')
episode: 106 training return: tensor(-1.8425e+11, device='cuda:0')
episode: 107 training return: tensor(-4.0892e+12, device='cuda:0')
epoch: 27 test_true_pfm: 468.47927110119645
episode: 108 training return: tensor(-1.9961e+10, device='cuda:0')
episode: 109 training return: tensor(-9.3413e+11, device='cuda:0')
episode: 110 training return: tensor(-4.1585e+11, device='cuda:0')
episode: 111 training return: tensor(-8.5091e+10, device='cuda:0')
epoch: 28 test_true_pfm: 231.95985432067553
episode: 112 training return: tensor(-1.6553e+13, device='cuda:0')
episode: 113 training return: tensor(-1.4447e+13, device='cuda:0')
episode: 114 training return: tensor(-2.3066e+10, device='cuda:0')
episode: 115 training return: tensor(-4.5542e+12, device='cuda:0')
epoch: 29 test_true_pfm: 474.56610478281283
episode: 116 training return: tensor(-5.9841e+09, device='cuda:0')
episode: 117 training return: tensor(-5.4328e+11, device='cuda:0')
episode: 118 training return: tensor(-5.2690e+18, device='cuda:0')
episode: 119 training return: tensor(-2.1841e+10, device='cuda:0')
epoch: 30 test_true_pfm: 366.5628791919368
episode: 120 training return: tensor(-2.0789e+09, device='cuda:0')
episode: 121 training return: tensor(-4.5921e+09, device='cuda:0')
episode: 122 training return: tensor(-1.2727e+09, device='cuda:0')
episode: 123 training return: tensor(-7.2078e+08, device='cuda:0')
epoch: 31 test_true_pfm: 240.8435612306333
episode: 124 training return: tensor(-7.3599e+09, device='cuda:0')
episode: 125 training return: tensor(-2.6215e+09, device='cuda:0')
episode: 126 training return: tensor(-4.1716e+09, device='cuda:0')
episode: 127 training return: tensor(-4.0871e+10, device='cuda:0')
epoch: 32 test_true_pfm: 236.176330948731
episode: 128 training return: tensor(-3.6341e+09, device='cuda:0')
episode: 129 training return: tensor(-98399808., device='cuda:0')
episode: 130 training return: tensor(-3.1758e+08, device='cuda:0')
episode: 131 training return: tensor(-1.8364e+09, device='cuda:0')
epoch: 33 test_true_pfm: 267.03597677110497
episode: 132 training return: tensor(-1.9003e+08, device='cuda:0')
episode: 133 training return: tensor(-1.4210e+09, device='cuda:0')
episode: 134 training return: tensor(-1.2405e+09, device='cuda:0')
episode: 135 training return: tensor(-71440456., device='cuda:0')
epoch: 34 test_true_pfm: 225.27118414948518
episode: 136 training return: tensor(-3.1637e+08, device='cuda:0')
episode: 137 training return: tensor(-9.8263e+10, device='cuda:0')
episode: 138 training return: tensor(-7.0953e+09, device='cuda:0')
episode: 139 training return: tensor(-3.0671e+08, device='cuda:0')
epoch: 35 test_true_pfm: 226.1330840528437
episode: 140 training return: tensor(-52984404., device='cuda:0')
episode: 141 training return: tensor(-56777940., device='cuda:0')
episode: 142 training return: tensor(-7.9590e+08, device='cuda:0')
episode: 143 training return: tensor(-5.3514e+08, device='cuda:0')
epoch: 36 test_true_pfm: 218.83024951747666
episode: 144 training return: tensor(-4.8713e+08, device='cuda:0')
episode: 145 training return: tensor(-37858336., device='cuda:0')
episode: 146 training return: tensor(-39796460., device='cuda:0')
episode: 147 training return: tensor(-43014796., device='cuda:0')
epoch: 37 test_true_pfm: 211.38573405586263
episode: 148 training return: tensor(-66405244., device='cuda:0')
episode: 149 training return: tensor(-68367312., device='cuda:0')
episode: 150 training return: tensor(-17823074., device='cuda:0')
episode: 151 training return: tensor(-66917328., device='cuda:0')
epoch: 38 test_true_pfm: 209.29548926852064
episode: 152 training return: tensor(-57883192., device='cuda:0')
episode: 153 training return: tensor(-1.3701e+08, device='cuda:0')
episode: 154 training return: tensor(-49527024., device='cuda:0')
episode: 155 training return: tensor(-47185364., device='cuda:0')
epoch: 39 test_true_pfm: 215.75043311617523
episode: 156 training return: tensor(-87980088., device='cuda:0')
episode: 157 training return: tensor(-57023912., device='cuda:0')
episode: 158 training return: tensor(-61420916., device='cuda:0')
episode: 159 training return: tensor(-20209686., device='cuda:0')
epoch: 40 test_true_pfm: 213.27968867350896
episode: 160 training return: tensor(-73397336., device='cuda:0')
episode: 161 training return: tensor(-52007632., device='cuda:0')
episode: 162 training return: tensor(-28233016., device='cuda:0')
episode: 163 training return: tensor(-61669824., device='cuda:0')
epoch: 41 test_true_pfm: 206.797806912073
episode: 164 training return: tensor(-39324144., device='cuda:0')
episode: 165 training return: tensor(-16684549., device='cuda:0')
episode: 166 training return: tensor(-33631416., device='cuda:0')
episode: 167 training return: tensor(-35799596., device='cuda:0')
epoch: 42 test_true_pfm: 214.90602293736686
episode: 168 training return: tensor(-42418788., device='cuda:0')
episode: 169 training return: tensor(-32928766., device='cuda:0')
episode: 170 training return: tensor(-33745352., device='cuda:0')
episode: 171 training return: tensor(-96178640., device='cuda:0')
epoch: 43 test_true_pfm: 219.23919743371133
episode: 172 training return: tensor(-27013136., device='cuda:0')
episode: 173 training return: tensor(-37285336., device='cuda:0')
episode: 174 training return: tensor(-26305520., device='cuda:0')
episode: 175 training return: tensor(-22208546., device='cuda:0')
epoch: 44 test_true_pfm: 223.27752841999927
episode: 176 training return: tensor(-60124876., device='cuda:0')
episode: 177 training return: tensor(-39070880., device='cuda:0')
episode: 178 training return: tensor(-19043424., device='cuda:0')
episode: 179 training return: tensor(-27507198., device='cuda:0')
epoch: 45 test_true_pfm: 212.22216516680692
episode: 180 training return: tensor(-20361514., device='cuda:0')
episode: 181 training return: tensor(-23307542., device='cuda:0')
episode: 182 training return: tensor(-35188376., device='cuda:0')
episode: 183 training return: tensor(-39368628., device='cuda:0')
epoch: 46 test_true_pfm: 217.60272158148302
episode: 184 training return: tensor(-16490504., device='cuda:0')
episode: 185 training return: tensor(-24740306., device='cuda:0')
episode: 186 training return: tensor(-15531338., device='cuda:0')
episode: 187 training return: tensor(-24022548., device='cuda:0')
epoch: 47 test_true_pfm: 201.36922473298492
episode: 188 training return: tensor(-45323580., device='cuda:0')
episode: 189 training return: tensor(-15770691., device='cuda:0')
episode: 190 training return: tensor(-15854473., device='cuda:0')
episode: 191 training return: tensor(-14514618., device='cuda:0')
epoch: 48 test_true_pfm: 199.04140385408024
episode: 192 training return: tensor(-16784842., device='cuda:0')
episode: 193 training return: tensor(-19386156., device='cuda:0')
episode: 194 training return: tensor(-16523696., device='cuda:0')
episode: 195 training return: tensor(-13736535., device='cuda:0')
epoch: 49 test_true_pfm: 195.22341048858206
episode: 196 training return: tensor(-14369268., device='cuda:0')
episode: 197 training return: tensor(-11700382., device='cuda:0')
episode: 198 training return: tensor(-12969591., device='cuda:0')
episode: 199 training return: tensor(-12745705., device='cuda:0')
epoch: 50 test_true_pfm: 187.60630739686238
episode: 200 training return: tensor(-14166593., device='cuda:0')
episode: 201 training return: tensor(-12971531., device='cuda:0')
episode: 202 training return: tensor(-12515996., device='cuda:0')
episode: 203 training return: tensor(-11313500., device='cuda:0')
epoch: 51 test_true_pfm: 219.5984938647458
episode: 204 training return: tensor(-12338104., device='cuda:0')
episode: 205 training return: tensor(-11752960., device='cuda:0')
episode: 206 training return: tensor(-11032452., device='cuda:0')
episode: 207 training return: tensor(-12074171., device='cuda:0')
epoch: 52 test_true_pfm: 193.54636326621562
episode: 208 training return: tensor(-13305154., device='cuda:0')
episode: 209 training return: tensor(-12898972., device='cuda:0')
episode: 210 training return: tensor(-11106431., device='cuda:0')
episode: 211 training return: tensor(-11581930., device='cuda:0')
epoch: 53 test_true_pfm: 355.3373479529753
episode: 212 training return: tensor(-9814420., device='cuda:0')
episode: 213 training return: tensor(-10743887., device='cuda:0')
episode: 214 training return: tensor(-5049694., device='cuda:0')
episode: 215 training return: tensor(-3529004.2500, device='cuda:0')
epoch: 54 test_true_pfm: 158.3497586548692
episode: 216 training return: tensor(-4087803.7500, device='cuda:0')
episode: 217 training return: tensor(-4352094., device='cuda:0')
episode: 218 training return: tensor(-2898241.2500, device='cuda:0')
episode: 219 training return: tensor(-3515733.2500, device='cuda:0')
epoch: 55 test_true_pfm: 77.89238361542726
episode: 220 training return: tensor(-4366570., device='cuda:0')
episode: 221 training return: tensor(-2351425., device='cuda:0')
episode: 222 training return: tensor(-2814066.7500, device='cuda:0')
episode: 223 training return: tensor(-2673915.5000, device='cuda:0')
epoch: 56 test_true_pfm: 79.53484981788561
episode: 224 training return: tensor(-5119776.5000, device='cuda:0')
episode: 225 training return: tensor(-2965751.7500, device='cuda:0')
episode: 226 training return: tensor(-2550271., device='cuda:0')
episode: 227 training return: tensor(-2867701.7500, device='cuda:0')
epoch: 57 test_true_pfm: 167.15867677276538
episode: 228 training return: tensor(-7.6585e+09, device='cuda:0')
episode: 229 training return: tensor(-2325696.7500, device='cuda:0')
episode: 230 training return: tensor(-3394700.7500, device='cuda:0')
episode: 231 training return: tensor(-7.8118e+09, device='cuda:0')
epoch: 58 test_true_pfm: 80.02959763560575
episode: 232 training return: tensor(-2455449., device='cuda:0')
episode: 233 training return: tensor(-4.0091e+09, device='cuda:0')
episode: 234 training return: tensor(-19682174., device='cuda:0')
episode: 235 training return: tensor(-4.7877e+09, device='cuda:0')
epoch: 59 test_true_pfm: 86.02073922634128
episode: 236 training return: tensor(-3226700.2500, device='cuda:0')
episode: 237 training return: tensor(-2393736., device='cuda:0')
episode: 238 training return: tensor(-1958208.7500, device='cuda:0')
episode: 239 training return: tensor(-4711438., device='cuda:0')
epoch: 60 test_true_pfm: 88.98776711653822
episode: 240 training return: tensor(-1820157.1250, device='cuda:0')
episode: 241 training return: tensor(-1937023.7500, device='cuda:0')
episode: 242 training return: tensor(-1.1001e+10, device='cuda:0')
episode: 243 training return: tensor(-2981739., device='cuda:0')
epoch: 61 test_true_pfm: 82.12012002559408
episode: 244 training return: tensor(-3.7158e+09, device='cuda:0')
episode: 245 training return: tensor(-6.5169e+09, device='cuda:0')
episode: 246 training return: tensor(-1851734.6250, device='cuda:0')
episode: 247 training return: tensor(-2093838., device='cuda:0')
epoch: 62 test_true_pfm: 91.11891241576505
episode: 248 training return: tensor(-8.1272e+09, device='cuda:0')
episode: 249 training return: tensor(-3.4414e+09, device='cuda:0')
episode: 250 training return: tensor(-6.7440e+09, device='cuda:0')
episode: 251 training return: tensor(-3888847.7500, device='cuda:0')
epoch: 63 test_true_pfm: 81.60824133294902
episode: 252 training return: tensor(-2313435.7500, device='cuda:0')
episode: 253 training return: tensor(-2214353.2500, device='cuda:0')
episode: 254 training return: tensor(-38600564., device='cuda:0')
episode: 255 training return: tensor(-7.7703e+09, device='cuda:0')
epoch: 64 test_true_pfm: 78.46238883862448
episode: 256 training return: tensor(-3.1678e+09, device='cuda:0')
episode: 257 training return: tensor(-4.1685e+09, device='cuda:0')
episode: 258 training return: tensor(-4.2115e+09, device='cuda:0')
episode: 259 training return: tensor(-2490775.2500, device='cuda:0')
epoch: 65 test_true_pfm: 78.99803654424569
episode: 260 training return: tensor(-1591049., device='cuda:0')
episode: 261 training return: tensor(-9.1405e+08, device='cuda:0')
episode: 262 training return: tensor(-2030127.1250, device='cuda:0')
episode: 263 training return: tensor(-8.5989e+08, device='cuda:0')
epoch: 66 test_true_pfm: 70.75988458822734
episode: 264 training return: tensor(-1.4533e+09, device='cuda:0')
episode: 265 training return: tensor(-3.0484e+09, device='cuda:0')
episode: 266 training return: tensor(-1.4999e+10, device='cuda:0')
episode: 267 training return: tensor(-9.7320e+08, device='cuda:0')
epoch: 67 test_true_pfm: 75.09740357245006
episode: 268 training return: tensor(-2.2089e+10, device='cuda:0')
episode: 269 training return: tensor(-8.8567e+09, device='cuda:0')
episode: 270 training return: tensor(-3.0129e+09, device='cuda:0')
episode: 271 training return: tensor(-3.2455e+09, device='cuda:0')
epoch: 68 test_true_pfm: 75.81845159325034
episode: 272 training return: tensor(-4.6404e+09, device='cuda:0')
episode: 273 training return: tensor(-1.2061e+10, device='cuda:0')
episode: 274 training return: tensor(-5.4654e+09, device='cuda:0')
episode: 275 training return: tensor(-1.1445e+10, device='cuda:0')
epoch: 69 test_true_pfm: 78.14854565565845
episode: 276 training return: tensor(-5.5991e+09, device='cuda:0')
episode: 277 training return: tensor(-1.5910e+10, device='cuda:0')
episode: 278 training return: tensor(-1.6794e+10, device='cuda:0')
episode: 279 training return: tensor(-3.6771e+10, device='cuda:0')
epoch: 70 test_true_pfm: 96.26054609750548
episode: 280 training return: tensor(-4.2292e+09, device='cuda:0')
episode: 281 training return: tensor(-4048843., device='cuda:0')
episode: 282 training return: tensor(-6730335.5000, device='cuda:0')
episode: 283 training return: tensor(-1.6405e+10, device='cuda:0')
epoch: 71 test_true_pfm: 178.7961210850892
episode: 284 training return: tensor(-11945258., device='cuda:0')
episode: 285 training return: tensor(-6342569., device='cuda:0')
episode: 286 training return: tensor(-14205013., device='cuda:0')
episode: 287 training return: tensor(-11260770., device='cuda:0')
epoch: 72 test_true_pfm: 182.61247735027072
episode: 288 training return: tensor(-1.5212e+08, device='cuda:0')
episode: 289 training return: tensor(-1.7097e+09, device='cuda:0')
episode: 290 training return: tensor(-20557478., device='cuda:0')
episode: 291 training return: tensor(-81002696., device='cuda:0')
epoch: 73 test_true_pfm: 200.3784543637696
episode: 292 training return: tensor(-19744388., device='cuda:0')
episode: 293 training return: tensor(-53601812., device='cuda:0')
episode: 294 training return: tensor(-14658692., device='cuda:0')
episode: 295 training return: tensor(-17059498., device='cuda:0')
epoch: 74 test_true_pfm: 214.64322917753623
episode: 296 training return: tensor(-1.1479e+08, device='cuda:0')
episode: 297 training return: tensor(-4.0610e+09, device='cuda:0')
episode: 298 training return: tensor(-28266932., device='cuda:0')
episode: 299 training return: tensor(-83490088., device='cuda:0')
epoch: 75 test_true_pfm: 220.28538756052134
episode: 300 training return: tensor(-2.5209e+09, device='cuda:0')
episode: 301 training return: tensor(-41718520., device='cuda:0')
episode: 302 training return: tensor(-13433793., device='cuda:0')
episode: 303 training return: tensor(-12900882., device='cuda:0')
epoch: 76 test_true_pfm: 223.59523216975853
episode: 304 training return: tensor(-2.9151e+09, device='cuda:0')
episode: 305 training return: tensor(-1.0206e+08, device='cuda:0')
episode: 306 training return: tensor(-44282136., device='cuda:0')
episode: 307 training return: tensor(-2.1416e+08, device='cuda:0')
epoch: 77 test_true_pfm: 222.9983349932951
episode: 308 training return: tensor(-27827802., device='cuda:0')
episode: 309 training return: tensor(-2.2523e+08, device='cuda:0')
episode: 310 training return: tensor(-2.1261e+09, device='cuda:0')
episode: 311 training return: tensor(-3.4090e+08, device='cuda:0')
epoch: 78 test_true_pfm: 221.99752777329684
episode: 312 training return: tensor(-73682776., device='cuda:0')
episode: 313 training return: tensor(-3.0278e+09, device='cuda:0')
episode: 314 training return: tensor(-13164992., device='cuda:0')
episode: 315 training return: tensor(-26135574., device='cuda:0')
epoch: 79 test_true_pfm: 212.5232162903375
episode: 316 training return: tensor(-1.2451e+08, device='cuda:0')
episode: 317 training return: tensor(-5.9353e+08, device='cuda:0')
episode: 318 training return: tensor(-53739720., device='cuda:0')
episode: 319 training return: tensor(-1.9537e+08, device='cuda:0')
epoch: 80 test_true_pfm: 218.78985767779807
episode: 320 training return: tensor(-1.4897e+08, device='cuda:0')
episode: 321 training return: tensor(-7.5352e+08, device='cuda:0')
episode: 322 training return: tensor(-34338820., device='cuda:0')
episode: 323 training return: tensor(-20439252., device='cuda:0')
epoch: 81 test_true_pfm: 215.44940253409928
episode: 324 training return: tensor(-2.6381e+09, device='cuda:0')
episode: 325 training return: tensor(-56341984., device='cuda:0')
episode: 326 training return: tensor(-77949016., device='cuda:0')
episode: 327 training return: tensor(-2.2375e+09, device='cuda:0')
epoch: 82 test_true_pfm: 201.46819869249248
episode: 328 training return: tensor(-39542024., device='cuda:0')
episode: 329 training return: tensor(-2.7563e+08, device='cuda:0')
episode: 330 training return: tensor(-1.5859e+08, device='cuda:0')
episode: 331 training return: tensor(-2.8174e+08, device='cuda:0')
epoch: 83 test_true_pfm: 216.65146316622312
episode: 332 training return: tensor(-1.1641e+08, device='cuda:0')
episode: 333 training return: tensor(-1.1435e+08, device='cuda:0')
episode: 334 training return: tensor(-2.0439e+09, device='cuda:0')
episode: 335 training return: tensor(-1.4831e+08, device='cuda:0')
epoch: 84 test_true_pfm: 218.48865750643782
episode: 336 training return: tensor(-1.3625e+08, device='cuda:0')
episode: 337 training return: tensor(-4.1672e+09, device='cuda:0')
episode: 338 training return: tensor(-1.9487e+09, device='cuda:0')
episode: 339 training return: tensor(-2.0178e+08, device='cuda:0')
epoch: 85 test_true_pfm: 216.95508140697052
episode: 340 training return: tensor(-5.8954e+09, device='cuda:0')
episode: 341 training return: tensor(-3.0177e+08, device='cuda:0')
episode: 342 training return: tensor(-3.3818e+09, device='cuda:0')
episode: 343 training return: tensor(-1.0999e+08, device='cuda:0')
epoch: 86 test_true_pfm: 215.55105813322425
episode: 344 training return: tensor(-1.1361e+08, device='cuda:0')
episode: 345 training return: tensor(-1.5032e+08, device='cuda:0')
episode: 346 training return: tensor(-75466608., device='cuda:0')
episode: 347 training return: tensor(-34226976., device='cuda:0')
epoch: 87 test_true_pfm: 227.07179669806706
episode: 348 training return: tensor(-4.9931e+09, device='cuda:0')
episode: 349 training return: tensor(-1.4638e+09, device='cuda:0')
episode: 350 training return: tensor(-5.0226e+08, device='cuda:0')
episode: 351 training return: tensor(-67654160., device='cuda:0')
epoch: 88 test_true_pfm: 225.13825162366803
episode: 352 training return: tensor(-7.8783e+08, device='cuda:0')
episode: 353 training return: tensor(-3.3230e+09, device='cuda:0')
episode: 354 training return: tensor(-1.0041e+08, device='cuda:0')
episode: 355 training return: tensor(-8.3685e+09, device='cuda:0')
epoch: 89 test_true_pfm: 215.11826047276384
episode: 356 training return: tensor(-1.0828e+08, device='cuda:0')
episode: 357 training return: tensor(-1.4259e+09, device='cuda:0')
episode: 358 training return: tensor(-97379712., device='cuda:0')
episode: 359 training return: tensor(-1.2306e+09, device='cuda:0')
epoch: 90 test_true_pfm: 221.73470686579648
episode: 360 training return: tensor(-60904888., device='cuda:0')
episode: 361 training return: tensor(-1.1067e+08, device='cuda:0')
episode: 362 training return: tensor(-1.4238e+08, device='cuda:0')
episode: 363 training return: tensor(-6.7782e+09, device='cuda:0')
epoch: 91 test_true_pfm: 219.9412585146704
episode: 364 training return: tensor(-1.9386e+08, device='cuda:0')
episode: 365 training return: tensor(-1.5992e+09, device='cuda:0')
episode: 366 training return: tensor(-3.2435e+08, device='cuda:0')
episode: 367 training return: tensor(-4.8200e+09, device='cuda:0')
epoch: 92 test_true_pfm: 219.28893065835203
episode: 368 training return: tensor(-1.2782e+09, device='cuda:0')
episode: 369 training return: tensor(-1.0493e+10, device='cuda:0')
episode: 370 training return: tensor(-4.9844e+09, device='cuda:0')
episode: 371 training return: tensor(-6.3521e+09, device='cuda:0')
epoch: 93 test_true_pfm: 217.8920652365252
episode: 372 training return: tensor(-4.9351e+08, device='cuda:0')
episode: 373 training return: tensor(-2.7781e+08, device='cuda:0')
episode: 374 training return: tensor(-51324952., device='cuda:0')
episode: 375 training return: tensor(-8.9830e+08, device='cuda:0')
epoch: 94 test_true_pfm: 215.7501092805784
episode: 376 training return: tensor(-4.0147e+08, device='cuda:0')
episode: 377 training return: tensor(-9.2298e+08, device='cuda:0')
episode: 378 training return: tensor(-4.0708e+08, device='cuda:0')
episode: 379 training return: tensor(-3.9463e+08, device='cuda:0')
epoch: 95 test_true_pfm: 219.09458576690508
episode: 380 training return: tensor(-4.0432e+08, device='cuda:0')
episode: 381 training return: tensor(-3.0960e+08, device='cuda:0')
episode: 382 training return: tensor(-3.4725e+08, device='cuda:0')
episode: 383 training return: tensor(-3.1559e+08, device='cuda:0')
epoch: 96 test_true_pfm: 213.71574621664809
episode: 384 training return: tensor(-3.4082e+08, device='cuda:0')
episode: 385 training return: tensor(-2.9441e+09, device='cuda:0')
episode: 386 training return: tensor(-8.7502e+08, device='cuda:0')
episode: 387 training return: tensor(-4.3792e+09, device='cuda:0')
epoch: 97 test_true_pfm: 219.62908020923692
episode: 388 training return: tensor(-2.0343e+08, device='cuda:0')
episode: 389 training return: tensor(-1.6289e+08, device='cuda:0')
episode: 390 training return: tensor(-6.3396e+09, device='cuda:0')
episode: 391 training return: tensor(-4.4456e+08, device='cuda:0')
epoch: 98 test_true_pfm: 213.42310037915055
episode: 392 training return: tensor(-4.6397e+08, device='cuda:0')
episode: 393 training return: tensor(-4.7248e+08, device='cuda:0')
episode: 394 training return: tensor(-4.3603e+08, device='cuda:0')
episode: 395 training return: tensor(-4.4700e+09, device='cuda:0')
epoch: 99 test_true_pfm: 218.72109173906702
episode: 396 training return: tensor(-7.1521e+08, device='cuda:0')
episode: 397 training return: tensor(-1.1673e+08, device='cuda:0')
episode: 398 training return: tensor(-1.0767e+10, device='cuda:0')
episode: 399 training return: tensor(-3.4561e+09, device='cuda:0')
epoch: 100 test_true_pfm: 217.89394521165204
episode: 400 training return: tensor(-2.1426e+09, device='cuda:0')
episode: 401 training return: tensor(-1.2957e+08, device='cuda:0')
episode: 402 training return: tensor(-2.3771e+08, device='cuda:0')
episode: 403 training return: tensor(-6.1297e+08, device='cuda:0')
epoch: 101 test_true_pfm: 215.7187320972332
episode: 404 training return: tensor(-5.5393e+08, device='cuda:0')
episode: 405 training return: tensor(-6.0221e+09, device='cuda:0')
episode: 406 training return: tensor(-2.7911e+08, device='cuda:0')
episode: 407 training return: tensor(-6.2839e+08, device='cuda:0')
epoch: 102 test_true_pfm: 218.77725795454396
episode: 408 training return: tensor(-1.1932e+09, device='cuda:0')
episode: 409 training return: tensor(-1.1217e+08, device='cuda:0')
episode: 410 training return: tensor(-2.7689e+08, device='cuda:0')
episode: 411 training return: tensor(-2.1291e+09, device='cuda:0')
epoch: 103 test_true_pfm: 215.57070177369528
episode: 412 training return: tensor(-4.3793e+08, device='cuda:0')
episode: 413 training return: tensor(-1.0509e+08, device='cuda:0')
episode: 414 training return: tensor(-7.1992e+08, device='cuda:0')
episode: 415 training return: tensor(-7.0433e+08, device='cuda:0')
epoch: 104 test_true_pfm: 218.32794645506783
episode: 416 training return: tensor(-69913064., device='cuda:0')
episode: 417 training return: tensor(-3.1242e+08, device='cuda:0')
episode: 418 training return: tensor(-2.6546e+08, device='cuda:0')
episode: 419 training return: tensor(-1.9246e+09, device='cuda:0')
epoch: 105 test_true_pfm: 215.99394887367362
episode: 420 training return: tensor(-3.6507e+09, device='cuda:0')
episode: 421 training return: tensor(-3.9034e+09, device='cuda:0')
episode: 422 training return: tensor(-3.1115e+09, device='cuda:0')
episode: 423 training return: tensor(-3.4654e+08, device='cuda:0')
epoch: 106 test_true_pfm: 218.4389340485975
episode: 424 training return: tensor(-4.2165e+08, device='cuda:0')
episode: 425 training return: tensor(-1.0473e+09, device='cuda:0')
episode: 426 training return: tensor(-3.9930e+08, device='cuda:0')
episode: 427 training return: tensor(-1.5690e+08, device='cuda:0')
epoch: 107 test_true_pfm: 217.33627551500908
episode: 428 training return: tensor(-2.7385e+08, device='cuda:0')
episode: 429 training return: tensor(-4.1860e+09, device='cuda:0')
episode: 430 training return: tensor(-1.1395e+09, device='cuda:0')
episode: 431 training return: tensor(-1.1071e+09, device='cuda:0')
epoch: 108 test_true_pfm: 222.10840107731508
episode: 432 training return: tensor(-1.8013e+09, device='cuda:0')
episode: 433 training return: tensor(-4.7631e+08, device='cuda:0')
episode: 434 training return: tensor(-3.5424e+08, device='cuda:0')
episode: 435 training return: tensor(-1.4608e+10, device='cuda:0')
epoch: 109 test_true_pfm: 218.73329766835704
episode: 436 training return: tensor(-4.6930e+08, device='cuda:0')
episode: 437 training return: tensor(-2.1770e+08, device='cuda:0')
episode: 438 training return: tensor(-5.8060e+09, device='cuda:0')
episode: 439 training return: tensor(-2.8191e+09, device='cuda:0')
epoch: 110 test_true_pfm: 228.79674056616014
episode: 440 training return: tensor(-1.0245e+08, device='cuda:0')
episode: 441 training return: tensor(-1.5649e+08, device='cuda:0')
episode: 442 training return: tensor(-1.5574e+09, device='cuda:0')
episode: 443 training return: tensor(-1.9651e+08, device='cuda:0')
epoch: 111 test_true_pfm: 218.10672040974296
episode: 444 training return: tensor(-4.3451e+08, device='cuda:0')
episode: 445 training return: tensor(-6.6068e+08, device='cuda:0')
episode: 446 training return: tensor(-4.6180e+08, device='cuda:0')
episode: 447 training return: tensor(-6.9445e+08, device='cuda:0')
epoch: 112 test_true_pfm: 227.3071398977119
episode: 448 training return: tensor(-4.1530e+08, device='cuda:0')
episode: 449 training return: tensor(-9.5890e+08, device='cuda:0')
episode: 450 training return: tensor(-5.0147e+08, device='cuda:0')
episode: 451 training return: tensor(-1.1269e+09, device='cuda:0')
epoch: 113 test_true_pfm: 211.22699466172722
episode: 452 training return: tensor(-1.5780e+09, device='cuda:0')
episode: 453 training return: tensor(-1.6390e+08, device='cuda:0')
episode: 454 training return: tensor(-1.4631e+09, device='cuda:0')
episode: 455 training return: tensor(-1.0772e+09, device='cuda:0')
epoch: 114 test_true_pfm: 221.19125970369
episode: 456 training return: tensor(-2.6859e+08, device='cuda:0')
episode: 457 training return: tensor(-1.1223e+09, device='cuda:0')
episode: 458 training return: tensor(-2.6068e+09, device='cuda:0')
episode: 459 training return: tensor(-2.8287e+09, device='cuda:0')
epoch: 115 test_true_pfm: 219.74805098027284
episode: 460 training return: tensor(-4.1542e+09, device='cuda:0')
episode: 461 training return: tensor(-3.8671e+08, device='cuda:0')
episode: 462 training return: tensor(-1.5616e+09, device='cuda:0')
episode: 463 training return: tensor(-1.8115e+08, device='cuda:0')
epoch: 116 test_true_pfm: 220.36033140722728
episode: 464 training return: tensor(-5.3205e+08, device='cuda:0')
episode: 465 training return: tensor(-2.1908e+08, device='cuda:0')
episode: 466 training return: tensor(-4.9554e+08, device='cuda:0')
episode: 467 training return: tensor(-5.1384e+08, device='cuda:0')
epoch: 117 test_true_pfm: 216.30983332229127
episode: 468 training return: tensor(-7.7803e+08, device='cuda:0')
episode: 469 training return: tensor(-5.4674e+08, device='cuda:0')
episode: 470 training return: tensor(-1.8428e+09, device='cuda:0')
episode: 471 training return: tensor(-8.0285e+08, device='cuda:0')
epoch: 118 test_true_pfm: 217.01724710181824
episode: 472 training return: tensor(-59839992., device='cuda:0')
episode: 473 training return: tensor(-3.0705e+09, device='cuda:0')
episode: 474 training return: tensor(-1.3510e+10, device='cuda:0')
episode: 475 training return: tensor(-2.0128e+09, device='cuda:0')
epoch: 119 test_true_pfm: 216.79610996494657
episode: 476 training return: tensor(-5.9196e+09, device='cuda:0')
episode: 477 training return: tensor(-2.6919e+08, device='cuda:0')
episode: 478 training return: tensor(-6.3070e+08, device='cuda:0')
episode: 479 training return: tensor(-3.1932e+09, device='cuda:0')
epoch: 120 test_true_pfm: 217.3219071314755
episode: 480 training return: tensor(-3.0093e+08, device='cuda:0')
episode: 481 training return: tensor(-8.7193e+08, device='cuda:0')
episode: 482 training return: tensor(-9.6443e+08, device='cuda:0')
episode: 483 training return: tensor(-3.0160e+08, device='cuda:0')
epoch: 121 test_true_pfm: 217.65971775179923
episode: 484 training return: tensor(-6.8888e+08, device='cuda:0')
episode: 485 training return: tensor(-1.1696e+09, device='cuda:0')
episode: 486 training return: tensor(-1.0076e+09, device='cuda:0')
episode: 487 training return: tensor(-5.9626e+08, device='cuda:0')
epoch: 122 test_true_pfm: 215.87894152374727
episode: 488 training return: tensor(-4.8103e+08, device='cuda:0')
episode: 489 training return: tensor(-2.4660e+08, device='cuda:0')
episode: 490 training return: tensor(-5.0296e+08, device='cuda:0')
episode: 491 training return: tensor(-8.4306e+09, device='cuda:0')
epoch: 123 test_true_pfm: 216.15979834821928
episode: 492 training return: tensor(-1.5152e+08, device='cuda:0')
episode: 493 training return: tensor(-2.8637e+08, device='cuda:0')
episode: 494 training return: tensor(-5.9310e+08, device='cuda:0')
episode: 495 training return: tensor(-2.5033e+08, device='cuda:0')
epoch: 124 test_true_pfm: 233.07888821139954
episode: 496 training return: tensor(-3.2447e+08, device='cuda:0')
episode: 497 training return: tensor(-3.5697e+09, device='cuda:0')
episode: 498 training return: tensor(-2.4344e+09, device='cuda:0')
episode: 499 training return: tensor(-1.2945e+08, device='cuda:0')
epoch: 125 test_true_pfm: 215.98006123866162
episode: 500 training return: tensor(-2.5592e+09, device='cuda:0')
episode: 501 training return: tensor(-2.6651e+09, device='cuda:0')
episode: 502 training return: tensor(-2.4418e+09, device='cuda:0')
episode: 503 training return: tensor(-1.3783e+10, device='cuda:0')
epoch: 126 test_true_pfm: 216.12465014123828
episode: 504 training return: tensor(-9.4576e+09, device='cuda:0')
episode: 505 training return: tensor(-1.4800e+08, device='cuda:0')
episode: 506 training return: tensor(-2.2404e+09, device='cuda:0')
episode: 507 training return: tensor(-1.3678e+09, device='cuda:0')
epoch: 127 test_true_pfm: 220.45702133317778
episode: 508 training return: tensor(-8.4202e+08, device='cuda:0')
episode: 509 training return: tensor(-4.3936e+08, device='cuda:0')
episode: 510 training return: tensor(-7.3616e+09, device='cuda:0')
episode: 511 training return: tensor(-2.5430e+09, device='cuda:0')
epoch: 128 test_true_pfm: 219.2272587205906
episode: 512 training return: tensor(-6.1007e+08, device='cuda:0')
episode: 513 training return: tensor(-2.0655e+10, device='cuda:0')
episode: 514 training return: tensor(-6.8381e+09, device='cuda:0')
episode: 515 training return: tensor(-2.3131e+08, device='cuda:0')
epoch: 129 test_true_pfm: 234.61060574665757
episode: 516 training return: tensor(-1.5727e+08, device='cuda:0')
episode: 517 training return: tensor(-6.5678e+08, device='cuda:0')
episode: 518 training return: tensor(-6.6042e+10, device='cuda:0')
episode: 519 training return: tensor(-3.5059e+09, device='cuda:0')
epoch: 130 test_true_pfm: 215.36390733387233
episode: 520 training return: tensor(-6.4776e+09, device='cuda:0')
episode: 521 training return: tensor(-4.9719e+08, device='cuda:0')
episode: 522 training return: tensor(-2.8224e+08, device='cuda:0')
episode: 523 training return: tensor(-69726152., device='cuda:0')
epoch: 131 test_true_pfm: 218.5535874330559
episode: 524 training return: tensor(-1.6988e+09, device='cuda:0')
episode: 525 training return: tensor(-7.5913e+09, device='cuda:0')
episode: 526 training return: tensor(-9.1834e+08, device='cuda:0')
episode: 527 training return: tensor(-4.3733e+09, device='cuda:0')
epoch: 132 test_true_pfm: 220.72577634201105
episode: 528 training return: tensor(-5.3088e+08, device='cuda:0')
episode: 529 training return: tensor(-1.2225e+09, device='cuda:0')
episode: 530 training return: tensor(-4.1351e+08, device='cuda:0')
episode: 531 training return: tensor(-4.9606e+08, device='cuda:0')
epoch: 133 test_true_pfm: 219.591352719395
episode: 532 training return: tensor(-2.1183e+09, device='cuda:0')
episode: 533 training return: tensor(-4.2465e+08, device='cuda:0')
episode: 534 training return: tensor(-1.6769e+09, device='cuda:0')
episode: 535 training return: tensor(-1.0864e+09, device='cuda:0')
epoch: 134 test_true_pfm: 217.47269654684035
episode: 536 training return: tensor(-1.5387e+09, device='cuda:0')
episode: 537 training return: tensor(-1.8097e+08, device='cuda:0')
episode: 538 training return: tensor(-2.6996e+09, device='cuda:0')
episode: 539 training return: tensor(-1.7511e+10, device='cuda:0')
epoch: 135 test_true_pfm: 219.39484340355034
episode: 540 training return: tensor(-2.6601e+09, device='cuda:0')
episode: 541 training return: tensor(-6.6323e+09, device='cuda:0')
episode: 542 training return: tensor(-2.9304e+09, device='cuda:0')
episode: 543 training return: tensor(-2.5582e+08, device='cuda:0')
epoch: 136 test_true_pfm: 215.36868012389107
episode: 544 training return: tensor(-5.6913e+08, device='cuda:0')
episode: 545 training return: tensor(-5.1233e+08, device='cuda:0')
episode: 546 training return: tensor(-5.9269e+09, device='cuda:0')
episode: 547 training return: tensor(-2.6642e+09, device='cuda:0')
epoch: 137 test_true_pfm: 222.1633778444445
episode: 548 training return: tensor(-9.4059e+08, device='cuda:0')
episode: 549 training return: tensor(-1.3732e+08, device='cuda:0')
episode: 550 training return: tensor(-1.1644e+09, device='cuda:0')
episode: 551 training return: tensor(-5.6109e+08, device='cuda:0')
epoch: 138 test_true_pfm: 217.1277727779112
episode: 552 training return: tensor(-1.4591e+09, device='cuda:0')
episode: 553 training return: tensor(-2.1547e+09, device='cuda:0')
episode: 554 training return: tensor(-3.8293e+08, device='cuda:0')
episode: 555 training return: tensor(-1.1210e+09, device='cuda:0')
epoch: 139 test_true_pfm: 222.71925364874411
episode: 556 training return: tensor(-3.0219e+09, device='cuda:0')
episode: 557 training return: tensor(-1.3075e+08, device='cuda:0')
episode: 558 training return: tensor(-58163968., device='cuda:0')
episode: 559 training return: tensor(-6.1429e+08, device='cuda:0')
epoch: 140 test_true_pfm: 214.61510331704983
episode: 560 training return: tensor(-2.9153e+08, device='cuda:0')
episode: 561 training return: tensor(-2.8333e+09, device='cuda:0')
episode: 562 training return: tensor(-4.0617e+09, device='cuda:0')
episode: 563 training return: tensor(-3.3914e+09, device='cuda:0')
epoch: 141 test_true_pfm: 218.74755865307824
episode: 564 training return: tensor(-7.6873e+09, device='cuda:0')
episode: 565 training return: tensor(-2.5470e+08, device='cuda:0')
episode: 566 training return: tensor(-3.1636e+08, device='cuda:0')
episode: 567 training return: tensor(-3.8818e+09, device='cuda:0')
epoch: 142 test_true_pfm: 218.6603653437115
episode: 568 training return: tensor(-4.5309e+09, device='cuda:0')
episode: 569 training return: tensor(-9.6122e+08, device='cuda:0')
episode: 570 training return: tensor(-3.6919e+09, device='cuda:0')
episode: 571 training return: tensor(-5.0377e+08, device='cuda:0')
epoch: 143 test_true_pfm: 219.98143705423095
episode: 572 training return: tensor(-6.6653e+09, device='cuda:0')
episode: 573 training return: tensor(-5.8133e+08, device='cuda:0')
episode: 574 training return: tensor(-3.5828e+08, device='cuda:0')
episode: 575 training return: tensor(-7.3504e+08, device='cuda:0')
epoch: 144 test_true_pfm: 217.1220368421291
episode: 576 training return: tensor(-2.5071e+10, device='cuda:0')
episode: 577 training return: tensor(-1.1122e+09, device='cuda:0')
episode: 578 training return: tensor(-8.7098e+08, device='cuda:0')
episode: 579 training return: tensor(-6.3674e+08, device='cuda:0')
epoch: 145 test_true_pfm: 218.62260474050504
episode: 580 training return: tensor(-2.6826e+09, device='cuda:0')
episode: 581 training return: tensor(-3.0577e+10, device='cuda:0')
episode: 582 training return: tensor(-2.5171e+09, device='cuda:0')
episode: 583 training return: tensor(-8.2750e+09, device='cuda:0')
epoch: 146 test_true_pfm: 219.0098176570109
episode: 584 training return: tensor(-3.1310e+08, device='cuda:0')
episode: 585 training return: tensor(-7.2641e+08, device='cuda:0')
episode: 586 training return: tensor(-4.2970e+08, device='cuda:0')
episode: 587 training return: tensor(-5.7962e+08, device='cuda:0')
epoch: 147 test_true_pfm: 226.47096598223274
episode: 588 training return: tensor(-3.9613e+09, device='cuda:0')
episode: 589 training return: tensor(-2.5673e+08, device='cuda:0')
episode: 590 training return: tensor(-1.7585e+09, device='cuda:0')
episode: 591 training return: tensor(-2.9973e+09, device='cuda:0')
epoch: 148 test_true_pfm: 222.73605863854314
episode: 592 training return: tensor(-5.6931e+08, device='cuda:0')
episode: 593 training return: tensor(-2.9178e+08, device='cuda:0')
episode: 594 training return: tensor(-5.7482e+08, device='cuda:0')
episode: 595 training return: tensor(-1.2003e+09, device='cuda:0')
epoch: 149 test_true_pfm: 217.04327652371953
episode: 596 training return: tensor(-20131698., device='cuda:0')
episode: 597 training return: tensor(-19494624., device='cuda:0')
episode: 598 training return: tensor(-4.2240e+08, device='cuda:0')
episode: 599 training return: tensor(-1.2072e+10, device='cuda:0')
epoch: 150 test_true_pfm: 220.3761687948908
