['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'brac', '--traj', 'medium', '--seed', '2', '--data', '100000']
2399.9795499642414
episode: 0 training return: tensor(-26625224., device='cuda:0')
episode: 1 training return: tensor(-14353625., device='cuda:0')
episode: 2 training return: tensor(-73184176., device='cuda:0')
episode: 3 training return: tensor(-2.3161e+09, device='cuda:0')
epoch: 1 test_true_pfm: 43.77800095833813
episode: 4 training return: tensor(-51524040., device='cuda:0')
episode: 5 training return: tensor(-3.8191e+09, device='cuda:0')
episode: 6 training return: tensor(-47037028., device='cuda:0')
episode: 7 training return: tensor(-51640912., device='cuda:0')
epoch: 2 test_true_pfm: -17.32956199269368
episode: 8 training return: tensor(-43736968., device='cuda:0')
episode: 9 training return: tensor(-30716090., device='cuda:0')
episode: 10 training return: tensor(-58385928., device='cuda:0')
episode: 11 training return: tensor(-40352.3906, device='cuda:0')
epoch: 3 test_true_pfm: -105.17965199121289
episode: 12 training return: tensor(-86755.8672, device='cuda:0')
episode: 13 training return: tensor(-60084.8594, device='cuda:0')
episode: 14 training return: tensor(-59880656., device='cuda:0')
episode: 15 training return: tensor(-51167.4883, device='cuda:0')
epoch: 4 test_true_pfm: -135.16516934334308
episode: 16 training return: tensor(-92301.1172, device='cuda:0')
episode: 17 training return: tensor(-20501890., device='cuda:0')
episode: 18 training return: tensor(-157538.9688, device='cuda:0')
episode: 19 training return: tensor(-1.2353e+08, device='cuda:0')
epoch: 5 test_true_pfm: 81.35536638857218
episode: 20 training return: tensor(-91595992., device='cuda:0')
episode: 21 training return: tensor(-8678013., device='cuda:0')
episode: 22 training return: tensor(-1.3623e+08, device='cuda:0')
episode: 23 training return: tensor(-1.1140e+08, device='cuda:0')
epoch: 6 test_true_pfm: 74.84779465294856
episode: 24 training return: tensor(-69408136., device='cuda:0')
episode: 25 training return: tensor(-43972.1836, device='cuda:0')
episode: 26 training return: tensor(-108089.7734, device='cuda:0')
episode: 27 training return: tensor(-57372.6680, device='cuda:0')
epoch: 7 test_true_pfm: 57.38593598997077
episode: 28 training return: tensor(-58166.3125, device='cuda:0')
episode: 29 training return: tensor(-5.7353e+08, device='cuda:0')
episode: 30 training return: tensor(-45547.8828, device='cuda:0')
episode: 31 training return: tensor(-62480.4375, device='cuda:0')
epoch: 8 test_true_pfm: 120.00135512877534
episode: 32 training return: tensor(-4424389., device='cuda:0')
episode: 33 training return: tensor(-88110.3281, device='cuda:0')
episode: 34 training return: tensor(-169018.2031, device='cuda:0')
episode: 35 training return: tensor(-115834.6328, device='cuda:0')
epoch: 9 test_true_pfm: 89.04833231573978
episode: 36 training return: tensor(-4452902.5000, device='cuda:0')
episode: 37 training return: tensor(-911520.8750, device='cuda:0')
episode: 38 training return: tensor(-5570687.5000, device='cuda:0')
episode: 39 training return: tensor(-62515.0430, device='cuda:0')
epoch: 10 test_true_pfm: -48.96795396578421
episode: 40 training return: tensor(-5151939.5000, device='cuda:0')
episode: 41 training return: tensor(-41474.9688, device='cuda:0')
episode: 42 training return: tensor(-1.0165e+08, device='cuda:0')
episode: 43 training return: tensor(-1.7249e+08, device='cuda:0')
epoch: 11 test_true_pfm: 80.97046011546331
episode: 44 training return: tensor(-4791073., device='cuda:0')
episode: 45 training return: tensor(-49177.7539, device='cuda:0')
episode: 46 training return: tensor(-2.2943e+08, device='cuda:0')
episode: 47 training return: tensor(-51369572., device='cuda:0')
epoch: 12 test_true_pfm: -48.41284208717055
episode: 48 training return: tensor(-6171309.5000, device='cuda:0')
episode: 49 training return: tensor(-46433.3555, device='cuda:0')
episode: 50 training return: tensor(-185754.7812, device='cuda:0')
episode: 51 training return: tensor(-198232.8750, device='cuda:0')
epoch: 13 test_true_pfm: -39.47309899131451
episode: 52 training return: tensor(-193701.1094, device='cuda:0')
episode: 53 training return: tensor(-153472.5156, device='cuda:0')
episode: 54 training return: tensor(-48974.5938, device='cuda:0')
episode: 55 training return: tensor(-50208248., device='cuda:0')
epoch: 14 test_true_pfm: -13.804517093878085
episode: 56 training return: tensor(-56903328., device='cuda:0')
episode: 57 training return: tensor(-5.3889e+08, device='cuda:0')
episode: 58 training return: tensor(-47309800., device='cuda:0')
episode: 59 training return: tensor(-47660396., device='cuda:0')
epoch: 15 test_true_pfm: 51.72318966776182
episode: 60 training return: tensor(-46231588., device='cuda:0')
episode: 61 training return: tensor(-2864898.5000, device='cuda:0')
episode: 62 training return: tensor(-22889802., device='cuda:0')
episode: 63 training return: tensor(-51173632., device='cuda:0')
epoch: 16 test_true_pfm: -46.49736400071807
episode: 64 training return: tensor(-95851.8750, device='cuda:0')
episode: 65 training return: tensor(-96584.4609, device='cuda:0')
episode: 66 training return: tensor(-28260086., device='cuda:0')
episode: 67 training return: tensor(-33518.6445, device='cuda:0')
epoch: 17 test_true_pfm: -24.380538472888627
episode: 68 training return: tensor(-1505617.6250, device='cuda:0')
episode: 69 training return: tensor(-3635870., device='cuda:0')
episode: 70 training return: tensor(-49908824., device='cuda:0')
episode: 71 training return: tensor(-34234.6914, device='cuda:0')
epoch: 18 test_true_pfm: 28.107866654317007
episode: 72 training return: tensor(-150138.0156, device='cuda:0')
episode: 73 training return: tensor(-63831.4023, device='cuda:0')
episode: 74 training return: tensor(-76640.1875, device='cuda:0')
episode: 75 training return: tensor(-113692.7422, device='cuda:0')
epoch: 19 test_true_pfm: 77.08519095620126
episode: 76 training return: tensor(-50691.3477, device='cuda:0')
episode: 77 training return: tensor(-45945896., device='cuda:0')
episode: 78 training return: tensor(-4.4485e+08, device='cuda:0')
episode: 79 training return: tensor(-1280485.3750, device='cuda:0')
epoch: 20 test_true_pfm: 127.80082485639265
episode: 80 training return: tensor(-10321069., device='cuda:0')
episode: 81 training return: tensor(-73046.9453, device='cuda:0')
episode: 82 training return: tensor(-32539.1367, device='cuda:0')
episode: 83 training return: tensor(-75695.3203, device='cuda:0')
epoch: 21 test_true_pfm: 227.70826786286352
episode: 84 training return: tensor(-68410336., device='cuda:0')
episode: 85 training return: tensor(-231969.7344, device='cuda:0')
episode: 86 training return: tensor(-76734.6406, device='cuda:0')
episode: 87 training return: tensor(-119502.4609, device='cuda:0')
epoch: 22 test_true_pfm: -46.63850016881916
episode: 88 training return: tensor(-1.3490e+08, device='cuda:0')
episode: 89 training return: tensor(-22987.9473, device='cuda:0')
episode: 90 training return: tensor(-45753.5859, device='cuda:0')
episode: 91 training return: tensor(-1316580.1250, device='cuda:0')
epoch: 23 test_true_pfm: 193.79319076397928
episode: 92 training return: tensor(-4195581., device='cuda:0')
episode: 93 training return: tensor(-188786.9219, device='cuda:0')
episode: 94 training return: tensor(-3774197., device='cuda:0')
episode: 95 training return: tensor(-116833.7500, device='cuda:0')
epoch: 24 test_true_pfm: -221.03427712982514
episode: 96 training return: tensor(-57292736., device='cuda:0')
episode: 97 training return: tensor(-2724802., device='cuda:0')
episode: 98 training return: tensor(-7044744., device='cuda:0')
episode: 99 training return: tensor(-58958.5586, device='cuda:0')
epoch: 25 test_true_pfm: 341.83643223095856
episode: 100 training return: tensor(-72291696., device='cuda:0')
episode: 101 training return: tensor(-39185064., device='cuda:0')
episode: 102 training return: tensor(-7.8700e+08, device='cuda:0')
episode: 103 training return: tensor(-82396.1562, device='cuda:0')
epoch: 26 test_true_pfm: -63.03357758159172
episode: 104 training return: tensor(-2.8778e+08, device='cuda:0')
episode: 105 training return: tensor(-87646.9922, device='cuda:0')
episode: 106 training return: tensor(-52664.8906, device='cuda:0')
episode: 107 training return: tensor(-1.9244e+08, device='cuda:0')
epoch: 27 test_true_pfm: -59.68292128521367
episode: 108 training return: tensor(-61564608., device='cuda:0')
episode: 109 training return: tensor(-3.0729e+10, device='cuda:0')
episode: 110 training return: tensor(-3.1698e+08, device='cuda:0')
episode: 111 training return: tensor(-8.4751e+08, device='cuda:0')
epoch: 28 test_true_pfm: -2.904976529729216
episode: 112 training return: tensor(-46235104., device='cuda:0')
episode: 113 training return: tensor(-3355268., device='cuda:0')
episode: 114 training return: tensor(-7.2489e+09, device='cuda:0')
episode: 115 training return: tensor(-171054.1406, device='cuda:0')
epoch: 29 test_true_pfm: 41.307986582363434
episode: 116 training return: tensor(-1.0982e+16, device='cuda:0')
episode: 117 training return: tensor(-1145478.3750, device='cuda:0')
episode: 118 training return: tensor(-80450.9375, device='cuda:0')
episode: 119 training return: tensor(-183439.9375, device='cuda:0')
epoch: 30 test_true_pfm: 162.3879009909377
episode: 120 training return: tensor(-36284.9766, device='cuda:0')
episode: 121 training return: tensor(-55741.4414, device='cuda:0')
episode: 122 training return: tensor(-54229.4883, device='cuda:0')
episode: 123 training return: tensor(-48279.1250, device='cuda:0')
epoch: 31 test_true_pfm: -83.87583191543548
episode: 124 training return: tensor(-62635236., device='cuda:0')
episode: 125 training return: tensor(-23571.6836, device='cuda:0')
episode: 126 training return: tensor(-17478958., device='cuda:0')
episode: 127 training return: tensor(-18403.0918, device='cuda:0')
epoch: 32 test_true_pfm: 145.91428325513985
episode: 128 training return: tensor(-74962552., device='cuda:0')
episode: 129 training return: tensor(-26151.7207, device='cuda:0')
episode: 130 training return: tensor(-15475.2100, device='cuda:0')
episode: 131 training return: tensor(-21377.3867, device='cuda:0')
epoch: 33 test_true_pfm: -31.669131445833642
episode: 132 training return: tensor(-17133.7773, device='cuda:0')
episode: 133 training return: tensor(-589738.8750, device='cuda:0')
episode: 134 training return: tensor(-78146200., device='cuda:0')
episode: 135 training return: tensor(-60956692., device='cuda:0')
epoch: 34 test_true_pfm: -164.72821700826626
episode: 136 training return: tensor(-939393.2500, device='cuda:0')
episode: 137 training return: tensor(-7176587.5000, device='cuda:0')
episode: 138 training return: tensor(-7779757., device='cuda:0')
episode: 139 training return: tensor(-248718.4844, device='cuda:0')
epoch: 35 test_true_pfm: -9.181903725201558
episode: 140 training return: tensor(-16386648., device='cuda:0')
episode: 141 training return: tensor(-12929.2832, device='cuda:0')
episode: 142 training return: tensor(-15123.1973, device='cuda:0')
episode: 143 training return: tensor(-16319.2217, device='cuda:0')
epoch: 36 test_true_pfm: 68.06923986042813
episode: 144 training return: tensor(-30516848., device='cuda:0')
episode: 145 training return: tensor(-22510.2188, device='cuda:0')
episode: 146 training return: tensor(-124690.4141, device='cuda:0')
episode: 147 training return: tensor(-34724964., device='cuda:0')
epoch: 37 test_true_pfm: -32.83116910111756
episode: 148 training return: tensor(-101976.3047, device='cuda:0')
episode: 149 training return: tensor(-20199.6621, device='cuda:0')
episode: 150 training return: tensor(-38876760., device='cuda:0')
episode: 151 training return: tensor(-92273.0781, device='cuda:0')
epoch: 38 test_true_pfm: -15.234746855858733
episode: 152 training return: tensor(-107349.7812, device='cuda:0')
episode: 153 training return: tensor(-107398.3672, device='cuda:0')
episode: 154 training return: tensor(-32969478., device='cuda:0')
episode: 155 training return: tensor(-13830.4375, device='cuda:0')
epoch: 39 test_true_pfm: -11.484082535720523
episode: 156 training return: tensor(-91113.5625, device='cuda:0')
episode: 157 training return: tensor(-13761.5146, device='cuda:0')
episode: 158 training return: tensor(-15557.1699, device='cuda:0')
episode: 159 training return: tensor(-23264.5156, device='cuda:0')
epoch: 40 test_true_pfm: 128.55952839310905
episode: 160 training return: tensor(-18304.4199, device='cuda:0')
episode: 161 training return: tensor(-104107.8906, device='cuda:0')
episode: 162 training return: tensor(-106995.4922, device='cuda:0')
episode: 163 training return: tensor(-17359.3418, device='cuda:0')
epoch: 41 test_true_pfm: -107.97757788369375
episode: 164 training return: tensor(-12377.7021, device='cuda:0')
episode: 165 training return: tensor(-32593362., device='cuda:0')
episode: 166 training return: tensor(-36731272., device='cuda:0')
episode: 167 training return: tensor(-110135.1172, device='cuda:0')
epoch: 42 test_true_pfm: 119.47049942818235
episode: 168 training return: tensor(-108180.1562, device='cuda:0')
episode: 169 training return: tensor(-91095.2344, device='cuda:0')
episode: 170 training return: tensor(-103847.5859, device='cuda:0')
episode: 171 training return: tensor(-16782.5938, device='cuda:0')
epoch: 43 test_true_pfm: 98.95949139051639
episode: 172 training return: tensor(-89987.2422, device='cuda:0')
episode: 173 training return: tensor(-12995.3213, device='cuda:0')
episode: 174 training return: tensor(-122251.3438, device='cuda:0')
episode: 175 training return: tensor(-146922.2031, device='cuda:0')
epoch: 44 test_true_pfm: -28.544850995159994
episode: 176 training return: tensor(-617969.3750, device='cuda:0')
episode: 177 training return: tensor(-6.4531e+08, device='cuda:0')
episode: 178 training return: tensor(-16103.6719, device='cuda:0')
episode: 179 training return: tensor(-15339.9678, device='cuda:0')
epoch: 45 test_true_pfm: 204.43134233035175
episode: 180 training return: tensor(-17324.1289, device='cuda:0')
episode: 181 training return: tensor(-100155.1484, device='cuda:0')
episode: 182 training return: tensor(-27327.8457, device='cuda:0')
episode: 183 training return: tensor(-17683.2129, device='cuda:0')
epoch: 46 test_true_pfm: -5.975993197901137
episode: 184 training return: tensor(-22239.7910, device='cuda:0')
episode: 185 training return: tensor(-15222.0605, device='cuda:0')
episode: 186 training return: tensor(-23534.3379, device='cuda:0')
episode: 187 training return: tensor(-22842.9844, device='cuda:0')
epoch: 47 test_true_pfm: -142.02577023829988
episode: 188 training return: tensor(-12289.1592, device='cuda:0')
episode: 189 training return: tensor(-4.1034e+08, device='cuda:0')
episode: 190 training return: tensor(-21134.6055, device='cuda:0')
episode: 191 training return: tensor(-14505.4121, device='cuda:0')
epoch: 48 test_true_pfm: 37.65509816117473
episode: 192 training return: tensor(-13169.3906, device='cuda:0')
episode: 193 training return: tensor(-13126.0176, device='cuda:0')
episode: 194 training return: tensor(-30241.0508, device='cuda:0')
episode: 195 training return: tensor(-34638.9570, device='cuda:0')
epoch: 49 test_true_pfm: -82.8778495411396
episode: 196 training return: tensor(-1.7316e+09, device='cuda:0')
episode: 197 training return: tensor(-6.8325e+09, device='cuda:0')
episode: 198 training return: tensor(-46187.6133, device='cuda:0')
episode: 199 training return: tensor(-1.7215e+08, device='cuda:0')
epoch: 50 test_true_pfm: -392.6840088972994
episode: 200 training return: tensor(-2.7470e+08, device='cuda:0')
episode: 201 training return: tensor(-4.2759e+09, device='cuda:0')
episode: 202 training return: tensor(-1.0111e+10, device='cuda:0')
episode: 203 training return: tensor(-66243972., device='cuda:0')
epoch: 51 test_true_pfm: -79.61762566038733
episode: 204 training return: tensor(-62988844., device='cuda:0')
episode: 205 training return: tensor(-1.1808e+10, device='cuda:0')
episode: 206 training return: tensor(-1.1249e+10, device='cuda:0')
episode: 207 training return: tensor(-1.1977e+10, device='cuda:0')
epoch: 52 test_true_pfm: -24.92537457599244
episode: 208 training return: tensor(-1.2424e+10, device='cuda:0')
episode: 209 training return: tensor(-62691760., device='cuda:0')
episode: 210 training return: tensor(-47446116., device='cuda:0')
episode: 211 training return: tensor(-73941872., device='cuda:0')
epoch: 53 test_true_pfm: 32.770315962354346
episode: 212 training return: tensor(-48150020., device='cuda:0')
episode: 213 training return: tensor(-41831196., device='cuda:0')
episode: 214 training return: tensor(-53124272., device='cuda:0')
episode: 215 training return: tensor(-47276488., device='cuda:0')
epoch: 54 test_true_pfm: 37.94516194575534
episode: 216 training return: tensor(-52626296., device='cuda:0')
episode: 217 training return: tensor(-49941784., device='cuda:0')
episode: 218 training return: tensor(-63298912., device='cuda:0')
episode: 219 training return: tensor(-52546256., device='cuda:0')
epoch: 55 test_true_pfm: 29.386572436671162
episode: 220 training return: tensor(-42968044., device='cuda:0')
episode: 221 training return: tensor(-1.2432e+10, device='cuda:0')
episode: 222 training return: tensor(-54780304., device='cuda:0')
episode: 223 training return: tensor(-44147716., device='cuda:0')
epoch: 56 test_true_pfm: -23.782355868028162
episode: 224 training return: tensor(-1.2525e+10, device='cuda:0')
episode: 225 training return: tensor(-1.1754e+10, device='cuda:0')
episode: 226 training return: tensor(-1.2546e+10, device='cuda:0')
episode: 227 training return: tensor(-1.2463e+10, device='cuda:0')
epoch: 57 test_true_pfm: -40.83302091934044
episode: 228 training return: tensor(-1.2225e+10, device='cuda:0')
episode: 229 training return: tensor(-1.2395e+10, device='cuda:0')
episode: 230 training return: tensor(-1.2485e+10, device='cuda:0')
episode: 231 training return: tensor(-1.2450e+10, device='cuda:0')
epoch: 58 test_true_pfm: -25.431364549761454
episode: 232 training return: tensor(-1.2520e+10, device='cuda:0')
episode: 233 training return: tensor(-1.2254e+10, device='cuda:0')
episode: 234 training return: tensor(-1.2324e+10, device='cuda:0')
episode: 235 training return: tensor(-1.2214e+10, device='cuda:0')
epoch: 59 test_true_pfm: -41.23596522716419
episode: 236 training return: tensor(-1.2438e+10, device='cuda:0')
episode: 237 training return: tensor(-1.2324e+10, device='cuda:0')
episode: 238 training return: tensor(-1.2579e+10, device='cuda:0')
episode: 239 training return: tensor(-43730664., device='cuda:0')
epoch: 60 test_true_pfm: -23.879434174636685
episode: 240 training return: tensor(-1.2502e+10, device='cuda:0')
episode: 241 training return: tensor(-41066984., device='cuda:0')
episode: 242 training return: tensor(-1.2241e+10, device='cuda:0')
episode: 243 training return: tensor(-1.2529e+10, device='cuda:0')
epoch: 61 test_true_pfm: -37.672653341378364
episode: 244 training return: tensor(-1.2321e+10, device='cuda:0')
episode: 245 training return: tensor(-1.2305e+10, device='cuda:0')
episode: 246 training return: tensor(-49391764., device='cuda:0')
episode: 247 training return: tensor(-1.2534e+10, device='cuda:0')
epoch: 62 test_true_pfm: -41.204924503546295
episode: 248 training return: tensor(-1.2449e+10, device='cuda:0')
episode: 249 training return: tensor(-39701240., device='cuda:0')
episode: 250 training return: tensor(-39273616., device='cuda:0')
episode: 251 training return: tensor(-1.2154e+10, device='cuda:0')
epoch: 63 test_true_pfm: -35.446211121858596
episode: 252 training return: tensor(-1.2582e+10, device='cuda:0')
episode: 253 training return: tensor(-1.2521e+10, device='cuda:0')
episode: 254 training return: tensor(-1.1214e+10, device='cuda:0')
episode: 255 training return: tensor(-1.2378e+10, device='cuda:0')
epoch: 64 test_true_pfm: -44.566931020801654
episode: 256 training return: tensor(-1.2417e+10, device='cuda:0')
episode: 257 training return: tensor(-1.2175e+10, device='cuda:0')
episode: 258 training return: tensor(-1.2481e+10, device='cuda:0')
episode: 259 training return: tensor(-1.2419e+10, device='cuda:0')
epoch: 65 test_true_pfm: -34.681149178125736
episode: 260 training return: tensor(-2.8697e+12, device='cuda:0')
episode: 261 training return: tensor(-1.1994e+10, device='cuda:0')
episode: 262 training return: tensor(-1.0528e+08, device='cuda:0')
episode: 263 training return: tensor(-1.2025e+10, device='cuda:0')
epoch: 66 test_true_pfm: -40.23894880728375
episode: 264 training return: tensor(-1.2208e+10, device='cuda:0')
episode: 265 training return: tensor(-1.2369e+10, device='cuda:0')
episode: 266 training return: tensor(-1.2482e+10, device='cuda:0')
episode: 267 training return: tensor(-1.2170e+10, device='cuda:0')
epoch: 67 test_true_pfm: -38.18603999844286
episode: 268 training return: tensor(-1.2099e+10, device='cuda:0')
episode: 269 training return: tensor(-1.2405e+10, device='cuda:0')
episode: 270 training return: tensor(-1.2414e+10, device='cuda:0')
episode: 271 training return: tensor(-39361692., device='cuda:0')
epoch: 68 test_true_pfm: -38.949065349313
episode: 272 training return: tensor(-1.2476e+10, device='cuda:0')
episode: 273 training return: tensor(-1.2579e+10, device='cuda:0')
episode: 274 training return: tensor(-1.2629e+10, device='cuda:0')
episode: 275 training return: tensor(-1.2365e+10, device='cuda:0')
epoch: 69 test_true_pfm: -41.81172950567992
episode: 276 training return: tensor(-1.2150e+10, device='cuda:0')
episode: 277 training return: tensor(-1.2481e+10, device='cuda:0')
episode: 278 training return: tensor(-39433392., device='cuda:0')
episode: 279 training return: tensor(-1.2528e+10, device='cuda:0')
epoch: 70 test_true_pfm: -42.66197016724059
episode: 280 training return: tensor(-1.2694e+10, device='cuda:0')
episode: 281 training return: tensor(-1.2543e+10, device='cuda:0')
episode: 282 training return: tensor(-1.2469e+10, device='cuda:0')
episode: 283 training return: tensor(-1.2552e+10, device='cuda:0')
epoch: 71 test_true_pfm: -41.434227941981476
episode: 284 training return: tensor(-1.2370e+10, device='cuda:0')
episode: 285 training return: tensor(-1.2662e+10, device='cuda:0')
episode: 286 training return: tensor(-1.2654e+10, device='cuda:0')
episode: 287 training return: tensor(-1.2505e+10, device='cuda:0')
epoch: 72 test_true_pfm: -39.103519349625834
episode: 288 training return: tensor(-38571220., device='cuda:0')
episode: 289 training return: tensor(-1.2243e+10, device='cuda:0')
episode: 290 training return: tensor(-1.2500e+10, device='cuda:0')
episode: 291 training return: tensor(-1.2209e+10, device='cuda:0')
epoch: 73 test_true_pfm: -15.968599492451782
episode: 292 training return: tensor(-54305932., device='cuda:0')
episode: 293 training return: tensor(-1.2495e+10, device='cuda:0')
episode: 294 training return: tensor(-1.2093e+10, device='cuda:0')
episode: 295 training return: tensor(-1.4473e+08, device='cuda:0')
epoch: 74 test_true_pfm: -40.135644968580515
episode: 296 training return: tensor(-1.2393e+10, device='cuda:0')
episode: 297 training return: tensor(-1.1478e+10, device='cuda:0')
episode: 298 training return: tensor(-42942768., device='cuda:0')
episode: 299 training return: tensor(-1.2558e+10, device='cuda:0')
epoch: 75 test_true_pfm: -39.75151674787393
episode: 300 training return: tensor(-1.2429e+10, device='cuda:0')
episode: 301 training return: tensor(-1.2218e+10, device='cuda:0')
episode: 302 training return: tensor(-1.2267e+10, device='cuda:0')
episode: 303 training return: tensor(-1.2246e+10, device='cuda:0')
epoch: 76 test_true_pfm: 23.843317909977817
episode: 304 training return: tensor(-1.2399e+10, device='cuda:0')
episode: 305 training return: tensor(-56110924., device='cuda:0')
episode: 306 training return: tensor(-1.2391e+10, device='cuda:0')
episode: 307 training return: tensor(-1.2481e+10, device='cuda:0')
epoch: 77 test_true_pfm: -37.951648670715
episode: 308 training return: tensor(-1.2315e+10, device='cuda:0')
episode: 309 training return: tensor(-1.2295e+10, device='cuda:0')
episode: 310 training return: tensor(-44208744., device='cuda:0')
episode: 311 training return: tensor(-1.2233e+10, device='cuda:0')
epoch: 78 test_true_pfm: -40.047672887306256
episode: 312 training return: tensor(-1.2453e+10, device='cuda:0')
episode: 313 training return: tensor(-1.2486e+10, device='cuda:0')
episode: 314 training return: tensor(-1.2616e+10, device='cuda:0')
episode: 315 training return: tensor(-1.2383e+10, device='cuda:0')
epoch: 79 test_true_pfm: -42.24168328873241
episode: 316 training return: tensor(-1.2524e+10, device='cuda:0')
episode: 317 training return: tensor(-43717800., device='cuda:0')
episode: 318 training return: tensor(-5.2525e+12, device='cuda:0')
episode: 319 training return: tensor(-1.2575e+10, device='cuda:0')
epoch: 80 test_true_pfm: -14.115102602820215
episode: 320 training return: tensor(-1.2585e+10, device='cuda:0')
episode: 321 training return: tensor(-1.2323e+10, device='cuda:0')
episode: 322 training return: tensor(-71684696., device='cuda:0')
episode: 323 training return: tensor(-42057788., device='cuda:0')
epoch: 81 test_true_pfm: -40.32660184457844
episode: 324 training return: tensor(-1.2791e+10, device='cuda:0')
episode: 325 training return: tensor(-1.2485e+10, device='cuda:0')
episode: 326 training return: tensor(-1.2385e+10, device='cuda:0')
episode: 327 training return: tensor(-1.1956e+10, device='cuda:0')
epoch: 82 test_true_pfm: -38.46151837597574
episode: 328 training return: tensor(-1.2383e+10, device='cuda:0')
episode: 329 training return: tensor(-1.2534e+10, device='cuda:0')
episode: 330 training return: tensor(-39999584., device='cuda:0')
episode: 331 training return: tensor(-1.2453e+10, device='cuda:0')
epoch: 83 test_true_pfm: -40.29758712239652
episode: 332 training return: tensor(-1.2596e+10, device='cuda:0')
episode: 333 training return: tensor(-1.2286e+10, device='cuda:0')
episode: 334 training return: tensor(-1.1960e+10, device='cuda:0')
episode: 335 training return: tensor(-1.2277e+10, device='cuda:0')
epoch: 84 test_true_pfm: -38.87652364196873
episode: 336 training return: tensor(-1.2474e+10, device='cuda:0')
episode: 337 training return: tensor(-1.1371e+10, device='cuda:0')
episode: 338 training return: tensor(-41777352., device='cuda:0')
episode: 339 training return: tensor(-39427804., device='cuda:0')
epoch: 85 test_true_pfm: -47.581418048735806
episode: 340 training return: tensor(-1.2481e+10, device='cuda:0')
episode: 341 training return: tensor(-1.2314e+10, device='cuda:0')
episode: 342 training return: tensor(-62384696., device='cuda:0')
episode: 343 training return: tensor(-49577508., device='cuda:0')
epoch: 86 test_true_pfm: -37.80131686945631
episode: 344 training return: tensor(-1.2746e+10, device='cuda:0')
episode: 345 training return: tensor(-39250344., device='cuda:0')
episode: 346 training return: tensor(-38490504., device='cuda:0')
episode: 347 training return: tensor(-1.1916e+10, device='cuda:0')
epoch: 87 test_true_pfm: -22.73494248431
episode: 348 training return: tensor(-2773910.7500, device='cuda:0')
episode: 349 training return: tensor(-38036176., device='cuda:0')
episode: 350 training return: tensor(-1.2483e+10, device='cuda:0')
episode: 351 training return: tensor(-2718256.7500, device='cuda:0')
epoch: 88 test_true_pfm: 28.90401459223098
episode: 352 training return: tensor(-41662124., device='cuda:0')
episode: 353 training return: tensor(-1.2439e+10, device='cuda:0')
episode: 354 training return: tensor(-1.2676e+10, device='cuda:0')
episode: 355 training return: tensor(-1.1173e+10, device='cuda:0')
epoch: 89 test_true_pfm: -42.133948552781014
episode: 356 training return: tensor(-1.2337e+10, device='cuda:0')
episode: 357 training return: tensor(-1.2435e+10, device='cuda:0')
episode: 358 training return: tensor(-1.2386e+10, device='cuda:0')
episode: 359 training return: tensor(-1.2499e+10, device='cuda:0')
epoch: 90 test_true_pfm: -32.77571482165657
episode: 360 training return: tensor(-1.2542e+10, device='cuda:0')
episode: 361 training return: tensor(-10193819., device='cuda:0')
episode: 362 training return: tensor(-1.2371e+10, device='cuda:0')
episode: 363 training return: tensor(-66306980., device='cuda:0')
epoch: 91 test_true_pfm: -43.00260132464684
episode: 364 training return: tensor(-39278968., device='cuda:0')
episode: 365 training return: tensor(-1.2646e+10, device='cuda:0')
episode: 366 training return: tensor(-1.2245e+10, device='cuda:0')
episode: 367 training return: tensor(-1.2178e+10, device='cuda:0')
epoch: 92 test_true_pfm: -39.23089466694476
episode: 368 training return: tensor(-1.2236e+10, device='cuda:0')
episode: 369 training return: tensor(-1.2463e+10, device='cuda:0')
episode: 370 training return: tensor(-42928752., device='cuda:0')
episode: 371 training return: tensor(-1.2456e+10, device='cuda:0')
epoch: 93 test_true_pfm: -31.893893423244794
episode: 372 training return: tensor(-1.2257e+10, device='cuda:0')
episode: 373 training return: tensor(-1.2500e+10, device='cuda:0')
episode: 374 training return: tensor(-1.2646e+10, device='cuda:0')
episode: 375 training return: tensor(-40050320., device='cuda:0')
epoch: 94 test_true_pfm: 25.24140926298074
episode: 376 training return: tensor(-46596864., device='cuda:0')
episode: 377 training return: tensor(-58778036., device='cuda:0')
episode: 378 training return: tensor(-1.2449e+10, device='cuda:0')
episode: 379 training return: tensor(-39641396., device='cuda:0')
epoch: 95 test_true_pfm: 36.42067440822309
episode: 380 training return: tensor(-45854084., device='cuda:0')
episode: 381 training return: tensor(-1.1771e+10, device='cuda:0')
episode: 382 training return: tensor(-1.2263e+10, device='cuda:0')
episode: 383 training return: tensor(-1.2450e+10, device='cuda:0')
epoch: 96 test_true_pfm: 0.642978384553975
episode: 384 training return: tensor(-46280680., device='cuda:0')
episode: 385 training return: tensor(-1.2211e+10, device='cuda:0')
episode: 386 training return: tensor(-44544832., device='cuda:0')
episode: 387 training return: tensor(-39596936., device='cuda:0')
epoch: 97 test_true_pfm: -4.279382868241774
episode: 388 training return: tensor(-39781484., device='cuda:0')
episode: 389 training return: tensor(-40516396., device='cuda:0')
episode: 390 training return: tensor(-11893929., device='cuda:0')
episode: 391 training return: tensor(-38570556., device='cuda:0')
epoch: 98 test_true_pfm: -10.072211667368004
episode: 392 training return: tensor(-39815644., device='cuda:0')
episode: 393 training return: tensor(-39292884., device='cuda:0')
episode: 394 training return: tensor(-40703876., device='cuda:0')
episode: 395 training return: tensor(-1.0251e+08, device='cuda:0')
epoch: 99 test_true_pfm: -22.928540520777144
episode: 396 training return: tensor(-44058856., device='cuda:0')
episode: 397 training return: tensor(-40160724., device='cuda:0')
episode: 398 training return: tensor(-40225164., device='cuda:0')
episode: 399 training return: tensor(-1.2184e+10, device='cuda:0')
epoch: 100 test_true_pfm: -50.33528739920863
episode: 400 training return: tensor(-40347220., device='cuda:0')
episode: 401 training return: tensor(-1.0682e+10, device='cuda:0')
episode: 402 training return: tensor(-8.9089e+08, device='cuda:0')
episode: 403 training return: tensor(-9.9672e+08, device='cuda:0')
epoch: 101 test_true_pfm: -233.65840152410564
episode: 404 training return: tensor(-1.8998e+18, device='cuda:0')
episode: 405 training return: tensor(-1.4253e+10, device='cuda:0')
episode: 406 training return: tensor(-2.6183e+18, device='cuda:0')
episode: 407 training return: tensor(-3.7236e+11, device='cuda:0')
epoch: 102 test_true_pfm: -636.7679130503769
episode: 408 training return: tensor(-4.1803e+08, device='cuda:0')
episode: 409 training return: tensor(-3.1668e+08, device='cuda:0')
episode: 410 training return: tensor(-6.1204e+12, device='cuda:0')
episode: 411 training return: tensor(-1.7906e+13, device='cuda:0')
epoch: 103 test_true_pfm: -33.218510050624246
episode: 412 training return: tensor(-1.0390e+10, device='cuda:0')
episode: 413 training return: tensor(-1.1621e+10, device='cuda:0')
episode: 414 training return: tensor(-2.4701e+08, device='cuda:0')
episode: 415 training return: tensor(-39724192., device='cuda:0')
epoch: 104 test_true_pfm: -36.36347871173387
episode: 416 training return: tensor(-1.2082e+10, device='cuda:0')
episode: 417 training return: tensor(-1.2360e+10, device='cuda:0')
episode: 418 training return: tensor(-1.4521e+08, device='cuda:0')
episode: 419 training return: tensor(-36152.8945, device='cuda:0')
epoch: 105 test_true_pfm: -44.006805969434026
episode: 420 training return: tensor(-1.2595e+10, device='cuda:0')
episode: 421 training return: tensor(-1.2287e+10, device='cuda:0')
episode: 422 training return: tensor(-39878088., device='cuda:0')
episode: 423 training return: tensor(-36270376., device='cuda:0')
epoch: 106 test_true_pfm: 10.29333487191237
episode: 424 training return: tensor(-1.3057e+09, device='cuda:0')
episode: 425 training return: tensor(-1.3607e+08, device='cuda:0')
episode: 426 training return: tensor(-1.2341e+09, device='cuda:0')
episode: 427 training return: tensor(-2.1301e+08, device='cuda:0')
epoch: 107 test_true_pfm: -133.07691877936472
episode: 428 training return: tensor(-41431912., device='cuda:0')
episode: 429 training return: tensor(-2.4446e+08, device='cuda:0')
episode: 430 training return: tensor(-6.7974e+09, device='cuda:0')
episode: 431 training return: tensor(-39863396., device='cuda:0')
epoch: 108 test_true_pfm: -8.457240399922048
episode: 432 training return: tensor(-36757644., device='cuda:0')
episode: 433 training return: tensor(-39249356., device='cuda:0')
episode: 434 training return: tensor(-40777960., device='cuda:0')
episode: 435 training return: tensor(-44437884., device='cuda:0')
epoch: 109 test_true_pfm: -5.67700191372175
episode: 436 training return: tensor(-38953592., device='cuda:0')
episode: 437 training return: tensor(-1.1424e+10, device='cuda:0')
episode: 438 training return: tensor(-39469132., device='cuda:0')
episode: 439 training return: tensor(-70774216., device='cuda:0')
epoch: 110 test_true_pfm: -34.65200998528246
episode: 440 training return: tensor(-73719184., device='cuda:0')
episode: 441 training return: tensor(-1.2825e+10, device='cuda:0')
episode: 442 training return: tensor(-1.2001e+10, device='cuda:0')
episode: 443 training return: tensor(-1.3378e+10, device='cuda:0')
epoch: 111 test_true_pfm: -69.61163248159698
episode: 444 training return: tensor(-1.2907e+10, device='cuda:0')
episode: 445 training return: tensor(-1.2074e+10, device='cuda:0')
episode: 446 training return: tensor(-72009896., device='cuda:0')
episode: 447 training return: tensor(-46557048., device='cuda:0')
epoch: 112 test_true_pfm: -43.557112767964156
episode: 448 training return: tensor(-1.2736e+10, device='cuda:0')
episode: 449 training return: tensor(-1.2691e+10, device='cuda:0')
episode: 450 training return: tensor(-3.8665e+09, device='cuda:0')
episode: 451 training return: tensor(-3.7951e+09, device='cuda:0')
epoch: 113 test_true_pfm: 202.74805418085103
episode: 452 training return: tensor(-376801.0312, device='cuda:0')
episode: 453 training return: tensor(-10010557., device='cuda:0')
episode: 454 training return: tensor(-1.3770e+08, device='cuda:0')
episode: 455 training return: tensor(-48698368., device='cuda:0')
epoch: 114 test_true_pfm: -28.41082544741327
episode: 456 training return: tensor(-51873624., device='cuda:0')
episode: 457 training return: tensor(-1.9643e+10, device='cuda:0')
episode: 458 training return: tensor(-29727972., device='cuda:0')
episode: 459 training return: tensor(-66210156., device='cuda:0')
epoch: 115 test_true_pfm: 8.420694738369964
episode: 460 training return: tensor(-39380364., device='cuda:0')
episode: 461 training return: tensor(-42579620., device='cuda:0')
episode: 462 training return: tensor(-58483140., device='cuda:0')
episode: 463 training return: tensor(-45026284., device='cuda:0')
epoch: 116 test_true_pfm: -9.182993067646871
episode: 464 training return: tensor(-79549816., device='cuda:0')
episode: 465 training return: tensor(-2.4498e+08, device='cuda:0')
episode: 466 training return: tensor(-40136244., device='cuda:0')
episode: 467 training return: tensor(-41152140., device='cuda:0')
epoch: 117 test_true_pfm: -112.20317705934973
episode: 468 training return: tensor(-2.6509e+14, device='cuda:0')
episode: 469 training return: tensor(-1.0838e+10, device='cuda:0')
episode: 470 training return: tensor(-1.2210e+10, device='cuda:0')
episode: 471 training return: tensor(-51648592., device='cuda:0')
epoch: 118 test_true_pfm: -163.2083498900204
episode: 472 training return: tensor(-1.3413e+10, device='cuda:0')
episode: 473 training return: tensor(-1.2612e+10, device='cuda:0')
episode: 474 training return: tensor(-1.1648e+10, device='cuda:0')
episode: 475 training return: tensor(-1.2207e+10, device='cuda:0')
epoch: 119 test_true_pfm: -41.0585603092143
episode: 476 training return: tensor(-1.2122e+10, device='cuda:0')
episode: 477 training return: tensor(-1.2095e+10, device='cuda:0')
episode: 478 training return: tensor(-41886904., device='cuda:0')
episode: 479 training return: tensor(-1.2498e+10, device='cuda:0')
epoch: 120 test_true_pfm: 23.27851695947714
episode: 480 training return: tensor(-3.0645e+11, device='cuda:0')
episode: 481 training return: tensor(-41667596., device='cuda:0')
episode: 482 training return: tensor(-4.0947e+09, device='cuda:0')
episode: 483 training return: tensor(-4.8608e+12, device='cuda:0')
epoch: 121 test_true_pfm: -19.82114262514179
episode: 484 training return: tensor(-41305104., device='cuda:0')
episode: 485 training return: tensor(-44171280., device='cuda:0')
episode: 486 training return: tensor(-1.9671e+08, device='cuda:0')
episode: 487 training return: tensor(-42427228., device='cuda:0')
epoch: 122 test_true_pfm: -38.15927070554176
episode: 488 training return: tensor(-1.2361e+10, device='cuda:0')
episode: 489 training return: tensor(-8.7332e+09, device='cuda:0')
episode: 490 training return: tensor(-2.0153e+09, device='cuda:0')
episode: 491 training return: tensor(-1.1979e+10, device='cuda:0')
epoch: 123 test_true_pfm: -35.56773720938495
episode: 492 training return: tensor(-1.1862e+10, device='cuda:0')
episode: 493 training return: tensor(-1.2093e+10, device='cuda:0')
episode: 494 training return: tensor(-2.2574e+09, device='cuda:0')
episode: 495 training return: tensor(-41082432., device='cuda:0')
epoch: 124 test_true_pfm: 258.6337749535947
episode: 496 training return: tensor(-43746068., device='cuda:0')
episode: 497 training return: tensor(-3.9174e+12, device='cuda:0')
episode: 498 training return: tensor(-1.2421e+10, device='cuda:0')
episode: 499 training return: tensor(-1.2081e+10, device='cuda:0')
epoch: 125 test_true_pfm: 186.6927779015133
episode: 500 training return: tensor(-5.6990e+12, device='cuda:0')
episode: 501 training return: tensor(-1.1660e+10, device='cuda:0')
episode: 502 training return: tensor(-1.2378e+10, device='cuda:0')
episode: 503 training return: tensor(-1.1358e+08, device='cuda:0')
epoch: 126 test_true_pfm: -97.20103326602573
episode: 504 training return: tensor(-1.2304e+10, device='cuda:0')
episode: 505 training return: tensor(-37032276., device='cuda:0')
episode: 506 training return: tensor(-35996028., device='cuda:0')
episode: 507 training return: tensor(-1.1253e+10, device='cuda:0')
epoch: 127 test_true_pfm: -17.128262382344314
episode: 508 training return: tensor(-82308104., device='cuda:0')
episode: 509 training return: tensor(-1.2320e+10, device='cuda:0')
episode: 510 training return: tensor(-60934252., device='cuda:0')
episode: 511 training return: tensor(-31580358., device='cuda:0')
epoch: 128 test_true_pfm: -21.65751738841711
episode: 512 training return: tensor(-46949960., device='cuda:0')
episode: 513 training return: tensor(-58228000., device='cuda:0')
episode: 514 training return: tensor(-39771516., device='cuda:0')
episode: 515 training return: tensor(-1.2369e+10, device='cuda:0')
epoch: 129 test_true_pfm: -15.873418120582778
episode: 516 training return: tensor(-30619078., device='cuda:0')
episode: 517 training return: tensor(-1.3858e+10, device='cuda:0')
episode: 518 training return: tensor(-47889880., device='cuda:0')
episode: 519 training return: tensor(-40028448., device='cuda:0')
epoch: 130 test_true_pfm: -46.03573891768257
episode: 520 training return: tensor(-48890712., device='cuda:0')
episode: 521 training return: tensor(-1.9209e+09, device='cuda:0')
episode: 522 training return: tensor(-1.1907e+08, device='cuda:0')
episode: 523 training return: tensor(-54536264., device='cuda:0')
epoch: 131 test_true_pfm: -123.4413761906555
episode: 524 training return: tensor(-3.8008e+09, device='cuda:0')
episode: 525 training return: tensor(-2.5364e+09, device='cuda:0')
episode: 526 training return: tensor(-1.1958e+10, device='cuda:0')
episode: 527 training return: tensor(-3.3702e+09, device='cuda:0')
epoch: 132 test_true_pfm: -36.40799478386557
episode: 528 training return: tensor(-67847448., device='cuda:0')
episode: 529 training return: tensor(-2.2501e+11, device='cuda:0')
episode: 530 training return: tensor(-1.0166e+10, device='cuda:0')
episode: 531 training return: tensor(-2.8397e+12, device='cuda:0')
epoch: 133 test_true_pfm: -20.97801159706879
episode: 532 training return: tensor(-253336.0469, device='cuda:0')
episode: 533 training return: tensor(-2.9046e+10, device='cuda:0')
episode: 534 training return: tensor(-1.6046e+12, device='cuda:0')
episode: 535 training return: tensor(-229113.7344, device='cuda:0')
epoch: 134 test_true_pfm: -57.33886702996735
episode: 536 training return: tensor(-48938588., device='cuda:0')
episode: 537 training return: tensor(-27294832., device='cuda:0')
episode: 538 training return: tensor(-1.2165e+10, device='cuda:0')
episode: 539 training return: tensor(-1.2269e+10, device='cuda:0')
epoch: 135 test_true_pfm: -23.61583597292395
episode: 540 training return: tensor(-42550848., device='cuda:0')
episode: 541 training return: tensor(-13958382., device='cuda:0')
episode: 542 training return: tensor(-2.5025e+10, device='cuda:0')
episode: 543 training return: tensor(-1.2545e+10, device='cuda:0')
epoch: 136 test_true_pfm: -9.604537420772731
episode: 544 training return: tensor(-39036472., device='cuda:0')
episode: 545 training return: tensor(-1.2321e+10, device='cuda:0')
episode: 546 training return: tensor(-1.1494e+10, device='cuda:0')
episode: 547 training return: tensor(-70414840., device='cuda:0')
epoch: 137 test_true_pfm: -39.8645099275719
episode: 548 training return: tensor(-1.2162e+10, device='cuda:0')
episode: 549 training return: tensor(-1.2112e+10, device='cuda:0')
episode: 550 training return: tensor(-6.2900e+09, device='cuda:0')
episode: 551 training return: tensor(-1.2188e+10, device='cuda:0')
epoch: 138 test_true_pfm: -23.77591120142145
episode: 552 training return: tensor(-1.2261e+10, device='cuda:0')
episode: 553 training return: tensor(-1.2616e+10, device='cuda:0')
episode: 554 training return: tensor(-41123572., device='cuda:0')
episode: 555 training return: tensor(-45658644., device='cuda:0')
epoch: 139 test_true_pfm: -39.629839309807835
episode: 556 training return: tensor(-1.5751e+10, device='cuda:0')
episode: 557 training return: tensor(-49108872., device='cuda:0')
episode: 558 training return: tensor(-1.9316e+10, device='cuda:0')
episode: 559 training return: tensor(-53772588., device='cuda:0')
epoch: 140 test_true_pfm: -3.5544599791767646
episode: 560 training return: tensor(-1.2114e+10, device='cuda:0')
episode: 561 training return: tensor(-1.2977e+10, device='cuda:0')
episode: 562 training return: tensor(-45426648., device='cuda:0')
episode: 563 training return: tensor(-1.2834e+10, device='cuda:0')
epoch: 141 test_true_pfm: -103.70180200292809
episode: 564 training return: tensor(-7.0817e+12, device='cuda:0')
episode: 565 training return: tensor(-1.1016e+10, device='cuda:0')
episode: 566 training return: tensor(-3.7662e+10, device='cuda:0')
episode: 567 training return: tensor(-50422028., device='cuda:0')
epoch: 142 test_true_pfm: -47.36378180054535
episode: 568 training return: tensor(-6.1259e+12, device='cuda:0')
episode: 569 training return: tensor(-1.2195e+10, device='cuda:0')
episode: 570 training return: tensor(-1.1948e+10, device='cuda:0')
episode: 571 training return: tensor(-43241004., device='cuda:0')
epoch: 143 test_true_pfm: 8.85014332941307
episode: 572 training return: tensor(-3.5436e+12, device='cuda:0')
episode: 573 training return: tensor(-1.1259e+10, device='cuda:0')
episode: 574 training return: tensor(-41067280., device='cuda:0')
episode: 575 training return: tensor(-1.2263e+10, device='cuda:0')
epoch: 144 test_true_pfm: -22.26359003212777
episode: 576 training return: tensor(-1.2145e+10, device='cuda:0')
episode: 577 training return: tensor(-1.0771e+10, device='cuda:0')
episode: 578 training return: tensor(-1.1986e+10, device='cuda:0')
episode: 579 training return: tensor(-1.2506e+10, device='cuda:0')
epoch: 145 test_true_pfm: 30.97838885899991
episode: 580 training return: tensor(-51277748., device='cuda:0')
episode: 581 training return: tensor(-51259872., device='cuda:0')
episode: 582 training return: tensor(-42403460., device='cuda:0')
episode: 583 training return: tensor(-44782796., device='cuda:0')
epoch: 146 test_true_pfm: 9.945473941810418
episode: 584 training return: tensor(-1.3059e+10, device='cuda:0')
episode: 585 training return: tensor(-40757800., device='cuda:0')
episode: 586 training return: tensor(-1.2477e+10, device='cuda:0')
episode: 587 training return: tensor(-6.9077e+09, device='cuda:0')
epoch: 147 test_true_pfm: -47.20575107299438
episode: 588 training return: tensor(-2.0593e+12, device='cuda:0')
episode: 589 training return: tensor(-1.6503e+10, device='cuda:0')
episode: 590 training return: tensor(-1.2785e+10, device='cuda:0')
episode: 591 training return: tensor(-1.2464e+10, device='cuda:0')
epoch: 148 test_true_pfm: -61.092193375455935
episode: 592 training return: tensor(-42520648., device='cuda:0')
episode: 593 training return: tensor(-72402112., device='cuda:0')
episode: 594 training return: tensor(-1.0739e+10, device='cuda:0')
episode: 595 training return: tensor(-46678996., device='cuda:0')
epoch: 149 test_true_pfm: -72.46138803015526
episode: 596 training return: tensor(-36231872., device='cuda:0')
episode: 597 training return: tensor(-41963968., device='cuda:0')
episode: 598 training return: tensor(-42376136., device='cuda:0')
episode: 599 training return: tensor(-45396968., device='cuda:0')
epoch: 150 test_true_pfm: -46.930870113003586
