['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'baseline', '--traj', 'expert', '--seed', '4']
episode: 0 training return: tensor(-804.6204, device='cuda:0')
episode: 1 training return: tensor(-913.0395, device='cuda:0')
episode: 2 training return: tensor(-875.6512, device='cuda:0')
episode: 3 training return: tensor(-896.1638, device='cuda:0')
epoch: 1 test_true_pfm: 253.53970315956155 sim_pfm: -876.0681964286292
episode: 4 training return: tensor(-875.4551, device='cuda:0')
episode: 5 training return: tensor(-935.9131, device='cuda:0')
episode: 6 training return: tensor(-748.7537, device='cuda:0')
episode: 7 training return: tensor(-617.7718, device='cuda:0')
epoch: 2 test_true_pfm: 53.07058058368393 sim_pfm: -761.0372348396728
episode: 8 training return: tensor(-788.7026, device='cuda:0')
episode: 9 training return: tensor(-826.0874, device='cuda:0')
episode: 10 training return: tensor(-757.5464, device='cuda:0')
episode: 11 training return: tensor(-858.5863, device='cuda:0')
epoch: 3 test_true_pfm: -474.408295937206 sim_pfm: -168.79967599664815
episode: 12 training return: tensor(-547.3773, device='cuda:0')
episode: 13 training return: tensor(-949.3181, device='cuda:0')
episode: 14 training return: tensor(-890.4496, device='cuda:0')
episode: 15 training return: tensor(-877.6298, device='cuda:0')
epoch: 4 test_true_pfm: 70.28016985743635 sim_pfm: -950.2998180352151
episode: 16 training return: tensor(-877.0513, device='cuda:0')
episode: 17 training return: tensor(-892.7781, device='cuda:0')
episode: 18 training return: tensor(-823.7330, device='cuda:0')
episode: 19 training return: tensor(-793.7832, device='cuda:0')
epoch: 5 test_true_pfm: -43.528592455169765 sim_pfm: -804.3220717981458
episode: 20 training return: tensor(-865.1385, device='cuda:0')
episode: 21 training return: tensor(-860.6224, device='cuda:0')
episode: 22 training return: tensor(-858.6388, device='cuda:0')
episode: 23 training return: tensor(-756.3279, device='cuda:0')
epoch: 6 test_true_pfm: 3.763880583803875 sim_pfm: -712.3248021756299
episode: 24 training return: tensor(-834.9144, device='cuda:0')
episode: 25 training return: tensor(-864.0416, device='cuda:0')
episode: 26 training return: tensor(-759.7648, device='cuda:0')
episode: 27 training return: tensor(-784.4902, device='cuda:0')
epoch: 7 test_true_pfm: -87.16080874347206 sim_pfm: -878.9251074194908
episode: 28 training return: tensor(-744.0244, device='cuda:0')
episode: 29 training return: tensor(-933.2913, device='cuda:0')
episode: 30 training return: tensor(-700.7704, device='cuda:0')
episode: 31 training return: tensor(-158.1429, device='cuda:0')
epoch: 8 test_true_pfm: 11.097362140154354 sim_pfm: -676.3879277311886
episode: 32 training return: tensor(-683.9465, device='cuda:0')
episode: 33 training return: tensor(-719.2144, device='cuda:0')
episode: 34 training return: tensor(-929.4747, device='cuda:0')
episode: 35 training return: tensor(-613.9628, device='cuda:0')
epoch: 9 test_true_pfm: -117.69469300103837 sim_pfm: -340.04345131308463
episode: 36 training return: tensor(-898.1862, device='cuda:0')
episode: 37 training return: tensor(-670.6157, device='cuda:0')
episode: 38 training return: tensor(296.6686, device='cuda:0')
episode: 39 training return: tensor(-855.2204, device='cuda:0')
epoch: 10 test_true_pfm: -163.4821738218317 sim_pfm: -708.571526118399
episode: 40 training return: tensor(-550.6987, device='cuda:0')
episode: 41 training return: tensor(-811.5533, device='cuda:0')
episode: 42 training return: tensor(171.7683, device='cuda:0')
episode: 43 training return: tensor(-635.4415, device='cuda:0')
epoch: 11 test_true_pfm: -158.2956152734142 sim_pfm: -894.6886716162165
episode: 44 training return: tensor(-854.0338, device='cuda:0')
episode: 45 training return: tensor(-883.4793, device='cuda:0')
episode: 46 training return: tensor(-764.0943, device='cuda:0')
episode: 47 training return: tensor(-763.8925, device='cuda:0')
epoch: 12 test_true_pfm: 102.03590437717264 sim_pfm: -867.9185893908143
episode: 48 training return: tensor(-712.7064, device='cuda:0')
episode: 49 training return: tensor(-789.9081, device='cuda:0')
episode: 50 training return: tensor(-765.7755, device='cuda:0')
episode: 51 training return: tensor(-854.3375, device='cuda:0')
epoch: 13 test_true_pfm: 235.1560589831647 sim_pfm: -690.3420410977172
episode: 52 training return: tensor(-792.0552, device='cuda:0')
episode: 53 training return: tensor(-467.5337, device='cuda:0')
episode: 54 training return: tensor(-375.7820, device='cuda:0')
episode: 55 training return: tensor(-339.4843, device='cuda:0')
epoch: 14 test_true_pfm: 47.7754203516408 sim_pfm: -917.4546276361992
episode: 56 training return: tensor(-598.2854, device='cuda:0')
episode: 57 training return: tensor(-871.3597, device='cuda:0')
episode: 58 training return: tensor(337.4232, device='cuda:0')
episode: 59 training return: tensor(-551.9937, device='cuda:0')
epoch: 15 test_true_pfm: 71.78516766739534 sim_pfm: -673.644881267605
episode: 60 training return: tensor(-505.4738, device='cuda:0')
episode: 61 training return: tensor(-763.6895, device='cuda:0')
episode: 62 training return: tensor(-845.3741, device='cuda:0')
episode: 63 training return: tensor(-578.9330, device='cuda:0')
epoch: 16 test_true_pfm: -56.60609165737984 sim_pfm: -503.05471327357617
episode: 64 training return: tensor(-817.9331, device='cuda:0')
episode: 65 training return: tensor(-549.1727, device='cuda:0')
episode: 66 training return: tensor(-916.8743, device='cuda:0')
episode: 67 training return: tensor(-433.4370, device='cuda:0')
epoch: 17 test_true_pfm: -60.884354382926425 sim_pfm: -413.97459251897334
episode: 68 training return: tensor(-716.9037, device='cuda:0')
episode: 69 training return: tensor(-728.6923, device='cuda:0')
episode: 70 training return: tensor(-572.9580, device='cuda:0')
episode: 71 training return: tensor(-582.5690, device='cuda:0')
epoch: 18 test_true_pfm: 66.92749093234103 sim_pfm: -691.091678129218
episode: 72 training return: tensor(-490.7453, device='cuda:0')
episode: 73 training return: tensor(-754.9213, device='cuda:0')
episode: 74 training return: tensor(-487.6160, device='cuda:0')
episode: 75 training return: tensor(186.7953, device='cuda:0')
epoch: 19 test_true_pfm: 145.08932133094473 sim_pfm: -818.2209463901818
episode: 76 training return: tensor(-283.1895, device='cuda:0')
episode: 77 training return: tensor(-507.2578, device='cuda:0')
episode: 78 training return: tensor(-698.8041, device='cuda:0')
episode: 79 training return: tensor(-760.5510, device='cuda:0')
epoch: 20 test_true_pfm: 123.35799515842159 sim_pfm: -810.7982792550077
episode: 80 training return: tensor(-689.3168, device='cuda:0')
episode: 81 training return: tensor(-666.3457, device='cuda:0')
episode: 82 training return: tensor(-652.6051, device='cuda:0')
episode: 83 training return: tensor(-465.0635, device='cuda:0')
epoch: 21 test_true_pfm: 119.74375412413086 sim_pfm: -755.1987932967022
episode: 84 training return: tensor(-467.5624, device='cuda:0')
episode: 85 training return: tensor(-626.2285, device='cuda:0')
episode: 86 training return: tensor(-659.0417, device='cuda:0')
episode: 87 training return: tensor(-792.0784, device='cuda:0')
epoch: 22 test_true_pfm: -633.4973186774628 sim_pfm: -276.58932607865427
episode: 88 training return: tensor(-436.3481, device='cuda:0')
episode: 89 training return: tensor(-802.6273, device='cuda:0')
episode: 90 training return: tensor(-301.4830, device='cuda:0')
episode: 91 training return: tensor(0.1393, device='cuda:0')
epoch: 23 test_true_pfm: -135.0010180777443 sim_pfm: -671.9566688683117
episode: 92 training return: tensor(-172.0467, device='cuda:0')
episode: 93 training return: tensor(-652.4384, device='cuda:0')
episode: 94 training return: tensor(-627.5236, device='cuda:0')
episode: 95 training return: tensor(-628.5809, device='cuda:0')
epoch: 24 test_true_pfm: 34.899353601413964 sim_pfm: -679.6025758666607
episode: 96 training return: tensor(-703.6107, device='cuda:0')
episode: 97 training return: tensor(-693.5493, device='cuda:0')
episode: 98 training return: tensor(-643.1769, device='cuda:0')
episode: 99 training return: tensor(-654.0700, device='cuda:0')
epoch: 25 test_true_pfm: 17.637887012519844 sim_pfm: -642.0777395344339
episode: 100 training return: tensor(-561.6560, device='cuda:0')
episode: 101 training return: tensor(-361.1795, device='cuda:0')
episode: 102 training return: tensor(-486.6047, device='cuda:0')
episode: 103 training return: tensor(-775.1917, device='cuda:0')
epoch: 26 test_true_pfm: -44.2273645644319 sim_pfm: -533.4756709467038
episode: 104 training return: tensor(-847.3271, device='cuda:0')
episode: 105 training return: tensor(-637.1699, device='cuda:0')
episode: 106 training return: tensor(-632.1265, device='cuda:0')
episode: 107 training return: tensor(-711.9900, device='cuda:0')
epoch: 27 test_true_pfm: -65.9517159159855 sim_pfm: -838.1383855800765
episode: 108 training return: tensor(-108.5948, device='cuda:0')
episode: 109 training return: tensor(-801.2221, device='cuda:0')
episode: 110 training return: tensor(-652.6141, device='cuda:0')
episode: 111 training return: tensor(-471.0564, device='cuda:0')
epoch: 28 test_true_pfm: -206.89049806645994 sim_pfm: -609.8949627708256
episode: 112 training return: tensor(-696.5191, device='cuda:0')
episode: 113 training return: tensor(-650.0996, device='cuda:0')
episode: 114 training return: tensor(-777.1874, device='cuda:0')
episode: 115 training return: tensor(-718.8781, device='cuda:0')
epoch: 29 test_true_pfm: -166.51387667024645 sim_pfm: -727.5226237740135
episode: 116 training return: tensor(-381.8669, device='cuda:0')
episode: 117 training return: tensor(74.0534, device='cuda:0')
episode: 118 training return: tensor(-793.6438, device='cuda:0')
episode: 119 training return: tensor(-455.9976, device='cuda:0')
epoch: 30 test_true_pfm: -147.56493501679645 sim_pfm: -610.5571223456258
episode: 120 training return: tensor(-624.7189, device='cuda:0')
episode: 121 training return: tensor(-747.8587, device='cuda:0')
episode: 122 training return: tensor(-784.9464, device='cuda:0')
episode: 123 training return: tensor(-712.2305, device='cuda:0')
epoch: 31 test_true_pfm: -407.9195924410856 sim_pfm: -633.4261900782585
episode: 124 training return: tensor(-642.9857, device='cuda:0')
episode: 125 training return: tensor(-626.3438, device='cuda:0')
episode: 126 training return: tensor(-657.7717, device='cuda:0')
episode: 127 training return: tensor(-731.1639, device='cuda:0')
epoch: 32 test_true_pfm: -422.11696307606985 sim_pfm: -502.14962366751087
episode: 128 training return: tensor(-578.2502, device='cuda:0')
episode: 129 training return: tensor(-629.0296, device='cuda:0')
episode: 130 training return: tensor(-196.7116, device='cuda:0')
episode: 131 training return: tensor(-843.2061, device='cuda:0')
epoch: 33 test_true_pfm: -271.7576662878673 sim_pfm: -565.160564569834
episode: 132 training return: tensor(-757.7037, device='cuda:0')
episode: 133 training return: tensor(-685.4285, device='cuda:0')
episode: 134 training return: tensor(-666.6717, device='cuda:0')
episode: 135 training return: tensor(-792.8792, device='cuda:0')
epoch: 34 test_true_pfm: -336.9552366259938 sim_pfm: -673.7310181618668
episode: 136 training return: tensor(-517.3815, device='cuda:0')
episode: 137 training return: tensor(-678.5616, device='cuda:0')
episode: 138 training return: tensor(-766.7994, device='cuda:0')
episode: 139 training return: tensor(-745.1757, device='cuda:0')
epoch: 35 test_true_pfm: -221.2370700049024 sim_pfm: -701.6856235219942
episode: 140 training return: tensor(-677.7160, device='cuda:0')
episode: 141 training return: tensor(-639.8157, device='cuda:0')
episode: 142 training return: tensor(-674.1562, device='cuda:0')
episode: 143 training return: tensor(-684.3741, device='cuda:0')
epoch: 36 test_true_pfm: -236.05274500376723 sim_pfm: -601.2574887148415
episode: 144 training return: tensor(-667.9479, device='cuda:0')
episode: 145 training return: tensor(-642.6378, device='cuda:0')
episode: 146 training return: tensor(-407.6300, device='cuda:0')
episode: 147 training return: tensor(-694.2411, device='cuda:0')
epoch: 37 test_true_pfm: -260.7377176002554 sim_pfm: -606.1528499053675
episode: 148 training return: tensor(-545.9346, device='cuda:0')
episode: 149 training return: tensor(-637.1788, device='cuda:0')
episode: 150 training return: tensor(-651.6221, device='cuda:0')
episode: 151 training return: tensor(-213.6485, device='cuda:0')
epoch: 38 test_true_pfm: -111.1189186493332 sim_pfm: -746.1534068678351
episode: 152 training return: tensor(-544.7852, device='cuda:0')
episode: 153 training return: tensor(-613.1499, device='cuda:0')
episode: 154 training return: tensor(-726.0687, device='cuda:0')
episode: 155 training return: tensor(-675.6473, device='cuda:0')
epoch: 39 test_true_pfm: -229.14170233136852 sim_pfm: -599.5737016334897
episode: 156 training return: tensor(-602.9712, device='cuda:0')
episode: 157 training return: tensor(-660.0574, device='cuda:0')
episode: 158 training return: tensor(-614.0783, device='cuda:0')
episode: 159 training return: tensor(-641.5378, device='cuda:0')
epoch: 40 test_true_pfm: -439.5607153800799 sim_pfm: -685.9359621580807
episode: 160 training return: tensor(-646.0114, device='cuda:0')
episode: 161 training return: tensor(-688.9891, device='cuda:0')
episode: 162 training return: tensor(-614.0014, device='cuda:0')
episode: 163 training return: tensor(-644.3657, device='cuda:0')
epoch: 41 test_true_pfm: -74.39583881016443 sim_pfm: -660.8929067702653
episode: 164 training return: tensor(-629.3838, device='cuda:0')
episode: 165 training return: tensor(-646.4855, device='cuda:0')
episode: 166 training return: tensor(-636.1355, device='cuda:0')
episode: 167 training return: tensor(-719.8158, device='cuda:0')
epoch: 42 test_true_pfm: -59.36937430183517 sim_pfm: -629.603702069105
episode: 168 training return: tensor(-865.7600, device='cuda:0')
episode: 169 training return: tensor(-698.9995, device='cuda:0')
episode: 170 training return: tensor(-629.5513, device='cuda:0')
episode: 171 training return: tensor(-104.4254, device='cuda:0')
epoch: 43 test_true_pfm: -3.191891593960936 sim_pfm: -779.755888276423
episode: 172 training return: tensor(-682.2058, device='cuda:0')
episode: 173 training return: tensor(-713.3533, device='cuda:0')
episode: 174 training return: tensor(-541.9466, device='cuda:0')
episode: 175 training return: tensor(-692.3054, device='cuda:0')
epoch: 44 test_true_pfm: -178.2441966045146 sim_pfm: -566.6018841881305
episode: 176 training return: tensor(-671.2011, device='cuda:0')
episode: 177 training return: tensor(-634.0659, device='cuda:0')
episode: 178 training return: tensor(-703.3622, device='cuda:0')
episode: 179 training return: tensor(-607.0795, device='cuda:0')
epoch: 45 test_true_pfm: 63.075251465821424 sim_pfm: -599.0908584239951
episode: 180 training return: tensor(-657.8522, device='cuda:0')
episode: 181 training return: tensor(-676.4084, device='cuda:0')
episode: 182 training return: tensor(-646.6777, device='cuda:0')
episode: 183 training return: tensor(-677.6812, device='cuda:0')
epoch: 46 test_true_pfm: 49.05875140441009 sim_pfm: -570.9971764554502
episode: 184 training return: tensor(-667.8493, device='cuda:0')
episode: 185 training return: tensor(-691.3480, device='cuda:0')
episode: 186 training return: tensor(-606.8291, device='cuda:0')
episode: 187 training return: tensor(-652.6082, device='cuda:0')
epoch: 47 test_true_pfm: -228.7636601575832 sim_pfm: -598.5569098715399
episode: 188 training return: tensor(-645.8345, device='cuda:0')
episode: 189 training return: tensor(-634.1992, device='cuda:0')
episode: 190 training return: tensor(-666.8048, device='cuda:0')
episode: 191 training return: tensor(-634.0090, device='cuda:0')
epoch: 48 test_true_pfm: 22.46445684733212 sim_pfm: -623.9960435638204
episode: 192 training return: tensor(-656.4964, device='cuda:0')
episode: 193 training return: tensor(-756.1296, device='cuda:0')
episode: 194 training return: tensor(-673.1977, device='cuda:0')
episode: 195 training return: tensor(-636.4775, device='cuda:0')
epoch: 49 test_true_pfm: -116.12129315364005 sim_pfm: -584.5730409295065
episode: 196 training return: tensor(-626.0547, device='cuda:0')
episode: 197 training return: tensor(-622.2908, device='cuda:0')
episode: 198 training return: tensor(-563.8953, device='cuda:0')
episode: 199 training return: tensor(-626.8181, device='cuda:0')
epoch: 50 test_true_pfm: -43.8851270274742 sim_pfm: -594.9391068145633
episode: 200 training return: tensor(-716.9587, device='cuda:0')
episode: 201 training return: tensor(-846.6440, device='cuda:0')
episode: 202 training return: tensor(-654.4841, device='cuda:0')
episode: 203 training return: tensor(-665.7299, device='cuda:0')
epoch: 51 test_true_pfm: -117.26577398515023 sim_pfm: -584.0432764030993
episode: 204 training return: tensor(-687.6129, device='cuda:0')
episode: 205 training return: tensor(-640.0367, device='cuda:0')
episode: 206 training return: tensor(-690.0027, device='cuda:0')
episode: 207 training return: tensor(-652.6673, device='cuda:0')
epoch: 52 test_true_pfm: -59.65791263242878 sim_pfm: -623.2746249123787
episode: 208 training return: tensor(-308.9842, device='cuda:0')
episode: 209 training return: tensor(-651.2786, device='cuda:0')
episode: 210 training return: tensor(-614.6038, device='cuda:0')
episode: 211 training return: tensor(-616.6901, device='cuda:0')
epoch: 53 test_true_pfm: -201.47153946363713 sim_pfm: -615.4216082591253
episode: 212 training return: tensor(-611.7272, device='cuda:0')
episode: 213 training return: tensor(-673.9424, device='cuda:0')
episode: 214 training return: tensor(-645.8691, device='cuda:0')
episode: 215 training return: tensor(-613.7385, device='cuda:0')
epoch: 54 test_true_pfm: -60.372716257538826 sim_pfm: -610.3300110009732
episode: 216 training return: tensor(-712.0340, device='cuda:0')
episode: 217 training return: tensor(-606.5416, device='cuda:0')
episode: 218 training return: tensor(-318.8061, device='cuda:0')
episode: 219 training return: tensor(-630.0321, device='cuda:0')
epoch: 55 test_true_pfm: 2.924358084900131 sim_pfm: -602.9828487701403
episode: 220 training return: tensor(-666.8807, device='cuda:0')
episode: 221 training return: tensor(-693.9931, device='cuda:0')
episode: 222 training return: tensor(-693.3991, device='cuda:0')
episode: 223 training return: tensor(-656.9122, device='cuda:0')
epoch: 56 test_true_pfm: -98.75317835550675 sim_pfm: -617.8142881125872
episode: 224 training return: tensor(-610.0821, device='cuda:0')
episode: 225 training return: tensor(-651.0900, device='cuda:0')
episode: 226 training return: tensor(-656.0321, device='cuda:0')
episode: 227 training return: tensor(-607.8032, device='cuda:0')
epoch: 57 test_true_pfm: -9.318507589742191 sim_pfm: -575.2952579399571
episode: 228 training return: tensor(-612.3232, device='cuda:0')
episode: 229 training return: tensor(-689.9980, device='cuda:0')
episode: 230 training return: tensor(-670.9599, device='cuda:0')
episode: 231 training return: tensor(-683.7059, device='cuda:0')
epoch: 58 test_true_pfm: -4.218533449363217 sim_pfm: -565.438089749155
episode: 232 training return: tensor(-669.7205, device='cuda:0')
episode: 233 training return: tensor(-671.7031, device='cuda:0')
episode: 234 training return: tensor(-645.1778, device='cuda:0')
episode: 235 training return: tensor(-578.7247, device='cuda:0')
epoch: 59 test_true_pfm: -31.308801202403746 sim_pfm: -588.855078034879
episode: 236 training return: tensor(-660.2947, device='cuda:0')
episode: 237 training return: tensor(-620.2906, device='cuda:0')
episode: 238 training return: tensor(-623.7227, device='cuda:0')
episode: 239 training return: tensor(-666.6480, device='cuda:0')
epoch: 60 test_true_pfm: -158.9051046502285 sim_pfm: -591.7034469379578
episode: 240 training return: tensor(-681.1470, device='cuda:0')
episode: 241 training return: tensor(-657.2136, device='cuda:0')
episode: 242 training return: tensor(-603.4290, device='cuda:0')
episode: 243 training return: tensor(-607.1328, device='cuda:0')
epoch: 61 test_true_pfm: -177.2305465278254 sim_pfm: -551.8851768392682
episode: 244 training return: tensor(-622.0549, device='cuda:0')
episode: 245 training return: tensor(-610.9300, device='cuda:0')
episode: 246 training return: tensor(-634.0832, device='cuda:0')
episode: 247 training return: tensor(-637.9393, device='cuda:0')
epoch: 62 test_true_pfm: -72.68830289943281 sim_pfm: -631.1360938572325
episode: 248 training return: tensor(-598.1232, device='cuda:0')
episode: 249 training return: tensor(-684.3480, device='cuda:0')
episode: 250 training return: tensor(-690.6354, device='cuda:0')
episode: 251 training return: tensor(-641.9139, device='cuda:0')
epoch: 63 test_true_pfm: -181.6213005806001 sim_pfm: -634.966221324401
episode: 252 training return: tensor(-563.2319, device='cuda:0')
episode: 253 training return: tensor(-710.5773, device='cuda:0')
episode: 254 training return: tensor(-621.6480, device='cuda:0')
episode: 255 training return: tensor(-668.9565, device='cuda:0')
epoch: 64 test_true_pfm: -129.4223725369457 sim_pfm: -537.244655150144
episode: 256 training return: tensor(-651.3332, device='cuda:0')
episode: 257 training return: tensor(-602.8547, device='cuda:0')
episode: 258 training return: tensor(-596.3211, device='cuda:0')
episode: 259 training return: tensor(-671.5898, device='cuda:0')
epoch: 65 test_true_pfm: -330.3359953698544 sim_pfm: -588.7138727570418
episode: 260 training return: tensor(-757.3307, device='cuda:0')
episode: 261 training return: tensor(-607.9714, device='cuda:0')
episode: 262 training return: tensor(-678.9317, device='cuda:0')
episode: 263 training return: tensor(-691.8204, device='cuda:0')
epoch: 66 test_true_pfm: -171.7255355173139 sim_pfm: -557.7161722668292
episode: 264 training return: tensor(-608.9954, device='cuda:0')
episode: 265 training return: tensor(-701.0708, device='cuda:0')
episode: 266 training return: tensor(-643.9853, device='cuda:0')
episode: 267 training return: tensor(-664.4361, device='cuda:0')
epoch: 67 test_true_pfm: 8.418940830630708 sim_pfm: -543.6830112307216
episode: 268 training return: tensor(-589.4413, device='cuda:0')
episode: 269 training return: tensor(-612.3600, device='cuda:0')
episode: 270 training return: tensor(-573.3708, device='cuda:0')
episode: 271 training return: tensor(-673.0454, device='cuda:0')
epoch: 68 test_true_pfm: 37.044905513434855 sim_pfm: -548.0034407135487
episode: 272 training return: tensor(-676.6690, device='cuda:0')
episode: 273 training return: tensor(-638.3528, device='cuda:0')
episode: 274 training return: tensor(-636.6735, device='cuda:0')
episode: 275 training return: tensor(-592.7001, device='cuda:0')
epoch: 69 test_true_pfm: -288.3899622119752 sim_pfm: -579.9654076655861
episode: 276 training return: tensor(-619.1888, device='cuda:0')
episode: 277 training return: tensor(-749.2081, device='cuda:0')
episode: 278 training return: tensor(-627.1376, device='cuda:0')
episode: 279 training return: tensor(-613.2895, device='cuda:0')
epoch: 70 test_true_pfm: 32.96926467038144 sim_pfm: -546.678868685849
episode: 280 training return: tensor(-642.9362, device='cuda:0')
episode: 281 training return: tensor(-611.3730, device='cuda:0')
episode: 282 training return: tensor(-625.6108, device='cuda:0')
episode: 283 training return: tensor(-652.1869, device='cuda:0')
epoch: 71 test_true_pfm: 85.66087573931024 sim_pfm: -572.4811224717026
episode: 284 training return: tensor(-631.5703, device='cuda:0')
episode: 285 training return: tensor(-612.9246, device='cuda:0')
episode: 286 training return: tensor(-587.5518, device='cuda:0')
episode: 287 training return: tensor(-758.2214, device='cuda:0')
epoch: 72 test_true_pfm: -235.81445883623837 sim_pfm: -515.3961400642584
episode: 288 training return: tensor(-606.3163, device='cuda:0')
episode: 289 training return: tensor(-599.4075, device='cuda:0')
episode: 290 training return: tensor(-662.7153, device='cuda:0')
episode: 291 training return: tensor(-647.6605, device='cuda:0')
epoch: 73 test_true_pfm: -142.60674771328632 sim_pfm: -577.5162154243638
episode: 292 training return: tensor(-569.5089, device='cuda:0')
episode: 293 training return: tensor(-659.1376, device='cuda:0')
episode: 294 training return: tensor(-627.2204, device='cuda:0')
episode: 295 training return: tensor(-644.2451, device='cuda:0')
epoch: 74 test_true_pfm: 159.25579237443128 sim_pfm: -563.0547109452697
episode: 296 training return: tensor(-610.7887, device='cuda:0')
episode: 297 training return: tensor(-620.8323, device='cuda:0')
episode: 298 training return: tensor(-639.5505, device='cuda:0')
episode: 299 training return: tensor(-688.1252, device='cuda:0')
epoch: 75 test_true_pfm: 109.50544879822382 sim_pfm: -549.2338884378938
episode: 300 training return: tensor(-623.2709, device='cuda:0')
episode: 301 training return: tensor(-621.4090, device='cuda:0')
episode: 302 training return: tensor(-604.7723, device='cuda:0')
episode: 303 training return: tensor(-617.5546, device='cuda:0')
epoch: 76 test_true_pfm: 6.764596076387353 sim_pfm: -596.1032922770052
episode: 304 training return: tensor(-582.2949, device='cuda:0')
episode: 305 training return: tensor(-644.6328, device='cuda:0')
episode: 306 training return: tensor(-644.9009, device='cuda:0')
episode: 307 training return: tensor(-646.8049, device='cuda:0')
epoch: 77 test_true_pfm: 34.8620935661826 sim_pfm: -543.9936293778786
episode: 308 training return: tensor(-611.2302, device='cuda:0')
episode: 309 training return: tensor(-615.0951, device='cuda:0')
episode: 310 training return: tensor(-617.5721, device='cuda:0')
episode: 311 training return: tensor(-663.5231, device='cuda:0')
epoch: 78 test_true_pfm: -14.067137595061013 sim_pfm: -630.4246718967333
episode: 312 training return: tensor(-677.1935, device='cuda:0')
episode: 313 training return: tensor(-646.7728, device='cuda:0')
episode: 314 training return: tensor(-623.5615, device='cuda:0')
episode: 315 training return: tensor(-636.4250, device='cuda:0')
epoch: 79 test_true_pfm: 52.97160994669298 sim_pfm: -581.7016091072
episode: 316 training return: tensor(-638.7341, device='cuda:0')
episode: 317 training return: tensor(-589.9426, device='cuda:0')
episode: 318 training return: tensor(-595.7841, device='cuda:0')
episode: 319 training return: tensor(-632.7247, device='cuda:0')
epoch: 80 test_true_pfm: -33.95428199530223 sim_pfm: -551.7725819322901
episode: 320 training return: tensor(-582.5816, device='cuda:0')
episode: 321 training return: tensor(-669.1780, device='cuda:0')
episode: 322 training return: tensor(-886.9716, device='cuda:0')
episode: 323 training return: tensor(-623.8331, device='cuda:0')
epoch: 81 test_true_pfm: -77.13337198116223 sim_pfm: -580.3402871342842
episode: 324 training return: tensor(-644.4879, device='cuda:0')
episode: 325 training return: tensor(-662.3900, device='cuda:0')
episode: 326 training return: tensor(-641.9797, device='cuda:0')
episode: 327 training return: tensor(-590.5152, device='cuda:0')
epoch: 82 test_true_pfm: -118.49914571169226 sim_pfm: -558.6869585783764
episode: 328 training return: tensor(-601.1519, device='cuda:0')
episode: 329 training return: tensor(-608.2802, device='cuda:0')
episode: 330 training return: tensor(-654.1892, device='cuda:0')
episode: 331 training return: tensor(-594.1450, device='cuda:0')
epoch: 83 test_true_pfm: 37.67671052727108 sim_pfm: -566.2765641727796
episode: 332 training return: tensor(-628.1471, device='cuda:0')
episode: 333 training return: tensor(-604.2945, device='cuda:0')
episode: 334 training return: tensor(-612.2985, device='cuda:0')
episode: 335 training return: tensor(-640.0148, device='cuda:0')
epoch: 84 test_true_pfm: 50.04640871522226 sim_pfm: -770.3262415019175
episode: 336 training return: tensor(-624.5739, device='cuda:0')
episode: 337 training return: tensor(-652.4424, device='cuda:0')
episode: 338 training return: tensor(-594.4918, device='cuda:0')
episode: 339 training return: tensor(-640.5444, device='cuda:0')
epoch: 85 test_true_pfm: 69.33329377760701 sim_pfm: -555.1055525147822
episode: 340 training return: tensor(-654.6230, device='cuda:0')
episode: 341 training return: tensor(-608.2973, device='cuda:0')
episode: 342 training return: tensor(-731.1224, device='cuda:0')
episode: 343 training return: tensor(-676.9644, device='cuda:0')
epoch: 86 test_true_pfm: -19.76934067817084 sim_pfm: -533.5675022296297
episode: 344 training return: tensor(-618.1829, device='cuda:0')
episode: 345 training return: tensor(-506.2603, device='cuda:0')
episode: 346 training return: tensor(-489.7706, device='cuda:0')
episode: 347 training return: tensor(-632.6712, device='cuda:0')
epoch: 87 test_true_pfm: 94.23102734372769 sim_pfm: -557.5256048409113
episode: 348 training return: tensor(-602.7271, device='cuda:0')
episode: 349 training return: tensor(-603.2183, device='cuda:0')
episode: 350 training return: tensor(-589.7919, device='cuda:0')
episode: 351 training return: tensor(-608.3696, device='cuda:0')
epoch: 88 test_true_pfm: 30.904223044768987 sim_pfm: -542.5054086213544
episode: 352 training return: tensor(-734.3594, device='cuda:0')
episode: 353 training return: tensor(-677.8258, device='cuda:0')
episode: 354 training return: tensor(-590.6639, device='cuda:0')
episode: 355 training return: tensor(-593.3130, device='cuda:0')
epoch: 89 test_true_pfm: -17.54730633581221 sim_pfm: -525.8949387711473
episode: 356 training return: tensor(-642.3715, device='cuda:0')
episode: 357 training return: tensor(-625.1449, device='cuda:0')
episode: 358 training return: tensor(-614.2033, device='cuda:0')
episode: 359 training return: tensor(-918.3734, device='cuda:0')
epoch: 90 test_true_pfm: 39.719616185083346 sim_pfm: -510.87776938744355
episode: 360 training return: tensor(-621.9796, device='cuda:0')
episode: 361 training return: tensor(-587.4823, device='cuda:0')
episode: 362 training return: tensor(-597.3426, device='cuda:0')
episode: 363 training return: tensor(-593.8194, device='cuda:0')
epoch: 91 test_true_pfm: -34.93166770721024 sim_pfm: -533.6008100723848
episode: 364 training return: tensor(-607.8887, device='cuda:0')
episode: 365 training return: tensor(-782.4258, device='cuda:0')
episode: 366 training return: tensor(-588.6061, device='cuda:0')
episode: 367 training return: tensor(-863.9499, device='cuda:0')
epoch: 92 test_true_pfm: 32.49238866854419 sim_pfm: -548.2472357367709
episode: 368 training return: tensor(-619.0200, device='cuda:0')
episode: 369 training return: tensor(-639.8063, device='cuda:0')
episode: 370 training return: tensor(-631.0094, device='cuda:0')
episode: 371 training return: tensor(-591.6403, device='cuda:0')
epoch: 93 test_true_pfm: 102.81763482964618 sim_pfm: -546.7833312116563
episode: 372 training return: tensor(-610.7918, device='cuda:0')
episode: 373 training return: tensor(-625.2893, device='cuda:0')
episode: 374 training return: tensor(-617.1006, device='cuda:0')
episode: 375 training return: tensor(-644.5176, device='cuda:0')
epoch: 94 test_true_pfm: 27.303548248806223 sim_pfm: -539.212178974878
episode: 376 training return: tensor(-592.5662, device='cuda:0')
episode: 377 training return: tensor(-616.4804, device='cuda:0')
episode: 378 training return: tensor(-808.1976, device='cuda:0')
episode: 379 training return: tensor(-612.4655, device='cuda:0')
epoch: 95 test_true_pfm: 99.85993929042313 sim_pfm: -520.1007044557967
episode: 380 training return: tensor(-602.8943, device='cuda:0')
episode: 381 training return: tensor(-611.0344, device='cuda:0')
episode: 382 training return: tensor(-596.5105, device='cuda:0')
episode: 383 training return: tensor(-612.6210, device='cuda:0')
epoch: 96 test_true_pfm: 60.789986136741874 sim_pfm: -542.0368026516904
episode: 384 training return: tensor(-661.5266, device='cuda:0')
episode: 385 training return: tensor(-621.5369, device='cuda:0')
episode: 386 training return: tensor(-628.6240, device='cuda:0')
episode: 387 training return: tensor(-615.2026, device='cuda:0')
epoch: 97 test_true_pfm: 43.25004996139466 sim_pfm: -527.4021265905661
episode: 388 training return: tensor(-599.2634, device='cuda:0')
episode: 389 training return: tensor(-627.5257, device='cuda:0')
episode: 390 training return: tensor(-676.1337, device='cuda:0')
episode: 391 training return: tensor(-587.4500, device='cuda:0')
epoch: 98 test_true_pfm: 96.67484374865161 sim_pfm: -540.1279822633757
episode: 392 training return: tensor(-619.9197, device='cuda:0')
episode: 393 training return: tensor(-580.9618, device='cuda:0')
episode: 394 training return: tensor(-628.7029, device='cuda:0')
episode: 395 training return: tensor(-616.5282, device='cuda:0')
epoch: 99 test_true_pfm: 36.04467197232719 sim_pfm: -536.2666191085397
episode: 396 training return: tensor(-656.8879, device='cuda:0')
episode: 397 training return: tensor(-538.7067, device='cuda:0')
episode: 398 training return: tensor(-558.2521, device='cuda:0')
episode: 399 training return: tensor(-592.2627, device='cuda:0')
epoch: 100 test_true_pfm: -33.844114519543815 sim_pfm: -502.67137304170564
episode: 400 training return: tensor(-611.6986, device='cuda:0')
episode: 401 training return: tensor(-563.4909, device='cuda:0')
episode: 402 training return: tensor(-613.9709, device='cuda:0')
episode: 403 training return: tensor(-596.7795, device='cuda:0')
epoch: 101 test_true_pfm: 179.2742908710303 sim_pfm: -505.8081749562795
episode: 404 training return: tensor(-568.0071, device='cuda:0')
episode: 405 training return: tensor(-634.6046, device='cuda:0')
episode: 406 training return: tensor(-612.8065, device='cuda:0')
episode: 407 training return: tensor(-437.1963, device='cuda:0')
epoch: 102 test_true_pfm: 203.40723432475917 sim_pfm: -529.5368725848579
episode: 408 training return: tensor(-571.7740, device='cuda:0')
episode: 409 training return: tensor(-609.6846, device='cuda:0')
episode: 410 training return: tensor(-617.8654, device='cuda:0')
episode: 411 training return: tensor(-640.4592, device='cuda:0')
epoch: 103 test_true_pfm: 35.767285649627375 sim_pfm: -542.3083740098131
episode: 412 training return: tensor(-592.8119, device='cuda:0')
episode: 413 training return: tensor(-614.1633, device='cuda:0')
episode: 414 training return: tensor(-604.0508, device='cuda:0')
episode: 415 training return: tensor(-586.9514, device='cuda:0')
epoch: 104 test_true_pfm: 103.3487245996904 sim_pfm: -520.2330336941328
episode: 416 training return: tensor(-861.8530, device='cuda:0')
episode: 417 training return: tensor(-598.0056, device='cuda:0')
episode: 418 training return: tensor(-617.7155, device='cuda:0')
episode: 419 training return: tensor(-597.8856, device='cuda:0')
epoch: 105 test_true_pfm: 63.54963160261661 sim_pfm: -612.9344844707133
episode: 420 training return: tensor(-587.4700, device='cuda:0')
episode: 421 training return: tensor(-588.8062, device='cuda:0')
episode: 422 training return: tensor(-657.7234, device='cuda:0')
episode: 423 training return: tensor(-579.0233, device='cuda:0')
epoch: 106 test_true_pfm: 114.06876732596515 sim_pfm: -559.4819397929047
episode: 424 training return: tensor(-580.5443, device='cuda:0')
episode: 425 training return: tensor(-610.5832, device='cuda:0')
episode: 426 training return: tensor(-584.5740, device='cuda:0')
episode: 427 training return: tensor(-645.1525, device='cuda:0')
epoch: 107 test_true_pfm: 178.4005443451238 sim_pfm: -492.5278644593588
episode: 428 training return: tensor(-618.9498, device='cuda:0')
episode: 429 training return: tensor(-584.5322, device='cuda:0')
episode: 430 training return: tensor(-606.8329, device='cuda:0')
episode: 431 training return: tensor(-628.7532, device='cuda:0')
epoch: 108 test_true_pfm: 18.84438184828865 sim_pfm: -541.1974147607107
episode: 432 training return: tensor(-616.2440, device='cuda:0')
episode: 433 training return: tensor(-613.0235, device='cuda:0')
episode: 434 training return: tensor(-554.1763, device='cuda:0')
episode: 435 training return: tensor(-599.9801, device='cuda:0')
epoch: 109 test_true_pfm: -91.97852875804993 sim_pfm: -535.3637102027424
episode: 436 training return: tensor(-631.1082, device='cuda:0')
episode: 437 training return: tensor(-602.7741, device='cuda:0')
episode: 438 training return: tensor(-603.2737, device='cuda:0')
episode: 439 training return: tensor(-615.5724, device='cuda:0')
epoch: 110 test_true_pfm: 141.8442513634559 sim_pfm: -536.2773190617639
episode: 440 training return: tensor(-630.0650, device='cuda:0')
episode: 441 training return: tensor(-855.9077, device='cuda:0')
episode: 442 training return: tensor(-735.6729, device='cuda:0')
episode: 443 training return: tensor(-650.5477, device='cuda:0')
epoch: 111 test_true_pfm: 143.75209368290209 sim_pfm: -517.0649836324155
episode: 444 training return: tensor(-611.4204, device='cuda:0')
episode: 445 training return: tensor(-634.6334, device='cuda:0')
episode: 446 training return: tensor(-610.8016, device='cuda:0')
episode: 447 training return: tensor(-582.6817, device='cuda:0')
epoch: 112 test_true_pfm: 36.331297295519626 sim_pfm: -542.0750199843509
episode: 448 training return: tensor(-623.3188, device='cuda:0')
episode: 449 training return: tensor(-657.3827, device='cuda:0')
episode: 450 training return: tensor(-581.4512, device='cuda:0')
episode: 451 training return: tensor(-573.4200, device='cuda:0')
epoch: 113 test_true_pfm: 135.2276748080856 sim_pfm: -516.1524155308725
episode: 452 training return: tensor(-614.4824, device='cuda:0')
episode: 453 training return: tensor(-858.1481, device='cuda:0')
episode: 454 training return: tensor(-590.7651, device='cuda:0')
episode: 455 training return: tensor(-597.8860, device='cuda:0')
epoch: 114 test_true_pfm: 169.6272314762541 sim_pfm: -544.8782804789176
episode: 456 training return: tensor(-571.0828, device='cuda:0')
episode: 457 training return: tensor(-656.1060, device='cuda:0')
episode: 458 training return: tensor(-587.8672, device='cuda:0')
episode: 459 training return: tensor(-640.1117, device='cuda:0')
epoch: 115 test_true_pfm: 175.72427350954925 sim_pfm: -496.81648122953874
episode: 460 training return: tensor(-581.0626, device='cuda:0')
episode: 461 training return: tensor(-590.4514, device='cuda:0')
episode: 462 training return: tensor(-620.5446, device='cuda:0')
episode: 463 training return: tensor(-619.7191, device='cuda:0')
epoch: 116 test_true_pfm: 128.93808304321888 sim_pfm: -521.1289439390724
episode: 464 training return: tensor(-697.2120, device='cuda:0')
episode: 465 training return: tensor(-627.7752, device='cuda:0')
episode: 466 training return: tensor(-659.9028, device='cuda:0')
episode: 467 training return: tensor(-627.3257, device='cuda:0')
epoch: 117 test_true_pfm: 309.75521592489304 sim_pfm: -521.9926283784831
episode: 468 training return: tensor(-611.8383, device='cuda:0')
episode: 469 training return: tensor(-568.5202, device='cuda:0')
episode: 470 training return: tensor(-606.2263, device='cuda:0')
episode: 471 training return: tensor(-607.0691, device='cuda:0')
epoch: 118 test_true_pfm: 57.26141314453687 sim_pfm: -500.5531935640611
episode: 472 training return: tensor(-582.0404, device='cuda:0')
episode: 473 training return: tensor(-638.7012, device='cuda:0')
episode: 474 training return: tensor(-625.1784, device='cuda:0')
episode: 475 training return: tensor(-598.4611, device='cuda:0')
epoch: 119 test_true_pfm: 268.30407957279573 sim_pfm: -480.4136166595854
episode: 476 training return: tensor(-601.1287, device='cuda:0')
episode: 477 training return: tensor(-571.5513, device='cuda:0')
episode: 478 training return: tensor(-611.7830, device='cuda:0')
episode: 479 training return: tensor(-572.7542, device='cuda:0')
epoch: 120 test_true_pfm: 231.7631015717623 sim_pfm: -513.5674331057817
episode: 480 training return: tensor(-617.6226, device='cuda:0')
episode: 481 training return: tensor(-584.9350, device='cuda:0')
episode: 482 training return: tensor(-621.5958, device='cuda:0')
episode: 483 training return: tensor(-579.9099, device='cuda:0')
epoch: 121 test_true_pfm: 74.96982635934934 sim_pfm: -506.32730866008205
episode: 484 training return: tensor(-629.6453, device='cuda:0')
episode: 485 training return: tensor(-578.5747, device='cuda:0')
episode: 486 training return: tensor(-627.0372, device='cuda:0')
episode: 487 training return: tensor(-594.4343, device='cuda:0')
epoch: 122 test_true_pfm: 132.98125125671245 sim_pfm: -517.1193678270405
episode: 488 training return: tensor(-603.8231, device='cuda:0')
episode: 489 training return: tensor(-573.1261, device='cuda:0')
episode: 490 training return: tensor(-595.1774, device='cuda:0')
episode: 491 training return: tensor(-608.9190, device='cuda:0')
epoch: 123 test_true_pfm: 97.72595962317943 sim_pfm: -512.7496862907816
episode: 492 training return: tensor(-652.3304, device='cuda:0')
episode: 493 training return: tensor(-595.0248, device='cuda:0')
episode: 494 training return: tensor(-649.9651, device='cuda:0')
episode: 495 training return: tensor(-599.6423, device='cuda:0')
epoch: 124 test_true_pfm: 204.87403293483365 sim_pfm: -475.66640534965944
episode: 496 training return: tensor(-565.9938, device='cuda:0')
episode: 497 training return: tensor(-627.7236, device='cuda:0')
episode: 498 training return: tensor(-614.0004, device='cuda:0')
episode: 499 training return: tensor(-581.6605, device='cuda:0')
epoch: 125 test_true_pfm: -2.5772794457395682 sim_pfm: -528.6174184433961
episode: 500 training return: tensor(-579.2964, device='cuda:0')
episode: 501 training return: tensor(-624.9468, device='cuda:0')
episode: 502 training return: tensor(-549.6240, device='cuda:0')
episode: 503 training return: tensor(-638.7565, device='cuda:0')
epoch: 126 test_true_pfm: 258.0802657155932 sim_pfm: -539.2274634035615
episode: 504 training return: tensor(-766.7261, device='cuda:0')
episode: 505 training return: tensor(-558.3932, device='cuda:0')
episode: 506 training return: tensor(-633.4453, device='cuda:0')
episode: 507 training return: tensor(-572.3904, device='cuda:0')
epoch: 127 test_true_pfm: 217.45741535325632 sim_pfm: -512.9567331576254
episode: 508 training return: tensor(-621.9991, device='cuda:0')
episode: 509 training return: tensor(-598.4108, device='cuda:0')
episode: 510 training return: tensor(-710.6188, device='cuda:0')
episode: 511 training return: tensor(-571.2028, device='cuda:0')
epoch: 128 test_true_pfm: 42.08986474254863 sim_pfm: -493.594457998639
episode: 512 training return: tensor(-880.8524, device='cuda:0')
episode: 513 training return: tensor(-607.6955, device='cuda:0')
episode: 514 training return: tensor(-618.7640, device='cuda:0')
episode: 515 training return: tensor(-608.0845, device='cuda:0')
epoch: 129 test_true_pfm: 42.35729737372099 sim_pfm: -502.5713916978178
episode: 516 training return: tensor(-569.1982, device='cuda:0')
episode: 517 training return: tensor(-624.2286, device='cuda:0')
episode: 518 training return: tensor(-575.8857, device='cuda:0')
episode: 519 training return: tensor(-616.2349, device='cuda:0')
epoch: 130 test_true_pfm: 45.30678379289478 sim_pfm: -531.0691151331799
episode: 520 training return: tensor(-619.4308, device='cuda:0')
episode: 521 training return: tensor(-615.5242, device='cuda:0')
episode: 522 training return: tensor(-590.0826, device='cuda:0')
episode: 523 training return: tensor(-579.3595, device='cuda:0')
epoch: 131 test_true_pfm: 100.04585160066598 sim_pfm: -484.37741308407084
episode: 524 training return: tensor(-572.5513, device='cuda:0')
episode: 525 training return: tensor(-601.3560, device='cuda:0')
episode: 526 training return: tensor(-591.9351, device='cuda:0')
episode: 527 training return: tensor(-619.1694, device='cuda:0')
epoch: 132 test_true_pfm: 158.5616592616268 sim_pfm: -544.1230782150524
episode: 528 training return: tensor(-609.3633, device='cuda:0')
episode: 529 training return: tensor(-597.7392, device='cuda:0')
episode: 530 training return: tensor(-566.2819, device='cuda:0')
episode: 531 training return: tensor(-631.2233, device='cuda:0')
epoch: 133 test_true_pfm: 10.500610352235697 sim_pfm: -521.546673365403
episode: 532 training return: tensor(-584.3309, device='cuda:0')
episode: 533 training return: tensor(-590.1090, device='cuda:0')
episode: 534 training return: tensor(-586.2947, device='cuda:0')
episode: 535 training return: tensor(-616.0959, device='cuda:0')
epoch: 134 test_true_pfm: 210.1329337844909 sim_pfm: -474.1616246079793
episode: 536 training return: tensor(-609.8621, device='cuda:0')
episode: 537 training return: tensor(-597.3945, device='cuda:0')
episode: 538 training return: tensor(-613.4461, device='cuda:0')
episode: 539 training return: tensor(-627.2858, device='cuda:0')
epoch: 135 test_true_pfm: -107.04107254064657 sim_pfm: -538.6834229789674
episode: 540 training return: tensor(-601.7074, device='cuda:0')
episode: 541 training return: tensor(-609.7561, device='cuda:0')
episode: 542 training return: tensor(-592.3329, device='cuda:0')
episode: 543 training return: tensor(-614.5317, device='cuda:0')
epoch: 136 test_true_pfm: 192.794246362684 sim_pfm: -478.6468249236544
episode: 544 training return: tensor(-640.7591, device='cuda:0')
episode: 545 training return: tensor(-592.0153, device='cuda:0')
episode: 546 training return: tensor(-603.5745, device='cuda:0')
episode: 547 training return: tensor(-574.0958, device='cuda:0')
epoch: 137 test_true_pfm: 57.728828410754424 sim_pfm: -539.5964881842374
episode: 548 training return: tensor(-594.6281, device='cuda:0')
episode: 549 training return: tensor(-593.4496, device='cuda:0')
episode: 550 training return: tensor(-602.2814, device='cuda:0')
episode: 551 training return: tensor(-557.7776, device='cuda:0')
epoch: 138 test_true_pfm: 186.89049959212085 sim_pfm: -506.8513866038993
episode: 552 training return: tensor(-635.3396, device='cuda:0')
episode: 553 training return: tensor(-903.4888, device='cuda:0')
episode: 554 training return: tensor(-613.3359, device='cuda:0')
episode: 555 training return: tensor(-554.3690, device='cuda:0')
epoch: 139 test_true_pfm: 118.59300953781583 sim_pfm: -487.28074814443244
episode: 556 training return: tensor(-631.5504, device='cuda:0')
episode: 557 training return: tensor(-567.6003, device='cuda:0')
episode: 558 training return: tensor(-589.0035, device='cuda:0')
episode: 559 training return: tensor(-584.7075, device='cuda:0')
epoch: 140 test_true_pfm: 116.133715122987 sim_pfm: -538.7027467628164
episode: 560 training return: tensor(-589.1915, device='cuda:0')
episode: 561 training return: tensor(-612.2859, device='cuda:0')
episode: 562 training return: tensor(-603.7825, device='cuda:0')
episode: 563 training return: tensor(-620.2910, device='cuda:0')
epoch: 141 test_true_pfm: 193.97880004660453 sim_pfm: -492.58456100445864
episode: 564 training return: tensor(-554.3690, device='cuda:0')
episode: 565 training return: tensor(-579.4598, device='cuda:0')
episode: 566 training return: tensor(-612.4955, device='cuda:0')
episode: 567 training return: tensor(-639.0792, device='cuda:0')
epoch: 142 test_true_pfm: 153.32615705060158 sim_pfm: -531.4598950242313
episode: 568 training return: tensor(-651.2550, device='cuda:0')
episode: 569 training return: tensor(-586.5262, device='cuda:0')
episode: 570 training return: tensor(-590.9253, device='cuda:0')
episode: 571 training return: tensor(-588.7355, device='cuda:0')
epoch: 143 test_true_pfm: 79.3488761228959 sim_pfm: -494.48664820054546
episode: 572 training return: tensor(-630.2781, device='cuda:0')
episode: 573 training return: tensor(-593.0981, device='cuda:0')
episode: 574 training return: tensor(-605.4511, device='cuda:0')
episode: 575 training return: tensor(-559.2055, device='cuda:0')
epoch: 144 test_true_pfm: 28.187999648258767 sim_pfm: -510.9975928038669
episode: 576 training return: tensor(-574.7998, device='cuda:0')
episode: 577 training return: tensor(-638.7447, device='cuda:0')
episode: 578 training return: tensor(-577.5675, device='cuda:0')
episode: 579 training return: tensor(-611.4754, device='cuda:0')
epoch: 145 test_true_pfm: 196.9185473962009 sim_pfm: -475.7979337652214
episode: 580 training return: tensor(-644.7667, device='cuda:0')
episode: 581 training return: tensor(-621.7915, device='cuda:0')
episode: 582 training return: tensor(-628.9250, device='cuda:0')
episode: 583 training return: tensor(-602.7195, device='cuda:0')
epoch: 146 test_true_pfm: 60.05691051845927 sim_pfm: -533.3547748395649
episode: 584 training return: tensor(-630.4501, device='cuda:0')
episode: 585 training return: tensor(-622.2758, device='cuda:0')
episode: 586 training return: tensor(-593.1075, device='cuda:0')
episode: 587 training return: tensor(-594.7284, device='cuda:0')
epoch: 147 test_true_pfm: 110.2771207855843 sim_pfm: -536.8992326587128
episode: 588 training return: tensor(-575.9877, device='cuda:0')
episode: 589 training return: tensor(-605.9378, device='cuda:0')
episode: 590 training return: tensor(-596.4866, device='cuda:0')
episode: 591 training return: tensor(-590.3378, device='cuda:0')
epoch: 148 test_true_pfm: 49.184082484138436 sim_pfm: -489.2054167823905
episode: 592 training return: tensor(-613.6886, device='cuda:0')
episode: 593 training return: tensor(-861.6235, device='cuda:0')
episode: 594 training return: tensor(-601.4611, device='cuda:0')
episode: 595 training return: tensor(-565.1429, device='cuda:0')
epoch: 149 test_true_pfm: 191.9675446824101 sim_pfm: -524.5828260689353
episode: 596 training return: tensor(-555.1008, device='cuda:0')
episode: 597 training return: tensor(-568.6216, device='cuda:0')
episode: 598 training return: tensor(-584.4956, device='cuda:0')
episode: 599 training return: tensor(-616.1182, device='cuda:0')
epoch: 150 test_true_pfm: -64.79117846467557 sim_pfm: -507.99934330691275
