['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '2', '--data', '100000', '--regu', '0.3']
2413.8189750680767
episode: 0 training return: tensor(-273.0562, device='cuda:0')
episode: 1 training return: tensor(-288.6471, device='cuda:0')
episode: 2 training return: tensor(150.3298, device='cuda:0')
episode: 3 training return: tensor(-412.7799, device='cuda:0')
epoch: 1 test_true_pfm: 1991.0840641677332 sim_pfm: -169.1659997034585
episode: 4 training return: tensor(-360.3850, device='cuda:0')
episode: 5 training return: tensor(-338.1680, device='cuda:0')
episode: 6 training return: tensor(-306.6862, device='cuda:0')
episode: 7 training return: tensor(-124.6266, device='cuda:0')
epoch: 2 test_true_pfm: 1974.0298379923654 sim_pfm: -196.01110345645188
episode: 8 training return: tensor(51.0250, device='cuda:0')
episode: 9 training return: tensor(-221.8703, device='cuda:0')
episode: 10 training return: tensor(-469.8126, device='cuda:0')
episode: 11 training return: tensor(-466.5768, device='cuda:0')
epoch: 3 test_true_pfm: 1273.4125815729137 sim_pfm: -349.54138394476223
episode: 12 training return: tensor(-396.9460, device='cuda:0')
episode: 13 training return: tensor(-388.1908, device='cuda:0')
episode: 14 training return: tensor(-352.6789, device='cuda:0')
episode: 15 training return: tensor(289.1001, device='cuda:0')
epoch: 4 test_true_pfm: 1277.9427113072466 sim_pfm: -351.63288939309615
episode: 16 training return: tensor(-418.9156, device='cuda:0')
episode: 17 training return: tensor(-177.3954, device='cuda:0')
episode: 18 training return: tensor(-372.0464, device='cuda:0')
episode: 19 training return: tensor(-433.7953, device='cuda:0')
epoch: 5 test_true_pfm: 2866.6999921640586 sim_pfm: 59.829033595897876
episode: 20 training return: tensor(-437.9186, device='cuda:0')
episode: 21 training return: tensor(150.6192, device='cuda:0')
episode: 22 training return: tensor(-513.2138, device='cuda:0')
episode: 23 training return: tensor(-460.5649, device='cuda:0')
epoch: 6 test_true_pfm: 1658.610872071018 sim_pfm: -395.3569242461429
episode: 24 training return: tensor(-323.4999, device='cuda:0')
episode: 25 training return: tensor(-356.0896, device='cuda:0')
episode: 26 training return: tensor(-188.9436, device='cuda:0')
episode: 27 training return: tensor(-458.5518, device='cuda:0')
epoch: 7 test_true_pfm: 1362.3189136619912 sim_pfm: -326.49473457434215
episode: 28 training return: tensor(-415.7182, device='cuda:0')
episode: 29 training return: tensor(-459.0241, device='cuda:0')
episode: 30 training return: tensor(-387.4608, device='cuda:0')
episode: 31 training return: tensor(-235.9257, device='cuda:0')
epoch: 8 test_true_pfm: 1635.968402837314 sim_pfm: -208.4099362262059
episode: 32 training return: tensor(-431.4393, device='cuda:0')
episode: 33 training return: tensor(-364.0075, device='cuda:0')
episode: 34 training return: tensor(-464.8734, device='cuda:0')
episode: 35 training return: tensor(-239.9842, device='cuda:0')
epoch: 9 test_true_pfm: 1634.8620985302296 sim_pfm: -256.0893241382825
episode: 36 training return: tensor(-227.5898, device='cuda:0')
episode: 37 training return: tensor(-280.0439, device='cuda:0')
episode: 38 training return: tensor(-489.6894, device='cuda:0')
episode: 39 training return: tensor(-414.5514, device='cuda:0')
epoch: 10 test_true_pfm: 1189.6444942837913 sim_pfm: -361.4006901968581
episode: 40 training return: tensor(-209.2481, device='cuda:0')
episode: 41 training return: tensor(-300.5417, device='cuda:0')
episode: 42 training return: tensor(-316.1512, device='cuda:0')
episode: 43 training return: tensor(-375.4308, device='cuda:0')
epoch: 11 test_true_pfm: 2398.281676633427 sim_pfm: -33.035777383406334
episode: 44 training return: tensor(-393.5021, device='cuda:0')
episode: 45 training return: tensor(-310.0128, device='cuda:0')
episode: 46 training return: tensor(-368.2187, device='cuda:0')
episode: 47 training return: tensor(-282.6251, device='cuda:0')
epoch: 12 test_true_pfm: 1538.5048754858879 sim_pfm: -223.27033560435908
episode: 48 training return: tensor(-240.7818, device='cuda:0')
episode: 49 training return: tensor(-308.5309, device='cuda:0')
episode: 50 training return: tensor(-308.4223, device='cuda:0')
episode: 51 training return: tensor(-44.6451, device='cuda:0')
epoch: 13 test_true_pfm: 1647.8168492699133 sim_pfm: -150.63438206651094
episode: 52 training return: tensor(-359.8846, device='cuda:0')
episode: 53 training return: tensor(-279.9021, device='cuda:0')
episode: 54 training return: tensor(-322.1172, device='cuda:0')
episode: 55 training return: tensor(-319.3629, device='cuda:0')
epoch: 14 test_true_pfm: 2076.5599147141006 sim_pfm: -102.17811430625927
episode: 56 training return: tensor(-70.7122, device='cuda:0')
episode: 57 training return: tensor(-359.1985, device='cuda:0')
episode: 58 training return: tensor(-152.8140, device='cuda:0')
episode: 59 training return: tensor(-288.0387, device='cuda:0')
epoch: 15 test_true_pfm: 1550.0653854452719 sim_pfm: -287.9446504896817
episode: 60 training return: tensor(-258.8374, device='cuda:0')
episode: 61 training return: tensor(-238.6016, device='cuda:0')
episode: 62 training return: tensor(-302.5828, device='cuda:0')
episode: 63 training return: tensor(-316.4774, device='cuda:0')
epoch: 16 test_true_pfm: 1934.5516728556283 sim_pfm: 18.480558328640956
episode: 64 training return: tensor(29.1636, device='cuda:0')
episode: 65 training return: tensor(-343.1677, device='cuda:0')
episode: 66 training return: tensor(-64.0753, device='cuda:0')
episode: 67 training return: tensor(-338.1299, device='cuda:0')
epoch: 17 test_true_pfm: 2459.552772444545 sim_pfm: -142.89796528530619
episode: 68 training return: tensor(89.2090, device='cuda:0')
episode: 69 training return: tensor(-130.8452, device='cuda:0')
episode: 70 training return: tensor(-21.6579, device='cuda:0')
episode: 71 training return: tensor(-208.1366, device='cuda:0')
epoch: 18 test_true_pfm: 2989.1804200417196 sim_pfm: -6.275440065888688
episode: 72 training return: tensor(-109.7984, device='cuda:0')
episode: 73 training return: tensor(-168.1578, device='cuda:0')
episode: 74 training return: tensor(82.1512, device='cuda:0')
episode: 75 training return: tensor(-319.2910, device='cuda:0')
epoch: 19 test_true_pfm: 2296.3001814769464 sim_pfm: -53.96201105539998
episode: 76 training return: tensor(-416.2031, device='cuda:0')
episode: 77 training return: tensor(217.6743, device='cuda:0')
episode: 78 training return: tensor(-349.8701, device='cuda:0')
episode: 79 training return: tensor(177.2435, device='cuda:0')
epoch: 20 test_true_pfm: 2233.73257255649 sim_pfm: -155.13202236282328
episode: 80 training return: tensor(-75.4738, device='cuda:0')
episode: 81 training return: tensor(-285.5215, device='cuda:0')
episode: 82 training return: tensor(-290.8805, device='cuda:0')
episode: 83 training return: tensor(-300.0103, device='cuda:0')
epoch: 21 test_true_pfm: 2192.500387045147 sim_pfm: -100.27732759458013
episode: 84 training return: tensor(-69.0207, device='cuda:0')
episode: 85 training return: tensor(-392.1265, device='cuda:0')
episode: 86 training return: tensor(-187.6847, device='cuda:0')
episode: 87 training return: tensor(-355.7190, device='cuda:0')
epoch: 22 test_true_pfm: 2327.518278349247 sim_pfm: -238.20064913171032
episode: 88 training return: tensor(-281.4088, device='cuda:0')
episode: 89 training return: tensor(-248.4806, device='cuda:0')
episode: 90 training return: tensor(-357.0942, device='cuda:0')
episode: 91 training return: tensor(340.1702, device='cuda:0')
epoch: 23 test_true_pfm: 2734.097154285481 sim_pfm: 366.98967398284003
episode: 92 training return: tensor(31.9849, device='cuda:0')
episode: 93 training return: tensor(324.6203, device='cuda:0')
episode: 94 training return: tensor(-196.0200, device='cuda:0')
episode: 95 training return: tensor(161.9429, device='cuda:0')
epoch: 24 test_true_pfm: 1528.8385997973362 sim_pfm: -192.83050396014005
episode: 96 training return: tensor(-96.4775, device='cuda:0')
episode: 97 training return: tensor(-294.0096, device='cuda:0')
episode: 98 training return: tensor(169.3926, device='cuda:0')
episode: 99 training return: tensor(70.1888, device='cuda:0')
epoch: 25 test_true_pfm: 2008.826566761193 sim_pfm: -189.04714542763153
episode: 100 training return: tensor(-132.8460, device='cuda:0')
episode: 101 training return: tensor(-15.6991, device='cuda:0')
episode: 102 training return: tensor(119.5135, device='cuda:0')
episode: 103 training return: tensor(-231.1245, device='cuda:0')
epoch: 26 test_true_pfm: 1617.6410808607127 sim_pfm: -84.90964732738212
episode: 104 training return: tensor(247.9082, device='cuda:0')
episode: 105 training return: tensor(38.9813, device='cuda:0')
episode: 106 training return: tensor(-366.2277, device='cuda:0')
episode: 107 training return: tensor(36.1426, device='cuda:0')
epoch: 27 test_true_pfm: 2102.2610423410774 sim_pfm: -228.47979783344394
episode: 108 training return: tensor(-199.7194, device='cuda:0')
episode: 109 training return: tensor(-350.2973, device='cuda:0')
episode: 110 training return: tensor(-167.6857, device='cuda:0')
episode: 111 training return: tensor(78.2572, device='cuda:0')
epoch: 28 test_true_pfm: 2187.965522838832 sim_pfm: 186.2742782413261
episode: 112 training return: tensor(-63.9664, device='cuda:0')
episode: 113 training return: tensor(-263.9171, device='cuda:0')
episode: 114 training return: tensor(371.5704, device='cuda:0')
episode: 115 training return: tensor(-151.7511, device='cuda:0')
epoch: 29 test_true_pfm: 1669.4424046129068 sim_pfm: -7.903163489342357
episode: 116 training return: tensor(-349.4248, device='cuda:0')
episode: 117 training return: tensor(-240.3340, device='cuda:0')
episode: 118 training return: tensor(-48.3210, device='cuda:0')
episode: 119 training return: tensor(-174.9660, device='cuda:0')
epoch: 30 test_true_pfm: 1828.2630228420603 sim_pfm: -24.676509987873334
episode: 120 training return: tensor(171.8805, device='cuda:0')
episode: 121 training return: tensor(-107.6066, device='cuda:0')
episode: 122 training return: tensor(26.5470, device='cuda:0')
episode: 123 training return: tensor(-131.8813, device='cuda:0')
epoch: 31 test_true_pfm: 1904.612890002627 sim_pfm: 64.40739853233875
episode: 124 training return: tensor(343.3676, device='cuda:0')
episode: 125 training return: tensor(-155.0972, device='cuda:0')
episode: 126 training return: tensor(-316.5697, device='cuda:0')
episode: 127 training return: tensor(-401.8244, device='cuda:0')
epoch: 32 test_true_pfm: 3190.3502057789156 sim_pfm: -82.07895912214492
episode: 128 training return: tensor(-239.5481, device='cuda:0')
episode: 129 training return: tensor(-217.4994, device='cuda:0')
episode: 130 training return: tensor(-311.8192, device='cuda:0')
episode: 131 training return: tensor(164.9870, device='cuda:0')
epoch: 33 test_true_pfm: 2293.2767434372495 sim_pfm: 198.82764712752154
episode: 132 training return: tensor(-105.1967, device='cuda:0')
episode: 133 training return: tensor(263.1186, device='cuda:0')
episode: 134 training return: tensor(-217.6567, device='cuda:0')
episode: 135 training return: tensor(55.3239, device='cuda:0')
epoch: 34 test_true_pfm: 1777.023119693924 sim_pfm: -14.635699035522217
episode: 136 training return: tensor(-72.7147, device='cuda:0')
episode: 137 training return: tensor(-409.9907, device='cuda:0')
episode: 138 training return: tensor(-159.5885, device='cuda:0')
episode: 139 training return: tensor(316.3036, device='cuda:0')
epoch: 35 test_true_pfm: 2915.028131969017 sim_pfm: 37.57111599311853
episode: 140 training return: tensor(-119.3973, device='cuda:0')
episode: 141 training return: tensor(-344.8195, device='cuda:0')
episode: 142 training return: tensor(-326.8113, device='cuda:0')
episode: 143 training return: tensor(261.7039, device='cuda:0')
epoch: 36 test_true_pfm: 2841.1415293165574 sim_pfm: 203.65422443735102
episode: 144 training return: tensor(-287.1184, device='cuda:0')
episode: 145 training return: tensor(-286.5006, device='cuda:0')
episode: 146 training return: tensor(-98.0394, device='cuda:0')
episode: 147 training return: tensor(-50.8294, device='cuda:0')
epoch: 37 test_true_pfm: 2277.726278742203 sim_pfm: -214.1240277075558
episode: 148 training return: tensor(-81.8063, device='cuda:0')
episode: 149 training return: tensor(-391.5404, device='cuda:0')
episode: 150 training return: tensor(-270.1916, device='cuda:0')
episode: 151 training return: tensor(-383.1012, device='cuda:0')
epoch: 38 test_true_pfm: 3007.8974326329967 sim_pfm: 259.1043823158058
episode: 152 training return: tensor(-303.5392, device='cuda:0')
episode: 153 training return: tensor(-216.4383, device='cuda:0')
episode: 154 training return: tensor(-264.5709, device='cuda:0')
episode: 155 training return: tensor(-404.0823, device='cuda:0')
epoch: 39 test_true_pfm: 3108.238709498463 sim_pfm: 216.80359227648782
episode: 156 training return: tensor(-302.4516, device='cuda:0')
episode: 157 training return: tensor(-197.2301, device='cuda:0')
episode: 158 training return: tensor(-341.6581, device='cuda:0')
episode: 159 training return: tensor(-18.7804, device='cuda:0')
epoch: 40 test_true_pfm: 2370.04275912428 sim_pfm: -75.3702570523601
episode: 160 training return: tensor(-349.9984, device='cuda:0')
episode: 161 training return: tensor(-201.9188, device='cuda:0')
episode: 162 training return: tensor(-91.4575, device='cuda:0')
episode: 163 training return: tensor(-128.7546, device='cuda:0')
epoch: 41 test_true_pfm: 1966.5681110602027 sim_pfm: 29.882802987781663
episode: 164 training return: tensor(-146.7101, device='cuda:0')
episode: 165 training return: tensor(93.9767, device='cuda:0')
episode: 166 training return: tensor(-31.0457, device='cuda:0')
episode: 167 training return: tensor(-188.1283, device='cuda:0')
epoch: 42 test_true_pfm: 2757.3906256531955 sim_pfm: -178.52501334001622
episode: 168 training return: tensor(-127.9710, device='cuda:0')
episode: 169 training return: tensor(-366.8849, device='cuda:0')
episode: 170 training return: tensor(-315.1906, device='cuda:0')
episode: 171 training return: tensor(181.0539, device='cuda:0')
epoch: 43 test_true_pfm: 2851.02980852369 sim_pfm: 158.39826250941647
episode: 172 training return: tensor(-22.3529, device='cuda:0')
episode: 173 training return: tensor(-10.2054, device='cuda:0')
episode: 174 training return: tensor(194.5700, device='cuda:0')
episode: 175 training return: tensor(-66.4472, device='cuda:0')
epoch: 44 test_true_pfm: 2776.462502416216 sim_pfm: 157.74606473495564
episode: 176 training return: tensor(-16.9226, device='cuda:0')
episode: 177 training return: tensor(231.7199, device='cuda:0')
episode: 178 training return: tensor(-229.8004, device='cuda:0')
episode: 179 training return: tensor(-408.1032, device='cuda:0')
epoch: 45 test_true_pfm: 3246.044111270488 sim_pfm: 165.20408740856996
episode: 180 training return: tensor(-315.4940, device='cuda:0')
episode: 181 training return: tensor(-253.9320, device='cuda:0')
episode: 182 training return: tensor(-213.3995, device='cuda:0')
episode: 183 training return: tensor(-94.5047, device='cuda:0')
epoch: 46 test_true_pfm: 2527.9763139882675 sim_pfm: 148.7053662373995
episode: 184 training return: tensor(-37.2751, device='cuda:0')
episode: 185 training return: tensor(-200.6825, device='cuda:0')
episode: 186 training return: tensor(-40.2383, device='cuda:0')
episode: 187 training return: tensor(-204.7467, device='cuda:0')
epoch: 47 test_true_pfm: 2580.2404071638534 sim_pfm: 209.76705152155287
episode: 188 training return: tensor(-399.6228, device='cuda:0')
episode: 189 training return: tensor(-168.7799, device='cuda:0')
episode: 190 training return: tensor(181.9475, device='cuda:0')
episode: 191 training return: tensor(51.0089, device='cuda:0')
epoch: 48 test_true_pfm: 3093.2022007484325 sim_pfm: 384.6318023435306
episode: 192 training return: tensor(-333.9406, device='cuda:0')
episode: 193 training return: tensor(-393.2466, device='cuda:0')
episode: 194 training return: tensor(-330.1145, device='cuda:0')
episode: 195 training return: tensor(340.1634, device='cuda:0')
epoch: 49 test_true_pfm: 2713.916721501025 sim_pfm: 270.2611922021412
episode: 196 training return: tensor(-98.5956, device='cuda:0')
episode: 197 training return: tensor(-116.7031, device='cuda:0')
episode: 198 training return: tensor(86.7074, device='cuda:0')
episode: 199 training return: tensor(176.2242, device='cuda:0')
epoch: 50 test_true_pfm: 2748.574128629894 sim_pfm: 213.2770681156544
episode: 200 training return: tensor(-304.8349, device='cuda:0')
episode: 201 training return: tensor(101.5167, device='cuda:0')
episode: 202 training return: tensor(351.1678, device='cuda:0')
episode: 203 training return: tensor(365.9461, device='cuda:0')
epoch: 51 test_true_pfm: 1793.7625566648446 sim_pfm: -67.57350779122983
episode: 204 training return: tensor(-218.6113, device='cuda:0')
episode: 205 training return: tensor(-351.9902, device='cuda:0')
episode: 206 training return: tensor(-132.8416, device='cuda:0')
episode: 207 training return: tensor(-190.5062, device='cuda:0')
epoch: 52 test_true_pfm: 2720.515433588545 sim_pfm: 229.41479386232095
episode: 208 training return: tensor(-50.6242, device='cuda:0')
episode: 209 training return: tensor(-315.5161, device='cuda:0')
episode: 210 training return: tensor(-189.1160, device='cuda:0')
episode: 211 training return: tensor(-396.7316, device='cuda:0')
epoch: 53 test_true_pfm: 3143.810005867712 sim_pfm: 217.8555005300635
episode: 212 training return: tensor(-324.0685, device='cuda:0')
episode: 213 training return: tensor(265.1759, device='cuda:0')
episode: 214 training return: tensor(-58.6045, device='cuda:0')
episode: 215 training return: tensor(-395.7470, device='cuda:0')
epoch: 54 test_true_pfm: 3279.4124316883845 sim_pfm: 255.18933597633927
episode: 216 training return: tensor(-335.9554, device='cuda:0')
episode: 217 training return: tensor(-403.0330, device='cuda:0')
episode: 218 training return: tensor(6.4343, device='cuda:0')
episode: 219 training return: tensor(236.1183, device='cuda:0')
epoch: 55 test_true_pfm: 2321.3466736247033 sim_pfm: -139.87287829622315
episode: 220 training return: tensor(-21.1961, device='cuda:0')
episode: 221 training return: tensor(-239.6754, device='cuda:0')
episode: 222 training return: tensor(-292.7150, device='cuda:0')
episode: 223 training return: tensor(-113.9509, device='cuda:0')
epoch: 56 test_true_pfm: 2925.9162049002457 sim_pfm: 339.14962086174637
episode: 224 training return: tensor(-466.1696, device='cuda:0')
episode: 225 training return: tensor(-365.0251, device='cuda:0')
episode: 226 training return: tensor(-191.4004, device='cuda:0')
episode: 227 training return: tensor(-242.5199, device='cuda:0')
epoch: 57 test_true_pfm: 2447.993490021561 sim_pfm: 225.52631032676436
episode: 228 training return: tensor(-223.4110, device='cuda:0')
episode: 229 training return: tensor(-200.5803, device='cuda:0')
episode: 230 training return: tensor(-274.0516, device='cuda:0')
episode: 231 training return: tensor(-186.0745, device='cuda:0')
epoch: 58 test_true_pfm: 3341.5728729099374 sim_pfm: 378.50339593583095
episode: 232 training return: tensor(-404.2141, device='cuda:0')
episode: 233 training return: tensor(-305.8753, device='cuda:0')
episode: 234 training return: tensor(-189.4066, device='cuda:0')
episode: 235 training return: tensor(-180.0092, device='cuda:0')
epoch: 59 test_true_pfm: 3213.5237291319577 sim_pfm: 322.8052639490925
episode: 236 training return: tensor(-124.2351, device='cuda:0')
episode: 237 training return: tensor(-23.6831, device='cuda:0')
episode: 238 training return: tensor(-381.9631, device='cuda:0')
episode: 239 training return: tensor(-397.0718, device='cuda:0')
epoch: 60 test_true_pfm: 2491.2558163077533 sim_pfm: 164.03528835243196
episode: 240 training return: tensor(37.8563, device='cuda:0')
episode: 241 training return: tensor(-159.0600, device='cuda:0')
episode: 242 training return: tensor(-84.5317, device='cuda:0')
episode: 243 training return: tensor(351.2048, device='cuda:0')
epoch: 61 test_true_pfm: 2909.4861588464314 sim_pfm: 256.8168206674357
episode: 244 training return: tensor(-176.6304, device='cuda:0')
episode: 245 training return: tensor(-151.4160, device='cuda:0')
episode: 246 training return: tensor(-346.5396, device='cuda:0')
episode: 247 training return: tensor(284.1166, device='cuda:0')
epoch: 62 test_true_pfm: 3114.5391556630625 sim_pfm: 375.74523987745243
episode: 248 training return: tensor(40.7374, device='cuda:0')
episode: 249 training return: tensor(-134.2131, device='cuda:0')
episode: 250 training return: tensor(-199.4720, device='cuda:0')
episode: 251 training return: tensor(-216.4373, device='cuda:0')
epoch: 63 test_true_pfm: 3289.9443603648638 sim_pfm: 324.3911451178913
episode: 252 training return: tensor(-119.5844, device='cuda:0')
episode: 253 training return: tensor(347.8025, device='cuda:0')
episode: 254 training return: tensor(-254.6169, device='cuda:0')
episode: 255 training return: tensor(-234.8046, device='cuda:0')
epoch: 64 test_true_pfm: 2607.3607055711086 sim_pfm: 165.48482918800437
episode: 256 training return: tensor(45.0846, device='cuda:0')
episode: 257 training return: tensor(-327.4445, device='cuda:0')
episode: 258 training return: tensor(-281.8288, device='cuda:0')
episode: 259 training return: tensor(395.3596, device='cuda:0')
epoch: 65 test_true_pfm: 3180.215482523341 sim_pfm: 370.27909652384307
episode: 260 training return: tensor(-392.5046, device='cuda:0')
episode: 261 training return: tensor(-399.3877, device='cuda:0')
episode: 262 training return: tensor(-105.9427, device='cuda:0')
episode: 263 training return: tensor(-162.3255, device='cuda:0')
epoch: 66 test_true_pfm: 3301.4999958479425 sim_pfm: 368.0259032229272
episode: 264 training return: tensor(-220.9835, device='cuda:0')
episode: 265 training return: tensor(338.6853, device='cuda:0')
episode: 266 training return: tensor(-239.9713, device='cuda:0')
episode: 267 training return: tensor(375.6700, device='cuda:0')
epoch: 67 test_true_pfm: 2126.9994789534526 sim_pfm: 93.48971668250549
episode: 268 training return: tensor(-125.4912, device='cuda:0')
episode: 269 training return: tensor(-152.6676, device='cuda:0')
episode: 270 training return: tensor(18.5924, device='cuda:0')
episode: 271 training return: tensor(-396.1311, device='cuda:0')
epoch: 68 test_true_pfm: 3018.665368080264 sim_pfm: 347.8308958777149
episode: 272 training return: tensor(48.1044, device='cuda:0')
episode: 273 training return: tensor(41.5132, device='cuda:0')
episode: 274 training return: tensor(136.7165, device='cuda:0')
episode: 275 training return: tensor(-77.2929, device='cuda:0')
epoch: 69 test_true_pfm: 3018.1827986602225 sim_pfm: 385.13531386782415
episode: 276 training return: tensor(-159.0654, device='cuda:0')
episode: 277 training return: tensor(185.3818, device='cuda:0')
episode: 278 training return: tensor(-347.3330, device='cuda:0')
episode: 279 training return: tensor(-121.1760, device='cuda:0')
epoch: 70 test_true_pfm: 3042.884982517325 sim_pfm: 228.19155165133998
episode: 280 training return: tensor(-361.6626, device='cuda:0')
episode: 281 training return: tensor(-405.7437, device='cuda:0')
episode: 282 training return: tensor(83.4304, device='cuda:0')
episode: 283 training return: tensor(110.7208, device='cuda:0')
epoch: 71 test_true_pfm: 3305.848767919899 sim_pfm: 373.5360142305726
episode: 284 training return: tensor(-157.1699, device='cuda:0')
episode: 285 training return: tensor(22.9949, device='cuda:0')
episode: 286 training return: tensor(-163.4149, device='cuda:0')
episode: 287 training return: tensor(-289.7354, device='cuda:0')
epoch: 72 test_true_pfm: 2965.192823795221 sim_pfm: 162.16687064819658
episode: 288 training return: tensor(-84.1147, device='cuda:0')
episode: 289 training return: tensor(-194.2064, device='cuda:0')
episode: 290 training return: tensor(-362.0971, device='cuda:0')
episode: 291 training return: tensor(24.9564, device='cuda:0')
epoch: 73 test_true_pfm: 3297.618774301871 sim_pfm: 237.33508591908807
episode: 292 training return: tensor(322.6028, device='cuda:0')
episode: 293 training return: tensor(-209.0995, device='cuda:0')
episode: 294 training return: tensor(-1.4585, device='cuda:0')
episode: 295 training return: tensor(-51.9309, device='cuda:0')
epoch: 74 test_true_pfm: 3345.147806311498 sim_pfm: 399.8293630264234
episode: 296 training return: tensor(-196.4685, device='cuda:0')
episode: 297 training return: tensor(-360.6401, device='cuda:0')
episode: 298 training return: tensor(-373.4453, device='cuda:0')
episode: 299 training return: tensor(-450.1839, device='cuda:0')
epoch: 75 test_true_pfm: 2950.9363783426184 sim_pfm: 216.7198170036912
episode: 300 training return: tensor(-334.5362, device='cuda:0')
episode: 301 training return: tensor(-116.5454, device='cuda:0')
episode: 302 training return: tensor(-220.3924, device='cuda:0')
episode: 303 training return: tensor(7.2269, device='cuda:0')
epoch: 76 test_true_pfm: 3299.478253487056 sim_pfm: 246.2866839952767
episode: 304 training return: tensor(-141.7045, device='cuda:0')
episode: 305 training return: tensor(-346.4402, device='cuda:0')
episode: 306 training return: tensor(-91.1210, device='cuda:0')
episode: 307 training return: tensor(-115.5748, device='cuda:0')
epoch: 77 test_true_pfm: 2978.0603774878637 sim_pfm: 345.6647539675857
episode: 308 training return: tensor(-186.9191, device='cuda:0')
episode: 309 training return: tensor(-72.4344, device='cuda:0')
episode: 310 training return: tensor(-93.2823, device='cuda:0')
episode: 311 training return: tensor(-404.3926, device='cuda:0')
epoch: 78 test_true_pfm: 2973.7294528987245 sim_pfm: 375.9860417818806
episode: 312 training return: tensor(-401.3882, device='cuda:0')
episode: 313 training return: tensor(-192.9589, device='cuda:0')
episode: 314 training return: tensor(-203.3663, device='cuda:0')
episode: 315 training return: tensor(-18.7092, device='cuda:0')
epoch: 79 test_true_pfm: 2856.1388201438895 sim_pfm: 390.96156681127223
episode: 316 training return: tensor(344.3492, device='cuda:0')
episode: 317 training return: tensor(-66.6306, device='cuda:0')
episode: 318 training return: tensor(-201.8298, device='cuda:0')
episode: 319 training return: tensor(-374.7026, device='cuda:0')
epoch: 80 test_true_pfm: 3220.692169500009 sim_pfm: 369.9377438214918
episode: 320 training return: tensor(-157.3433, device='cuda:0')
episode: 321 training return: tensor(-79.3445, device='cuda:0')
episode: 322 training return: tensor(226.5912, device='cuda:0')
episode: 323 training return: tensor(-205.4641, device='cuda:0')
epoch: 81 test_true_pfm: 3320.901674529377 sim_pfm: 301.9926961082965
episode: 324 training return: tensor(-119.8821, device='cuda:0')
episode: 325 training return: tensor(-248.9041, device='cuda:0')
episode: 326 training return: tensor(-321.9932, device='cuda:0')
episode: 327 training return: tensor(-309.7630, device='cuda:0')
epoch: 82 test_true_pfm: 3334.7463846288956 sim_pfm: 277.9377260155355
episode: 328 training return: tensor(-310.8434, device='cuda:0')
episode: 329 training return: tensor(-297.5924, device='cuda:0')
episode: 330 training return: tensor(105.7578, device='cuda:0')
episode: 331 training return: tensor(5.4281, device='cuda:0')
epoch: 83 test_true_pfm: 2844.074924870945 sim_pfm: 370.64026392491843
episode: 332 training return: tensor(91.7309, device='cuda:0')
episode: 333 training return: tensor(-237.5972, device='cuda:0')
episode: 334 training return: tensor(238.8050, device='cuda:0')
episode: 335 training return: tensor(-167.1838, device='cuda:0')
epoch: 84 test_true_pfm: 3108.5721621472 sim_pfm: 375.7720220221284
episode: 336 training return: tensor(-204.7706, device='cuda:0')
episode: 337 training return: tensor(-60.7045, device='cuda:0')
episode: 338 training return: tensor(-20.6351, device='cuda:0')
episode: 339 training return: tensor(-221.7945, device='cuda:0')
epoch: 85 test_true_pfm: 3301.7335117140215 sim_pfm: 393.33683226568
episode: 340 training return: tensor(25.1926, device='cuda:0')
episode: 341 training return: tensor(-209.6761, device='cuda:0')
episode: 342 training return: tensor(-230.8226, device='cuda:0')
episode: 343 training return: tensor(-333.9333, device='cuda:0')
epoch: 86 test_true_pfm: 3332.828564334451 sim_pfm: 381.245439265389
episode: 344 training return: tensor(61.7880, device='cuda:0')
episode: 345 training return: tensor(-351.2513, device='cuda:0')
episode: 346 training return: tensor(-97.3050, device='cuda:0')
episode: 347 training return: tensor(87.4839, device='cuda:0')
epoch: 87 test_true_pfm: 3012.159306579426 sim_pfm: 246.00114890994155
episode: 348 training return: tensor(-281.5806, device='cuda:0')
episode: 349 training return: tensor(36.0015, device='cuda:0')
episode: 350 training return: tensor(216.5923, device='cuda:0')
episode: 351 training return: tensor(-429.8735, device='cuda:0')
epoch: 88 test_true_pfm: 2910.8486098080853 sim_pfm: 282.5645253939244
episode: 352 training return: tensor(-317.8121, device='cuda:0')
episode: 353 training return: tensor(-319.5100, device='cuda:0')
episode: 354 training return: tensor(-201.4166, device='cuda:0')
episode: 355 training return: tensor(397.6798, device='cuda:0')
epoch: 89 test_true_pfm: 3157.508426296161 sim_pfm: 379.57964526015957
episode: 356 training return: tensor(-342.9040, device='cuda:0')
episode: 357 training return: tensor(-439.0433, device='cuda:0')
episode: 358 training return: tensor(-190.8940, device='cuda:0')
episode: 359 training return: tensor(-215.9035, device='cuda:0')
epoch: 90 test_true_pfm: 2711.6212838615297 sim_pfm: 285.2870875981559
episode: 360 training return: tensor(188.5281, device='cuda:0')
episode: 361 training return: tensor(-4.5275, device='cuda:0')
episode: 362 training return: tensor(-90.0635, device='cuda:0')
episode: 363 training return: tensor(-267.6831, device='cuda:0')
epoch: 91 test_true_pfm: 3383.5145153184535 sim_pfm: 252.06646812482117
episode: 364 training return: tensor(-158.3448, device='cuda:0')
episode: 365 training return: tensor(-88.5180, device='cuda:0')
episode: 366 training return: tensor(-380.9261, device='cuda:0')
episode: 367 training return: tensor(-248.2256, device='cuda:0')
epoch: 92 test_true_pfm: 2777.1883244597425 sim_pfm: 240.79067554371431
episode: 368 training return: tensor(229.9130, device='cuda:0')
episode: 369 training return: tensor(-432.2511, device='cuda:0')
episode: 370 training return: tensor(-423.9329, device='cuda:0')
episode: 371 training return: tensor(-126.6525, device='cuda:0')
epoch: 93 test_true_pfm: 3275.6382038463203 sim_pfm: 373.01649816536036
episode: 372 training return: tensor(-185.8361, device='cuda:0')
episode: 373 training return: tensor(-163.1797, device='cuda:0')
episode: 374 training return: tensor(-321.6534, device='cuda:0')
episode: 375 training return: tensor(112.9593, device='cuda:0')
epoch: 94 test_true_pfm: 2933.966639669324 sim_pfm: 240.32003239542246
episode: 376 training return: tensor(-211.8320, device='cuda:0')
episode: 377 training return: tensor(-128.6920, device='cuda:0')
episode: 378 training return: tensor(-275.9543, device='cuda:0')
episode: 379 training return: tensor(-132.2543, device='cuda:0')
epoch: 95 test_true_pfm: 3191.4432243062624 sim_pfm: 255.4119326992271
episode: 380 training return: tensor(147.7715, device='cuda:0')
episode: 381 training return: tensor(-151.0379, device='cuda:0')
episode: 382 training return: tensor(-202.9131, device='cuda:0')
episode: 383 training return: tensor(-112.1003, device='cuda:0')
epoch: 96 test_true_pfm: 2914.9528098934243 sim_pfm: 387.11388375198777
episode: 384 training return: tensor(-293.3435, device='cuda:0')
episode: 385 training return: tensor(277.3290, device='cuda:0')
episode: 386 training return: tensor(-346.3466, device='cuda:0')
episode: 387 training return: tensor(-48.2398, device='cuda:0')
epoch: 97 test_true_pfm: 3295.4334115371407 sim_pfm: 362.5976966776264
episode: 388 training return: tensor(43.4659, device='cuda:0')
episode: 389 training return: tensor(107.0829, device='cuda:0')
episode: 390 training return: tensor(-90.4205, device='cuda:0')
episode: 391 training return: tensor(-386.0561, device='cuda:0')
epoch: 98 test_true_pfm: 3307.317429533729 sim_pfm: 214.42356678727083
episode: 392 training return: tensor(18.4463, device='cuda:0')
episode: 393 training return: tensor(-389.1904, device='cuda:0')
episode: 394 training return: tensor(-203.7964, device='cuda:0')
episode: 395 training return: tensor(-317.3089, device='cuda:0')
epoch: 99 test_true_pfm: 2937.1535857234753 sim_pfm: 383.5953059688521
episode: 396 training return: tensor(-19.9300, device='cuda:0')
episode: 397 training return: tensor(52.2764, device='cuda:0')
episode: 398 training return: tensor(-384.4715, device='cuda:0')
episode: 399 training return: tensor(-415.5721, device='cuda:0')
epoch: 100 test_true_pfm: 2998.083656595074 sim_pfm: 263.23451703367755
episode: 400 training return: tensor(247.0207, device='cuda:0')
episode: 401 training return: tensor(-187.3321, device='cuda:0')
episode: 402 training return: tensor(-194.5456, device='cuda:0')
episode: 403 training return: tensor(-353.7844, device='cuda:0')
epoch: 101 test_true_pfm: 3238.1934260533467 sim_pfm: 376.8340020804123
episode: 404 training return: tensor(-193.3953, device='cuda:0')
episode: 405 training return: tensor(-369.0840, device='cuda:0')
episode: 406 training return: tensor(-321.7833, device='cuda:0')
episode: 407 training return: tensor(60.0454, device='cuda:0')
epoch: 102 test_true_pfm: 3326.993935990901 sim_pfm: 375.68655590565567
episode: 408 training return: tensor(-137.4518, device='cuda:0')
episode: 409 training return: tensor(-228.7910, device='cuda:0')
episode: 410 training return: tensor(-319.1995, device='cuda:0')
episode: 411 training return: tensor(-108.4274, device='cuda:0')
epoch: 103 test_true_pfm: 2599.560910750693 sim_pfm: 395.69337764879066
episode: 412 training return: tensor(-231.2013, device='cuda:0')
episode: 413 training return: tensor(24.1054, device='cuda:0')
episode: 414 training return: tensor(-354.0998, device='cuda:0')
episode: 415 training return: tensor(-105.5279, device='cuda:0')
epoch: 104 test_true_pfm: 2943.987927101001 sim_pfm: 373.3410950000398
episode: 416 training return: tensor(-372.6703, device='cuda:0')
episode: 417 training return: tensor(192.0817, device='cuda:0')
episode: 418 training return: tensor(-368.0424, device='cuda:0')
episode: 419 training return: tensor(-348.0450, device='cuda:0')
epoch: 105 test_true_pfm: 2752.6157211506797 sim_pfm: 246.15406847619064
episode: 420 training return: tensor(-16.7809, device='cuda:0')
episode: 421 training return: tensor(-205.9790, device='cuda:0')
episode: 422 training return: tensor(-379.0113, device='cuda:0')
episode: 423 training return: tensor(-213.3515, device='cuda:0')
epoch: 106 test_true_pfm: 3098.2024630333867 sim_pfm: 370.6980343677569
episode: 424 training return: tensor(-309.7319, device='cuda:0')
episode: 425 training return: tensor(-188.3873, device='cuda:0')
episode: 426 training return: tensor(-378.8830, device='cuda:0')
episode: 427 training return: tensor(-194.1665, device='cuda:0')
epoch: 107 test_true_pfm: 3032.8568254274383 sim_pfm: 274.62003925753135
episode: 428 training return: tensor(-173.2766, device='cuda:0')
episode: 429 training return: tensor(-196.2135, device='cuda:0')
episode: 430 training return: tensor(-192.1448, device='cuda:0')
episode: 431 training return: tensor(-304.0189, device='cuda:0')
epoch: 108 test_true_pfm: 3063.447498777851 sim_pfm: 372.426717816561
episode: 432 training return: tensor(-99.9588, device='cuda:0')
episode: 433 training return: tensor(-309.0193, device='cuda:0')
episode: 434 training return: tensor(-406.9283, device='cuda:0')
episode: 435 training return: tensor(-400.4164, device='cuda:0')
epoch: 109 test_true_pfm: 3305.1916276751126 sim_pfm: 375.39711314936477
episode: 436 training return: tensor(-329.0950, device='cuda:0')
episode: 437 training return: tensor(-294.9298, device='cuda:0')
episode: 438 training return: tensor(184.1400, device='cuda:0')
episode: 439 training return: tensor(-127.5293, device='cuda:0')
epoch: 110 test_true_pfm: 3045.9777388949865 sim_pfm: 373.6539615580502
episode: 440 training return: tensor(104.4125, device='cuda:0')
episode: 441 training return: tensor(-192.9063, device='cuda:0')
episode: 442 training return: tensor(-380.8615, device='cuda:0')
episode: 443 training return: tensor(-199.2866, device='cuda:0')
epoch: 111 test_true_pfm: 2649.055297129622 sim_pfm: 356.7991001191549
episode: 444 training return: tensor(-301.7950, device='cuda:0')
episode: 445 training return: tensor(-79.2524, device='cuda:0')
episode: 446 training return: tensor(-369.1481, device='cuda:0')
episode: 447 training return: tensor(158.3882, device='cuda:0')
epoch: 112 test_true_pfm: 3227.3018419409614 sim_pfm: 378.5711096525095
episode: 448 training return: tensor(-324.0485, device='cuda:0')
episode: 449 training return: tensor(-348.5967, device='cuda:0')
episode: 450 training return: tensor(-199.1369, device='cuda:0')
episode: 451 training return: tensor(-83.9082, device='cuda:0')
epoch: 113 test_true_pfm: 2996.978616545806 sim_pfm: 109.91138845697667
episode: 452 training return: tensor(-413.7629, device='cuda:0')
episode: 453 training return: tensor(-258.4794, device='cuda:0')
episode: 454 training return: tensor(-229.7972, device='cuda:0')
episode: 455 training return: tensor(-382.7358, device='cuda:0')
epoch: 114 test_true_pfm: 2826.0096977883254 sim_pfm: 354.043956999667
episode: 456 training return: tensor(-392.7429, device='cuda:0')
episode: 457 training return: tensor(-137.2911, device='cuda:0')
episode: 458 training return: tensor(-371.9114, device='cuda:0')
episode: 459 training return: tensor(44.9194, device='cuda:0')
epoch: 115 test_true_pfm: 3323.94939485853 sim_pfm: 368.75653416865197
episode: 460 training return: tensor(-326.6515, device='cuda:0')
episode: 461 training return: tensor(-273.2346, device='cuda:0')
episode: 462 training return: tensor(-160.4868, device='cuda:0')
episode: 463 training return: tensor(-275.9727, device='cuda:0')
epoch: 116 test_true_pfm: 3300.6608589989937 sim_pfm: 284.62013707018923
episode: 464 training return: tensor(-79.1714, device='cuda:0')
episode: 465 training return: tensor(-209.3576, device='cuda:0')
episode: 466 training return: tensor(-290.7897, device='cuda:0')
episode: 467 training return: tensor(-410.8175, device='cuda:0')
epoch: 117 test_true_pfm: 3223.6520634745125 sim_pfm: 377.87817769571365
episode: 468 training return: tensor(181.6160, device='cuda:0')
episode: 469 training return: tensor(-263.5540, device='cuda:0')
episode: 470 training return: tensor(-211.9500, device='cuda:0')
episode: 471 training return: tensor(-220.9995, device='cuda:0')
epoch: 118 test_true_pfm: 2818.8973526725767 sim_pfm: 194.55189726369767
episode: 472 training return: tensor(-87.4694, device='cuda:0')
episode: 473 training return: tensor(59.0093, device='cuda:0')
episode: 474 training return: tensor(-377.3256, device='cuda:0')
episode: 475 training return: tensor(-216.1356, device='cuda:0')
epoch: 119 test_true_pfm: 3354.6525140938065 sim_pfm: 187.59756146386886
episode: 476 training return: tensor(52.4132, device='cuda:0')
episode: 477 training return: tensor(-299.8812, device='cuda:0')
episode: 478 training return: tensor(-190.2602, device='cuda:0')
episode: 479 training return: tensor(-137.1576, device='cuda:0')
epoch: 120 test_true_pfm: 3344.765416288146 sim_pfm: 360.43215909274295
episode: 480 training return: tensor(-369.9468, device='cuda:0')
episode: 481 training return: tensor(152.5327, device='cuda:0')
episode: 482 training return: tensor(-347.5540, device='cuda:0')
episode: 483 training return: tensor(4.9148, device='cuda:0')
epoch: 121 test_true_pfm: 2999.347582441705 sim_pfm: 277.1641097614386
episode: 484 training return: tensor(-285.7470, device='cuda:0')
episode: 485 training return: tensor(-231.7175, device='cuda:0')
episode: 486 training return: tensor(-291.1932, device='cuda:0')
episode: 487 training return: tensor(-246.3893, device='cuda:0')
epoch: 122 test_true_pfm: 3339.6468466766805 sim_pfm: 225.9165099161522
episode: 488 training return: tensor(-175.6811, device='cuda:0')
episode: 489 training return: tensor(6.9596, device='cuda:0')
episode: 490 training return: tensor(-110.2589, device='cuda:0')
episode: 491 training return: tensor(-285.3748, device='cuda:0')
epoch: 123 test_true_pfm: 3295.6475150527185 sim_pfm: 284.2017244983678
episode: 492 training return: tensor(-375.2423, device='cuda:0')
episode: 493 training return: tensor(-215.0078, device='cuda:0')
episode: 494 training return: tensor(-308.3445, device='cuda:0')
episode: 495 training return: tensor(-279.4303, device='cuda:0')
epoch: 124 test_true_pfm: 3295.65247337349 sim_pfm: 279.6294212077434
episode: 496 training return: tensor(76.2145, device='cuda:0')
episode: 497 training return: tensor(-194.5520, device='cuda:0')
episode: 498 training return: tensor(-144.9294, device='cuda:0')
episode: 499 training return: tensor(-283.2092, device='cuda:0')
epoch: 125 test_true_pfm: 3304.508280981299 sim_pfm: 373.5399199269402
episode: 500 training return: tensor(371.0304, device='cuda:0')
episode: 501 training return: tensor(-212.2870, device='cuda:0')
episode: 502 training return: tensor(-202.4785, device='cuda:0')
episode: 503 training return: tensor(-329.8509, device='cuda:0')
epoch: 126 test_true_pfm: 3311.354483951516 sim_pfm: 380.17069106952596
episode: 504 training return: tensor(-261.9765, device='cuda:0')
episode: 505 training return: tensor(-370.9772, device='cuda:0')
episode: 506 training return: tensor(-165.0960, device='cuda:0')
episode: 507 training return: tensor(-358.5720, device='cuda:0')
epoch: 127 test_true_pfm: 3302.3714413639996 sim_pfm: 379.03807218252524
episode: 508 training return: tensor(-400.1476, device='cuda:0')
episode: 509 training return: tensor(-290.7003, device='cuda:0')
episode: 510 training return: tensor(-380.4525, device='cuda:0')
episode: 511 training return: tensor(63.8001, device='cuda:0')
epoch: 128 test_true_pfm: 2954.356053741862 sim_pfm: 386.4575589649224
episode: 512 training return: tensor(-241.8148, device='cuda:0')
episode: 513 training return: tensor(-153.5596, device='cuda:0')
episode: 514 training return: tensor(-81.5583, device='cuda:0')
episode: 515 training return: tensor(-246.7366, device='cuda:0')
epoch: 129 test_true_pfm: 3318.684218760089 sim_pfm: 307.5183068027545
episode: 516 training return: tensor(123.8018, device='cuda:0')
episode: 517 training return: tensor(-92.4883, device='cuda:0')
episode: 518 training return: tensor(-329.8852, device='cuda:0')
episode: 519 training return: tensor(-270.2784, device='cuda:0')
epoch: 130 test_true_pfm: 2872.9881575709896 sim_pfm: 278.98874196518835
episode: 520 training return: tensor(-367.4940, device='cuda:0')
episode: 521 training return: tensor(-392.2638, device='cuda:0')
episode: 522 training return: tensor(-89.2273, device='cuda:0')
episode: 523 training return: tensor(-312.0873, device='cuda:0')
epoch: 131 test_true_pfm: 3045.1292309631626 sim_pfm: 371.9322560816072
episode: 524 training return: tensor(-304.9463, device='cuda:0')
episode: 525 training return: tensor(-390.5598, device='cuda:0')
episode: 526 training return: tensor(-340.2405, device='cuda:0')
episode: 527 training return: tensor(-185.8985, device='cuda:0')
epoch: 132 test_true_pfm: 3287.940118535011 sim_pfm: 352.3420314081013
episode: 528 training return: tensor(347.4921, device='cuda:0')
episode: 529 training return: tensor(-102.4089, device='cuda:0')
episode: 530 training return: tensor(-99.3783, device='cuda:0')
episode: 531 training return: tensor(-411.5271, device='cuda:0')
epoch: 133 test_true_pfm: 3038.20848097059 sim_pfm: 375.9187094651473
episode: 532 training return: tensor(170.2315, device='cuda:0')
episode: 533 training return: tensor(-407.4747, device='cuda:0')
episode: 534 training return: tensor(-350.2360, device='cuda:0')
episode: 535 training return: tensor(-344.4041, device='cuda:0')
epoch: 134 test_true_pfm: 3143.8514495576237 sim_pfm: 321.88766544157016
episode: 536 training return: tensor(-349.6631, device='cuda:0')
episode: 537 training return: tensor(-346.2803, device='cuda:0')
episode: 538 training return: tensor(-198.4739, device='cuda:0')
episode: 539 training return: tensor(293.3939, device='cuda:0')
epoch: 135 test_true_pfm: 3302.7101502763944 sim_pfm: 366.17217778864625
episode: 540 training return: tensor(-346.8610, device='cuda:0')
episode: 541 training return: tensor(-8.5805, device='cuda:0')
episode: 542 training return: tensor(-162.1612, device='cuda:0')
episode: 543 training return: tensor(-381.6678, device='cuda:0')
epoch: 136 test_true_pfm: 3303.6109081706695 sim_pfm: 375.36168372646597
episode: 544 training return: tensor(-152.8232, device='cuda:0')
episode: 545 training return: tensor(-106.0248, device='cuda:0')
episode: 546 training return: tensor(-353.3867, device='cuda:0')
episode: 547 training return: tensor(-216.8441, device='cuda:0')
epoch: 137 test_true_pfm: 3322.197477291549 sim_pfm: 380.37714373134077
episode: 548 training return: tensor(-242.5789, device='cuda:0')
episode: 549 training return: tensor(-347.4886, device='cuda:0')
episode: 550 training return: tensor(-127.4218, device='cuda:0')
episode: 551 training return: tensor(-343.8174, device='cuda:0')
epoch: 138 test_true_pfm: 3012.642663735212 sim_pfm: 207.35490969227007
episode: 552 training return: tensor(-349.3979, device='cuda:0')
episode: 553 training return: tensor(-412.1282, device='cuda:0')
episode: 554 training return: tensor(391.7990, device='cuda:0')
episode: 555 training return: tensor(-191.0293, device='cuda:0')
epoch: 139 test_true_pfm: 3336.985485831659 sim_pfm: 286.1255637387124
episode: 556 training return: tensor(-401.1409, device='cuda:0')
episode: 557 training return: tensor(-376.8024, device='cuda:0')
episode: 558 training return: tensor(-263.3515, device='cuda:0')
episode: 559 training return: tensor(-409.8989, device='cuda:0')
epoch: 140 test_true_pfm: 3044.702107945961 sim_pfm: 382.11415520232794
episode: 560 training return: tensor(-160.2193, device='cuda:0')
episode: 561 training return: tensor(-352.2570, device='cuda:0')
episode: 562 training return: tensor(-392.8120, device='cuda:0')
episode: 563 training return: tensor(-398.9168, device='cuda:0')
epoch: 141 test_true_pfm: 3303.3041734685485 sim_pfm: 375.90295399252017
episode: 564 training return: tensor(-21.9793, device='cuda:0')
episode: 565 training return: tensor(-349.6544, device='cuda:0')
episode: 566 training return: tensor(-239.7615, device='cuda:0')
episode: 567 training return: tensor(-392.6446, device='cuda:0')
epoch: 142 test_true_pfm: 3220.839975484391 sim_pfm: 250.66804547980428
episode: 568 training return: tensor(-411.6876, device='cuda:0')
episode: 569 training return: tensor(-185.4037, device='cuda:0')
episode: 570 training return: tensor(-162.0131, device='cuda:0')
episode: 571 training return: tensor(-4.5778, device='cuda:0')
epoch: 143 test_true_pfm: 3298.7520734851564 sim_pfm: 375.1437362986617
episode: 572 training return: tensor(-382.0019, device='cuda:0')
episode: 573 training return: tensor(154.1981, device='cuda:0')
episode: 574 training return: tensor(-363.4852, device='cuda:0')
episode: 575 training return: tensor(-409.2673, device='cuda:0')
epoch: 144 test_true_pfm: 3331.277629913268 sim_pfm: 320.5223649293184
episode: 576 training return: tensor(-337.5465, device='cuda:0')
episode: 577 training return: tensor(-361.6738, device='cuda:0')
episode: 578 training return: tensor(-393.7738, device='cuda:0')
episode: 579 training return: tensor(-386.6615, device='cuda:0')
epoch: 145 test_true_pfm: 3296.1669420139065 sim_pfm: 371.2298645027137
episode: 580 training return: tensor(-196.9140, device='cuda:0')
episode: 581 training return: tensor(-280.9633, device='cuda:0')
episode: 582 training return: tensor(-320.4483, device='cuda:0')
episode: 583 training return: tensor(-401.2593, device='cuda:0')
epoch: 146 test_true_pfm: 3306.3159172334385 sim_pfm: 215.07597573194653
episode: 584 training return: tensor(-112.7802, device='cuda:0')
episode: 585 training return: tensor(-391.5398, device='cuda:0')
episode: 586 training return: tensor(-210.4324, device='cuda:0')
episode: 587 training return: tensor(-125.1372, device='cuda:0')
epoch: 147 test_true_pfm: 3297.7403526775197 sim_pfm: 378.8687411691062
episode: 588 training return: tensor(-170.7694, device='cuda:0')
episode: 589 training return: tensor(-238.9168, device='cuda:0')
episode: 590 training return: tensor(-187.4304, device='cuda:0')
episode: 591 training return: tensor(-337.7333, device='cuda:0')
epoch: 148 test_true_pfm: 3320.137543141929 sim_pfm: 163.88746713443348
episode: 592 training return: tensor(-119.7575, device='cuda:0')
episode: 593 training return: tensor(-110.1926, device='cuda:0')
episode: 594 training return: tensor(-16.1706, device='cuda:0')
episode: 595 training return: tensor(-332.5902, device='cuda:0')
epoch: 149 test_true_pfm: 3295.7467791885597 sim_pfm: 376.9386800063464
episode: 596 training return: tensor(-328.1261, device='cuda:0')
episode: 597 training return: tensor(-141.4079, device='cuda:0')
episode: 598 training return: tensor(-129.4398, device='cuda:0')
episode: 599 training return: tensor(-233.4836, device='cuda:0')
epoch: 150 test_true_pfm: 2859.8146483316555 sim_pfm: 206.44016128976364
