['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'medium', '--seed', '2', '--data', '100000', '--regu', '0.3']
5052.02001030369
episode: 0 training return: tensor(-412.0622, device='cuda:0')
episode: 1 training return: tensor(-379.4510, device='cuda:0')
episode: 2 training return: tensor(-404.6235, device='cuda:0')
episode: 3 training return: tensor(-343.8809, device='cuda:0')
epoch: 1 test_true_pfm: 4573.054453537917 sim_pfm: -332.55051900460967
episode: 4 training return: tensor(-478.5725, device='cuda:0')
episode: 5 training return: tensor(-265.3848, device='cuda:0')
episode: 6 training return: tensor(-411.3394, device='cuda:0')
episode: 7 training return: tensor(-491.1930, device='cuda:0')
epoch: 2 test_true_pfm: 3680.172963723906 sim_pfm: -479.75555620276526
episode: 8 training return: tensor(-408.4566, device='cuda:0')
episode: 9 training return: tensor(-361.3523, device='cuda:0')
episode: 10 training return: tensor(-556.2486, device='cuda:0')
episode: 11 training return: tensor(-470.3929, device='cuda:0')
epoch: 3 test_true_pfm: 3037.7175938041387 sim_pfm: -555.644237109363
episode: 12 training return: tensor(-629.8334, device='cuda:0')
episode: 13 training return: tensor(-189.8869, device='cuda:0')
episode: 14 training return: tensor(-367.0940, device='cuda:0')
episode: 15 training return: tensor(-235.4230, device='cuda:0')
epoch: 4 test_true_pfm: 4881.368611606026 sim_pfm: -205.64021971740294
episode: 16 training return: tensor(-380.3769, device='cuda:0')
episode: 17 training return: tensor(-190.5876, device='cuda:0')
episode: 18 training return: tensor(-428.3622, device='cuda:0')
episode: 19 training return: tensor(-301.3310, device='cuda:0')
epoch: 5 test_true_pfm: 4964.990225774366 sim_pfm: -214.87252668626994
episode: 20 training return: tensor(-435.9297, device='cuda:0')
episode: 21 training return: tensor(-376.4782, device='cuda:0')
episode: 22 training return: tensor(-197.8322, device='cuda:0')
episode: 23 training return: tensor(-168.7876, device='cuda:0')
epoch: 6 test_true_pfm: 5015.889302641234 sim_pfm: -14.434992656402756
episode: 24 training return: tensor(-298.4434, device='cuda:0')
episode: 25 training return: tensor(-214.3681, device='cuda:0')
episode: 26 training return: tensor(-56.9454, device='cuda:0')
episode: 27 training return: tensor(-120.5560, device='cuda:0')
epoch: 7 test_true_pfm: 5226.655587471164 sim_pfm: 146.7213400323138
episode: 28 training return: tensor(-164.9999, device='cuda:0')
episode: 29 training return: tensor(-101.4919, device='cuda:0')
episode: 30 training return: tensor(-259.7706, device='cuda:0')
episode: 31 training return: tensor(-137.8249, device='cuda:0')
epoch: 8 test_true_pfm: 5109.950380533587 sim_pfm: 11.32720972339545
episode: 32 training return: tensor(-172.8166, device='cuda:0')
episode: 33 training return: tensor(-158.8216, device='cuda:0')
episode: 34 training return: tensor(-283.9406, device='cuda:0')
episode: 35 training return: tensor(-99.0666, device='cuda:0')
epoch: 9 test_true_pfm: 5290.810435336 sim_pfm: 71.11648313498397
episode: 36 training return: tensor(-124.5952, device='cuda:0')
episode: 37 training return: tensor(-187.1107, device='cuda:0')
episode: 38 training return: tensor(-162.3164, device='cuda:0')
episode: 39 training return: tensor(-73.1699, device='cuda:0')
epoch: 10 test_true_pfm: 5311.5641142503255 sim_pfm: 69.70666283649432
episode: 40 training return: tensor(-165.9325, device='cuda:0')
episode: 41 training return: tensor(-50.7409, device='cuda:0')
episode: 42 training return: tensor(19.6412, device='cuda:0')
episode: 43 training return: tensor(13.9476, device='cuda:0')
epoch: 11 test_true_pfm: 5377.554553636525 sim_pfm: 249.11329539034827
episode: 44 training return: tensor(-90.3680, device='cuda:0')
episode: 45 training return: tensor(-177.1230, device='cuda:0')
episode: 46 training return: tensor(-42.3529, device='cuda:0')
episode: 47 training return: tensor(25.4896, device='cuda:0')
epoch: 12 test_true_pfm: 5510.295818396701 sim_pfm: 158.24459205408735
episode: 48 training return: tensor(-34.8077, device='cuda:0')
episode: 49 training return: tensor(-254.9451, device='cuda:0')
episode: 50 training return: tensor(-27.2179, device='cuda:0')
episode: 51 training return: tensor(92.9367, device='cuda:0')
epoch: 13 test_true_pfm: 5359.619430296441 sim_pfm: 259.9484446704155
episode: 52 training return: tensor(-21.7412, device='cuda:0')
episode: 53 training return: tensor(-47.5217, device='cuda:0')
episode: 54 training return: tensor(24.0905, device='cuda:0')
episode: 55 training return: tensor(19.0913, device='cuda:0')
epoch: 14 test_true_pfm: 5402.961543335345 sim_pfm: 203.92907183010053
episode: 56 training return: tensor(-154.0232, device='cuda:0')
episode: 57 training return: tensor(50.3900, device='cuda:0')
episode: 58 training return: tensor(-25.8756, device='cuda:0')
episode: 59 training return: tensor(-118.2263, device='cuda:0')
epoch: 15 test_true_pfm: 5363.601501819766 sim_pfm: 259.62921607657336
episode: 60 training return: tensor(142.0538, device='cuda:0')
episode: 61 training return: tensor(88.0050, device='cuda:0')
episode: 62 training return: tensor(-43.8257, device='cuda:0')
episode: 63 training return: tensor(-87.0392, device='cuda:0')
epoch: 16 test_true_pfm: 5458.292163919534 sim_pfm: 297.0540921384236
episode: 64 training return: tensor(-23.2902, device='cuda:0')
episode: 65 training return: tensor(-13.0932, device='cuda:0')
episode: 66 training return: tensor(-100.3631, device='cuda:0')
episode: 67 training return: tensor(154.4236, device='cuda:0')
epoch: 17 test_true_pfm: 5416.039360261672 sim_pfm: 211.2347459133404
episode: 68 training return: tensor(99.5935, device='cuda:0')
episode: 69 training return: tensor(79.5421, device='cuda:0')
episode: 70 training return: tensor(136.3027, device='cuda:0')
episode: 71 training return: tensor(77.3344, device='cuda:0')
epoch: 18 test_true_pfm: 5628.783057832006 sim_pfm: 286.27913431324606
episode: 72 training return: tensor(-6.5850, device='cuda:0')
episode: 73 training return: tensor(-39.7866, device='cuda:0')
episode: 74 training return: tensor(10.8470, device='cuda:0')
episode: 75 training return: tensor(78.9874, device='cuda:0')
epoch: 19 test_true_pfm: 5581.07622217531 sim_pfm: 289.92215432443965
episode: 76 training return: tensor(119.6010, device='cuda:0')
episode: 77 training return: tensor(58.9439, device='cuda:0')
episode: 78 training return: tensor(210.4295, device='cuda:0')
episode: 79 training return: tensor(237.2722, device='cuda:0')
epoch: 20 test_true_pfm: 5637.856937942881 sim_pfm: 343.80343552083167
episode: 80 training return: tensor(127.5372, device='cuda:0')
episode: 81 training return: tensor(42.0876, device='cuda:0')
episode: 82 training return: tensor(108.0024, device='cuda:0')
episode: 83 training return: tensor(-8.9602, device='cuda:0')
epoch: 21 test_true_pfm: 5501.211425902519 sim_pfm: 289.77345921897603
episode: 84 training return: tensor(128.4312, device='cuda:0')
episode: 85 training return: tensor(128.3879, device='cuda:0')
episode: 86 training return: tensor(56.8667, device='cuda:0')
episode: 87 training return: tensor(160.5669, device='cuda:0')
epoch: 22 test_true_pfm: 5676.607106229894 sim_pfm: 395.5669033623611
episode: 88 training return: tensor(147.6902, device='cuda:0')
episode: 89 training return: tensor(163.4660, device='cuda:0')
episode: 90 training return: tensor(85.3285, device='cuda:0')
episode: 91 training return: tensor(96.1984, device='cuda:0')
epoch: 23 test_true_pfm: 5628.971197215237 sim_pfm: 399.4886976654719
episode: 92 training return: tensor(114.6007, device='cuda:0')
episode: 93 training return: tensor(145.6430, device='cuda:0')
episode: 94 training return: tensor(196.7125, device='cuda:0')
episode: 95 training return: tensor(262.4462, device='cuda:0')
epoch: 24 test_true_pfm: 5664.6499338972 sim_pfm: 392.3181357794674
episode: 96 training return: tensor(130.7695, device='cuda:0')
episode: 97 training return: tensor(146.6923, device='cuda:0')
episode: 98 training return: tensor(119.9482, device='cuda:0')
episode: 99 training return: tensor(153.4314, device='cuda:0')
epoch: 25 test_true_pfm: 5569.730554302611 sim_pfm: 377.10668200759875
episode: 100 training return: tensor(50.3307, device='cuda:0')
episode: 101 training return: tensor(189.8225, device='cuda:0')
episode: 102 training return: tensor(218.7222, device='cuda:0')
episode: 103 training return: tensor(154.7992, device='cuda:0')
epoch: 26 test_true_pfm: 5643.498085444092 sim_pfm: 388.4099664412982
episode: 104 training return: tensor(184.7110, device='cuda:0')
episode: 105 training return: tensor(259.9678, device='cuda:0')
episode: 106 training return: tensor(208.5280, device='cuda:0')
episode: 107 training return: tensor(317.1552, device='cuda:0')
epoch: 27 test_true_pfm: 5697.148024713021 sim_pfm: 447.80027251981664
episode: 108 training return: tensor(264.8751, device='cuda:0')
episode: 109 training return: tensor(282.2168, device='cuda:0')
episode: 110 training return: tensor(217.2707, device='cuda:0')
episode: 111 training return: tensor(280.6902, device='cuda:0')
epoch: 28 test_true_pfm: 5586.832858758534 sim_pfm: 406.8114321974572
episode: 112 training return: tensor(270.0129, device='cuda:0')
episode: 113 training return: tensor(167.3555, device='cuda:0')
episode: 114 training return: tensor(172.1206, device='cuda:0')
episode: 115 training return: tensor(216.9853, device='cuda:0')
epoch: 29 test_true_pfm: 5816.284388882411 sim_pfm: 481.1061281162935
episode: 116 training return: tensor(223.3408, device='cuda:0')
episode: 117 training return: tensor(309.9928, device='cuda:0')
episode: 118 training return: tensor(170.2569, device='cuda:0')
episode: 119 training return: tensor(173.1837, device='cuda:0')
epoch: 30 test_true_pfm: 5757.250112486615 sim_pfm: 442.18329968610004
episode: 120 training return: tensor(223.6308, device='cuda:0')
episode: 121 training return: tensor(251.5946, device='cuda:0')
episode: 122 training return: tensor(186.5840, device='cuda:0')
episode: 123 training return: tensor(257.6125, device='cuda:0')
epoch: 31 test_true_pfm: 5708.312702942693 sim_pfm: 443.7781817073895
episode: 124 training return: tensor(255.2826, device='cuda:0')
episode: 125 training return: tensor(270.6480, device='cuda:0')
episode: 126 training return: tensor(341.7863, device='cuda:0')
episode: 127 training return: tensor(349.8257, device='cuda:0')
epoch: 32 test_true_pfm: 5738.772996947683 sim_pfm: 457.5943017738561
episode: 128 training return: tensor(225.5570, device='cuda:0')
episode: 129 training return: tensor(244.2311, device='cuda:0')
episode: 130 training return: tensor(217.3246, device='cuda:0')
episode: 131 training return: tensor(166.5049, device='cuda:0')
epoch: 33 test_true_pfm: 4778.182971985626 sim_pfm: 416.60140988676966
episode: 132 training return: tensor(290.1902, device='cuda:0')
episode: 133 training return: tensor(206.5718, device='cuda:0')
episode: 134 training return: tensor(258.4286, device='cuda:0')
episode: 135 training return: tensor(345.6216, device='cuda:0')
epoch: 34 test_true_pfm: 5810.020897569902 sim_pfm: 445.16600210646476
episode: 136 training return: tensor(268.1015, device='cuda:0')
episode: 137 training return: tensor(320.1741, device='cuda:0')
episode: 138 training return: tensor(343.6374, device='cuda:0')
episode: 139 training return: tensor(284.4308, device='cuda:0')
epoch: 35 test_true_pfm: 5877.474176308555 sim_pfm: 495.54085529828444
episode: 140 training return: tensor(317.8204, device='cuda:0')
episode: 141 training return: tensor(256.7084, device='cuda:0')
episode: 142 training return: tensor(326.8738, device='cuda:0')
episode: 143 training return: tensor(395.3200, device='cuda:0')
epoch: 36 test_true_pfm: 5843.1301046634935 sim_pfm: 491.03395465804107
episode: 144 training return: tensor(310.0343, device='cuda:0')
episode: 145 training return: tensor(371.4047, device='cuda:0')
episode: 146 training return: tensor(321.2097, device='cuda:0')
episode: 147 training return: tensor(324.0654, device='cuda:0')
epoch: 37 test_true_pfm: 5805.692764843962 sim_pfm: 458.1357342157862
episode: 148 training return: tensor(215.8746, device='cuda:0')
episode: 149 training return: tensor(303.3692, device='cuda:0')
episode: 150 training return: tensor(321.7990, device='cuda:0')
episode: 151 training return: tensor(304.1529, device='cuda:0')
epoch: 38 test_true_pfm: 5755.388329390734 sim_pfm: 534.2217189112562
episode: 152 training return: tensor(339.1614, device='cuda:0')
episode: 153 training return: tensor(334.9979, device='cuda:0')
episode: 154 training return: tensor(298.3081, device='cuda:0')
episode: 155 training return: tensor(292.4760, device='cuda:0')
epoch: 39 test_true_pfm: 5132.443559447441 sim_pfm: 521.4914684563022
episode: 156 training return: tensor(415.1199, device='cuda:0')
episode: 157 training return: tensor(224.7742, device='cuda:0')
episode: 158 training return: tensor(333.2733, device='cuda:0')
episode: 159 training return: tensor(349.6393, device='cuda:0')
epoch: 40 test_true_pfm: 5921.248540077916 sim_pfm: 556.6396912139995
episode: 160 training return: tensor(302.4647, device='cuda:0')
episode: 161 training return: tensor(252.7713, device='cuda:0')
episode: 162 training return: tensor(297.7416, device='cuda:0')
episode: 163 training return: tensor(388.0189, device='cuda:0')
epoch: 41 test_true_pfm: 5900.426846819018 sim_pfm: 549.6993199306307
episode: 164 training return: tensor(356.7685, device='cuda:0')
episode: 165 training return: tensor(356.9684, device='cuda:0')
episode: 166 training return: tensor(336.0388, device='cuda:0')
episode: 167 training return: tensor(248.2644, device='cuda:0')
epoch: 42 test_true_pfm: 5817.46606766925 sim_pfm: 481.4329773380111
episode: 168 training return: tensor(393.5442, device='cuda:0')
episode: 169 training return: tensor(364.9874, device='cuda:0')
episode: 170 training return: tensor(372.4986, device='cuda:0')
episode: 171 training return: tensor(363.9804, device='cuda:0')
epoch: 43 test_true_pfm: 5957.27407487825 sim_pfm: 544.8137541312414
episode: 172 training return: tensor(469.5406, device='cuda:0')
episode: 173 training return: tensor(324.2183, device='cuda:0')
episode: 174 training return: tensor(200.9617, device='cuda:0')
episode: 175 training return: tensor(295.0882, device='cuda:0')
epoch: 44 test_true_pfm: 5953.9061383051885 sim_pfm: 561.4729541918108
episode: 176 training return: tensor(360.3356, device='cuda:0')
episode: 177 training return: tensor(292.8455, device='cuda:0')
episode: 178 training return: tensor(363.4627, device='cuda:0')
episode: 179 training return: tensor(405.4417, device='cuda:0')
epoch: 45 test_true_pfm: 5970.259513504289 sim_pfm: 582.8963057841271
episode: 180 training return: tensor(391.6467, device='cuda:0')
episode: 181 training return: tensor(362.0252, device='cuda:0')
episode: 182 training return: tensor(360.2242, device='cuda:0')
episode: 183 training return: tensor(300.9254, device='cuda:0')
epoch: 46 test_true_pfm: 5800.749099451133 sim_pfm: 524.0102055872945
episode: 184 training return: tensor(308.5846, device='cuda:0')
episode: 185 training return: tensor(447.0432, device='cuda:0')
episode: 186 training return: tensor(370.2568, device='cuda:0')
episode: 187 training return: tensor(361.5639, device='cuda:0')
epoch: 47 test_true_pfm: 6044.534853349146 sim_pfm: 574.2090898938865
episode: 188 training return: tensor(356.2885, device='cuda:0')
episode: 189 training return: tensor(280.8378, device='cuda:0')
episode: 190 training return: tensor(235.6282, device='cuda:0')
episode: 191 training return: tensor(351.6146, device='cuda:0')
epoch: 48 test_true_pfm: 5939.655229865596 sim_pfm: 570.6073652765481
episode: 192 training return: tensor(237.7621, device='cuda:0')
episode: 193 training return: tensor(381.6040, device='cuda:0')
episode: 194 training return: tensor(421.1985, device='cuda:0')
episode: 195 training return: tensor(424.1878, device='cuda:0')
epoch: 49 test_true_pfm: 5969.942409826039 sim_pfm: 596.891045028111
episode: 196 training return: tensor(409.1626, device='cuda:0')
episode: 197 training return: tensor(317.7821, device='cuda:0')
episode: 198 training return: tensor(471.7061, device='cuda:0')
episode: 199 training return: tensor(436.7185, device='cuda:0')
epoch: 50 test_true_pfm: 5944.0459776717535 sim_pfm: 581.7799915321908
episode: 200 training return: tensor(353.7631, device='cuda:0')
episode: 201 training return: tensor(415.8924, device='cuda:0')
episode: 202 training return: tensor(415.5611, device='cuda:0')
episode: 203 training return: tensor(382.6823, device='cuda:0')
epoch: 51 test_true_pfm: 6003.551139305052 sim_pfm: 572.1845573124398
episode: 204 training return: tensor(426.5341, device='cuda:0')
episode: 205 training return: tensor(294.7039, device='cuda:0')
episode: 206 training return: tensor(404.3201, device='cuda:0')
episode: 207 training return: tensor(460.2263, device='cuda:0')
epoch: 52 test_true_pfm: 5976.725886396333 sim_pfm: 614.3844398198029
episode: 208 training return: tensor(381.5153, device='cuda:0')
episode: 209 training return: tensor(369.4737, device='cuda:0')
episode: 210 training return: tensor(383.7828, device='cuda:0')
episode: 211 training return: tensor(405.6103, device='cuda:0')
epoch: 53 test_true_pfm: 5971.116086006707 sim_pfm: 552.0065596917024
episode: 212 training return: tensor(415.0696, device='cuda:0')
episode: 213 training return: tensor(455.5439, device='cuda:0')
episode: 214 training return: tensor(496.9538, device='cuda:0')
episode: 215 training return: tensor(373.2563, device='cuda:0')
epoch: 54 test_true_pfm: 5992.156444172841 sim_pfm: 638.556175175899
episode: 216 training return: tensor(501.3802, device='cuda:0')
episode: 217 training return: tensor(363.3176, device='cuda:0')
episode: 218 training return: tensor(427.8919, device='cuda:0')
episode: 219 training return: tensor(355.3491, device='cuda:0')
epoch: 55 test_true_pfm: 5961.9463152865 sim_pfm: 607.4085741131179
episode: 220 training return: tensor(465.4091, device='cuda:0')
episode: 221 training return: tensor(377.4900, device='cuda:0')
episode: 222 training return: tensor(427.8157, device='cuda:0')
episode: 223 training return: tensor(486.4829, device='cuda:0')
epoch: 56 test_true_pfm: 6005.8645955690545 sim_pfm: 601.9998346835879
episode: 224 training return: tensor(429.6770, device='cuda:0')
episode: 225 training return: tensor(325.4663, device='cuda:0')
episode: 226 training return: tensor(423.5338, device='cuda:0')
episode: 227 training return: tensor(461.7910, device='cuda:0')
epoch: 57 test_true_pfm: 6066.414668611817 sim_pfm: 587.0057999986069
episode: 228 training return: tensor(444.1712, device='cuda:0')
episode: 229 training return: tensor(463.9283, device='cuda:0')
episode: 230 training return: tensor(455.9507, device='cuda:0')
episode: 231 training return: tensor(427.2431, device='cuda:0')
epoch: 58 test_true_pfm: 6051.438052555233 sim_pfm: 612.707388564401
episode: 232 training return: tensor(414.2269, device='cuda:0')
episode: 233 training return: tensor(471.8653, device='cuda:0')
episode: 234 training return: tensor(426.7939, device='cuda:0')
episode: 235 training return: tensor(354.1264, device='cuda:0')
epoch: 59 test_true_pfm: 6064.1857889426765 sim_pfm: 619.5965777949896
episode: 236 training return: tensor(466.2538, device='cuda:0')
episode: 237 training return: tensor(407.5711, device='cuda:0')
episode: 238 training return: tensor(378.6932, device='cuda:0')
episode: 239 training return: tensor(452.2639, device='cuda:0')
epoch: 60 test_true_pfm: 6044.138531992775 sim_pfm: 638.0606154638032
episode: 240 training return: tensor(388.3392, device='cuda:0')
episode: 241 training return: tensor(468.0660, device='cuda:0')
episode: 242 training return: tensor(432.0004, device='cuda:0')
episode: 243 training return: tensor(388.9206, device='cuda:0')
epoch: 61 test_true_pfm: 6100.8387065204215 sim_pfm: 646.2130650807716
episode: 244 training return: tensor(478.8297, device='cuda:0')
episode: 245 training return: tensor(428.6406, device='cuda:0')
episode: 246 training return: tensor(514.5535, device='cuda:0')
episode: 247 training return: tensor(392.1088, device='cuda:0')
epoch: 62 test_true_pfm: 6031.92959844837 sim_pfm: 597.5034290493155
episode: 248 training return: tensor(479.0920, device='cuda:0')
episode: 249 training return: tensor(450.5489, device='cuda:0')
episode: 250 training return: tensor(510.6796, device='cuda:0')
episode: 251 training return: tensor(381.3973, device='cuda:0')
epoch: 63 test_true_pfm: 6028.993046919882 sim_pfm: 592.3229864409368
episode: 252 training return: tensor(481.3494, device='cuda:0')
episode: 253 training return: tensor(466.9584, device='cuda:0')
episode: 254 training return: tensor(417.3452, device='cuda:0')
episode: 255 training return: tensor(468.6219, device='cuda:0')
epoch: 64 test_true_pfm: 6009.152844115203 sim_pfm: 653.333768683408
episode: 256 training return: tensor(423.6711, device='cuda:0')
episode: 257 training return: tensor(450.7523, device='cuda:0')
episode: 258 training return: tensor(447.4572, device='cuda:0')
episode: 259 training return: tensor(417.1185, device='cuda:0')
epoch: 65 test_true_pfm: 5995.56651665249 sim_pfm: 577.5637890950311
episode: 260 training return: tensor(402.5356, device='cuda:0')
episode: 261 training return: tensor(478.9583, device='cuda:0')
episode: 262 training return: tensor(377.4016, device='cuda:0')
episode: 263 training return: tensor(458.9890, device='cuda:0')
epoch: 66 test_true_pfm: 6052.999440323743 sim_pfm: 611.5726004766766
episode: 264 training return: tensor(397.2757, device='cuda:0')
episode: 265 training return: tensor(436.8179, device='cuda:0')
episode: 266 training return: tensor(476.9536, device='cuda:0')
episode: 267 training return: tensor(515.3815, device='cuda:0')
epoch: 67 test_true_pfm: 6132.137082250036 sim_pfm: 687.1065707481079
episode: 268 training return: tensor(424.4330, device='cuda:0')
episode: 269 training return: tensor(479.7207, device='cuda:0')
episode: 270 training return: tensor(442.7899, device='cuda:0')
episode: 271 training return: tensor(498.8338, device='cuda:0')
epoch: 68 test_true_pfm: 6102.144981590686 sim_pfm: 659.2334106747876
episode: 272 training return: tensor(478.1851, device='cuda:0')
episode: 273 training return: tensor(448.0714, device='cuda:0')
episode: 274 training return: tensor(395.1715, device='cuda:0')
episode: 275 training return: tensor(415.1630, device='cuda:0')
epoch: 69 test_true_pfm: 6144.278356528556 sim_pfm: 661.4134803761262
episode: 276 training return: tensor(438.9524, device='cuda:0')
episode: 277 training return: tensor(445.7240, device='cuda:0')
episode: 278 training return: tensor(534.7176, device='cuda:0')
episode: 279 training return: tensor(425.2674, device='cuda:0')
epoch: 70 test_true_pfm: 6055.261937847113 sim_pfm: 631.1822200307312
episode: 280 training return: tensor(483.0287, device='cuda:0')
episode: 281 training return: tensor(467.0506, device='cuda:0')
episode: 282 training return: tensor(451.6880, device='cuda:0')
episode: 283 training return: tensor(429.1373, device='cuda:0')
epoch: 71 test_true_pfm: 6167.878828869897 sim_pfm: 638.3511457072067
episode: 284 training return: tensor(502.6497, device='cuda:0')
episode: 285 training return: tensor(451.1152, device='cuda:0')
episode: 286 training return: tensor(520.8206, device='cuda:0')
episode: 287 training return: tensor(-458.7200, device='cuda:0')
epoch: 72 test_true_pfm: 6145.13100189939 sim_pfm: 671.857700337132
episode: 288 training return: tensor(466.5848, device='cuda:0')
episode: 289 training return: tensor(424.5546, device='cuda:0')
episode: 290 training return: tensor(350.6906, device='cuda:0')
episode: 291 training return: tensor(456.9648, device='cuda:0')
epoch: 73 test_true_pfm: 6072.6147556843935 sim_pfm: 567.7468729810789
episode: 292 training return: tensor(397.2028, device='cuda:0')
episode: 293 training return: tensor(452.4423, device='cuda:0')
episode: 294 training return: tensor(518.9331, device='cuda:0')
episode: 295 training return: tensor(407.2272, device='cuda:0')
epoch: 74 test_true_pfm: 6123.527803371281 sim_pfm: 655.5573603992816
episode: 296 training return: tensor(475.3567, device='cuda:0')
episode: 297 training return: tensor(561.6560, device='cuda:0')
episode: 298 training return: tensor(446.6170, device='cuda:0')
episode: 299 training return: tensor(408.3716, device='cuda:0')
epoch: 75 test_true_pfm: 6131.664940539374 sim_pfm: 670.0260235861448
episode: 300 training return: tensor(548.4649, device='cuda:0')
episode: 301 training return: tensor(575.6581, device='cuda:0')
episode: 302 training return: tensor(438.0900, device='cuda:0')
episode: 303 training return: tensor(528.6653, device='cuda:0')
epoch: 76 test_true_pfm: 6104.191958080418 sim_pfm: 621.2882779463738
episode: 304 training return: tensor(523.4989, device='cuda:0')
episode: 305 training return: tensor(391.3403, device='cuda:0')
episode: 306 training return: tensor(492.9083, device='cuda:0')
episode: 307 training return: tensor(425.5154, device='cuda:0')
epoch: 77 test_true_pfm: 6132.9095175478915 sim_pfm: 644.1176870752824
episode: 308 training return: tensor(446.3576, device='cuda:0')
episode: 309 training return: tensor(423.5297, device='cuda:0')
episode: 310 training return: tensor(502.0342, device='cuda:0')
episode: 311 training return: tensor(560.7357, device='cuda:0')
epoch: 78 test_true_pfm: 6158.38793544779 sim_pfm: 685.4995101670889
episode: 312 training return: tensor(327.7358, device='cuda:0')
episode: 313 training return: tensor(482.7695, device='cuda:0')
episode: 314 training return: tensor(519.0714, device='cuda:0')
episode: 315 training return: tensor(511.2466, device='cuda:0')
epoch: 79 test_true_pfm: 6113.3189426188765 sim_pfm: 663.0775855668957
episode: 316 training return: tensor(516.1499, device='cuda:0')
episode: 317 training return: tensor(531.8275, device='cuda:0')
episode: 318 training return: tensor(514.8615, device='cuda:0')
episode: 319 training return: tensor(446.3281, device='cuda:0')
epoch: 80 test_true_pfm: 6118.400770074045 sim_pfm: 639.1856921553457
episode: 320 training return: tensor(457.6126, device='cuda:0')
episode: 321 training return: tensor(454.0072, device='cuda:0')
episode: 322 training return: tensor(557.8723, device='cuda:0')
episode: 323 training return: tensor(473.8521, device='cuda:0')
epoch: 81 test_true_pfm: 6098.286832548744 sim_pfm: 652.0296367372599
episode: 324 training return: tensor(518.9955, device='cuda:0')
episode: 325 training return: tensor(519.4233, device='cuda:0')
episode: 326 training return: tensor(495.8585, device='cuda:0')
episode: 327 training return: tensor(542.4312, device='cuda:0')
epoch: 82 test_true_pfm: 6160.601939760371 sim_pfm: 658.0648329233712
episode: 328 training return: tensor(489.4145, device='cuda:0')
episode: 329 training return: tensor(569.2408, device='cuda:0')
episode: 330 training return: tensor(485.2179, device='cuda:0')
episode: 331 training return: tensor(465.5911, device='cuda:0')
epoch: 83 test_true_pfm: 6215.659762373686 sim_pfm: 681.1076981984079
episode: 332 training return: tensor(435.1758, device='cuda:0')
episode: 333 training return: tensor(466.1732, device='cuda:0')
episode: 334 training return: tensor(465.6531, device='cuda:0')
episode: 335 training return: tensor(493.3478, device='cuda:0')
epoch: 84 test_true_pfm: 6203.03507183096 sim_pfm: 699.0899363774806
episode: 336 training return: tensor(464.0273, device='cuda:0')
episode: 337 training return: tensor(496.0695, device='cuda:0')
episode: 338 training return: tensor(448.4863, device='cuda:0')
episode: 339 training return: tensor(547.1479, device='cuda:0')
epoch: 85 test_true_pfm: 6169.9227198800245 sim_pfm: 680.7890041873712
episode: 340 training return: tensor(526.2385, device='cuda:0')
episode: 341 training return: tensor(521.2135, device='cuda:0')
episode: 342 training return: tensor(543.9395, device='cuda:0')
episode: 343 training return: tensor(495.5979, device='cuda:0')
epoch: 86 test_true_pfm: 6146.814943937506 sim_pfm: 681.6827126825616
episode: 344 training return: tensor(482.2120, device='cuda:0')
episode: 345 training return: tensor(473.3789, device='cuda:0')
episode: 346 training return: tensor(475.0429, device='cuda:0')
episode: 347 training return: tensor(539.1671, device='cuda:0')
epoch: 87 test_true_pfm: 6175.261896308046 sim_pfm: 632.1670558129068
episode: 348 training return: tensor(553.8208, device='cuda:0')
episode: 349 training return: tensor(518.1464, device='cuda:0')
episode: 350 training return: tensor(487.9043, device='cuda:0')
episode: 351 training return: tensor(537.8813, device='cuda:0')
epoch: 88 test_true_pfm: 6093.556997112249 sim_pfm: 623.9753064544639
episode: 352 training return: tensor(499.8190, device='cuda:0')
episode: 353 training return: tensor(499.9470, device='cuda:0')
episode: 354 training return: tensor(487.1295, device='cuda:0')
episode: 355 training return: tensor(587.3295, device='cuda:0')
epoch: 89 test_true_pfm: 6152.403444171742 sim_pfm: 698.1254050035495
episode: 356 training return: tensor(536.5004, device='cuda:0')
episode: 357 training return: tensor(496.9277, device='cuda:0')
episode: 358 training return: tensor(465.9700, device='cuda:0')
episode: 359 training return: tensor(463.6306, device='cuda:0')
epoch: 90 test_true_pfm: 6182.129287465662 sim_pfm: 646.3852484695672
episode: 360 training return: tensor(519.6832, device='cuda:0')
episode: 361 training return: tensor(547.4697, device='cuda:0')
episode: 362 training return: tensor(546.3656, device='cuda:0')
episode: 363 training return: tensor(520.1182, device='cuda:0')
epoch: 91 test_true_pfm: 6170.965026808521 sim_pfm: 656.8202574560419
episode: 364 training return: tensor(571.8753, device='cuda:0')
episode: 365 training return: tensor(451.2495, device='cuda:0')
episode: 366 training return: tensor(466.4593, device='cuda:0')
episode: 367 training return: tensor(506.4092, device='cuda:0')
epoch: 92 test_true_pfm: 6157.042165305299 sim_pfm: 688.4002723316662
episode: 368 training return: tensor(456.3247, device='cuda:0')
episode: 369 training return: tensor(459.7295, device='cuda:0')
episode: 370 training return: tensor(496.2243, device='cuda:0')
episode: 371 training return: tensor(510.7404, device='cuda:0')
epoch: 93 test_true_pfm: 6105.7468399108375 sim_pfm: 696.0873402826643
episode: 372 training return: tensor(449.3781, device='cuda:0')
episode: 373 training return: tensor(495.4673, device='cuda:0')
episode: 374 training return: tensor(173.9809, device='cuda:0')
episode: 375 training return: tensor(461.5369, device='cuda:0')
epoch: 94 test_true_pfm: 6189.0217894639745 sim_pfm: 654.1527925947254
episode: 376 training return: tensor(443.6005, device='cuda:0')
episode: 377 training return: tensor(541.5172, device='cuda:0')
episode: 378 training return: tensor(551.3789, device='cuda:0')
episode: 379 training return: tensor(570.3331, device='cuda:0')
epoch: 95 test_true_pfm: 6177.549965097799 sim_pfm: 668.412307807031
episode: 380 training return: tensor(389.9059, device='cuda:0')
episode: 381 training return: tensor(534.7824, device='cuda:0')
episode: 382 training return: tensor(549.3628, device='cuda:0')
episode: 383 training return: tensor(490.2346, device='cuda:0')
epoch: 96 test_true_pfm: 6117.497228659337 sim_pfm: 687.6132594493683
episode: 384 training return: tensor(550.2053, device='cuda:0')
episode: 385 training return: tensor(504.1102, device='cuda:0')
episode: 386 training return: tensor(539.7205, device='cuda:0')
episode: 387 training return: tensor(462.1956, device='cuda:0')
epoch: 97 test_true_pfm: 6130.805192607889 sim_pfm: 665.6705141988738
episode: 388 training return: tensor(539.6638, device='cuda:0')
episode: 389 training return: tensor(547.5958, device='cuda:0')
episode: 390 training return: tensor(504.8560, device='cuda:0')
episode: 391 training return: tensor(510.5966, device='cuda:0')
epoch: 98 test_true_pfm: 6202.2180328776385 sim_pfm: 675.6469593366686
episode: 392 training return: tensor(392.2871, device='cuda:0')
episode: 393 training return: tensor(495.1910, device='cuda:0')
episode: 394 training return: tensor(468.1890, device='cuda:0')
episode: 395 training return: tensor(549.3228, device='cuda:0')
epoch: 99 test_true_pfm: 6171.0903395907735 sim_pfm: 639.900382349772
episode: 396 training return: tensor(474.0420, device='cuda:0')
episode: 397 training return: tensor(469.2545, device='cuda:0')
episode: 398 training return: tensor(546.6086, device='cuda:0')
episode: 399 training return: tensor(576.6130, device='cuda:0')
epoch: 100 test_true_pfm: 6193.936128436388 sim_pfm: 674.3705521308196
episode: 400 training return: tensor(527.0024, device='cuda:0')
episode: 401 training return: tensor(486.6182, device='cuda:0')
episode: 402 training return: tensor(510.3040, device='cuda:0')
episode: 403 training return: tensor(519.3206, device='cuda:0')
epoch: 101 test_true_pfm: 6181.6089758883145 sim_pfm: 683.0147233958123
episode: 404 training return: tensor(502.5768, device='cuda:0')
episode: 405 training return: tensor(519.3486, device='cuda:0')
episode: 406 training return: tensor(397.9371, device='cuda:0')
episode: 407 training return: tensor(579.2949, device='cuda:0')
epoch: 102 test_true_pfm: 6164.658174719166 sim_pfm: 675.2925067524193
episode: 408 training return: tensor(521.4778, device='cuda:0')
episode: 409 training return: tensor(487.7090, device='cuda:0')
episode: 410 training return: tensor(527.3768, device='cuda:0')
episode: 411 training return: tensor(542.3424, device='cuda:0')
epoch: 103 test_true_pfm: 6067.954900246691 sim_pfm: 650.4268212565318
episode: 412 training return: tensor(546.2778, device='cuda:0')
episode: 413 training return: tensor(523.8242, device='cuda:0')
episode: 414 training return: tensor(519.9728, device='cuda:0')
episode: 415 training return: tensor(568.1794, device='cuda:0')
epoch: 104 test_true_pfm: 6162.280575230291 sim_pfm: 651.5225290996023
episode: 416 training return: tensor(487.5973, device='cuda:0')
episode: 417 training return: tensor(467.9022, device='cuda:0')
episode: 418 training return: tensor(494.2189, device='cuda:0')
episode: 419 training return: tensor(496.4892, device='cuda:0')
epoch: 105 test_true_pfm: 6213.607976674511 sim_pfm: 663.5823361896328
episode: 420 training return: tensor(450.3109, device='cuda:0')
episode: 421 training return: tensor(549.0370, device='cuda:0')
episode: 422 training return: tensor(522.0830, device='cuda:0')
episode: 423 training return: tensor(582.3404, device='cuda:0')
epoch: 106 test_true_pfm: 6199.237968196452 sim_pfm: 685.5975765109566
episode: 424 training return: tensor(471.2765, device='cuda:0')
episode: 425 training return: tensor(547.3524, device='cuda:0')
episode: 426 training return: tensor(530.2567, device='cuda:0')
episode: 427 training return: tensor(558.5293, device='cuda:0')
epoch: 107 test_true_pfm: 6226.404594207299 sim_pfm: 695.8138531188791
episode: 428 training return: tensor(571.7711, device='cuda:0')
episode: 429 training return: tensor(422.8997, device='cuda:0')
episode: 430 training return: tensor(541.4200, device='cuda:0')
episode: 431 training return: tensor(489.9298, device='cuda:0')
epoch: 108 test_true_pfm: 6088.48202283451 sim_pfm: 671.7504701598858
episode: 432 training return: tensor(588.2675, device='cuda:0')
episode: 433 training return: tensor(529.1401, device='cuda:0')
episode: 434 training return: tensor(528.7842, device='cuda:0')
episode: 435 training return: tensor(538.7739, device='cuda:0')
epoch: 109 test_true_pfm: 6179.452684874304 sim_pfm: 715.7932363040551
episode: 436 training return: tensor(567.6497, device='cuda:0')
episode: 437 training return: tensor(478.9857, device='cuda:0')
episode: 438 training return: tensor(525.2030, device='cuda:0')
episode: 439 training return: tensor(622.2650, device='cuda:0')
epoch: 110 test_true_pfm: 6190.684253025675 sim_pfm: 656.9111654421237
episode: 440 training return: tensor(501.2939, device='cuda:0')
episode: 441 training return: tensor(383.6958, device='cuda:0')
episode: 442 training return: tensor(567.1085, device='cuda:0')
episode: 443 training return: tensor(521.9617, device='cuda:0')
epoch: 111 test_true_pfm: 6178.306748780262 sim_pfm: 696.7978699710608
episode: 444 training return: tensor(511.5746, device='cuda:0')
episode: 445 training return: tensor(496.9407, device='cuda:0')
episode: 446 training return: tensor(579.1385, device='cuda:0')
episode: 447 training return: tensor(511.6558, device='cuda:0')
epoch: 112 test_true_pfm: 6169.585652924915 sim_pfm: 676.5810889545828
episode: 448 training return: tensor(503.0279, device='cuda:0')
episode: 449 training return: tensor(530.2101, device='cuda:0')
episode: 450 training return: tensor(504.5345, device='cuda:0')
episode: 451 training return: tensor(557.9700, device='cuda:0')
epoch: 113 test_true_pfm: 6181.325578840307 sim_pfm: 695.8392203611633
episode: 452 training return: tensor(514.8943, device='cuda:0')
episode: 453 training return: tensor(545.8989, device='cuda:0')
episode: 454 training return: tensor(533.5856, device='cuda:0')
episode: 455 training return: tensor(567.8101, device='cuda:0')
epoch: 114 test_true_pfm: 6224.581995138339 sim_pfm: 703.9091903618149
episode: 456 training return: tensor(516.1467, device='cuda:0')
episode: 457 training return: tensor(601.4340, device='cuda:0')
episode: 458 training return: tensor(497.5988, device='cuda:0')
episode: 459 training return: tensor(580.2092, device='cuda:0')
epoch: 115 test_true_pfm: 6175.965653904673 sim_pfm: 670.3441128963992
episode: 460 training return: tensor(530.0677, device='cuda:0')
episode: 461 training return: tensor(566.2131, device='cuda:0')
episode: 462 training return: tensor(545.5485, device='cuda:0')
episode: 463 training return: tensor(516.3176, device='cuda:0')
epoch: 116 test_true_pfm: 6224.381330616452 sim_pfm: 698.650978362925
episode: 464 training return: tensor(564.8879, device='cuda:0')
episode: 465 training return: tensor(489.7867, device='cuda:0')
episode: 466 training return: tensor(552.2836, device='cuda:0')
episode: 467 training return: tensor(505.4333, device='cuda:0')
epoch: 117 test_true_pfm: 6185.2413937845295 sim_pfm: 676.4739449937673
episode: 468 training return: tensor(523.4556, device='cuda:0')
episode: 469 training return: tensor(496.3012, device='cuda:0')
episode: 470 training return: tensor(490.2805, device='cuda:0')
episode: 471 training return: tensor(530.9050, device='cuda:0')
epoch: 118 test_true_pfm: 6195.532697375605 sim_pfm: 691.1342183407396
episode: 472 training return: tensor(578.2966, device='cuda:0')
episode: 473 training return: tensor(593.2482, device='cuda:0')
episode: 474 training return: tensor(559.9714, device='cuda:0')
episode: 475 training return: tensor(582.1774, device='cuda:0')
epoch: 119 test_true_pfm: 6202.326187926067 sim_pfm: 671.0802426524557
episode: 476 training return: tensor(592.8714, device='cuda:0')
episode: 477 training return: tensor(501.7624, device='cuda:0')
episode: 478 training return: tensor(541.4408, device='cuda:0')
episode: 479 training return: tensor(604.3621, device='cuda:0')
epoch: 120 test_true_pfm: 6208.811196214527 sim_pfm: 694.957949239877
episode: 480 training return: tensor(557.8061, device='cuda:0')
episode: 481 training return: tensor(494.3253, device='cuda:0')
episode: 482 training return: tensor(540.1343, device='cuda:0')
episode: 483 training return: tensor(534.9277, device='cuda:0')
epoch: 121 test_true_pfm: 6203.839792051013 sim_pfm: 706.117768496663
episode: 484 training return: tensor(573.4420, device='cuda:0')
episode: 485 training return: tensor(574.6425, device='cuda:0')
episode: 486 training return: tensor(500.1884, device='cuda:0')
episode: 487 training return: tensor(562.5245, device='cuda:0')
epoch: 122 test_true_pfm: 6243.423732081962 sim_pfm: 742.0018777595833
episode: 488 training return: tensor(564.5085, device='cuda:0')
episode: 489 training return: tensor(575.4976, device='cuda:0')
episode: 490 training return: tensor(508.5213, device='cuda:0')
episode: 491 training return: tensor(607.0925, device='cuda:0')
epoch: 123 test_true_pfm: 6229.005779884655 sim_pfm: 653.551018640787
episode: 492 training return: tensor(555.1310, device='cuda:0')
episode: 493 training return: tensor(500.4165, device='cuda:0')
episode: 494 training return: tensor(503.1100, device='cuda:0')
episode: 495 training return: tensor(517.8492, device='cuda:0')
epoch: 124 test_true_pfm: 6176.151356695479 sim_pfm: 710.6521606344031
episode: 496 training return: tensor(555.9149, device='cuda:0')
episode: 497 training return: tensor(595.7492, device='cuda:0')
episode: 498 training return: tensor(527.5674, device='cuda:0')
episode: 499 training return: tensor(556.5051, device='cuda:0')
epoch: 125 test_true_pfm: 6242.393744421449 sim_pfm: 697.3691531061195
episode: 500 training return: tensor(544.7935, device='cuda:0')
episode: 501 training return: tensor(560.7916, device='cuda:0')
episode: 502 training return: tensor(538.7610, device='cuda:0')
episode: 503 training return: tensor(600.7673, device='cuda:0')
epoch: 126 test_true_pfm: 6195.968402651866 sim_pfm: 702.6257505918232
episode: 504 training return: tensor(576.7866, device='cuda:0')
episode: 505 training return: tensor(511.5475, device='cuda:0')
episode: 506 training return: tensor(538.0594, device='cuda:0')
episode: 507 training return: tensor(561.0320, device='cuda:0')
epoch: 127 test_true_pfm: 6201.468480583499 sim_pfm: 707.8248393148533
episode: 508 training return: tensor(476.3116, device='cuda:0')
episode: 509 training return: tensor(551.0005, device='cuda:0')
episode: 510 training return: tensor(596.2131, device='cuda:0')
episode: 511 training return: tensor(516.0975, device='cuda:0')
epoch: 128 test_true_pfm: 6138.757909775835 sim_pfm: 717.3058868550773
episode: 512 training return: tensor(611.5958, device='cuda:0')
episode: 513 training return: tensor(550.6166, device='cuda:0')
episode: 514 training return: tensor(561.3143, device='cuda:0')
episode: 515 training return: tensor(575.5413, device='cuda:0')
epoch: 129 test_true_pfm: 6174.363102769545 sim_pfm: 702.820358168489
episode: 516 training return: tensor(578.5326, device='cuda:0')
episode: 517 training return: tensor(567.6881, device='cuda:0')
episode: 518 training return: tensor(598.1860, device='cuda:0')
episode: 519 training return: tensor(564.6449, device='cuda:0')
epoch: 130 test_true_pfm: 6132.576428403121 sim_pfm: 676.497565810491
episode: 520 training return: tensor(571.0670, device='cuda:0')
episode: 521 training return: tensor(531.4925, device='cuda:0')
episode: 522 training return: tensor(587.3525, device='cuda:0')
episode: 523 training return: tensor(550.8107, device='cuda:0')
epoch: 131 test_true_pfm: 6177.099179652784 sim_pfm: 731.5826976307047
episode: 524 training return: tensor(543.2729, device='cuda:0')
episode: 525 training return: tensor(554.7568, device='cuda:0')
episode: 526 training return: tensor(552.4703, device='cuda:0')
episode: 527 training return: tensor(564.1600, device='cuda:0')
epoch: 132 test_true_pfm: 6270.439978910971 sim_pfm: 713.0976334163375
episode: 528 training return: tensor(577.6775, device='cuda:0')
episode: 529 training return: tensor(545.3160, device='cuda:0')
episode: 530 training return: tensor(521.7818, device='cuda:0')
episode: 531 training return: tensor(492.2031, device='cuda:0')
epoch: 133 test_true_pfm: 6269.1790821162895 sim_pfm: 709.2803664247234
episode: 532 training return: tensor(535.3481, device='cuda:0')
episode: 533 training return: tensor(551.2061, device='cuda:0')
episode: 534 training return: tensor(587.6169, device='cuda:0')
episode: 535 training return: tensor(511.6132, device='cuda:0')
epoch: 134 test_true_pfm: 6230.440600315268 sim_pfm: 700.547863454558
episode: 536 training return: tensor(566.3450, device='cuda:0')
episode: 537 training return: tensor(511.7134, device='cuda:0')
episode: 538 training return: tensor(559.9963, device='cuda:0')
episode: 539 training return: tensor(529.5703, device='cuda:0')
epoch: 135 test_true_pfm: 6224.795835434485 sim_pfm: 663.9263885728238
episode: 540 training return: tensor(532.4086, device='cuda:0')
episode: 541 training return: tensor(530.6648, device='cuda:0')
episode: 542 training return: tensor(537.4755, device='cuda:0')
episode: 543 training return: tensor(516.2881, device='cuda:0')
epoch: 136 test_true_pfm: 6236.791854817132 sim_pfm: 675.8771904986352
episode: 544 training return: tensor(613.5284, device='cuda:0')
episode: 545 training return: tensor(584.6588, device='cuda:0')
episode: 546 training return: tensor(580.5012, device='cuda:0')
episode: 547 training return: tensor(483.0508, device='cuda:0')
epoch: 137 test_true_pfm: 6276.768095311738 sim_pfm: 701.0454763427066
episode: 548 training return: tensor(557.8577, device='cuda:0')
episode: 549 training return: tensor(600.5195, device='cuda:0')
episode: 550 training return: tensor(552.5552, device='cuda:0')
episode: 551 training return: tensor(528.1199, device='cuda:0')
epoch: 138 test_true_pfm: 6263.335994813188 sim_pfm: 744.8226183360675
episode: 552 training return: tensor(561.0514, device='cuda:0')
episode: 553 training return: tensor(518.6943, device='cuda:0')
episode: 554 training return: tensor(509.6734, device='cuda:0')
episode: 555 training return: tensor(579.6218, device='cuda:0')
epoch: 139 test_true_pfm: 6253.138534966222 sim_pfm: 668.3327394488733
episode: 556 training return: tensor(591.0092, device='cuda:0')
episode: 557 training return: tensor(633.2845, device='cuda:0')
episode: 558 training return: tensor(569.1614, device='cuda:0')
episode: 559 training return: tensor(515.1949, device='cuda:0')
epoch: 140 test_true_pfm: 6241.884861928247 sim_pfm: 681.1089839848379
episode: 560 training return: tensor(541.5751, device='cuda:0')
episode: 561 training return: tensor(509.4206, device='cuda:0')
episode: 562 training return: tensor(564.1899, device='cuda:0')
episode: 563 training return: tensor(587.1779, device='cuda:0')
epoch: 141 test_true_pfm: 6199.728838842905 sim_pfm: 715.6564054247768
episode: 564 training return: tensor(606.9295, device='cuda:0')
episode: 565 training return: tensor(605.2836, device='cuda:0')
episode: 566 training return: tensor(528.5934, device='cuda:0')
episode: 567 training return: tensor(538.6359, device='cuda:0')
epoch: 142 test_true_pfm: 6335.212843097198 sim_pfm: 731.6108295147618
episode: 568 training return: tensor(532.9505, device='cuda:0')
episode: 569 training return: tensor(583.2070, device='cuda:0')
episode: 570 training return: tensor(548.3532, device='cuda:0')
episode: 571 training return: tensor(541.4877, device='cuda:0')
epoch: 143 test_true_pfm: 6253.159668663419 sim_pfm: 743.0285675611036
episode: 572 training return: tensor(581.2788, device='cuda:0')
episode: 573 training return: tensor(558.3266, device='cuda:0')
episode: 574 training return: tensor(498.9260, device='cuda:0')
episode: 575 training return: tensor(586.5781, device='cuda:0')
epoch: 144 test_true_pfm: 6314.856092703777 sim_pfm: 681.6630314250942
episode: 576 training return: tensor(601.5643, device='cuda:0')
episode: 577 training return: tensor(515.0400, device='cuda:0')
episode: 578 training return: tensor(618.4513, device='cuda:0')
episode: 579 training return: tensor(565.7821, device='cuda:0')
epoch: 145 test_true_pfm: 6248.7714514457775 sim_pfm: 715.5372212733297
episode: 580 training return: tensor(512.4125, device='cuda:0')
episode: 581 training return: tensor(586.6929, device='cuda:0')
episode: 582 training return: tensor(568.7130, device='cuda:0')
episode: 583 training return: tensor(498.1886, device='cuda:0')
epoch: 146 test_true_pfm: 6290.6528366832135 sim_pfm: 732.9370867732214
episode: 584 training return: tensor(505.6189, device='cuda:0')
episode: 585 training return: tensor(561.2954, device='cuda:0')
episode: 586 training return: tensor(628.8603, device='cuda:0')
episode: 587 training return: tensor(552.4490, device='cuda:0')
epoch: 147 test_true_pfm: 6239.947166640144 sim_pfm: 688.9840879831463
episode: 588 training return: tensor(607.0253, device='cuda:0')
episode: 589 training return: tensor(543.9342, device='cuda:0')
episode: 590 training return: tensor(544.3026, device='cuda:0')
episode: 591 training return: tensor(552.4801, device='cuda:0')
epoch: 148 test_true_pfm: 6249.118357274855 sim_pfm: 721.0949142025784
episode: 592 training return: tensor(574.6089, device='cuda:0')
episode: 593 training return: tensor(603.0275, device='cuda:0')
episode: 594 training return: tensor(556.5366, device='cuda:0')
episode: 595 training return: tensor(584.6265, device='cuda:0')
epoch: 149 test_true_pfm: 6267.4058305984945 sim_pfm: 735.9727408839777
episode: 596 training return: tensor(499.5105, device='cuda:0')
episode: 597 training return: tensor(579.4856, device='cuda:0')
episode: 598 training return: tensor(567.8248, device='cuda:0')
episode: 599 training return: tensor(606.8175, device='cuda:0')
epoch: 150 test_true_pfm: 6272.21001557316 sim_pfm: 744.5016506412843
