['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'medium', '--seed', '3', '--data', '100000', '--regu', '0.3']
5101.254421714855
episode: 0 training return: tensor(-280.6603, device='cuda:0')
episode: 1 training return: tensor(-371.0206, device='cuda:0')
episode: 2 training return: tensor(-376.8325, device='cuda:0')
episode: 3 training return: tensor(-380.0704, device='cuda:0')
epoch: 1 test_true_pfm: 4876.1107052488105 sim_pfm: -201.08291145278295
episode: 4 training return: tensor(-357.0624, device='cuda:0')
episode: 5 training return: tensor(-409.2397, device='cuda:0')
episode: 6 training return: tensor(-380.1613, device='cuda:0')
episode: 7 training return: tensor(-426.6439, device='cuda:0')
epoch: 2 test_true_pfm: 3451.9688967249654 sim_pfm: -224.1099643416528
episode: 8 training return: tensor(-330.4494, device='cuda:0')
episode: 9 training return: tensor(-374.5388, device='cuda:0')
episode: 10 training return: tensor(-361.4961, device='cuda:0')
episode: 11 training return: tensor(-149.0621, device='cuda:0')
epoch: 3 test_true_pfm: 4852.930870080512 sim_pfm: -298.72291446090094
episode: 12 training return: tensor(-420.9755, device='cuda:0')
episode: 13 training return: tensor(-426.5476, device='cuda:0')
episode: 14 training return: tensor(-329.1945, device='cuda:0')
episode: 15 training return: tensor(-239.2493, device='cuda:0')
epoch: 4 test_true_pfm: 5196.107007799921 sim_pfm: -51.392113030112036
episode: 16 training return: tensor(-209.3007, device='cuda:0')
episode: 17 training return: tensor(-677.9570, device='cuda:0')
episode: 18 training return: tensor(-169.5644, device='cuda:0')
episode: 19 training return: tensor(-163.0264, device='cuda:0')
epoch: 5 test_true_pfm: 5055.465440670584 sim_pfm: 114.39838544012552
episode: 20 training return: tensor(-239.7057, device='cuda:0')
episode: 21 training return: tensor(-339.4283, device='cuda:0')
episode: 22 training return: tensor(-266.4569, device='cuda:0')
episode: 23 training return: tensor(-215.9074, device='cuda:0')
epoch: 6 test_true_pfm: 5077.598948340238 sim_pfm: -244.5696988683582
episode: 24 training return: tensor(-74.3140, device='cuda:0')
episode: 25 training return: tensor(-269.6976, device='cuda:0')
episode: 26 training return: tensor(-175.9025, device='cuda:0')
episode: 27 training return: tensor(-136.1353, device='cuda:0')
epoch: 7 test_true_pfm: 5073.696352863815 sim_pfm: 83.68850972727523
episode: 28 training return: tensor(-271.4890, device='cuda:0')
episode: 29 training return: tensor(-190.1626, device='cuda:0')
episode: 30 training return: tensor(-117.1777, device='cuda:0')
episode: 31 training return: tensor(-116.6431, device='cuda:0')
epoch: 8 test_true_pfm: 5148.027649759076 sim_pfm: 107.50368391170439
episode: 32 training return: tensor(-194.3839, device='cuda:0')
episode: 33 training return: tensor(-150.6557, device='cuda:0')
episode: 34 training return: tensor(-30.2535, device='cuda:0')
episode: 35 training return: tensor(-228.4126, device='cuda:0')
epoch: 9 test_true_pfm: 5227.737063579617 sim_pfm: 119.2807820282275
episode: 36 training return: tensor(46.8303, device='cuda:0')
episode: 37 training return: tensor(-150.2561, device='cuda:0')
episode: 38 training return: tensor(-83.6731, device='cuda:0')
episode: 39 training return: tensor(-194.2988, device='cuda:0')
epoch: 10 test_true_pfm: 5234.080365626148 sim_pfm: 158.67280399703304
episode: 40 training return: tensor(-27.2982, device='cuda:0')
episode: 41 training return: tensor(-29.4759, device='cuda:0')
episode: 42 training return: tensor(-4.0867, device='cuda:0')
episode: 43 training return: tensor(-22.5010, device='cuda:0')
epoch: 11 test_true_pfm: 5286.875890124252 sim_pfm: 203.12218634732804
episode: 44 training return: tensor(-54.4412, device='cuda:0')
episode: 45 training return: tensor(-67.8755, device='cuda:0')
episode: 46 training return: tensor(-2.3993, device='cuda:0')
episode: 47 training return: tensor(-7.3810, device='cuda:0')
epoch: 12 test_true_pfm: 5323.965413891955 sim_pfm: 185.4507715451182
episode: 48 training return: tensor(86.9532, device='cuda:0')
episode: 49 training return: tensor(-35.5229, device='cuda:0')
episode: 50 training return: tensor(8.9601, device='cuda:0')
episode: 51 training return: tensor(21.8049, device='cuda:0')
epoch: 13 test_true_pfm: 5419.116514524528 sim_pfm: 231.80716057827036
episode: 52 training return: tensor(125.4889, device='cuda:0')
episode: 53 training return: tensor(60.5224, device='cuda:0')
episode: 54 training return: tensor(82.5698, device='cuda:0')
episode: 55 training return: tensor(15.3129, device='cuda:0')
epoch: 14 test_true_pfm: 5389.310888285112 sim_pfm: 245.31209205419873
episode: 56 training return: tensor(60.7800, device='cuda:0')
episode: 57 training return: tensor(109.0456, device='cuda:0')
episode: 58 training return: tensor(32.2426, device='cuda:0')
episode: 59 training return: tensor(182.0444, device='cuda:0')
epoch: 15 test_true_pfm: 5460.870928448956 sim_pfm: 365.03701493663056
episode: 60 training return: tensor(-25.4819, device='cuda:0')
episode: 61 training return: tensor(-18.1774, device='cuda:0')
episode: 62 training return: tensor(118.1167, device='cuda:0')
episode: 63 training return: tensor(117.4304, device='cuda:0')
epoch: 16 test_true_pfm: 5530.370426531258 sim_pfm: 275.70641665376996
episode: 64 training return: tensor(113.9663, device='cuda:0')
episode: 65 training return: tensor(12.6297, device='cuda:0')
episode: 66 training return: tensor(55.3911, device='cuda:0')
episode: 67 training return: tensor(73.1185, device='cuda:0')
epoch: 17 test_true_pfm: 5602.346094727338 sim_pfm: 266.025065495099
episode: 68 training return: tensor(154.4428, device='cuda:0')
episode: 69 training return: tensor(148.1384, device='cuda:0')
episode: 70 training return: tensor(9.6408, device='cuda:0')
episode: 71 training return: tensor(134.4860, device='cuda:0')
epoch: 18 test_true_pfm: 5614.060383052031 sim_pfm: 336.52603475452634
episode: 72 training return: tensor(175.1267, device='cuda:0')
episode: 73 training return: tensor(98.8300, device='cuda:0')
episode: 74 training return: tensor(65.4352, device='cuda:0')
episode: 75 training return: tensor(118.6666, device='cuda:0')
epoch: 19 test_true_pfm: 5639.536725309387 sim_pfm: 387.4810803831012
episode: 76 training return: tensor(172.6398, device='cuda:0')
episode: 77 training return: tensor(272.3271, device='cuda:0')
episode: 78 training return: tensor(262.5947, device='cuda:0')
episode: 79 training return: tensor(66.2609, device='cuda:0')
epoch: 20 test_true_pfm: 5747.385857949922 sim_pfm: 342.38937861911836
episode: 80 training return: tensor(167.0759, device='cuda:0')
episode: 81 training return: tensor(255.4048, device='cuda:0')
episode: 82 training return: tensor(194.3148, device='cuda:0')
episode: 83 training return: tensor(172.6014, device='cuda:0')
epoch: 21 test_true_pfm: 5598.313059940866 sim_pfm: 347.96237991877325
episode: 84 training return: tensor(195.6873, device='cuda:0')
episode: 85 training return: tensor(35.9484, device='cuda:0')
episode: 86 training return: tensor(216.8853, device='cuda:0')
episode: 87 training return: tensor(129.2990, device='cuda:0')
epoch: 22 test_true_pfm: 5586.090925981926 sim_pfm: 408.3851126898856
episode: 88 training return: tensor(215.6607, device='cuda:0')
episode: 89 training return: tensor(214.7208, device='cuda:0')
episode: 90 training return: tensor(110.8224, device='cuda:0')
episode: 91 training return: tensor(187.4013, device='cuda:0')
epoch: 23 test_true_pfm: 5713.001302389669 sim_pfm: 417.84008410825237
episode: 92 training return: tensor(194.0855, device='cuda:0')
episode: 93 training return: tensor(196.5229, device='cuda:0')
episode: 94 training return: tensor(129.9305, device='cuda:0')
episode: 95 training return: tensor(139.6735, device='cuda:0')
epoch: 24 test_true_pfm: 5792.617554311778 sim_pfm: 399.46037744047743
episode: 96 training return: tensor(158.9538, device='cuda:0')
episode: 97 training return: tensor(150.8274, device='cuda:0')
episode: 98 training return: tensor(289.1387, device='cuda:0')
episode: 99 training return: tensor(231.9534, device='cuda:0')
epoch: 25 test_true_pfm: 5772.4106514456425 sim_pfm: 459.47067234160687
episode: 100 training return: tensor(347.3208, device='cuda:0')
episode: 101 training return: tensor(294.9790, device='cuda:0')
episode: 102 training return: tensor(192.9943, device='cuda:0')
episode: 103 training return: tensor(42.4494, device='cuda:0')
epoch: 26 test_true_pfm: 5705.952310619701 sim_pfm: 454.8403625977323
episode: 104 training return: tensor(249.1182, device='cuda:0')
episode: 105 training return: tensor(304.8030, device='cuda:0')
episode: 106 training return: tensor(263.7638, device='cuda:0')
episode: 107 training return: tensor(287.0960, device='cuda:0')
epoch: 27 test_true_pfm: 5727.848412791315 sim_pfm: 496.56769948800985
episode: 108 training return: tensor(198.5514, device='cuda:0')
episode: 109 training return: tensor(262.1641, device='cuda:0')
episode: 110 training return: tensor(269.9187, device='cuda:0')
episode: 111 training return: tensor(314.6897, device='cuda:0')
epoch: 28 test_true_pfm: 5787.52953359896 sim_pfm: 459.660161400175
episode: 112 training return: tensor(336.1586, device='cuda:0')
episode: 113 training return: tensor(354.9716, device='cuda:0')
episode: 114 training return: tensor(244.7351, device='cuda:0')
episode: 115 training return: tensor(279.6584, device='cuda:0')
epoch: 29 test_true_pfm: 5738.077229503899 sim_pfm: 441.547547123783
episode: 116 training return: tensor(258.0781, device='cuda:0')
episode: 117 training return: tensor(298.8222, device='cuda:0')
episode: 118 training return: tensor(295.2855, device='cuda:0')
episode: 119 training return: tensor(305.0059, device='cuda:0')
epoch: 30 test_true_pfm: 4353.615459177753 sim_pfm: 483.43437191391905
episode: 120 training return: tensor(288.2806, device='cuda:0')
episode: 121 training return: tensor(250.4516, device='cuda:0')
episode: 122 training return: tensor(324.6696, device='cuda:0')
episode: 123 training return: tensor(266.9088, device='cuda:0')
epoch: 31 test_true_pfm: 5890.043898688234 sim_pfm: 411.77658980937366
episode: 124 training return: tensor(363.8743, device='cuda:0')
episode: 125 training return: tensor(343.2452, device='cuda:0')
episode: 126 training return: tensor(310.8976, device='cuda:0')
episode: 127 training return: tensor(326.2217, device='cuda:0')
epoch: 32 test_true_pfm: 5894.286547387739 sim_pfm: 475.30076284925843
episode: 128 training return: tensor(313.3897, device='cuda:0')
episode: 129 training return: tensor(366.7502, device='cuda:0')
episode: 130 training return: tensor(409.6633, device='cuda:0')
episode: 131 training return: tensor(368.7006, device='cuda:0')
epoch: 33 test_true_pfm: 5708.345252473094 sim_pfm: 441.51265838721883
episode: 132 training return: tensor(392.3625, device='cuda:0')
episode: 133 training return: tensor(309.7496, device='cuda:0')
episode: 134 training return: tensor(355.9570, device='cuda:0')
episode: 135 training return: tensor(367.1256, device='cuda:0')
epoch: 34 test_true_pfm: 5873.263597966686 sim_pfm: 491.95006064761156
episode: 136 training return: tensor(253.7222, device='cuda:0')
episode: 137 training return: tensor(291.8331, device='cuda:0')
episode: 138 training return: tensor(413.4256, device='cuda:0')
episode: 139 training return: tensor(339.6498, device='cuda:0')
epoch: 35 test_true_pfm: 5857.030696106381 sim_pfm: 565.2886447566367
episode: 140 training return: tensor(372.7824, device='cuda:0')
episode: 141 training return: tensor(212.5099, device='cuda:0')
episode: 142 training return: tensor(341.9933, device='cuda:0')
episode: 143 training return: tensor(346.2550, device='cuda:0')
epoch: 36 test_true_pfm: 5861.912444278311 sim_pfm: 522.0402196421637
episode: 144 training return: tensor(297.2711, device='cuda:0')
episode: 145 training return: tensor(351.8828, device='cuda:0')
episode: 146 training return: tensor(348.7672, device='cuda:0')
episode: 147 training return: tensor(362.9457, device='cuda:0')
epoch: 37 test_true_pfm: 5978.916622870306 sim_pfm: 534.1964027229891
episode: 148 training return: tensor(383.4724, device='cuda:0')
episode: 149 training return: tensor(364.4547, device='cuda:0')
episode: 150 training return: tensor(382.9911, device='cuda:0')
episode: 151 training return: tensor(352.5828, device='cuda:0')
epoch: 38 test_true_pfm: 6019.31927509201 sim_pfm: 609.0648373772079
episode: 152 training return: tensor(350.6619, device='cuda:0')
episode: 153 training return: tensor(304.2520, device='cuda:0')
episode: 154 training return: tensor(383.5356, device='cuda:0')
episode: 155 training return: tensor(363.4829, device='cuda:0')
epoch: 39 test_true_pfm: 5896.370516296886 sim_pfm: 506.88192790459533
episode: 156 training return: tensor(306.6869, device='cuda:0')
episode: 157 training return: tensor(451.8665, device='cuda:0')
episode: 158 training return: tensor(373.4085, device='cuda:0')
episode: 159 training return: tensor(356.9978, device='cuda:0')
epoch: 40 test_true_pfm: 5945.3598690321705 sim_pfm: 574.6962548790034
episode: 160 training return: tensor(376.9959, device='cuda:0')
episode: 161 training return: tensor(399.2908, device='cuda:0')
episode: 162 training return: tensor(285.4748, device='cuda:0')
episode: 163 training return: tensor(347.0063, device='cuda:0')
epoch: 41 test_true_pfm: 5950.206871651302 sim_pfm: 564.5474898805842
episode: 164 training return: tensor(443.0594, device='cuda:0')
episode: 165 training return: tensor(388.3617, device='cuda:0')
episode: 166 training return: tensor(395.0804, device='cuda:0')
episode: 167 training return: tensor(400.0081, device='cuda:0')
epoch: 42 test_true_pfm: 5982.34053844625 sim_pfm: 581.5730979624786
episode: 168 training return: tensor(441.9668, device='cuda:0')
episode: 169 training return: tensor(349.2601, device='cuda:0')
episode: 170 training return: tensor(464.1461, device='cuda:0')
episode: 171 training return: tensor(421.5832, device='cuda:0')
epoch: 43 test_true_pfm: 5942.548251300806 sim_pfm: 574.1680374334004
episode: 172 training return: tensor(440.3037, device='cuda:0')
episode: 173 training return: tensor(404.3718, device='cuda:0')
episode: 174 training return: tensor(368.5832, device='cuda:0')
episode: 175 training return: tensor(347.9854, device='cuda:0')
epoch: 44 test_true_pfm: 6008.661232663383 sim_pfm: 600.7121760011263
episode: 176 training return: tensor(431.8447, device='cuda:0')
episode: 177 training return: tensor(438.8080, device='cuda:0')
episode: 178 training return: tensor(456.1548, device='cuda:0')
episode: 179 training return: tensor(388.0153, device='cuda:0')
epoch: 45 test_true_pfm: 5968.783231219569 sim_pfm: 551.5751544317076
episode: 180 training return: tensor(314.6201, device='cuda:0')
episode: 181 training return: tensor(443.0894, device='cuda:0')
episode: 182 training return: tensor(472.2676, device='cuda:0')
episode: 183 training return: tensor(412.0588, device='cuda:0')
epoch: 46 test_true_pfm: 6064.663020206654 sim_pfm: 625.3913846299014
episode: 184 training return: tensor(396.1883, device='cuda:0')
episode: 185 training return: tensor(360.9792, device='cuda:0')
episode: 186 training return: tensor(404.3483, device='cuda:0')
episode: 187 training return: tensor(461.0320, device='cuda:0')
epoch: 47 test_true_pfm: 6072.027193103783 sim_pfm: 644.9922005554157
episode: 188 training return: tensor(420.6263, device='cuda:0')
episode: 189 training return: tensor(404.8720, device='cuda:0')
episode: 190 training return: tensor(415.9785, device='cuda:0')
episode: 191 training return: tensor(438.3180, device='cuda:0')
epoch: 48 test_true_pfm: 5954.959197928533 sim_pfm: 584.0842693867744
episode: 192 training return: tensor(440.6954, device='cuda:0')
episode: 193 training return: tensor(451.7625, device='cuda:0')
episode: 194 training return: tensor(452.2020, device='cuda:0')
episode: 195 training return: tensor(417.2334, device='cuda:0')
epoch: 49 test_true_pfm: 6096.2694704196065 sim_pfm: 613.4092453979732
episode: 196 training return: tensor(454.9509, device='cuda:0')
episode: 197 training return: tensor(482.1724, device='cuda:0')
episode: 198 training return: tensor(461.6015, device='cuda:0')
episode: 199 training return: tensor(401.3308, device='cuda:0')
epoch: 50 test_true_pfm: 5988.024734007186 sim_pfm: 551.7116793038634
episode: 200 training return: tensor(411.0156, device='cuda:0')
episode: 201 training return: tensor(458.7325, device='cuda:0')
episode: 202 training return: tensor(418.2616, device='cuda:0')
episode: 203 training return: tensor(531.4666, device='cuda:0')
epoch: 51 test_true_pfm: 6003.573973671769 sim_pfm: 615.0486397042793
episode: 204 training return: tensor(549.0325, device='cuda:0')
episode: 205 training return: tensor(502.5588, device='cuda:0')
episode: 206 training return: tensor(501.2157, device='cuda:0')
episode: 207 training return: tensor(460.1897, device='cuda:0')
epoch: 52 test_true_pfm: 5979.959951529997 sim_pfm: 628.3946353306916
episode: 208 training return: tensor(461.4904, device='cuda:0')
episode: 209 training return: tensor(425.5371, device='cuda:0')
episode: 210 training return: tensor(482.5521, device='cuda:0')
episode: 211 training return: tensor(433.0984, device='cuda:0')
epoch: 53 test_true_pfm: 6075.410252499733 sim_pfm: 622.0031605450591
episode: 212 training return: tensor(420.6017, device='cuda:0')
episode: 213 training return: tensor(528.5510, device='cuda:0')
episode: 214 training return: tensor(433.3214, device='cuda:0')
episode: 215 training return: tensor(504.9521, device='cuda:0')
epoch: 54 test_true_pfm: 6083.3006443677 sim_pfm: 632.7725202063448
episode: 216 training return: tensor(400.5703, device='cuda:0')
episode: 217 training return: tensor(404.8248, device='cuda:0')
episode: 218 training return: tensor(438.6155, device='cuda:0')
episode: 219 training return: tensor(510.2833, device='cuda:0')
epoch: 55 test_true_pfm: 6087.685856677302 sim_pfm: 627.8576991018296
episode: 220 training return: tensor(407.2480, device='cuda:0')
episode: 221 training return: tensor(472.6111, device='cuda:0')
episode: 222 training return: tensor(476.3244, device='cuda:0')
episode: 223 training return: tensor(416.6674, device='cuda:0')
epoch: 56 test_true_pfm: 6024.085038974547 sim_pfm: 635.2728578953538
episode: 224 training return: tensor(404.6697, device='cuda:0')
episode: 225 training return: tensor(470.4327, device='cuda:0')
episode: 226 training return: tensor(519.1541, device='cuda:0')
episode: 227 training return: tensor(440.8764, device='cuda:0')
epoch: 57 test_true_pfm: 6117.907868842952 sim_pfm: 637.5749180092243
episode: 228 training return: tensor(552.9803, device='cuda:0')
episode: 229 training return: tensor(433.5530, device='cuda:0')
episode: 230 training return: tensor(501.6614, device='cuda:0')
episode: 231 training return: tensor(533.0619, device='cuda:0')
epoch: 58 test_true_pfm: 5898.048454968014 sim_pfm: 552.8553417727817
episode: 232 training return: tensor(432.2202, device='cuda:0')
episode: 233 training return: tensor(407.4862, device='cuda:0')
episode: 234 training return: tensor(530.0696, device='cuda:0')
episode: 235 training return: tensor(451.5242, device='cuda:0')
epoch: 59 test_true_pfm: 5984.735152750135 sim_pfm: 625.0651653820338
episode: 236 training return: tensor(514.0324, device='cuda:0')
episode: 237 training return: tensor(425.4934, device='cuda:0')
episode: 238 training return: tensor(505.8311, device='cuda:0')
episode: 239 training return: tensor(408.9873, device='cuda:0')
epoch: 60 test_true_pfm: 6156.654644944858 sim_pfm: 639.0265351237418
episode: 240 training return: tensor(462.5941, device='cuda:0')
episode: 241 training return: tensor(508.0934, device='cuda:0')
episode: 242 training return: tensor(475.8518, device='cuda:0')
episode: 243 training return: tensor(549.4865, device='cuda:0')
epoch: 61 test_true_pfm: 6101.876540004766 sim_pfm: 638.3971275178288
episode: 244 training return: tensor(532.3557, device='cuda:0')
episode: 245 training return: tensor(461.2130, device='cuda:0')
episode: 246 training return: tensor(470.2141, device='cuda:0')
episode: 247 training return: tensor(512.9116, device='cuda:0')
epoch: 62 test_true_pfm: 6068.36646364602 sim_pfm: 613.3223844476355
episode: 248 training return: tensor(445.7274, device='cuda:0')
episode: 249 training return: tensor(507.2267, device='cuda:0')
episode: 250 training return: tensor(503.4615, device='cuda:0')
episode: 251 training return: tensor(393.3724, device='cuda:0')
epoch: 63 test_true_pfm: 6142.475402233719 sim_pfm: 659.0837205393667
episode: 252 training return: tensor(465.6016, device='cuda:0')
episode: 253 training return: tensor(511.0618, device='cuda:0')
episode: 254 training return: tensor(445.7817, device='cuda:0')
episode: 255 training return: tensor(524.1155, device='cuda:0')
epoch: 64 test_true_pfm: 6082.5121818189555 sim_pfm: 631.7695679356888
episode: 256 training return: tensor(574.8812, device='cuda:0')
episode: 257 training return: tensor(531.5697, device='cuda:0')
episode: 258 training return: tensor(478.3528, device='cuda:0')
episode: 259 training return: tensor(513.6907, device='cuda:0')
epoch: 65 test_true_pfm: 6134.547917660156 sim_pfm: 634.4258589219147
episode: 260 training return: tensor(507.2827, device='cuda:0')
episode: 261 training return: tensor(523.5383, device='cuda:0')
episode: 262 training return: tensor(536.2319, device='cuda:0')
episode: 263 training return: tensor(500.0374, device='cuda:0')
epoch: 66 test_true_pfm: 6155.6624824615 sim_pfm: 656.5861582750609
episode: 264 training return: tensor(516.1066, device='cuda:0')
episode: 265 training return: tensor(476.9139, device='cuda:0')
episode: 266 training return: tensor(484.9704, device='cuda:0')
episode: 267 training return: tensor(501.9972, device='cuda:0')
epoch: 67 test_true_pfm: 6177.844182452835 sim_pfm: 699.4019958617477
episode: 268 training return: tensor(485.0290, device='cuda:0')
episode: 269 training return: tensor(417.2542, device='cuda:0')
episode: 270 training return: tensor(477.8490, device='cuda:0')
episode: 271 training return: tensor(432.2683, device='cuda:0')
epoch: 68 test_true_pfm: 6238.9576644486215 sim_pfm: 710.3677349536835
episode: 272 training return: tensor(507.7228, device='cuda:0')
episode: 273 training return: tensor(552.6636, device='cuda:0')
episode: 274 training return: tensor(434.0221, device='cuda:0')
episode: 275 training return: tensor(534.4196, device='cuda:0')
epoch: 69 test_true_pfm: 6150.755485905916 sim_pfm: 651.1992897324575
episode: 276 training return: tensor(538.5861, device='cuda:0')
episode: 277 training return: tensor(478.2147, device='cuda:0')
episode: 278 training return: tensor(465.7307, device='cuda:0')
episode: 279 training return: tensor(504.2774, device='cuda:0')
epoch: 70 test_true_pfm: 6178.074129375095 sim_pfm: 671.3189296758113
episode: 280 training return: tensor(475.2325, device='cuda:0')
episode: 281 training return: tensor(434.8952, device='cuda:0')
episode: 282 training return: tensor(523.0234, device='cuda:0')
episode: 283 training return: tensor(548.3516, device='cuda:0')
epoch: 71 test_true_pfm: 6168.658686282472 sim_pfm: 685.2267448451215
episode: 284 training return: tensor(465.5682, device='cuda:0')
episode: 285 training return: tensor(464.1484, device='cuda:0')
episode: 286 training return: tensor(474.5907, device='cuda:0')
episode: 287 training return: tensor(515.9114, device='cuda:0')
epoch: 72 test_true_pfm: 6240.652572924681 sim_pfm: 685.2384051554836
episode: 288 training return: tensor(550.5218, device='cuda:0')
episode: 289 training return: tensor(509.0980, device='cuda:0')
episode: 290 training return: tensor(561.6151, device='cuda:0')
episode: 291 training return: tensor(535.3002, device='cuda:0')
epoch: 73 test_true_pfm: 6180.877741287575 sim_pfm: 718.0361416654972
episode: 292 training return: tensor(521.3441, device='cuda:0')
episode: 293 training return: tensor(463.4471, device='cuda:0')
episode: 294 training return: tensor(378.8391, device='cuda:0')
episode: 295 training return: tensor(473.9030, device='cuda:0')
epoch: 74 test_true_pfm: 6151.1722551940065 sim_pfm: 671.7955161334636
episode: 296 training return: tensor(525.5945, device='cuda:0')
episode: 297 training return: tensor(525.4091, device='cuda:0')
episode: 298 training return: tensor(509.1196, device='cuda:0')
episode: 299 training return: tensor(518.1407, device='cuda:0')
epoch: 75 test_true_pfm: 6195.628577560725 sim_pfm: 685.6585023715161
episode: 300 training return: tensor(507.7629, device='cuda:0')
episode: 301 training return: tensor(520.5795, device='cuda:0')
episode: 302 training return: tensor(425.5583, device='cuda:0')
episode: 303 training return: tensor(523.0486, device='cuda:0')
epoch: 76 test_true_pfm: 6144.601794552808 sim_pfm: 653.6923286409583
episode: 304 training return: tensor(507.9583, device='cuda:0')
episode: 305 training return: tensor(524.4641, device='cuda:0')
episode: 306 training return: tensor(481.0466, device='cuda:0')
episode: 307 training return: tensor(486.9826, device='cuda:0')
epoch: 77 test_true_pfm: 6207.362603285826 sim_pfm: 700.8363666294221
episode: 308 training return: tensor(535.2287, device='cuda:0')
episode: 309 training return: tensor(503.5874, device='cuda:0')
episode: 310 training return: tensor(461.3070, device='cuda:0')
episode: 311 training return: tensor(482.4160, device='cuda:0')
epoch: 78 test_true_pfm: 6128.396402152506 sim_pfm: 596.7165082655459
episode: 312 training return: tensor(536.9874, device='cuda:0')
episode: 313 training return: tensor(441.2659, device='cuda:0')
episode: 314 training return: tensor(546.6747, device='cuda:0')
episode: 315 training return: tensor(517.2192, device='cuda:0')
epoch: 79 test_true_pfm: 6144.970657078851 sim_pfm: 690.1075861834688
episode: 316 training return: tensor(516.1411, device='cuda:0')
episode: 317 training return: tensor(578.2302, device='cuda:0')
episode: 318 training return: tensor(558.1738, device='cuda:0')
episode: 319 training return: tensor(532.5732, device='cuda:0')
epoch: 80 test_true_pfm: 6185.261003320971 sim_pfm: 658.6133103198372
episode: 320 training return: tensor(439.6461, device='cuda:0')
episode: 321 training return: tensor(532.5104, device='cuda:0')
episode: 322 training return: tensor(519.8110, device='cuda:0')
episode: 323 training return: tensor(594.0516, device='cuda:0')
epoch: 81 test_true_pfm: 6302.802313614441 sim_pfm: 719.5965341347037
episode: 324 training return: tensor(496.4096, device='cuda:0')
episode: 325 training return: tensor(520.5790, device='cuda:0')
episode: 326 training return: tensor(505.1204, device='cuda:0')
episode: 327 training return: tensor(555.1769, device='cuda:0')
epoch: 82 test_true_pfm: 6245.657096647698 sim_pfm: 689.8520373286059
episode: 328 training return: tensor(513.1190, device='cuda:0')
episode: 329 training return: tensor(558.5625, device='cuda:0')
episode: 330 training return: tensor(536.3815, device='cuda:0')
episode: 331 training return: tensor(578.5773, device='cuda:0')
epoch: 83 test_true_pfm: 6246.579145751445 sim_pfm: 692.5676355598649
episode: 332 training return: tensor(490.7000, device='cuda:0')
episode: 333 training return: tensor(577.3752, device='cuda:0')
episode: 334 training return: tensor(577.9149, device='cuda:0')
episode: 335 training return: tensor(558.4103, device='cuda:0')
epoch: 84 test_true_pfm: 6103.703962513307 sim_pfm: 659.4401395228537
episode: 336 training return: tensor(501.7501, device='cuda:0')
episode: 337 training return: tensor(581.4804, device='cuda:0')
episode: 338 training return: tensor(502.0733, device='cuda:0')
episode: 339 training return: tensor(523.0534, device='cuda:0')
epoch: 85 test_true_pfm: 6195.967355385827 sim_pfm: 683.2698757566395
episode: 340 training return: tensor(544.0694, device='cuda:0')
episode: 341 training return: tensor(562.4055, device='cuda:0')
episode: 342 training return: tensor(504.3961, device='cuda:0')
episode: 343 training return: tensor(498.6249, device='cuda:0')
epoch: 86 test_true_pfm: 6160.16560495428 sim_pfm: 671.3559600429726
episode: 344 training return: tensor(451.6661, device='cuda:0')
episode: 345 training return: tensor(587.0887, device='cuda:0')
episode: 346 training return: tensor(506.5522, device='cuda:0')
episode: 347 training return: tensor(553.6074, device='cuda:0')
epoch: 87 test_true_pfm: 6174.918885403188 sim_pfm: 697.5365434866011
episode: 348 training return: tensor(452.5432, device='cuda:0')
episode: 349 training return: tensor(586.1179, device='cuda:0')
episode: 350 training return: tensor(544.6406, device='cuda:0')
episode: 351 training return: tensor(540.8585, device='cuda:0')
epoch: 88 test_true_pfm: 6273.480865555998 sim_pfm: 684.7712636667615
episode: 352 training return: tensor(518.0559, device='cuda:0')
episode: 353 training return: tensor(571.5392, device='cuda:0')
episode: 354 training return: tensor(541.1744, device='cuda:0')
episode: 355 training return: tensor(520.6608, device='cuda:0')
epoch: 89 test_true_pfm: 6098.031589376586 sim_pfm: 649.1533349584012
episode: 356 training return: tensor(492.4729, device='cuda:0')
episode: 357 training return: tensor(602.3721, device='cuda:0')
episode: 358 training return: tensor(487.2393, device='cuda:0')
episode: 359 training return: tensor(538.1248, device='cuda:0')
epoch: 90 test_true_pfm: 6271.013155534404 sim_pfm: 706.7534215949321
episode: 360 training return: tensor(592.0851, device='cuda:0')
episode: 361 training return: tensor(550.3121, device='cuda:0')
episode: 362 training return: tensor(569.4023, device='cuda:0')
episode: 363 training return: tensor(557.6340, device='cuda:0')
epoch: 91 test_true_pfm: 6188.350902855684 sim_pfm: 729.1926346717519
episode: 364 training return: tensor(474.2180, device='cuda:0')
episode: 365 training return: tensor(527.4783, device='cuda:0')
episode: 366 training return: tensor(534.7193, device='cuda:0')
episode: 367 training return: tensor(537.7778, device='cuda:0')
epoch: 92 test_true_pfm: 6266.423017214763 sim_pfm: 704.8012805415007
episode: 368 training return: tensor(528.3657, device='cuda:0')
episode: 369 training return: tensor(575.5095, device='cuda:0')
episode: 370 training return: tensor(480.9890, device='cuda:0')
episode: 371 training return: tensor(534.0553, device='cuda:0')
epoch: 93 test_true_pfm: 6228.969920320745 sim_pfm: 707.3245652197123
episode: 372 training return: tensor(526.1732, device='cuda:0')
episode: 373 training return: tensor(545.5020, device='cuda:0')
episode: 374 training return: tensor(555.5609, device='cuda:0')
episode: 375 training return: tensor(460.7920, device='cuda:0')
epoch: 94 test_true_pfm: 6314.575326806531 sim_pfm: 720.1956171328978
episode: 376 training return: tensor(553.9626, device='cuda:0')
episode: 377 training return: tensor(547.1431, device='cuda:0')
episode: 378 training return: tensor(577.3903, device='cuda:0')
episode: 379 training return: tensor(513.1057, device='cuda:0')
epoch: 95 test_true_pfm: 6281.533449609262 sim_pfm: 728.2016548071211
episode: 380 training return: tensor(619.4965, device='cuda:0')
episode: 381 training return: tensor(551.5580, device='cuda:0')
episode: 382 training return: tensor(481.8132, device='cuda:0')
episode: 383 training return: tensor(526.3309, device='cuda:0')
epoch: 96 test_true_pfm: 6253.786932404136 sim_pfm: 693.0199305479182
episode: 384 training return: tensor(562.7776, device='cuda:0')
episode: 385 training return: tensor(536.6006, device='cuda:0')
episode: 386 training return: tensor(501.5592, device='cuda:0')
episode: 387 training return: tensor(550.8484, device='cuda:0')
epoch: 97 test_true_pfm: 6244.204952575034 sim_pfm: 721.595087685079
episode: 388 training return: tensor(567.1862, device='cuda:0')
episode: 389 training return: tensor(590.4764, device='cuda:0')
episode: 390 training return: tensor(549.7396, device='cuda:0')
episode: 391 training return: tensor(555.2473, device='cuda:0')
epoch: 98 test_true_pfm: 6224.505511269388 sim_pfm: 705.4205434717975
episode: 392 training return: tensor(468.2709, device='cuda:0')
episode: 393 training return: tensor(574.4302, device='cuda:0')
episode: 394 training return: tensor(538.4832, device='cuda:0')
episode: 395 training return: tensor(545.3270, device='cuda:0')
epoch: 99 test_true_pfm: 6275.504170548673 sim_pfm: 709.8915693815798
episode: 396 training return: tensor(546.4688, device='cuda:0')
episode: 397 training return: tensor(572.0214, device='cuda:0')
episode: 398 training return: tensor(615.6079, device='cuda:0')
episode: 399 training return: tensor(592.2946, device='cuda:0')
epoch: 100 test_true_pfm: 6211.128131254755 sim_pfm: 704.3240652619861
episode: 400 training return: tensor(481.8343, device='cuda:0')
episode: 401 training return: tensor(529.8002, device='cuda:0')
episode: 402 training return: tensor(574.7661, device='cuda:0')
episode: 403 training return: tensor(467.4010, device='cuda:0')
epoch: 101 test_true_pfm: 6274.565158139325 sim_pfm: 732.2448920821771
episode: 404 training return: tensor(533.3812, device='cuda:0')
episode: 405 training return: tensor(581.7373, device='cuda:0')
episode: 406 training return: tensor(603.0868, device='cuda:0')
episode: 407 training return: tensor(544.7928, device='cuda:0')
epoch: 102 test_true_pfm: 6293.458909113929 sim_pfm: 721.4994823144904
episode: 408 training return: tensor(552.6893, device='cuda:0')
episode: 409 training return: tensor(575.8196, device='cuda:0')
episode: 410 training return: tensor(647.2807, device='cuda:0')
episode: 411 training return: tensor(603.5355, device='cuda:0')
epoch: 103 test_true_pfm: 6227.481881936926 sim_pfm: 700.744667985787
episode: 412 training return: tensor(535.6137, device='cuda:0')
episode: 413 training return: tensor(536.2735, device='cuda:0')
episode: 414 training return: tensor(574.8958, device='cuda:0')
episode: 415 training return: tensor(624.2418, device='cuda:0')
epoch: 104 test_true_pfm: 6171.031864642219 sim_pfm: 703.2775150438343
episode: 416 training return: tensor(546.6652, device='cuda:0')
episode: 417 training return: tensor(602.5588, device='cuda:0')
episode: 418 training return: tensor(641.9358, device='cuda:0')
episode: 419 training return: tensor(516.5137, device='cuda:0')
epoch: 105 test_true_pfm: 6301.105869090284 sim_pfm: 736.5665802943209
episode: 420 training return: tensor(527.9481, device='cuda:0')
episode: 421 training return: tensor(617.3892, device='cuda:0')
episode: 422 training return: tensor(520.7975, device='cuda:0')
episode: 423 training return: tensor(512.1569, device='cuda:0')
epoch: 106 test_true_pfm: 6300.603809011372 sim_pfm: 729.9669326387035
episode: 424 training return: tensor(558.3480, device='cuda:0')
episode: 425 training return: tensor(460.3653, device='cuda:0')
episode: 426 training return: tensor(603.6379, device='cuda:0')
episode: 427 training return: tensor(576.9763, device='cuda:0')
epoch: 107 test_true_pfm: 6298.070603678255 sim_pfm: 729.0380004585217
episode: 428 training return: tensor(628.2967, device='cuda:0')
episode: 429 training return: tensor(587.7919, device='cuda:0')
episode: 430 training return: tensor(603.9778, device='cuda:0')
episode: 431 training return: tensor(586.7809, device='cuda:0')
epoch: 108 test_true_pfm: 6341.443475033725 sim_pfm: 731.6678579523383
episode: 432 training return: tensor(571.1594, device='cuda:0')
episode: 433 training return: tensor(548.7350, device='cuda:0')
episode: 434 training return: tensor(629.7449, device='cuda:0')
episode: 435 training return: tensor(546.4835, device='cuda:0')
epoch: 109 test_true_pfm: 6251.557029078933 sim_pfm: 639.420246412997
episode: 436 training return: tensor(568.5301, device='cuda:0')
episode: 437 training return: tensor(600.7122, device='cuda:0')
episode: 438 training return: tensor(547.2092, device='cuda:0')
episode: 439 training return: tensor(615.5548, device='cuda:0')
epoch: 110 test_true_pfm: 6269.582252634294 sim_pfm: 690.8709385114101
episode: 440 training return: tensor(569.1365, device='cuda:0')
episode: 441 training return: tensor(469.8056, device='cuda:0')
episode: 442 training return: tensor(568.7488, device='cuda:0')
episode: 443 training return: tensor(538.2233, device='cuda:0')
epoch: 111 test_true_pfm: 6090.6377693505665 sim_pfm: 665.6771327677028
episode: 444 training return: tensor(585.0315, device='cuda:0')
episode: 445 training return: tensor(559.7717, device='cuda:0')
episode: 446 training return: tensor(571.9362, device='cuda:0')
episode: 447 training return: tensor(580.8734, device='cuda:0')
epoch: 112 test_true_pfm: 6263.019612200033 sim_pfm: 744.4170792323226
episode: 448 training return: tensor(592.0637, device='cuda:0')
episode: 449 training return: tensor(567.9951, device='cuda:0')
episode: 450 training return: tensor(594.8725, device='cuda:0')
episode: 451 training return: tensor(563.3307, device='cuda:0')
epoch: 113 test_true_pfm: 6280.053441455261 sim_pfm: 726.2134502805226
episode: 452 training return: tensor(603.5403, device='cuda:0')
episode: 453 training return: tensor(539.3544, device='cuda:0')
episode: 454 training return: tensor(604.9207, device='cuda:0')
episode: 455 training return: tensor(612.7186, device='cuda:0')
epoch: 114 test_true_pfm: 6266.214566426607 sim_pfm: 713.1684904107824
episode: 456 training return: tensor(622.9218, device='cuda:0')
episode: 457 training return: tensor(528.2094, device='cuda:0')
episode: 458 training return: tensor(585.2628, device='cuda:0')
episode: 459 training return: tensor(543.1567, device='cuda:0')
epoch: 115 test_true_pfm: 6282.97505364808 sim_pfm: 720.4262607904772
episode: 460 training return: tensor(530.0633, device='cuda:0')
episode: 461 training return: tensor(546.9749, device='cuda:0')
episode: 462 training return: tensor(576.3646, device='cuda:0')
episode: 463 training return: tensor(519.3500, device='cuda:0')
epoch: 116 test_true_pfm: 6265.381368509595 sim_pfm: 736.9242852882211
episode: 464 training return: tensor(542.1993, device='cuda:0')
episode: 465 training return: tensor(616.1053, device='cuda:0')
episode: 466 training return: tensor(626.3919, device='cuda:0')
episode: 467 training return: tensor(577.3994, device='cuda:0')
epoch: 117 test_true_pfm: 6280.720535112635 sim_pfm: 738.6329693525719
episode: 468 training return: tensor(628.8568, device='cuda:0')
episode: 469 training return: tensor(571.4531, device='cuda:0')
episode: 470 training return: tensor(621.0538, device='cuda:0')
episode: 471 training return: tensor(562.9235, device='cuda:0')
epoch: 118 test_true_pfm: 6245.021343070213 sim_pfm: 707.0589087116532
episode: 472 training return: tensor(614.1330, device='cuda:0')
episode: 473 training return: tensor(610.9006, device='cuda:0')
episode: 474 training return: tensor(618.8640, device='cuda:0')
episode: 475 training return: tensor(641.6414, device='cuda:0')
epoch: 119 test_true_pfm: 6249.705061184694 sim_pfm: 728.6452284637295
episode: 476 training return: tensor(590.2173, device='cuda:0')
episode: 477 training return: tensor(608.3980, device='cuda:0')
episode: 478 training return: tensor(573.2557, device='cuda:0')
episode: 479 training return: tensor(630.1824, device='cuda:0')
epoch: 120 test_true_pfm: 6324.740332440367 sim_pfm: 710.588997714532
episode: 480 training return: tensor(527.1920, device='cuda:0')
episode: 481 training return: tensor(634.3557, device='cuda:0')
episode: 482 training return: tensor(548.8528, device='cuda:0')
episode: 483 training return: tensor(569.9481, device='cuda:0')
epoch: 121 test_true_pfm: 6244.3161312212105 sim_pfm: 712.5617111947698
episode: 484 training return: tensor(592.9876, device='cuda:0')
episode: 485 training return: tensor(616.6495, device='cuda:0')
episode: 486 training return: tensor(568.7940, device='cuda:0')
episode: 487 training return: tensor(586.7717, device='cuda:0')
epoch: 122 test_true_pfm: 6273.15055345961 sim_pfm: 734.4220522215086
episode: 488 training return: tensor(585.0755, device='cuda:0')
episode: 489 training return: tensor(563.3788, device='cuda:0')
episode: 490 training return: tensor(590.2772, device='cuda:0')
episode: 491 training return: tensor(571.5652, device='cuda:0')
epoch: 123 test_true_pfm: 6376.00205728528 sim_pfm: 720.6522296856856
episode: 492 training return: tensor(528.1295, device='cuda:0')
episode: 493 training return: tensor(622.2441, device='cuda:0')
episode: 494 training return: tensor(627.2542, device='cuda:0')
episode: 495 training return: tensor(579.5345, device='cuda:0')
epoch: 124 test_true_pfm: 6248.175449441948 sim_pfm: 699.2691290442308
episode: 496 training return: tensor(590.1888, device='cuda:0')
episode: 497 training return: tensor(588.3837, device='cuda:0')
episode: 498 training return: tensor(549.2198, device='cuda:0')
episode: 499 training return: tensor(597.5360, device='cuda:0')
epoch: 125 test_true_pfm: 6351.53251851384 sim_pfm: 731.0393230090849
episode: 500 training return: tensor(616.2261, device='cuda:0')
episode: 501 training return: tensor(587.3637, device='cuda:0')
episode: 502 training return: tensor(605.9293, device='cuda:0')
episode: 503 training return: tensor(557.6384, device='cuda:0')
epoch: 126 test_true_pfm: 6291.321577557496 sim_pfm: 709.2506612407742
episode: 504 training return: tensor(572.4823, device='cuda:0')
episode: 505 training return: tensor(586.4796, device='cuda:0')
episode: 506 training return: tensor(567.2306, device='cuda:0')
episode: 507 training return: tensor(585.2273, device='cuda:0')
epoch: 127 test_true_pfm: 6384.322219945745 sim_pfm: 750.4344160274292
episode: 508 training return: tensor(614.0973, device='cuda:0')
episode: 509 training return: tensor(594.0807, device='cuda:0')
episode: 510 training return: tensor(601.3055, device='cuda:0')
episode: 511 training return: tensor(570.3897, device='cuda:0')
epoch: 128 test_true_pfm: 6294.264999537362 sim_pfm: 728.1598947806439
episode: 512 training return: tensor(618.8643, device='cuda:0')
episode: 513 training return: tensor(544.5844, device='cuda:0')
episode: 514 training return: tensor(622.7200, device='cuda:0')
episode: 515 training return: tensor(541.0501, device='cuda:0')
epoch: 129 test_true_pfm: 6280.10661920837 sim_pfm: 729.574911888844
episode: 516 training return: tensor(600.3093, device='cuda:0')
episode: 517 training return: tensor(570.0123, device='cuda:0')
episode: 518 training return: tensor(640.4435, device='cuda:0')
episode: 519 training return: tensor(653.2902, device='cuda:0')
epoch: 130 test_true_pfm: 6250.932087513367 sim_pfm: 714.0180671571288
episode: 520 training return: tensor(577.9584, device='cuda:0')
episode: 521 training return: tensor(560.1093, device='cuda:0')
episode: 522 training return: tensor(556.1895, device='cuda:0')
episode: 523 training return: tensor(627.3636, device='cuda:0')
epoch: 131 test_true_pfm: 6198.783431168296 sim_pfm: 706.3381861347394
episode: 524 training return: tensor(589.7609, device='cuda:0')
episode: 525 training return: tensor(574.2562, device='cuda:0')
episode: 526 training return: tensor(569.2546, device='cuda:0')
episode: 527 training return: tensor(625.8318, device='cuda:0')
epoch: 132 test_true_pfm: 6295.257943221083 sim_pfm: 740.6403759381113
episode: 528 training return: tensor(558.7782, device='cuda:0')
episode: 529 training return: tensor(618.7430, device='cuda:0')
episode: 530 training return: tensor(532.8243, device='cuda:0')
episode: 531 training return: tensor(605.8761, device='cuda:0')
epoch: 133 test_true_pfm: 6304.912575697963 sim_pfm: 764.170258321877
episode: 532 training return: tensor(538.1315, device='cuda:0')
episode: 533 training return: tensor(609.6276, device='cuda:0')
episode: 534 training return: tensor(573.7512, device='cuda:0')
episode: 535 training return: tensor(657.6349, device='cuda:0')
epoch: 134 test_true_pfm: 6226.193126732353 sim_pfm: 719.2251052858774
episode: 536 training return: tensor(578.9592, device='cuda:0')
episode: 537 training return: tensor(557.5475, device='cuda:0')
episode: 538 training return: tensor(554.9153, device='cuda:0')
episode: 539 training return: tensor(537.8839, device='cuda:0')
epoch: 135 test_true_pfm: 6314.406791301452 sim_pfm: 756.6082337458307
episode: 540 training return: tensor(570.8827, device='cuda:0')
episode: 541 training return: tensor(595.0812, device='cuda:0')
episode: 542 training return: tensor(550.5676, device='cuda:0')
episode: 543 training return: tensor(628.3488, device='cuda:0')
epoch: 136 test_true_pfm: 6323.539683589498 sim_pfm: 741.0496808321526
episode: 544 training return: tensor(609.6263, device='cuda:0')
episode: 545 training return: tensor(576.3915, device='cuda:0')
episode: 546 training return: tensor(608.2047, device='cuda:0')
episode: 547 training return: tensor(560.1663, device='cuda:0')
epoch: 137 test_true_pfm: 6324.339333036474 sim_pfm: 736.0691097558787
episode: 548 training return: tensor(622.4243, device='cuda:0')
episode: 549 training return: tensor(620.0928, device='cuda:0')
episode: 550 training return: tensor(511.5244, device='cuda:0')
episode: 551 training return: tensor(628.1592, device='cuda:0')
epoch: 138 test_true_pfm: 6300.192942918719 sim_pfm: 753.4520993450811
episode: 552 training return: tensor(607.3342, device='cuda:0')
episode: 553 training return: tensor(572.2452, device='cuda:0')
episode: 554 training return: tensor(587.8683, device='cuda:0')
episode: 555 training return: tensor(607.9395, device='cuda:0')
epoch: 139 test_true_pfm: 6372.916316005075 sim_pfm: 753.4624516076098
episode: 556 training return: tensor(611.9814, device='cuda:0')
episode: 557 training return: tensor(600.3767, device='cuda:0')
episode: 558 training return: tensor(621.2745, device='cuda:0')
episode: 559 training return: tensor(582.2800, device='cuda:0')
epoch: 140 test_true_pfm: 6226.039576292544 sim_pfm: 727.5961015867069
episode: 560 training return: tensor(613.3884, device='cuda:0')
episode: 561 training return: tensor(594.3134, device='cuda:0')
episode: 562 training return: tensor(554.1667, device='cuda:0')
episode: 563 training return: tensor(530.9900, device='cuda:0')
epoch: 141 test_true_pfm: 6347.024803345218 sim_pfm: 740.9664414189756
episode: 564 training return: tensor(631.6997, device='cuda:0')
episode: 565 training return: tensor(597.9477, device='cuda:0')
episode: 566 training return: tensor(644.0040, device='cuda:0')
episode: 567 training return: tensor(596.8605, device='cuda:0')
epoch: 142 test_true_pfm: 6331.231906721504 sim_pfm: 750.0167934239531
episode: 568 training return: tensor(551.0292, device='cuda:0')
episode: 569 training return: tensor(628.3926, device='cuda:0')
episode: 570 training return: tensor(605.9630, device='cuda:0')
episode: 571 training return: tensor(579.4283, device='cuda:0')
epoch: 143 test_true_pfm: 6278.449752823246 sim_pfm: 737.6441715609593
episode: 572 training return: tensor(562.4697, device='cuda:0')
episode: 573 training return: tensor(591.7667, device='cuda:0')
episode: 574 training return: tensor(556.9053, device='cuda:0')
episode: 575 training return: tensor(602.3578, device='cuda:0')
epoch: 144 test_true_pfm: 6348.28295555081 sim_pfm: 770.543113539461
episode: 576 training return: tensor(594.6937, device='cuda:0')
episode: 577 training return: tensor(596.1829, device='cuda:0')
episode: 578 training return: tensor(597.2784, device='cuda:0')
episode: 579 training return: tensor(586.2083, device='cuda:0')
epoch: 145 test_true_pfm: 6300.7341104677425 sim_pfm: 747.7347248618802
episode: 580 training return: tensor(604.8093, device='cuda:0')
episode: 581 training return: tensor(594.6067, device='cuda:0')
episode: 582 training return: tensor(574.4935, device='cuda:0')
episode: 583 training return: tensor(621.9694, device='cuda:0')
epoch: 146 test_true_pfm: 6345.818006216436 sim_pfm: 744.3383216354996
episode: 584 training return: tensor(633.5206, device='cuda:0')
episode: 585 training return: tensor(581.2523, device='cuda:0')
episode: 586 training return: tensor(591.4380, device='cuda:0')
episode: 587 training return: tensor(603.9465, device='cuda:0')
epoch: 147 test_true_pfm: 6350.035024578886 sim_pfm: 761.1584835356722
episode: 588 training return: tensor(564.9006, device='cuda:0')
episode: 589 training return: tensor(587.6108, device='cuda:0')
episode: 590 training return: tensor(590.2233, device='cuda:0')
episode: 591 training return: tensor(613.9773, device='cuda:0')
epoch: 148 test_true_pfm: 6301.676575497993 sim_pfm: 760.4088035166884
episode: 592 training return: tensor(559.7323, device='cuda:0')
episode: 593 training return: tensor(640.8884, device='cuda:0')
episode: 594 training return: tensor(612.8654, device='cuda:0')
episode: 595 training return: tensor(633.8589, device='cuda:0')
epoch: 149 test_true_pfm: 6371.556065447439 sim_pfm: 734.7492559878156
episode: 596 training return: tensor(586.7131, device='cuda:0')
episode: 597 training return: tensor(629.3937, device='cuda:0')
episode: 598 training return: tensor(612.5417, device='cuda:0')
episode: 599 training return: tensor(585.2421, device='cuda:0')
epoch: 150 test_true_pfm: 6330.866316125116 sim_pfm: 741.1951657601943
