['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '0', '--data', '100000', '--regu', '0.2']
2143.5141708791352
episode: 0 training return: tensor(320.4107, device='cuda:0')
episode: 1 training return: tensor(-9.2359, device='cuda:0')
episode: 2 training return: tensor(-88.0323, device='cuda:0')
episode: 3 training return: tensor(301.3168, device='cuda:0')
epoch: 1 test_true_pfm: 3015.3391365665643 sim_pfm: -58.22149313984361
episode: 4 training return: tensor(416.6185, device='cuda:0')
episode: 5 training return: tensor(375.5811, device='cuda:0')
episode: 6 training return: tensor(339.1158, device='cuda:0')
episode: 7 training return: tensor(85.8387, device='cuda:0')
epoch: 2 test_true_pfm: 3023.550056602889 sim_pfm: 245.29083513476266
episode: 8 training return: tensor(277.5783, device='cuda:0')
episode: 9 training return: tensor(393.8487, device='cuda:0')
episode: 10 training return: tensor(368.3998, device='cuda:0')
episode: 11 training return: tensor(394.2826, device='cuda:0')
epoch: 3 test_true_pfm: 3161.386712550527 sim_pfm: 299.67047941548907
episode: 12 training return: tensor(302.2020, device='cuda:0')
episode: 13 training return: tensor(-83.7879, device='cuda:0')
episode: 14 training return: tensor(243.6423, device='cuda:0')
episode: 15 training return: tensor(380.3914, device='cuda:0')
epoch: 4 test_true_pfm: 3429.2782430496277 sim_pfm: 416.6451074466556
episode: 16 training return: tensor(374.2752, device='cuda:0')
episode: 17 training return: tensor(323.8995, device='cuda:0')
episode: 18 training return: tensor(383.8987, device='cuda:0')
episode: 19 training return: tensor(417.4144, device='cuda:0')
epoch: 5 test_true_pfm: 3112.7752872083697 sim_pfm: 326.625868046113
episode: 20 training return: tensor(338.4957, device='cuda:0')
episode: 21 training return: tensor(-118.2610, device='cuda:0')
episode: 22 training return: tensor(128.6429, device='cuda:0')
episode: 23 training return: tensor(206.6217, device='cuda:0')
epoch: 6 test_true_pfm: 1965.8669647011914 sim_pfm: 31.25749932755328
episode: 24 training return: tensor(363.5877, device='cuda:0')
episode: 25 training return: tensor(329.9224, device='cuda:0')
episode: 26 training return: tensor(359.5780, device='cuda:0')
episode: 27 training return: tensor(-280.8985, device='cuda:0')
epoch: 7 test_true_pfm: 3351.3596522722564 sim_pfm: 211.53824638191145
episode: 28 training return: tensor(376.5231, device='cuda:0')
episode: 29 training return: tensor(386.6500, device='cuda:0')
episode: 30 training return: tensor(345.2250, device='cuda:0')
episode: 31 training return: tensor(154.4294, device='cuda:0')
epoch: 8 test_true_pfm: 3076.2259479359914 sim_pfm: 360.9435429193545
episode: 32 training return: tensor(398.2527, device='cuda:0')
episode: 33 training return: tensor(82.8256, device='cuda:0')
episode: 34 training return: tensor(-23.5741, device='cuda:0')
episode: 35 training return: tensor(299.9257, device='cuda:0')
epoch: 9 test_true_pfm: 3378.7203978280686 sim_pfm: 363.77968116791453
episode: 36 training return: tensor(384.3776, device='cuda:0')
episode: 37 training return: tensor(-48.5740, device='cuda:0')
episode: 38 training return: tensor(356.1803, device='cuda:0')
episode: 39 training return: tensor(-61.0205, device='cuda:0')
epoch: 10 test_true_pfm: 2901.1051069908995 sim_pfm: 410.515787809185
episode: 40 training return: tensor(-13.7557, device='cuda:0')
episode: 41 training return: tensor(341.2889, device='cuda:0')
episode: 42 training return: tensor(-71.4681, device='cuda:0')
episode: 43 training return: tensor(148.1653, device='cuda:0')
epoch: 11 test_true_pfm: 2874.1224429932513 sim_pfm: 136.90837166570904
episode: 44 training return: tensor(353.5978, device='cuda:0')
episode: 45 training return: tensor(-291.4178, device='cuda:0')
episode: 46 training return: tensor(384.2006, device='cuda:0')
episode: 47 training return: tensor(365.2451, device='cuda:0')
epoch: 12 test_true_pfm: 2835.5729490550293 sim_pfm: 294.87884483469924
episode: 48 training return: tensor(-122.0023, device='cuda:0')
episode: 49 training return: tensor(448.9770, device='cuda:0')
episode: 50 training return: tensor(376.6936, device='cuda:0')
episode: 51 training return: tensor(-324.7425, device='cuda:0')
epoch: 13 test_true_pfm: 3190.3867985361953 sim_pfm: 286.8544943629725
episode: 52 training return: tensor(325.2679, device='cuda:0')
episode: 53 training return: tensor(-293.5796, device='cuda:0')
episode: 54 training return: tensor(323.5947, device='cuda:0')
episode: 55 training return: tensor(341.5569, device='cuda:0')
epoch: 14 test_true_pfm: 3274.3650183803743 sim_pfm: 381.0868687703235
episode: 56 training return: tensor(-29.7566, device='cuda:0')
episode: 57 training return: tensor(-231.9210, device='cuda:0')
episode: 58 training return: tensor(-121.7377, device='cuda:0')
episode: 59 training return: tensor(77.9351, device='cuda:0')
epoch: 15 test_true_pfm: 3378.515505271894 sim_pfm: 242.13828035923265
episode: 60 training return: tensor(301.7990, device='cuda:0')
episode: 61 training return: tensor(390.4468, device='cuda:0')
episode: 62 training return: tensor(343.3916, device='cuda:0')
episode: 63 training return: tensor(358.9134, device='cuda:0')
epoch: 16 test_true_pfm: 3356.6520140036996 sim_pfm: 299.7295215096577
episode: 64 training return: tensor(-170.1811, device='cuda:0')
episode: 65 training return: tensor(441.9393, device='cuda:0')
episode: 66 training return: tensor(275.6026, device='cuda:0')
episode: 67 training return: tensor(297.3282, device='cuda:0')
epoch: 17 test_true_pfm: 3349.098136678369 sim_pfm: 245.476892674497
episode: 68 training return: tensor(360.5140, device='cuda:0')
episode: 69 training return: tensor(328.1214, device='cuda:0')
episode: 70 training return: tensor(332.6696, device='cuda:0')
episode: 71 training return: tensor(345.0559, device='cuda:0')
epoch: 18 test_true_pfm: 3221.520383355877 sim_pfm: 295.01072750081465
episode: 72 training return: tensor(290.2178, device='cuda:0')
episode: 73 training return: tensor(347.5741, device='cuda:0')
episode: 74 training return: tensor(264.2207, device='cuda:0')
episode: 75 training return: tensor(375.1387, device='cuda:0')
epoch: 19 test_true_pfm: 3368.1854658858597 sim_pfm: 368.90748014910304
episode: 76 training return: tensor(379.6854, device='cuda:0')
episode: 77 training return: tensor(24.7509, device='cuda:0')
episode: 78 training return: tensor(-305.3157, device='cuda:0')
episode: 79 training return: tensor(230.7987, device='cuda:0')
epoch: 20 test_true_pfm: 2408.1429007355687 sim_pfm: 168.24642617610516
episode: 80 training return: tensor(331.2690, device='cuda:0')
episode: 81 training return: tensor(366.3484, device='cuda:0')
episode: 82 training return: tensor(322.9745, device='cuda:0')
episode: 83 training return: tensor(356.2082, device='cuda:0')
epoch: 21 test_true_pfm: 3430.885823580035 sim_pfm: 383.9239624202892
episode: 84 training return: tensor(362.7505, device='cuda:0')
episode: 85 training return: tensor(364.4284, device='cuda:0')
episode: 86 training return: tensor(349.0180, device='cuda:0')
episode: 87 training return: tensor(389.3240, device='cuda:0')
epoch: 22 test_true_pfm: 3473.6096858765654 sim_pfm: 397.7060821131454
episode: 88 training return: tensor(355.3096, device='cuda:0')
episode: 89 training return: tensor(319.4816, device='cuda:0')
episode: 90 training return: tensor(339.1024, device='cuda:0')
episode: 91 training return: tensor(309.9620, device='cuda:0')
epoch: 23 test_true_pfm: 3506.0728638708515 sim_pfm: 387.8712220688273
episode: 92 training return: tensor(400.1457, device='cuda:0')
episode: 93 training return: tensor(355.5556, device='cuda:0')
episode: 94 training return: tensor(340.5775, device='cuda:0')
episode: 95 training return: tensor(262.7831, device='cuda:0')
epoch: 24 test_true_pfm: 3340.1385856625748 sim_pfm: 285.4185072423813
episode: 96 training return: tensor(408.9056, device='cuda:0')
episode: 97 training return: tensor(409.1173, device='cuda:0')
episode: 98 training return: tensor(285.7831, device='cuda:0')
episode: 99 training return: tensor(357.2765, device='cuda:0')
epoch: 25 test_true_pfm: 3435.931861357167 sim_pfm: 270.5657128657428
episode: 100 training return: tensor(313.8245, device='cuda:0')
episode: 101 training return: tensor(357.4448, device='cuda:0')
episode: 102 training return: tensor(363.6329, device='cuda:0')
episode: 103 training return: tensor(353.2207, device='cuda:0')
epoch: 26 test_true_pfm: 3398.3013297056327 sim_pfm: 312.7520711036341
episode: 104 training return: tensor(332.5138, device='cuda:0')
episode: 105 training return: tensor(377.9269, device='cuda:0')
episode: 106 training return: tensor(396.8393, device='cuda:0')
episode: 107 training return: tensor(373.9999, device='cuda:0')
epoch: 27 test_true_pfm: 3412.2505028850737 sim_pfm: 381.4135530760589
episode: 108 training return: tensor(424.2674, device='cuda:0')
episode: 109 training return: tensor(324.7891, device='cuda:0')
episode: 110 training return: tensor(377.4988, device='cuda:0')
episode: 111 training return: tensor(419.3244, device='cuda:0')
epoch: 28 test_true_pfm: 3468.961140193716 sim_pfm: 367.4011721976373
episode: 112 training return: tensor(368.7912, device='cuda:0')
episode: 113 training return: tensor(357.0660, device='cuda:0')
episode: 114 training return: tensor(323.4211, device='cuda:0')
episode: 115 training return: tensor(400.8765, device='cuda:0')
epoch: 29 test_true_pfm: 3388.5452890919073 sim_pfm: 375.709370783666
episode: 116 training return: tensor(379.7532, device='cuda:0')
episode: 117 training return: tensor(370.6898, device='cuda:0')
episode: 118 training return: tensor(314.5675, device='cuda:0')
episode: 119 training return: tensor(343.4576, device='cuda:0')
epoch: 30 test_true_pfm: 3513.423895903715 sim_pfm: 428.7519851269317
episode: 120 training return: tensor(379.1941, device='cuda:0')
episode: 121 training return: tensor(393.5663, device='cuda:0')
episode: 122 training return: tensor(352.8434, device='cuda:0')
episode: 123 training return: tensor(318.1597, device='cuda:0')
epoch: 31 test_true_pfm: 3379.3598278564154 sim_pfm: 394.3592976902146
episode: 124 training return: tensor(314.6920, device='cuda:0')
episode: 125 training return: tensor(432.6212, device='cuda:0')
episode: 126 training return: tensor(322.5298, device='cuda:0')
episode: 127 training return: tensor(402.8642, device='cuda:0')
epoch: 32 test_true_pfm: 3390.396679061594 sim_pfm: 367.2963895359814
episode: 128 training return: tensor(342.8008, device='cuda:0')
episode: 129 training return: tensor(325.4951, device='cuda:0')
episode: 130 training return: tensor(329.3628, device='cuda:0')
episode: 131 training return: tensor(296.5910, device='cuda:0')
epoch: 33 test_true_pfm: 3476.8044262055773 sim_pfm: 324.5359782918143
episode: 132 training return: tensor(369.3626, device='cuda:0')
episode: 133 training return: tensor(340.2596, device='cuda:0')
episode: 134 training return: tensor(443.3830, device='cuda:0')
episode: 135 training return: tensor(-149.5774, device='cuda:0')
epoch: 34 test_true_pfm: 3260.5751093502713 sim_pfm: 413.80994659600157
episode: 136 training return: tensor(379.0053, device='cuda:0')
episode: 137 training return: tensor(237.5551, device='cuda:0')
episode: 138 training return: tensor(431.7676, device='cuda:0')
episode: 139 training return: tensor(390.6891, device='cuda:0')
epoch: 35 test_true_pfm: 3412.392320626234 sim_pfm: 385.38929660084733
episode: 140 training return: tensor(352.7949, device='cuda:0')
episode: 141 training return: tensor(354.2997, device='cuda:0')
episode: 142 training return: tensor(414.2620, device='cuda:0')
episode: 143 training return: tensor(408.5249, device='cuda:0')
epoch: 36 test_true_pfm: 3431.6739415057286 sim_pfm: 405.610967654308
episode: 144 training return: tensor(414.1262, device='cuda:0')
episode: 145 training return: tensor(334.0276, device='cuda:0')
episode: 146 training return: tensor(480.7234, device='cuda:0')
episode: 147 training return: tensor(360.0368, device='cuda:0')
epoch: 37 test_true_pfm: 3448.981131780205 sim_pfm: 401.273552599722
episode: 148 training return: tensor(385.0429, device='cuda:0')
episode: 149 training return: tensor(283.7433, device='cuda:0')
episode: 150 training return: tensor(376.8330, device='cuda:0')
episode: 151 training return: tensor(330.4822, device='cuda:0')
epoch: 38 test_true_pfm: 3486.12140618957 sim_pfm: 420.5797953052679
episode: 152 training return: tensor(356.1041, device='cuda:0')
episode: 153 training return: tensor(405.9531, device='cuda:0')
episode: 154 training return: tensor(418.3372, device='cuda:0')
episode: 155 training return: tensor(356.4501, device='cuda:0')
epoch: 39 test_true_pfm: 3422.7160715354353 sim_pfm: 388.6072197677374
episode: 156 training return: tensor(381.0185, device='cuda:0')
episode: 157 training return: tensor(420.2141, device='cuda:0')
episode: 158 training return: tensor(390.4616, device='cuda:0')
episode: 159 training return: tensor(397.2683, device='cuda:0')
epoch: 40 test_true_pfm: 3466.625805851292 sim_pfm: 391.2203062746169
episode: 160 training return: tensor(382.5888, device='cuda:0')
episode: 161 training return: tensor(376.9070, device='cuda:0')
episode: 162 training return: tensor(354.2767, device='cuda:0')
episode: 163 training return: tensor(380.8279, device='cuda:0')
epoch: 41 test_true_pfm: 3459.400809481152 sim_pfm: 381.03728177126806
episode: 164 training return: tensor(332.5607, device='cuda:0')
episode: 165 training return: tensor(327.4852, device='cuda:0')
episode: 166 training return: tensor(347.4993, device='cuda:0')
episode: 167 training return: tensor(386.8315, device='cuda:0')
epoch: 42 test_true_pfm: 3467.3876262915724 sim_pfm: 426.0809146030903
episode: 168 training return: tensor(387.6704, device='cuda:0')
episode: 169 training return: tensor(405.9073, device='cuda:0')
episode: 170 training return: tensor(375.5060, device='cuda:0')
episode: 171 training return: tensor(361.4480, device='cuda:0')
epoch: 43 test_true_pfm: 3490.773036519829 sim_pfm: 443.0377796633402
episode: 172 training return: tensor(446.0935, device='cuda:0')
episode: 173 training return: tensor(387.3083, device='cuda:0')
episode: 174 training return: tensor(375.2514, device='cuda:0')
episode: 175 training return: tensor(400.4564, device='cuda:0')
epoch: 44 test_true_pfm: 3459.93983717741 sim_pfm: 400.97103703801986
episode: 176 training return: tensor(375.8997, device='cuda:0')
episode: 177 training return: tensor(289.0793, device='cuda:0')
episode: 178 training return: tensor(325.0594, device='cuda:0')
episode: 179 training return: tensor(401.5559, device='cuda:0')
epoch: 45 test_true_pfm: 3449.5848931783025 sim_pfm: 415.19457969267387
episode: 180 training return: tensor(420.5587, device='cuda:0')
episode: 181 training return: tensor(319.1513, device='cuda:0')
episode: 182 training return: tensor(433.3231, device='cuda:0')
episode: 183 training return: tensor(452.7219, device='cuda:0')
epoch: 46 test_true_pfm: 3423.8116652286913 sim_pfm: 374.3203169279732
episode: 184 training return: tensor(418.1596, device='cuda:0')
episode: 185 training return: tensor(434.0342, device='cuda:0')
episode: 186 training return: tensor(349.5653, device='cuda:0')
episode: 187 training return: tensor(363.9037, device='cuda:0')
epoch: 47 test_true_pfm: 3453.7282340725083 sim_pfm: 427.8940346172797
episode: 188 training return: tensor(299.6480, device='cuda:0')
episode: 189 training return: tensor(437.4086, device='cuda:0')
episode: 190 training return: tensor(422.2043, device='cuda:0')
episode: 191 training return: tensor(356.2076, device='cuda:0')
epoch: 48 test_true_pfm: 3512.0731515019847 sim_pfm: 447.16560495776747
episode: 192 training return: tensor(419.7950, device='cuda:0')
episode: 193 training return: tensor(255.3969, device='cuda:0')
episode: 194 training return: tensor(312.1364, device='cuda:0')
episode: 195 training return: tensor(411.6261, device='cuda:0')
epoch: 49 test_true_pfm: 3501.647405261197 sim_pfm: 413.8568269336829
episode: 196 training return: tensor(374.8335, device='cuda:0')
episode: 197 training return: tensor(18.7440, device='cuda:0')
episode: 198 training return: tensor(377.5616, device='cuda:0')
episode: 199 training return: tensor(430.9959, device='cuda:0')
epoch: 50 test_true_pfm: 3449.3899931651686 sim_pfm: 406.81842978138593
episode: 200 training return: tensor(430.1143, device='cuda:0')
episode: 201 training return: tensor(436.1387, device='cuda:0')
episode: 202 training return: tensor(-152.6071, device='cuda:0')
episode: 203 training return: tensor(493.3664, device='cuda:0')
epoch: 51 test_true_pfm: 3427.887413101849 sim_pfm: 384.6621012285468
episode: 204 training return: tensor(424.8243, device='cuda:0')
episode: 205 training return: tensor(395.9000, device='cuda:0')
episode: 206 training return: tensor(260.2033, device='cuda:0')
episode: 207 training return: tensor(401.9916, device='cuda:0')
epoch: 52 test_true_pfm: 3499.6597614599614 sim_pfm: 451.68806331562035
episode: 208 training return: tensor(347.5882, device='cuda:0')
episode: 209 training return: tensor(296.7872, device='cuda:0')
episode: 210 training return: tensor(436.7624, device='cuda:0')
episode: 211 training return: tensor(409.1098, device='cuda:0')
epoch: 53 test_true_pfm: 3423.7507866582255 sim_pfm: 379.8665474333684
episode: 212 training return: tensor(379.3382, device='cuda:0')
episode: 213 training return: tensor(377.5815, device='cuda:0')
episode: 214 training return: tensor(364.3557, device='cuda:0')
episode: 215 training return: tensor(432.1228, device='cuda:0')
epoch: 54 test_true_pfm: 3443.057043161402 sim_pfm: 378.12768463532365
episode: 216 training return: tensor(429.2290, device='cuda:0')
episode: 217 training return: tensor(383.7969, device='cuda:0')
episode: 218 training return: tensor(375.1718, device='cuda:0')
episode: 219 training return: tensor(385.0258, device='cuda:0')
epoch: 55 test_true_pfm: 3451.1012099192926 sim_pfm: 409.1581463731127
episode: 220 training return: tensor(-81.4486, device='cuda:0')
episode: 221 training return: tensor(459.5615, device='cuda:0')
episode: 222 training return: tensor(452.0477, device='cuda:0')
episode: 223 training return: tensor(83.1551, device='cuda:0')
epoch: 56 test_true_pfm: 3471.3617142900343 sim_pfm: 332.77000779584824
episode: 224 training return: tensor(435.1774, device='cuda:0')
episode: 225 training return: tensor(349.6123, device='cuda:0')
episode: 226 training return: tensor(341.7447, device='cuda:0')
episode: 227 training return: tensor(404.7664, device='cuda:0')
epoch: 57 test_true_pfm: 3472.7047893803287 sim_pfm: 409.15870704854996
episode: 228 training return: tensor(469.6510, device='cuda:0')
episode: 229 training return: tensor(391.8963, device='cuda:0')
episode: 230 training return: tensor(444.3437, device='cuda:0')
episode: 231 training return: tensor(375.1062, device='cuda:0')
epoch: 58 test_true_pfm: 3412.0592311173004 sim_pfm: 368.2582477386847
episode: 232 training return: tensor(428.0796, device='cuda:0')
episode: 233 training return: tensor(470.1171, device='cuda:0')
episode: 234 training return: tensor(412.1191, device='cuda:0')
episode: 235 training return: tensor(389.4391, device='cuda:0')
epoch: 59 test_true_pfm: 3416.345646675099 sim_pfm: 422.59658212617313
episode: 236 training return: tensor(269.7040, device='cuda:0')
episode: 237 training return: tensor(103.8666, device='cuda:0')
episode: 238 training return: tensor(48.3382, device='cuda:0')
episode: 239 training return: tensor(407.2544, device='cuda:0')
epoch: 60 test_true_pfm: 3395.2854065801816 sim_pfm: 359.94281546464964
episode: 240 training return: tensor(356.5323, device='cuda:0')
episode: 241 training return: tensor(104.3326, device='cuda:0')
episode: 242 training return: tensor(419.3277, device='cuda:0')
episode: 243 training return: tensor(52.6931, device='cuda:0')
epoch: 61 test_true_pfm: 3445.071065067709 sim_pfm: 431.8171910113888
episode: 244 training return: tensor(412.1749, device='cuda:0')
episode: 245 training return: tensor(476.4409, device='cuda:0')
episode: 246 training return: tensor(348.7737, device='cuda:0')
episode: 247 training return: tensor(433.8954, device='cuda:0')
epoch: 62 test_true_pfm: 3407.1885238058803 sim_pfm: 382.5871981112966
episode: 248 training return: tensor(398.9072, device='cuda:0')
episode: 249 training return: tensor(403.7296, device='cuda:0')
episode: 250 training return: tensor(358.5273, device='cuda:0')
episode: 251 training return: tensor(433.1221, device='cuda:0')
epoch: 63 test_true_pfm: 3446.6051214579506 sim_pfm: 406.34139678201365
episode: 252 training return: tensor(2.8298, device='cuda:0')
episode: 253 training return: tensor(465.5009, device='cuda:0')
episode: 254 training return: tensor(404.0209, device='cuda:0')
episode: 255 training return: tensor(344.4430, device='cuda:0')
epoch: 64 test_true_pfm: 3462.4922464892697 sim_pfm: 419.16253782094765
episode: 256 training return: tensor(423.0872, device='cuda:0')
episode: 257 training return: tensor(434.1959, device='cuda:0')
episode: 258 training return: tensor(428.5262, device='cuda:0')
episode: 259 training return: tensor(402.3022, device='cuda:0')
epoch: 65 test_true_pfm: 3460.1100846667928 sim_pfm: 348.6359946130542
episode: 260 training return: tensor(390.1699, device='cuda:0')
episode: 261 training return: tensor(414.6171, device='cuda:0')
episode: 262 training return: tensor(398.7494, device='cuda:0')
episode: 263 training return: tensor(374.7675, device='cuda:0')
epoch: 66 test_true_pfm: 3420.8391467374363 sim_pfm: 386.86756892785587
episode: 264 training return: tensor(365.6931, device='cuda:0')
episode: 265 training return: tensor(429.3686, device='cuda:0')
episode: 266 training return: tensor(385.4332, device='cuda:0')
episode: 267 training return: tensor(413.6789, device='cuda:0')
epoch: 67 test_true_pfm: 3474.4502304180714 sim_pfm: 430.0848811358058
episode: 268 training return: tensor(363.8471, device='cuda:0')
episode: 269 training return: tensor(365.2191, device='cuda:0')
episode: 270 training return: tensor(437.2850, device='cuda:0')
episode: 271 training return: tensor(457.8639, device='cuda:0')
epoch: 68 test_true_pfm: 3503.8413531367737 sim_pfm: 485.0283477089445
episode: 272 training return: tensor(433.3052, device='cuda:0')
episode: 273 training return: tensor(401.9990, device='cuda:0')
episode: 274 training return: tensor(416.6841, device='cuda:0')
episode: 275 training return: tensor(414.4514, device='cuda:0')
epoch: 69 test_true_pfm: 3515.1183679553833 sim_pfm: 448.43213241636596
episode: 276 training return: tensor(441.1418, device='cuda:0')
episode: 277 training return: tensor(382.3009, device='cuda:0')
episode: 278 training return: tensor(405.4179, device='cuda:0')
episode: 279 training return: tensor(393.7534, device='cuda:0')
epoch: 70 test_true_pfm: 3480.534745465755 sim_pfm: 423.03738614242565
episode: 280 training return: tensor(416.1890, device='cuda:0')
episode: 281 training return: tensor(404.1778, device='cuda:0')
episode: 282 training return: tensor(426.5330, device='cuda:0')
episode: 283 training return: tensor(434.0776, device='cuda:0')
epoch: 71 test_true_pfm: 3526.4205007043365 sim_pfm: 452.66561275487766
episode: 284 training return: tensor(-44.5454, device='cuda:0')
episode: 285 training return: tensor(392.3907, device='cuda:0')
episode: 286 training return: tensor(409.8036, device='cuda:0')
episode: 287 training return: tensor(425.7540, device='cuda:0')
epoch: 72 test_true_pfm: 3437.7154734250166 sim_pfm: 398.8405929536869
episode: 288 training return: tensor(486.0071, device='cuda:0')
episode: 289 training return: tensor(407.0693, device='cuda:0')
episode: 290 training return: tensor(397.1295, device='cuda:0')
episode: 291 training return: tensor(427.8321, device='cuda:0')
epoch: 73 test_true_pfm: 3499.860728309708 sim_pfm: 442.1189051847905
episode: 292 training return: tensor(399.3723, device='cuda:0')
episode: 293 training return: tensor(394.1739, device='cuda:0')
episode: 294 training return: tensor(378.9683, device='cuda:0')
episode: 295 training return: tensor(403.5429, device='cuda:0')
epoch: 74 test_true_pfm: 3473.749098359926 sim_pfm: 410.89054335688706
episode: 296 training return: tensor(404.9194, device='cuda:0')
episode: 297 training return: tensor(464.9059, device='cuda:0')
episode: 298 training return: tensor(435.7686, device='cuda:0')
episode: 299 training return: tensor(411.7018, device='cuda:0')
epoch: 75 test_true_pfm: 3457.5243011959087 sim_pfm: 420.17636614397634
episode: 300 training return: tensor(443.7059, device='cuda:0')
episode: 301 training return: tensor(411.6016, device='cuda:0')
episode: 302 training return: tensor(380.0204, device='cuda:0')
episode: 303 training return: tensor(436.2396, device='cuda:0')
epoch: 76 test_true_pfm: 3440.434073524446 sim_pfm: 412.24922392799635
episode: 304 training return: tensor(399.6692, device='cuda:0')
episode: 305 training return: tensor(377.5688, device='cuda:0')
episode: 306 training return: tensor(437.0349, device='cuda:0')
episode: 307 training return: tensor(469.1141, device='cuda:0')
epoch: 77 test_true_pfm: 3421.8121490123667 sim_pfm: 378.56575409845874
episode: 308 training return: tensor(408.5151, device='cuda:0')
episode: 309 training return: tensor(404.2916, device='cuda:0')
episode: 310 training return: tensor(397.3575, device='cuda:0')
episode: 311 training return: tensor(418.1653, device='cuda:0')
epoch: 78 test_true_pfm: 3451.595815401192 sim_pfm: 472.41905337473145
episode: 312 training return: tensor(409.0987, device='cuda:0')
episode: 313 training return: tensor(408.1215, device='cuda:0')
episode: 314 training return: tensor(405.8555, device='cuda:0')
episode: 315 training return: tensor(392.8960, device='cuda:0')
epoch: 79 test_true_pfm: 3459.6643777534723 sim_pfm: 434.6254560142018
episode: 316 training return: tensor(403.3835, device='cuda:0')
episode: 317 training return: tensor(409.6165, device='cuda:0')
episode: 318 training return: tensor(451.5180, device='cuda:0')
episode: 319 training return: tensor(424.5255, device='cuda:0')
epoch: 80 test_true_pfm: 3409.0385694538595 sim_pfm: 388.08276937235496
episode: 320 training return: tensor(400.6367, device='cuda:0')
episode: 321 training return: tensor(392.0755, device='cuda:0')
episode: 322 training return: tensor(397.6361, device='cuda:0')
episode: 323 training return: tensor(425.5908, device='cuda:0')
epoch: 81 test_true_pfm: 3438.166789729472 sim_pfm: 400.4203160564066
episode: 324 training return: tensor(440.0952, device='cuda:0')
episode: 325 training return: tensor(355.5363, device='cuda:0')
episode: 326 training return: tensor(412.1613, device='cuda:0')
episode: 327 training return: tensor(437.1879, device='cuda:0')
epoch: 82 test_true_pfm: 3489.264548744311 sim_pfm: 431.8226398832242
episode: 328 training return: tensor(414.3239, device='cuda:0')
episode: 329 training return: tensor(358.5064, device='cuda:0')
episode: 330 training return: tensor(435.6713, device='cuda:0')
episode: 331 training return: tensor(398.6105, device='cuda:0')
epoch: 83 test_true_pfm: 3480.987249290281 sim_pfm: 412.8128216525559
episode: 332 training return: tensor(407.9167, device='cuda:0')
episode: 333 training return: tensor(416.2555, device='cuda:0')
episode: 334 training return: tensor(406.8631, device='cuda:0')
episode: 335 training return: tensor(413.0864, device='cuda:0')
epoch: 84 test_true_pfm: 3453.0491342525056 sim_pfm: 411.486339096562
episode: 336 training return: tensor(391.6972, device='cuda:0')
episode: 337 training return: tensor(430.4492, device='cuda:0')
episode: 338 training return: tensor(425.8874, device='cuda:0')
episode: 339 training return: tensor(402.3355, device='cuda:0')
epoch: 85 test_true_pfm: 3478.228789289859 sim_pfm: 429.46322873970104
episode: 340 training return: tensor(389.1244, device='cuda:0')
episode: 341 training return: tensor(423.3413, device='cuda:0')
episode: 342 training return: tensor(481.5139, device='cuda:0')
episode: 343 training return: tensor(411.5505, device='cuda:0')
epoch: 86 test_true_pfm: 3470.8320335905387 sim_pfm: 409.6536354922379
episode: 344 training return: tensor(436.6455, device='cuda:0')
episode: 345 training return: tensor(415.5645, device='cuda:0')
episode: 346 training return: tensor(389.0835, device='cuda:0')
episode: 347 training return: tensor(369.2174, device='cuda:0')
epoch: 87 test_true_pfm: 3513.8162692689925 sim_pfm: 442.6042304271541
episode: 348 training return: tensor(348.5439, device='cuda:0')
episode: 349 training return: tensor(429.4522, device='cuda:0')
episode: 350 training return: tensor(393.1585, device='cuda:0')
episode: 351 training return: tensor(-657.7756, device='cuda:0')
epoch: 88 test_true_pfm: 3494.7826152668604 sim_pfm: 449.2592379413739
episode: 352 training return: tensor(386.2021, device='cuda:0')
episode: 353 training return: tensor(414.2950, device='cuda:0')
episode: 354 training return: tensor(-8.8322, device='cuda:0')
episode: 355 training return: tensor(404.5901, device='cuda:0')
epoch: 89 test_true_pfm: 3506.834227757184 sim_pfm: 454.6748978782368
episode: 356 training return: tensor(435.4306, device='cuda:0')
episode: 357 training return: tensor(425.4438, device='cuda:0')
episode: 358 training return: tensor(409.3185, device='cuda:0')
episode: 359 training return: tensor(389.8356, device='cuda:0')
epoch: 90 test_true_pfm: 3471.3049436488764 sim_pfm: 419.1667308898177
episode: 360 training return: tensor(433.2184, device='cuda:0')
episode: 361 training return: tensor(416.8795, device='cuda:0')
episode: 362 training return: tensor(424.1581, device='cuda:0')
episode: 363 training return: tensor(417.1494, device='cuda:0')
epoch: 91 test_true_pfm: 3379.7430679473814 sim_pfm: 364.2739461061622
episode: 364 training return: tensor(422.9909, device='cuda:0')
episode: 365 training return: tensor(464.7959, device='cuda:0')
episode: 366 training return: tensor(445.5366, device='cuda:0')
episode: 367 training return: tensor(454.6318, device='cuda:0')
epoch: 92 test_true_pfm: 3509.5240817971335 sim_pfm: 438.8290451002346
episode: 368 training return: tensor(408.3452, device='cuda:0')
episode: 369 training return: tensor(408.4348, device='cuda:0')
episode: 370 training return: tensor(390.4816, device='cuda:0')
episode: 371 training return: tensor(429.2103, device='cuda:0')
epoch: 93 test_true_pfm: 3522.1329965758573 sim_pfm: 454.12245391497464
episode: 372 training return: tensor(420.8731, device='cuda:0')
episode: 373 training return: tensor(433.4940, device='cuda:0')
episode: 374 training return: tensor(424.8391, device='cuda:0')
episode: 375 training return: tensor(419.1807, device='cuda:0')
epoch: 94 test_true_pfm: 3387.271071189058 sim_pfm: 374.3334306032145
episode: 376 training return: tensor(439.7470, device='cuda:0')
episode: 377 training return: tensor(415.5919, device='cuda:0')
episode: 378 training return: tensor(414.7782, device='cuda:0')
episode: 379 training return: tensor(383.7546, device='cuda:0')
epoch: 95 test_true_pfm: 3467.493209743188 sim_pfm: 418.7038639494276
episode: 380 training return: tensor(450.9174, device='cuda:0')
episode: 381 training return: tensor(405.8716, device='cuda:0')
episode: 382 training return: tensor(441.2211, device='cuda:0')
episode: 383 training return: tensor(421.0315, device='cuda:0')
epoch: 96 test_true_pfm: 3540.9581883905157 sim_pfm: 463.49584324046737
episode: 384 training return: tensor(365.4571, device='cuda:0')
episode: 385 training return: tensor(417.1208, device='cuda:0')
episode: 386 training return: tensor(428.5028, device='cuda:0')
episode: 387 training return: tensor(409.2238, device='cuda:0')
epoch: 97 test_true_pfm: 3497.2712394979876 sim_pfm: 442.3682270650946
episode: 388 training return: tensor(390.0123, device='cuda:0')
episode: 389 training return: tensor(403.8709, device='cuda:0')
episode: 390 training return: tensor(419.5722, device='cuda:0')
episode: 391 training return: tensor(408.0908, device='cuda:0')
epoch: 98 test_true_pfm: 3529.215048490359 sim_pfm: 429.6719363000011
episode: 392 training return: tensor(390.2097, device='cuda:0')
episode: 393 training return: tensor(430.4931, device='cuda:0')
episode: 394 training return: tensor(382.0543, device='cuda:0')
episode: 395 training return: tensor(384.7518, device='cuda:0')
epoch: 99 test_true_pfm: 3453.871319241524 sim_pfm: 420.55723756278167
episode: 396 training return: tensor(415.3124, device='cuda:0')
episode: 397 training return: tensor(417.3614, device='cuda:0')
episode: 398 training return: tensor(362.8420, device='cuda:0')
episode: 399 training return: tensor(389.9755, device='cuda:0')
epoch: 100 test_true_pfm: 3375.142584463423 sim_pfm: 366.9661888195939
episode: 400 training return: tensor(416.1485, device='cuda:0')
episode: 401 training return: tensor(416.7690, device='cuda:0')
episode: 402 training return: tensor(453.9021, device='cuda:0')
episode: 403 training return: tensor(407.8918, device='cuda:0')
epoch: 101 test_true_pfm: 3393.7381077394043 sim_pfm: 380.92239557390957
episode: 404 training return: tensor(401.2094, device='cuda:0')
episode: 405 training return: tensor(396.4624, device='cuda:0')
episode: 406 training return: tensor(414.8116, device='cuda:0')
episode: 407 training return: tensor(412.3084, device='cuda:0')
epoch: 102 test_true_pfm: 3445.8675253082206 sim_pfm: 411.71371365577215
episode: 408 training return: tensor(381.8171, device='cuda:0')
episode: 409 training return: tensor(474.1872, device='cuda:0')
episode: 410 training return: tensor(384.6012, device='cuda:0')
episode: 411 training return: tensor(411.6130, device='cuda:0')
epoch: 103 test_true_pfm: 3461.59492985968 sim_pfm: 410.5762365077583
episode: 412 training return: tensor(328.3842, device='cuda:0')
episode: 413 training return: tensor(408.2693, device='cuda:0')
episode: 414 training return: tensor(406.2629, device='cuda:0')
episode: 415 training return: tensor(404.8019, device='cuda:0')
epoch: 104 test_true_pfm: 3435.2898003098157 sim_pfm: 403.4284463687024
episode: 416 training return: tensor(431.4250, device='cuda:0')
episode: 417 training return: tensor(407.5007, device='cuda:0')
episode: 418 training return: tensor(430.4040, device='cuda:0')
episode: 419 training return: tensor(417.1375, device='cuda:0')
epoch: 105 test_true_pfm: 3442.4680556205894 sim_pfm: 401.226582326189
episode: 420 training return: tensor(416.3235, device='cuda:0')
episode: 421 training return: tensor(365.8878, device='cuda:0')
episode: 422 training return: tensor(414.3886, device='cuda:0')
episode: 423 training return: tensor(400.4479, device='cuda:0')
epoch: 106 test_true_pfm: 3411.3876085477973 sim_pfm: 383.56552698898787
episode: 424 training return: tensor(408.8885, device='cuda:0')
episode: 425 training return: tensor(413.2393, device='cuda:0')
episode: 426 training return: tensor(472.7542, device='cuda:0')
episode: 427 training return: tensor(414.3607, device='cuda:0')
epoch: 107 test_true_pfm: 3511.996488766796 sim_pfm: 442.5470105095883
episode: 428 training return: tensor(415.9445, device='cuda:0')
episode: 429 training return: tensor(438.1360, device='cuda:0')
episode: 430 training return: tensor(399.9069, device='cuda:0')
episode: 431 training return: tensor(433.4059, device='cuda:0')
epoch: 108 test_true_pfm: 3487.242921950688 sim_pfm: 431.91900697234087
episode: 432 training return: tensor(440.6307, device='cuda:0')
episode: 433 training return: tensor(418.3319, device='cuda:0')
episode: 434 training return: tensor(431.9396, device='cuda:0')
episode: 435 training return: tensor(412.8736, device='cuda:0')
epoch: 109 test_true_pfm: 3401.188894048703 sim_pfm: 372.8954996133689
episode: 436 training return: tensor(443.4548, device='cuda:0')
episode: 437 training return: tensor(413.0874, device='cuda:0')
episode: 438 training return: tensor(466.2386, device='cuda:0')
episode: 439 training return: tensor(407.5569, device='cuda:0')
epoch: 110 test_true_pfm: 3432.644045796393 sim_pfm: 397.12126739078667
episode: 440 training return: tensor(410.4481, device='cuda:0')
episode: 441 training return: tensor(424.7709, device='cuda:0')
episode: 442 training return: tensor(373.8757, device='cuda:0')
episode: 443 training return: tensor(398.1345, device='cuda:0')
epoch: 111 test_true_pfm: 3458.7046346855627 sim_pfm: 421.16359328789014
episode: 444 training return: tensor(450.4132, device='cuda:0')
episode: 445 training return: tensor(415.0665, device='cuda:0')
episode: 446 training return: tensor(453.5181, device='cuda:0')
episode: 447 training return: tensor(420.5574, device='cuda:0')
epoch: 112 test_true_pfm: 3422.7272151980537 sim_pfm: 392.3733104720013
episode: 448 training return: tensor(407.1668, device='cuda:0')
episode: 449 training return: tensor(387.1995, device='cuda:0')
episode: 450 training return: tensor(437.7213, device='cuda:0')
episode: 451 training return: tensor(461.9871, device='cuda:0')
epoch: 113 test_true_pfm: 3428.4307986999106 sim_pfm: 393.72455587925896
episode: 452 training return: tensor(421.4403, device='cuda:0')
episode: 453 training return: tensor(473.7262, device='cuda:0')
episode: 454 training return: tensor(431.9310, device='cuda:0')
episode: 455 training return: tensor(401.0815, device='cuda:0')
epoch: 114 test_true_pfm: 3472.081598308328 sim_pfm: 446.1117065460033
episode: 456 training return: tensor(329.4606, device='cuda:0')
episode: 457 training return: tensor(418.7557, device='cuda:0')
episode: 458 training return: tensor(401.4703, device='cuda:0')
episode: 459 training return: tensor(497.1354, device='cuda:0')
epoch: 115 test_true_pfm: 3457.498363873027 sim_pfm: 410.28383947883657
episode: 460 training return: tensor(383.1662, device='cuda:0')
episode: 461 training return: tensor(407.1512, device='cuda:0')
episode: 462 training return: tensor(417.7532, device='cuda:0')
episode: 463 training return: tensor(405.4702, device='cuda:0')
epoch: 116 test_true_pfm: 3486.7325432192724 sim_pfm: 442.1126663002651
episode: 464 training return: tensor(415.4329, device='cuda:0')
episode: 465 training return: tensor(407.2174, device='cuda:0')
episode: 466 training return: tensor(415.8764, device='cuda:0')
episode: 467 training return: tensor(397.2230, device='cuda:0')
epoch: 117 test_true_pfm: 3385.2412625805678 sim_pfm: 362.3905732699204
episode: 468 training return: tensor(447.3601, device='cuda:0')
episode: 469 training return: tensor(420.5548, device='cuda:0')
episode: 470 training return: tensor(439.9749, device='cuda:0')
episode: 471 training return: tensor(396.2155, device='cuda:0')
epoch: 118 test_true_pfm: 3479.2765018888226 sim_pfm: 423.28469655954785
episode: 472 training return: tensor(382.9949, device='cuda:0')
episode: 473 training return: tensor(460.7468, device='cuda:0')
episode: 474 training return: tensor(379.0354, device='cuda:0')
episode: 475 training return: tensor(418.5586, device='cuda:0')
epoch: 119 test_true_pfm: 3419.1425093889543 sim_pfm: 385.13474206017173
episode: 476 training return: tensor(437.7615, device='cuda:0')
episode: 477 training return: tensor(435.2113, device='cuda:0')
episode: 478 training return: tensor(414.4279, device='cuda:0')
episode: 479 training return: tensor(422.5624, device='cuda:0')
epoch: 120 test_true_pfm: 3488.333156967303 sim_pfm: 431.8970516316185
episode: 480 training return: tensor(428.3671, device='cuda:0')
episode: 481 training return: tensor(433.9131, device='cuda:0')
episode: 482 training return: tensor(363.3055, device='cuda:0')
episode: 483 training return: tensor(413.7714, device='cuda:0')
epoch: 121 test_true_pfm: 3446.7750731558604 sim_pfm: 405.7010505433136
episode: 484 training return: tensor(365.3680, device='cuda:0')
episode: 485 training return: tensor(368.5506, device='cuda:0')
episode: 486 training return: tensor(413.9678, device='cuda:0')
episode: 487 training return: tensor(398.2577, device='cuda:0')
epoch: 122 test_true_pfm: 3505.050563820934 sim_pfm: 445.79592534337036
episode: 488 training return: tensor(433.5374, device='cuda:0')
episode: 489 training return: tensor(430.8738, device='cuda:0')
episode: 490 training return: tensor(416.0930, device='cuda:0')
episode: 491 training return: tensor(475.9607, device='cuda:0')
epoch: 123 test_true_pfm: 3481.870509750888 sim_pfm: 428.1379795083776
episode: 492 training return: tensor(391.0706, device='cuda:0')
episode: 493 training return: tensor(445.9732, device='cuda:0')
episode: 494 training return: tensor(407.0513, device='cuda:0')
episode: 495 training return: tensor(390.0331, device='cuda:0')
epoch: 124 test_true_pfm: 3429.8810196916916 sim_pfm: 399.0240938609738
episode: 496 training return: tensor(438.5165, device='cuda:0')
episode: 497 training return: tensor(411.6468, device='cuda:0')
episode: 498 training return: tensor(415.6665, device='cuda:0')
episode: 499 training return: tensor(420.9077, device='cuda:0')
epoch: 125 test_true_pfm: 3456.3448589560844 sim_pfm: 414.1007395187141
episode: 500 training return: tensor(418.5944, device='cuda:0')
episode: 501 training return: tensor(387.9270, device='cuda:0')
episode: 502 training return: tensor(410.9289, device='cuda:0')
episode: 503 training return: tensor(424.0255, device='cuda:0')
epoch: 126 test_true_pfm: 3450.8017433554287 sim_pfm: 403.6790730251232
episode: 504 training return: tensor(145.1514, device='cuda:0')
episode: 505 training return: tensor(442.8643, device='cuda:0')
episode: 506 training return: tensor(450.1650, device='cuda:0')
episode: 507 training return: tensor(406.6841, device='cuda:0')
epoch: 127 test_true_pfm: 3449.648289693243 sim_pfm: 419.27444033677847
episode: 508 training return: tensor(413.9654, device='cuda:0')
episode: 509 training return: tensor(433.3271, device='cuda:0')
episode: 510 training return: tensor(413.1615, device='cuda:0')
episode: 511 training return: tensor(411.7018, device='cuda:0')
epoch: 128 test_true_pfm: 3474.49819977816 sim_pfm: 423.7156891061459
episode: 512 training return: tensor(485.5690, device='cuda:0')
episode: 513 training return: tensor(407.8262, device='cuda:0')
episode: 514 training return: tensor(409.0120, device='cuda:0')
episode: 515 training return: tensor(404.9982, device='cuda:0')
epoch: 129 test_true_pfm: 3466.715528031987 sim_pfm: 422.56096807743114
episode: 516 training return: tensor(440.5710, device='cuda:0')
episode: 517 training return: tensor(421.5929, device='cuda:0')
episode: 518 training return: tensor(443.8911, device='cuda:0')
episode: 519 training return: tensor(447.1304, device='cuda:0')
epoch: 130 test_true_pfm: 3467.342140364059 sim_pfm: 426.28973300456227
episode: 520 training return: tensor(408.4000, device='cuda:0')
episode: 521 training return: tensor(394.8304, device='cuda:0')
episode: 522 training return: tensor(411.5322, device='cuda:0')
episode: 523 training return: tensor(403.3029, device='cuda:0')
epoch: 131 test_true_pfm: 3423.213797688865 sim_pfm: 387.0253144573653
episode: 524 training return: tensor(431.4604, device='cuda:0')
episode: 525 training return: tensor(437.9199, device='cuda:0')
episode: 526 training return: tensor(396.0103, device='cuda:0')
episode: 527 training return: tensor(435.5882, device='cuda:0')
epoch: 132 test_true_pfm: 3505.12114632416 sim_pfm: 447.6602099568311
episode: 528 training return: tensor(451.3218, device='cuda:0')
episode: 529 training return: tensor(418.0848, device='cuda:0')
episode: 530 training return: tensor(-614.2585, device='cuda:0')
episode: 531 training return: tensor(456.7261, device='cuda:0')
epoch: 133 test_true_pfm: 3517.5452628561866 sim_pfm: 448.55810983871925
episode: 532 training return: tensor(458.9951, device='cuda:0')
episode: 533 training return: tensor(450.7461, device='cuda:0')
episode: 534 training return: tensor(400.0187, device='cuda:0')
episode: 535 training return: tensor(438.8906, device='cuda:0')
epoch: 134 test_true_pfm: 3441.5883485861145 sim_pfm: 405.4059254825115
episode: 536 training return: tensor(442.7292, device='cuda:0')
episode: 537 training return: tensor(423.9648, device='cuda:0')
episode: 538 training return: tensor(412.8540, device='cuda:0')
episode: 539 training return: tensor(363.5741, device='cuda:0')
epoch: 135 test_true_pfm: 3516.297173761794 sim_pfm: 453.1526783778293
episode: 540 training return: tensor(466.9688, device='cuda:0')
episode: 541 training return: tensor(437.2256, device='cuda:0')
episode: 542 training return: tensor(404.9235, device='cuda:0')
episode: 543 training return: tensor(416.9730, device='cuda:0')
epoch: 136 test_true_pfm: 3480.0566821825983 sim_pfm: 429.20246598592104
episode: 544 training return: tensor(471.0775, device='cuda:0')
episode: 545 training return: tensor(425.3660, device='cuda:0')
episode: 546 training return: tensor(395.7074, device='cuda:0')
episode: 547 training return: tensor(399.8757, device='cuda:0')
epoch: 137 test_true_pfm: 3433.109027693265 sim_pfm: 396.5980385270086
episode: 548 training return: tensor(402.7896, device='cuda:0')
episode: 549 training return: tensor(441.0983, device='cuda:0')
episode: 550 training return: tensor(430.1307, device='cuda:0')
episode: 551 training return: tensor(370.1900, device='cuda:0')
epoch: 138 test_true_pfm: 3479.2536660920855 sim_pfm: 426.5152391987115
episode: 552 training return: tensor(429.0625, device='cuda:0')
episode: 553 training return: tensor(429.8696, device='cuda:0')
episode: 554 training return: tensor(416.1386, device='cuda:0')
episode: 555 training return: tensor(428.5484, device='cuda:0')
epoch: 139 test_true_pfm: 3471.149068496874 sim_pfm: 430.01935154663323
episode: 556 training return: tensor(459.9916, device='cuda:0')
episode: 557 training return: tensor(442.9850, device='cuda:0')
episode: 558 training return: tensor(494.3427, device='cuda:0')
episode: 559 training return: tensor(442.3959, device='cuda:0')
epoch: 140 test_true_pfm: 3501.5860657705052 sim_pfm: 441.80074413873564
episode: 560 training return: tensor(414.7298, device='cuda:0')
episode: 561 training return: tensor(392.1808, device='cuda:0')
episode: 562 training return: tensor(480.2722, device='cuda:0')
episode: 563 training return: tensor(396.4581, device='cuda:0')
epoch: 141 test_true_pfm: 3468.5086134287917 sim_pfm: 422.2144006860811
episode: 564 training return: tensor(395.3873, device='cuda:0')
episode: 565 training return: tensor(427.1443, device='cuda:0')
episode: 566 training return: tensor(388.0385, device='cuda:0')
episode: 567 training return: tensor(435.2757, device='cuda:0')
epoch: 142 test_true_pfm: 3482.9251315844595 sim_pfm: 435.42648633313365
episode: 568 training return: tensor(452.1026, device='cuda:0')
episode: 569 training return: tensor(431.5296, device='cuda:0')
episode: 570 training return: tensor(423.4187, device='cuda:0')
episode: 571 training return: tensor(407.6736, device='cuda:0')
epoch: 143 test_true_pfm: 3522.1614574886057 sim_pfm: 454.11814690438524
episode: 572 training return: tensor(433.6662, device='cuda:0')
episode: 573 training return: tensor(417.7919, device='cuda:0')
episode: 574 training return: tensor(446.9311, device='cuda:0')
episode: 575 training return: tensor(429.6339, device='cuda:0')
epoch: 144 test_true_pfm: 3512.9262558832434 sim_pfm: 455.6957618182253
episode: 576 training return: tensor(420.8599, device='cuda:0')
episode: 577 training return: tensor(401.7537, device='cuda:0')
episode: 578 training return: tensor(444.1075, device='cuda:0')
episode: 579 training return: tensor(416.1738, device='cuda:0')
epoch: 145 test_true_pfm: 3451.0385127017544 sim_pfm: 417.73507518778206
episode: 580 training return: tensor(420.1920, device='cuda:0')
episode: 581 training return: tensor(424.5855, device='cuda:0')
episode: 582 training return: tensor(456.7931, device='cuda:0')
episode: 583 training return: tensor(422.6845, device='cuda:0')
epoch: 146 test_true_pfm: 3516.8061260636737 sim_pfm: 446.0612871434617
episode: 584 training return: tensor(385.6454, device='cuda:0')
episode: 585 training return: tensor(427.2216, device='cuda:0')
episode: 586 training return: tensor(419.3014, device='cuda:0')
episode: 587 training return: tensor(432.3988, device='cuda:0')
epoch: 147 test_true_pfm: 3480.86766892252 sim_pfm: 427.27053352212533
episode: 588 training return: tensor(433.8537, device='cuda:0')
episode: 589 training return: tensor(479.2274, device='cuda:0')
episode: 590 training return: tensor(409.9544, device='cuda:0')
episode: 591 training return: tensor(396.2403, device='cuda:0')
epoch: 148 test_true_pfm: 3435.2633980942105 sim_pfm: 394.013077892984
episode: 592 training return: tensor(398.4263, device='cuda:0')
episode: 593 training return: tensor(441.7854, device='cuda:0')
episode: 594 training return: tensor(394.0880, device='cuda:0')
episode: 595 training return: tensor(414.0988, device='cuda:0')
epoch: 149 test_true_pfm: 3424.954567867497 sim_pfm: 392.64014830535353
episode: 596 training return: tensor(435.2466, device='cuda:0')
episode: 597 training return: tensor(412.5623, device='cuda:0')
episode: 598 training return: tensor(448.5671, device='cuda:0')
episode: 599 training return: tensor(428.9150, device='cuda:0')
epoch: 150 test_true_pfm: 3494.2506982937525 sim_pfm: 441.6228537320664
