['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '0', '--data', '100000', '--regu', '0.05']
2279.9794748540344
episode: 0 training return: tensor(3.5923, device='cuda:0')
episode: 1 training return: tensor(336.2053, device='cuda:0')
episode: 2 training return: tensor(-130.9380, device='cuda:0')
episode: 3 training return: tensor(-149.0627, device='cuda:0')
epoch: 1 test_true_pfm: 2313.2806877950648 sim_pfm: -60.52484033202442
episode: 4 training return: tensor(-46.1831, device='cuda:0')
episode: 5 training return: tensor(78.3559, device='cuda:0')
episode: 6 training return: tensor(300.6882, device='cuda:0')
episode: 7 training return: tensor(385.8887, device='cuda:0')
epoch: 2 test_true_pfm: 2411.637409842943 sim_pfm: 55.048313587156976
episode: 8 training return: tensor(329.7911, device='cuda:0')
episode: 9 training return: tensor(245.9207, device='cuda:0')
episode: 10 training return: tensor(357.4200, device='cuda:0')
episode: 11 training return: tensor(402.5277, device='cuda:0')
epoch: 3 test_true_pfm: 3242.2657738677713 sim_pfm: -66.19621593187912
episode: 12 training return: tensor(-51.5545, device='cuda:0')
episode: 13 training return: tensor(-82.9587, device='cuda:0')
episode: 14 training return: tensor(92.7642, device='cuda:0')
episode: 15 training return: tensor(335.2437, device='cuda:0')
epoch: 4 test_true_pfm: 1841.6746847761322 sim_pfm: 28.50289104753756
episode: 16 training return: tensor(-45.3838, device='cuda:0')
episode: 17 training return: tensor(338.3245, device='cuda:0')
episode: 18 training return: tensor(365.6001, device='cuda:0')
episode: 19 training return: tensor(341.8907, device='cuda:0')
epoch: 5 test_true_pfm: 1775.6932535871144 sim_pfm: 136.32988698460395
episode: 20 training return: tensor(368.4194, device='cuda:0')
episode: 21 training return: tensor(30.0877, device='cuda:0')
episode: 22 training return: tensor(-87.4179, device='cuda:0')
episode: 23 training return: tensor(361.3894, device='cuda:0')
epoch: 6 test_true_pfm: 2367.4330145241643 sim_pfm: 109.49452158965869
episode: 24 training return: tensor(282.3098, device='cuda:0')
episode: 25 training return: tensor(314.1862, device='cuda:0')
episode: 26 training return: tensor(321.5752, device='cuda:0')
episode: 27 training return: tensor(-145.8585, device='cuda:0')
epoch: 7 test_true_pfm: 2803.100721015116 sim_pfm: 38.09624673989796
episode: 28 training return: tensor(-18.6946, device='cuda:0')
episode: 29 training return: tensor(314.2746, device='cuda:0')
episode: 30 training return: tensor(154.3166, device='cuda:0')
episode: 31 training return: tensor(-53.5178, device='cuda:0')
epoch: 8 test_true_pfm: 2277.695836768871 sim_pfm: 46.43406417572987
episode: 32 training return: tensor(312.9752, device='cuda:0')
episode: 33 training return: tensor(304.3089, device='cuda:0')
episode: 34 training return: tensor(340.9377, device='cuda:0')
episode: 35 training return: tensor(-168.7229, device='cuda:0')
epoch: 9 test_true_pfm: 3106.755614356642 sim_pfm: 80.74933398528567
episode: 36 training return: tensor(-231.7851, device='cuda:0')
episode: 37 training return: tensor(-105.4267, device='cuda:0')
episode: 38 training return: tensor(315.4664, device='cuda:0')
episode: 39 training return: tensor(-196.4616, device='cuda:0')
epoch: 10 test_true_pfm: 2179.399396958827 sim_pfm: -1.529214038241965
episode: 40 training return: tensor(-135.8501, device='cuda:0')
episode: 41 training return: tensor(-141.6348, device='cuda:0')
episode: 42 training return: tensor(86.4651, device='cuda:0')
episode: 43 training return: tensor(-99.1233, device='cuda:0')
epoch: 11 test_true_pfm: 2952.1505547275538 sim_pfm: 208.3359758182584
episode: 44 training return: tensor(343.9334, device='cuda:0')
episode: 45 training return: tensor(180.7884, device='cuda:0')
episode: 46 training return: tensor(383.9401, device='cuda:0')
episode: 47 training return: tensor(-83.8709, device='cuda:0')
epoch: 12 test_true_pfm: 3153.713125443103 sim_pfm: 337.0465456119758
episode: 48 training return: tensor(339.4098, device='cuda:0')
episode: 49 training return: tensor(-59.0288, device='cuda:0')
episode: 50 training return: tensor(369.0575, device='cuda:0')
episode: 51 training return: tensor(227.0433, device='cuda:0')
epoch: 13 test_true_pfm: 2934.811759124839 sim_pfm: 47.510908378414264
episode: 52 training return: tensor(284.2327, device='cuda:0')
episode: 53 training return: tensor(-113.6449, device='cuda:0')
episode: 54 training return: tensor(342.9952, device='cuda:0')
episode: 55 training return: tensor(305.8082, device='cuda:0')
epoch: 14 test_true_pfm: 2809.741636643595 sim_pfm: 181.60427192105757
episode: 56 training return: tensor(-352.8342, device='cuda:0')
episode: 57 training return: tensor(263.9200, device='cuda:0')
episode: 58 training return: tensor(-20.2890, device='cuda:0')
episode: 59 training return: tensor(347.3089, device='cuda:0')
epoch: 15 test_true_pfm: 3318.293557930761 sim_pfm: 318.0837353307191
episode: 60 training return: tensor(317.7286, device='cuda:0')
episode: 61 training return: tensor(-329.5072, device='cuda:0')
episode: 62 training return: tensor(3.7502, device='cuda:0')
episode: 63 training return: tensor(3.1277, device='cuda:0')
epoch: 16 test_true_pfm: 2910.369988349616 sim_pfm: 121.38837119934033
episode: 64 training return: tensor(-183.8156, device='cuda:0')
episode: 65 training return: tensor(350.2187, device='cuda:0')
episode: 66 training return: tensor(386.8258, device='cuda:0')
episode: 67 training return: tensor(326.3275, device='cuda:0')
epoch: 17 test_true_pfm: 2508.5125081833 sim_pfm: 67.81220112843828
episode: 68 training return: tensor(146.1131, device='cuda:0')
episode: 69 training return: tensor(-144.3535, device='cuda:0')
episode: 70 training return: tensor(328.7479, device='cuda:0')
episode: 71 training return: tensor(242.4301, device='cuda:0')
epoch: 18 test_true_pfm: 2703.182107017177 sim_pfm: 282.85758227756014
episode: 72 training return: tensor(307.2625, device='cuda:0')
episode: 73 training return: tensor(249.2113, device='cuda:0')
episode: 74 training return: tensor(-294.9550, device='cuda:0')
episode: 75 training return: tensor(5.2996, device='cuda:0')
epoch: 19 test_true_pfm: 3268.484154220132 sim_pfm: 212.7419187159297
episode: 76 training return: tensor(257.5901, device='cuda:0')
episode: 77 training return: tensor(350.6636, device='cuda:0')
episode: 78 training return: tensor(369.0864, device='cuda:0')
episode: 79 training return: tensor(356.6164, device='cuda:0')
epoch: 20 test_true_pfm: 2477.3096201197636 sim_pfm: 315.01533233125036
episode: 80 training return: tensor(219.5323, device='cuda:0')
episode: 81 training return: tensor(-100.2958, device='cuda:0')
episode: 82 training return: tensor(181.3658, device='cuda:0')
episode: 83 training return: tensor(409.4883, device='cuda:0')
epoch: 21 test_true_pfm: 3002.5786017865194 sim_pfm: 294.7847120221995
episode: 84 training return: tensor(327.1889, device='cuda:0')
episode: 85 training return: tensor(-135.0835, device='cuda:0')
episode: 86 training return: tensor(359.7145, device='cuda:0')
episode: 87 training return: tensor(313.1368, device='cuda:0')
epoch: 22 test_true_pfm: 3368.1569904816693 sim_pfm: 178.41645845890162
episode: 88 training return: tensor(-54.1188, device='cuda:0')
episode: 89 training return: tensor(349.6804, device='cuda:0')
episode: 90 training return: tensor(-3.4343, device='cuda:0')
episode: 91 training return: tensor(311.5324, device='cuda:0')
epoch: 23 test_true_pfm: 2993.668229504132 sim_pfm: -48.929028726619435
episode: 92 training return: tensor(338.6971, device='cuda:0')
episode: 93 training return: tensor(-155.8517, device='cuda:0')
episode: 94 training return: tensor(-131.4940, device='cuda:0')
episode: 95 training return: tensor(-113.4358, device='cuda:0')
epoch: 24 test_true_pfm: 2422.776739044862 sim_pfm: 108.21787802925489
episode: 96 training return: tensor(291.6685, device='cuda:0')
episode: 97 training return: tensor(297.3272, device='cuda:0')
episode: 98 training return: tensor(-177.3935, device='cuda:0')
episode: 99 training return: tensor(358.3735, device='cuda:0')
epoch: 25 test_true_pfm: 2993.4858411228256 sim_pfm: -19.741165491344873
episode: 100 training return: tensor(-111.0236, device='cuda:0')
episode: 101 training return: tensor(-22.3389, device='cuda:0')
episode: 102 training return: tensor(280.2749, device='cuda:0')
episode: 103 training return: tensor(-179.2411, device='cuda:0')
epoch: 26 test_true_pfm: 3368.877869605752 sim_pfm: 185.98565831480664
episode: 104 training return: tensor(-399.9736, device='cuda:0')
episode: 105 training return: tensor(332.2429, device='cuda:0')
episode: 106 training return: tensor(200.2453, device='cuda:0')
episode: 107 training return: tensor(112.1697, device='cuda:0')
epoch: 27 test_true_pfm: 2185.3468058968047 sim_pfm: 116.584717275536
episode: 108 training return: tensor(335.1353, device='cuda:0')
episode: 109 training return: tensor(308.8515, device='cuda:0')
episode: 110 training return: tensor(-130.3939, device='cuda:0')
episode: 111 training return: tensor(355.2811, device='cuda:0')
epoch: 28 test_true_pfm: 2960.1012575636832 sim_pfm: 184.89191410843827
episode: 112 training return: tensor(-3.1769, device='cuda:0')
episode: 113 training return: tensor(315.1244, device='cuda:0')
episode: 114 training return: tensor(350.3568, device='cuda:0')
episode: 115 training return: tensor(317.7866, device='cuda:0')
epoch: 29 test_true_pfm: 3011.237802540331 sim_pfm: 341.4674059171036
episode: 116 training return: tensor(-123.6405, device='cuda:0')
episode: 117 training return: tensor(-53.4694, device='cuda:0')
episode: 118 training return: tensor(286.1241, device='cuda:0')
episode: 119 training return: tensor(235.2002, device='cuda:0')
epoch: 30 test_true_pfm: 2758.76149383522 sim_pfm: 195.5717910141684
episode: 120 training return: tensor(295.5733, device='cuda:0')
episode: 121 training return: tensor(394.2043, device='cuda:0')
episode: 122 training return: tensor(-91.2652, device='cuda:0')
episode: 123 training return: tensor(294.7432, device='cuda:0')
epoch: 31 test_true_pfm: 2788.529611129556 sim_pfm: 320.8197767502279
episode: 124 training return: tensor(-133.3701, device='cuda:0')
episode: 125 training return: tensor(-125.1582, device='cuda:0')
episode: 126 training return: tensor(356.9776, device='cuda:0')
episode: 127 training return: tensor(179.9864, device='cuda:0')
epoch: 32 test_true_pfm: 3292.3558230909844 sim_pfm: -12.208456561958883
episode: 128 training return: tensor(29.8406, device='cuda:0')
episode: 129 training return: tensor(192.4203, device='cuda:0')
episode: 130 training return: tensor(343.9260, device='cuda:0')
episode: 131 training return: tensor(-106.2693, device='cuda:0')
epoch: 33 test_true_pfm: 2633.9142381078464 sim_pfm: 268.6176063963988
episode: 132 training return: tensor(-123.0270, device='cuda:0')
episode: 133 training return: tensor(314.8796, device='cuda:0')
episode: 134 training return: tensor(387.9435, device='cuda:0')
episode: 135 training return: tensor(-143.1158, device='cuda:0')
epoch: 34 test_true_pfm: 3096.9498800850633 sim_pfm: 132.01434681348232
episode: 136 training return: tensor(-2.5969, device='cuda:0')
episode: 137 training return: tensor(37.9799, device='cuda:0')
episode: 138 training return: tensor(-118.0598, device='cuda:0')
episode: 139 training return: tensor(394.9986, device='cuda:0')
epoch: 35 test_true_pfm: 3339.4196971322726 sim_pfm: 191.1562171393695
episode: 140 training return: tensor(-195.2174, device='cuda:0')
episode: 141 training return: tensor(339.9619, device='cuda:0')
episode: 142 training return: tensor(368.3115, device='cuda:0')
episode: 143 training return: tensor(346.0276, device='cuda:0')
epoch: 36 test_true_pfm: 2881.607637423198 sim_pfm: 356.47680331847124
episode: 144 training return: tensor(341.4951, device='cuda:0')
episode: 145 training return: tensor(315.2993, device='cuda:0')
episode: 146 training return: tensor(-199.3432, device='cuda:0')
episode: 147 training return: tensor(334.7450, device='cuda:0')
epoch: 37 test_true_pfm: 3045.5073204984583 sim_pfm: 154.20091073127696
episode: 148 training return: tensor(324.8357, device='cuda:0')
episode: 149 training return: tensor(380.3616, device='cuda:0')
episode: 150 training return: tensor(342.9228, device='cuda:0')
episode: 151 training return: tensor(363.0714, device='cuda:0')
epoch: 38 test_true_pfm: 3342.990982547684 sim_pfm: 182.13919966403046
episode: 152 training return: tensor(375.9902, device='cuda:0')
episode: 153 training return: tensor(-122.5612, device='cuda:0')
episode: 154 training return: tensor(295.3134, device='cuda:0')
episode: 155 training return: tensor(312.7445, device='cuda:0')
epoch: 39 test_true_pfm: 3283.334456893693 sim_pfm: 329.9136312294286
episode: 156 training return: tensor(323.9312, device='cuda:0')
episode: 157 training return: tensor(35.6195, device='cuda:0')
episode: 158 training return: tensor(-147.5429, device='cuda:0')
episode: 159 training return: tensor(236.5677, device='cuda:0')
epoch: 40 test_true_pfm: 3343.2395872663405 sim_pfm: 201.97811764324433
episode: 160 training return: tensor(218.8328, device='cuda:0')
episode: 161 training return: tensor(-153.1114, device='cuda:0')
episode: 162 training return: tensor(325.5916, device='cuda:0')
episode: 163 training return: tensor(158.2110, device='cuda:0')
epoch: 41 test_true_pfm: 3401.613131177612 sim_pfm: 261.7578202221387
episode: 164 training return: tensor(351.4949, device='cuda:0')
episode: 165 training return: tensor(-124.2518, device='cuda:0')
episode: 166 training return: tensor(95.7615, device='cuda:0')
episode: 167 training return: tensor(367.3139, device='cuda:0')
epoch: 42 test_true_pfm: 3331.544045505074 sim_pfm: 323.06224344534957
episode: 168 training return: tensor(-62.4348, device='cuda:0')
episode: 169 training return: tensor(338.4180, device='cuda:0')
episode: 170 training return: tensor(323.2957, device='cuda:0')
episode: 171 training return: tensor(-89.3795, device='cuda:0')
epoch: 43 test_true_pfm: 3371.572064333848 sim_pfm: 332.09148361854994
episode: 172 training return: tensor(-177.5049, device='cuda:0')
episode: 173 training return: tensor(-80.7501, device='cuda:0')
episode: 174 training return: tensor(-154.5546, device='cuda:0')
episode: 175 training return: tensor(315.4221, device='cuda:0')
epoch: 44 test_true_pfm: 3328.4160547072574 sim_pfm: 185.76803509741634
episode: 176 training return: tensor(347.6929, device='cuda:0')
episode: 177 training return: tensor(-165.6904, device='cuda:0')
episode: 178 training return: tensor(327.3668, device='cuda:0')
episode: 179 training return: tensor(308.2204, device='cuda:0')
epoch: 45 test_true_pfm: 2869.0187324102462 sim_pfm: 14.82052361399595
episode: 180 training return: tensor(296.0168, device='cuda:0')
episode: 181 training return: tensor(360.0010, device='cuda:0')
episode: 182 training return: tensor(307.0870, device='cuda:0')
episode: 183 training return: tensor(-131.0850, device='cuda:0')
epoch: 46 test_true_pfm: 2728.1787223010624 sim_pfm: 136.21713362659406
episode: 184 training return: tensor(-104.5687, device='cuda:0')
episode: 185 training return: tensor(-60.6214, device='cuda:0')
episode: 186 training return: tensor(387.0875, device='cuda:0')
episode: 187 training return: tensor(320.4920, device='cuda:0')
epoch: 47 test_true_pfm: 2978.9054733309763 sim_pfm: 156.31489775167816
episode: 188 training return: tensor(300.5184, device='cuda:0')
episode: 189 training return: tensor(325.3156, device='cuda:0')
episode: 190 training return: tensor(287.4896, device='cuda:0')
episode: 191 training return: tensor(-81.9087, device='cuda:0')
epoch: 48 test_true_pfm: 2948.797809659555 sim_pfm: 267.0182238749306
episode: 192 training return: tensor(272.5030, device='cuda:0')
episode: 193 training return: tensor(405.5058, device='cuda:0')
episode: 194 training return: tensor(-121.7453, device='cuda:0')
episode: 195 training return: tensor(-85.8481, device='cuda:0')
epoch: 49 test_true_pfm: 3341.0728923707015 sim_pfm: 109.88117241926375
episode: 196 training return: tensor(-213.0442, device='cuda:0')
episode: 197 training return: tensor(352.1343, device='cuda:0')
episode: 198 training return: tensor(251.3648, device='cuda:0')
episode: 199 training return: tensor(314.1165, device='cuda:0')
epoch: 50 test_true_pfm: 3362.0741545809838 sim_pfm: 374.05644979630597
episode: 200 training return: tensor(315.2063, device='cuda:0')
episode: 201 training return: tensor(-197.8451, device='cuda:0')
episode: 202 training return: tensor(170.6149, device='cuda:0')
episode: 203 training return: tensor(53.9070, device='cuda:0')
epoch: 51 test_true_pfm: 3264.146632918217 sim_pfm: 343.69703030320426
episode: 204 training return: tensor(337.9625, device='cuda:0')
episode: 205 training return: tensor(12.2151, device='cuda:0')
episode: 206 training return: tensor(308.0024, device='cuda:0')
episode: 207 training return: tensor(94.3158, device='cuda:0')
epoch: 52 test_true_pfm: 3007.3059320215384 sim_pfm: 317.30606287315214
episode: 208 training return: tensor(63.0552, device='cuda:0')
episode: 209 training return: tensor(237.8261, device='cuda:0')
episode: 210 training return: tensor(-218.5065, device='cuda:0')
episode: 211 training return: tensor(348.8022, device='cuda:0')
epoch: 53 test_true_pfm: 3365.4226042575174 sim_pfm: 30.65157158408935
episode: 212 training return: tensor(332.9616, device='cuda:0')
episode: 213 training return: tensor(-134.9563, device='cuda:0')
episode: 214 training return: tensor(328.9439, device='cuda:0')
episode: 215 training return: tensor(76.0897, device='cuda:0')
epoch: 54 test_true_pfm: 3333.6781341784767 sim_pfm: 191.36680681679476
episode: 216 training return: tensor(303.6717, device='cuda:0')
episode: 217 training return: tensor(-111.9296, device='cuda:0')
episode: 218 training return: tensor(-49.1301, device='cuda:0')
episode: 219 training return: tensor(316.6269, device='cuda:0')
epoch: 55 test_true_pfm: 2718.2411389455833 sim_pfm: 318.6907897269314
episode: 220 training return: tensor(322.3796, device='cuda:0')
episode: 221 training return: tensor(306.1788, device='cuda:0')
episode: 222 training return: tensor(353.4562, device='cuda:0')
episode: 223 training return: tensor(225.4926, device='cuda:0')
epoch: 56 test_true_pfm: 2940.206117502355 sim_pfm: 212.63345943791987
episode: 224 training return: tensor(333.3189, device='cuda:0')
episode: 225 training return: tensor(331.8497, device='cuda:0')
episode: 226 training return: tensor(-147.8819, device='cuda:0')
episode: 227 training return: tensor(292.7785, device='cuda:0')
epoch: 57 test_true_pfm: 3350.4902890122817 sim_pfm: 347.8421666463255
episode: 228 training return: tensor(337.4426, device='cuda:0')
episode: 229 training return: tensor(296.6001, device='cuda:0')
episode: 230 training return: tensor(277.7670, device='cuda:0')
episode: 231 training return: tensor(320.1269, device='cuda:0')
epoch: 58 test_true_pfm: 3321.602603477632 sim_pfm: 205.2439655836497
episode: 232 training return: tensor(320.9379, device='cuda:0')
episode: 233 training return: tensor(-93.3822, device='cuda:0')
episode: 234 training return: tensor(-90.4079, device='cuda:0')
episode: 235 training return: tensor(-182.1690, device='cuda:0')
epoch: 59 test_true_pfm: 3365.330237818717 sim_pfm: 333.9930394476396
episode: 236 training return: tensor(381.5287, device='cuda:0')
episode: 237 training return: tensor(-184.4294, device='cuda:0')
episode: 238 training return: tensor(294.4414, device='cuda:0')
episode: 239 training return: tensor(340.7910, device='cuda:0')
epoch: 60 test_true_pfm: 2840.8832188749893 sim_pfm: 34.84559489658568
episode: 240 training return: tensor(420.0192, device='cuda:0')
episode: 241 training return: tensor(337.0036, device='cuda:0')
episode: 242 training return: tensor(338.0186, device='cuda:0')
episode: 243 training return: tensor(378.2194, device='cuda:0')
epoch: 61 test_true_pfm: 3295.2984696057024 sim_pfm: 292.1048780894295
episode: 244 training return: tensor(304.6716, device='cuda:0')
episode: 245 training return: tensor(311.6092, device='cuda:0')
episode: 246 training return: tensor(-108.5296, device='cuda:0')
episode: 247 training return: tensor(398.7556, device='cuda:0')
epoch: 62 test_true_pfm: 3284.505725715712 sim_pfm: 293.0130950379923
episode: 248 training return: tensor(333.9052, device='cuda:0')
episode: 249 training return: tensor(103.0162, device='cuda:0')
episode: 250 training return: tensor(331.0908, device='cuda:0')
episode: 251 training return: tensor(349.6846, device='cuda:0')
epoch: 63 test_true_pfm: 3397.997299496639 sim_pfm: 339.7839156706953
episode: 252 training return: tensor(342.9639, device='cuda:0')
episode: 253 training return: tensor(349.1565, device='cuda:0')
episode: 254 training return: tensor(316.1429, device='cuda:0')
episode: 255 training return: tensor(165.5352, device='cuda:0')
epoch: 64 test_true_pfm: 3390.841481495959 sim_pfm: 237.5951216223572
episode: 256 training return: tensor(-56.0398, device='cuda:0')
episode: 257 training return: tensor(399.5533, device='cuda:0')
episode: 258 training return: tensor(199.7928, device='cuda:0')
episode: 259 training return: tensor(386.7059, device='cuda:0')
epoch: 65 test_true_pfm: 3352.6401378230644 sim_pfm: 333.0753750888107
episode: 260 training return: tensor(3.1642, device='cuda:0')
episode: 261 training return: tensor(356.3942, device='cuda:0')
episode: 262 training return: tensor(284.9001, device='cuda:0')
episode: 263 training return: tensor(349.4483, device='cuda:0')
epoch: 66 test_true_pfm: 3389.5622992787394 sim_pfm: 341.33154732153827
episode: 264 training return: tensor(289.1621, device='cuda:0')
episode: 265 training return: tensor(360.5557, device='cuda:0')
episode: 266 training return: tensor(305.2665, device='cuda:0')
episode: 267 training return: tensor(340.9727, device='cuda:0')
epoch: 67 test_true_pfm: 3038.019234239449 sim_pfm: 328.1560411012906
episode: 268 training return: tensor(333.6516, device='cuda:0')
episode: 269 training return: tensor(-26.3839, device='cuda:0')
episode: 270 training return: tensor(335.1663, device='cuda:0')
episode: 271 training return: tensor(275.5157, device='cuda:0')
epoch: 68 test_true_pfm: 3309.1832980903855 sim_pfm: 259.7837657590474
episode: 272 training return: tensor(419.8307, device='cuda:0')
episode: 273 training return: tensor(347.7868, device='cuda:0')
episode: 274 training return: tensor(290.0724, device='cuda:0')
episode: 275 training return: tensor(263.1281, device='cuda:0')
epoch: 69 test_true_pfm: 3393.7023118129946 sim_pfm: 297.53528500164003
episode: 276 training return: tensor(339.4928, device='cuda:0')
episode: 277 training return: tensor(316.4934, device='cuda:0')
episode: 278 training return: tensor(34.1342, device='cuda:0')
episode: 279 training return: tensor(447.3151, device='cuda:0')
epoch: 70 test_true_pfm: 2951.4181846058855 sim_pfm: 335.12601517823833
episode: 280 training return: tensor(308.2635, device='cuda:0')
episode: 281 training return: tensor(413.8610, device='cuda:0')
episode: 282 training return: tensor(372.8693, device='cuda:0')
episode: 283 training return: tensor(317.6271, device='cuda:0')
epoch: 71 test_true_pfm: 3325.7124112868914 sim_pfm: 179.24798682584273
episode: 284 training return: tensor(345.8426, device='cuda:0')
episode: 285 training return: tensor(355.0651, device='cuda:0')
episode: 286 training return: tensor(328.4079, device='cuda:0')
episode: 287 training return: tensor(-148.1463, device='cuda:0')
epoch: 72 test_true_pfm: 2915.7443001983847 sim_pfm: 356.48124718026764
episode: 288 training return: tensor(356.5370, device='cuda:0')
episode: 289 training return: tensor(-16.0920, device='cuda:0')
episode: 290 training return: tensor(-31.1508, device='cuda:0')
episode: 291 training return: tensor(351.8120, device='cuda:0')
epoch: 73 test_true_pfm: 2933.3218303192466 sim_pfm: 341.11088398634456
episode: 292 training return: tensor(303.4946, device='cuda:0')
episode: 293 training return: tensor(69.7450, device='cuda:0')
episode: 294 training return: tensor(330.7068, device='cuda:0')
episode: 295 training return: tensor(313.0387, device='cuda:0')
epoch: 74 test_true_pfm: 2845.5971393596515 sim_pfm: 343.8685571931225
episode: 296 training return: tensor(298.2411, device='cuda:0')
episode: 297 training return: tensor(-138.6468, device='cuda:0')
episode: 298 training return: tensor(14.4542, device='cuda:0')
episode: 299 training return: tensor(363.6102, device='cuda:0')
epoch: 75 test_true_pfm: 2789.868912056568 sim_pfm: 345.6923968096962
episode: 300 training return: tensor(333.5257, device='cuda:0')
episode: 301 training return: tensor(374.1784, device='cuda:0')
episode: 302 training return: tensor(344.0138, device='cuda:0')
episode: 303 training return: tensor(334.9926, device='cuda:0')
epoch: 76 test_true_pfm: 3377.0847751021697 sim_pfm: 185.96170570364725
episode: 304 training return: tensor(-162.9370, device='cuda:0')
episode: 305 training return: tensor(294.2627, device='cuda:0')
episode: 306 training return: tensor(348.9740, device='cuda:0')
episode: 307 training return: tensor(348.1307, device='cuda:0')
epoch: 77 test_true_pfm: 2800.958916339168 sim_pfm: 340.5560238361165
episode: 308 training return: tensor(319.9583, device='cuda:0')
episode: 309 training return: tensor(340.3285, device='cuda:0')
episode: 310 training return: tensor(307.7490, device='cuda:0')
episode: 311 training return: tensor(-132.5947, device='cuda:0')
epoch: 78 test_true_pfm: 3004.417738408112 sim_pfm: 355.4440996714014
episode: 312 training return: tensor(328.7950, device='cuda:0')
episode: 313 training return: tensor(-93.0257, device='cuda:0')
episode: 314 training return: tensor(286.2547, device='cuda:0')
episode: 315 training return: tensor(299.1112, device='cuda:0')
epoch: 79 test_true_pfm: 3357.3606108851986 sim_pfm: 191.07990196476263
episode: 316 training return: tensor(359.7393, device='cuda:0')
episode: 317 training return: tensor(-118.7068, device='cuda:0')
episode: 318 training return: tensor(388.0867, device='cuda:0')
episode: 319 training return: tensor(371.3406, device='cuda:0')
epoch: 80 test_true_pfm: 3327.564409944124 sim_pfm: 179.60952673222832
episode: 320 training return: tensor(293.1180, device='cuda:0')
episode: 321 training return: tensor(311.5771, device='cuda:0')
episode: 322 training return: tensor(160.7286, device='cuda:0')
episode: 323 training return: tensor(290.4103, device='cuda:0')
epoch: 81 test_true_pfm: 3346.9731299732243 sim_pfm: 173.92500582851548
episode: 324 training return: tensor(311.5019, device='cuda:0')
episode: 325 training return: tensor(297.6533, device='cuda:0')
episode: 326 training return: tensor(-192.5502, device='cuda:0')
episode: 327 training return: tensor(262.1419, device='cuda:0')
epoch: 82 test_true_pfm: 2931.1941880473337 sim_pfm: 359.67871791898506
episode: 328 training return: tensor(346.8222, device='cuda:0')
episode: 329 training return: tensor(-115.7720, device='cuda:0')
episode: 330 training return: tensor(-112.5978, device='cuda:0')
episode: 331 training return: tensor(392.3413, device='cuda:0')
epoch: 83 test_true_pfm: 2887.259615807887 sim_pfm: 148.15230908664913
episode: 332 training return: tensor(290.0124, device='cuda:0')
episode: 333 training return: tensor(351.1064, device='cuda:0')
episode: 334 training return: tensor(336.7870, device='cuda:0')
episode: 335 training return: tensor(342.1837, device='cuda:0')
epoch: 84 test_true_pfm: 3356.883785083876 sim_pfm: 316.92223855889944
episode: 336 training return: tensor(306.1958, device='cuda:0')
episode: 337 training return: tensor(-100.2209, device='cuda:0')
episode: 338 training return: tensor(315.1794, device='cuda:0')
episode: 339 training return: tensor(328.0889, device='cuda:0')
epoch: 85 test_true_pfm: 3343.702507596508 sim_pfm: 317.19457303125336
episode: 340 training return: tensor(349.1823, device='cuda:0')
episode: 341 training return: tensor(359.5802, device='cuda:0')
episode: 342 training return: tensor(375.2855, device='cuda:0')
episode: 343 training return: tensor(300.3841, device='cuda:0')
epoch: 86 test_true_pfm: 3176.328215982256 sim_pfm: 276.9183239650253
episode: 344 training return: tensor(321.5382, device='cuda:0')
episode: 345 training return: tensor(298.4302, device='cuda:0')
episode: 346 training return: tensor(345.7789, device='cuda:0')
episode: 347 training return: tensor(287.7189, device='cuda:0')
epoch: 87 test_true_pfm: 2677.0376382934314 sim_pfm: 135.0859174457922
episode: 348 training return: tensor(312.5166, device='cuda:0')
episode: 349 training return: tensor(-126.7684, device='cuda:0')
episode: 350 training return: tensor(323.4615, device='cuda:0')
episode: 351 training return: tensor(298.7876, device='cuda:0')
epoch: 88 test_true_pfm: 2525.36545828236 sim_pfm: 340.7179940836698
episode: 352 training return: tensor(358.7863, device='cuda:0')
episode: 353 training return: tensor(338.6431, device='cuda:0')
episode: 354 training return: tensor(344.9421, device='cuda:0')
episode: 355 training return: tensor(323.5709, device='cuda:0')
epoch: 89 test_true_pfm: 3146.799817958103 sim_pfm: 342.7840527140361
episode: 356 training return: tensor(-362.0419, device='cuda:0')
episode: 357 training return: tensor(363.7586, device='cuda:0')
episode: 358 training return: tensor(344.2938, device='cuda:0')
episode: 359 training return: tensor(285.6069, device='cuda:0')
epoch: 90 test_true_pfm: 3010.8232039365143 sim_pfm: 174.46127652489426
episode: 360 training return: tensor(304.2079, device='cuda:0')
episode: 361 training return: tensor(305.0426, device='cuda:0')
episode: 362 training return: tensor(325.5124, device='cuda:0')
episode: 363 training return: tensor(-83.5884, device='cuda:0')
epoch: 91 test_true_pfm: 3373.0458985523073 sim_pfm: 294.1674849841511
episode: 364 training return: tensor(289.5474, device='cuda:0')
episode: 365 training return: tensor(160.2202, device='cuda:0')
episode: 366 training return: tensor(-11.8422, device='cuda:0')
episode: 367 training return: tensor(356.5943, device='cuda:0')
epoch: 92 test_true_pfm: 3345.225652671892 sim_pfm: 184.47300339478534
episode: 368 training return: tensor(316.6977, device='cuda:0')
episode: 369 training return: tensor(349.4604, device='cuda:0')
episode: 370 training return: tensor(-141.0000, device='cuda:0')
episode: 371 training return: tensor(261.1375, device='cuda:0')
epoch: 93 test_true_pfm: 3231.966560387289 sim_pfm: 350.2750885976323
episode: 372 training return: tensor(303.9805, device='cuda:0')
episode: 373 training return: tensor(294.5179, device='cuda:0')
episode: 374 training return: tensor(360.7235, device='cuda:0')
episode: 375 training return: tensor(315.6969, device='cuda:0')
epoch: 94 test_true_pfm: 3329.1652137876204 sim_pfm: 232.35416982977767
episode: 376 training return: tensor(273.2581, device='cuda:0')
episode: 377 training return: tensor(364.0707, device='cuda:0')
episode: 378 training return: tensor(290.1325, device='cuda:0')
episode: 379 training return: tensor(400.7559, device='cuda:0')
epoch: 95 test_true_pfm: 3316.145008204101 sim_pfm: 177.11325980263064
episode: 380 training return: tensor(-64.0350, device='cuda:0')
episode: 381 training return: tensor(-115.0951, device='cuda:0')
episode: 382 training return: tensor(324.3320, device='cuda:0')
episode: 383 training return: tensor(-146.2911, device='cuda:0')
epoch: 96 test_true_pfm: 3341.6409416866777 sim_pfm: 334.96815037399455
episode: 384 training return: tensor(358.7485, device='cuda:0')
episode: 385 training return: tensor(297.7189, device='cuda:0')
episode: 386 training return: tensor(295.6005, device='cuda:0')
episode: 387 training return: tensor(358.6140, device='cuda:0')
epoch: 97 test_true_pfm: 3019.2453065126333 sim_pfm: 318.6445430742945
episode: 388 training return: tensor(301.8042, device='cuda:0')
episode: 389 training return: tensor(324.8174, device='cuda:0')
episode: 390 training return: tensor(325.3314, device='cuda:0')
episode: 391 training return: tensor(294.5823, device='cuda:0')
epoch: 98 test_true_pfm: 2709.5259763366607 sim_pfm: 346.4674007924041
episode: 392 training return: tensor(319.7024, device='cuda:0')
episode: 393 training return: tensor(430.7305, device='cuda:0')
episode: 394 training return: tensor(-23.3767, device='cuda:0')
episode: 395 training return: tensor(369.1349, device='cuda:0')
epoch: 99 test_true_pfm: 3281.5025713984783 sim_pfm: 340.8364955163367
episode: 396 training return: tensor(-203.0814, device='cuda:0')
episode: 397 training return: tensor(344.0925, device='cuda:0')
episode: 398 training return: tensor(342.4073, device='cuda:0')
episode: 399 training return: tensor(285.0108, device='cuda:0')
epoch: 100 test_true_pfm: 2912.5437506509056 sim_pfm: 163.61139238103837
episode: 400 training return: tensor(288.8271, device='cuda:0')
episode: 401 training return: tensor(338.7916, device='cuda:0')
episode: 402 training return: tensor(341.1994, device='cuda:0')
episode: 403 training return: tensor(186.7911, device='cuda:0')
epoch: 101 test_true_pfm: 2744.240145718903 sim_pfm: 351.68094222955796
episode: 404 training return: tensor(310.7822, device='cuda:0')
episode: 405 training return: tensor(424.0814, device='cuda:0')
episode: 406 training return: tensor(294.9650, device='cuda:0')
episode: 407 training return: tensor(325.6650, device='cuda:0')
epoch: 102 test_true_pfm: 3324.2252846302217 sim_pfm: 362.508540516011
episode: 408 training return: tensor(337.8891, device='cuda:0')
episode: 409 training return: tensor(270.3712, device='cuda:0')
episode: 410 training return: tensor(348.6711, device='cuda:0')
episode: 411 training return: tensor(399.9924, device='cuda:0')
epoch: 103 test_true_pfm: 3329.1029394966495 sim_pfm: 274.52549138039467
episode: 412 training return: tensor(293.2208, device='cuda:0')
episode: 413 training return: tensor(362.5049, device='cuda:0')
episode: 414 training return: tensor(363.3380, device='cuda:0')
episode: 415 training return: tensor(384.2530, device='cuda:0')
epoch: 104 test_true_pfm: 3327.9416551920526 sim_pfm: 171.19193545748325
episode: 416 training return: tensor(-170.0447, device='cuda:0')
episode: 417 training return: tensor(303.1246, device='cuda:0')
episode: 418 training return: tensor(263.4774, device='cuda:0')
episode: 419 training return: tensor(345.9363, device='cuda:0')
epoch: 105 test_true_pfm: 3339.6173866699924 sim_pfm: 258.12200309944456
episode: 420 training return: tensor(-268.3896, device='cuda:0')
episode: 421 training return: tensor(329.2841, device='cuda:0')
episode: 422 training return: tensor(-102.2403, device='cuda:0')
episode: 423 training return: tensor(-81.3745, device='cuda:0')
epoch: 106 test_true_pfm: 3337.3981615166936 sim_pfm: 317.67033607341
episode: 424 training return: tensor(322.7683, device='cuda:0')
episode: 425 training return: tensor(377.4784, device='cuda:0')
episode: 426 training return: tensor(365.6777, device='cuda:0')
episode: 427 training return: tensor(332.6115, device='cuda:0')
epoch: 107 test_true_pfm: 3302.7237449139193 sim_pfm: 288.5880952018779
episode: 428 training return: tensor(315.8674, device='cuda:0')
episode: 429 training return: tensor(318.9948, device='cuda:0')
episode: 430 training return: tensor(261.2205, device='cuda:0')
episode: 431 training return: tensor(267.8340, device='cuda:0')
epoch: 108 test_true_pfm: 3352.730934002028 sim_pfm: 137.71620943760112
episode: 432 training return: tensor(248.0561, device='cuda:0')
episode: 433 training return: tensor(314.1565, device='cuda:0')
episode: 434 training return: tensor(381.5005, device='cuda:0')
episode: 435 training return: tensor(335.3282, device='cuda:0')
epoch: 109 test_true_pfm: 2798.322406502865 sim_pfm: 294.0975886779682
episode: 436 training return: tensor(283.4406, device='cuda:0')
episode: 437 training return: tensor(385.7236, device='cuda:0')
episode: 438 training return: tensor(-166.8205, device='cuda:0')
episode: 439 training return: tensor(288.4127, device='cuda:0')
epoch: 110 test_true_pfm: 3330.553961136499 sim_pfm: 338.9556166443993
episode: 440 training return: tensor(357.9595, device='cuda:0')
episode: 441 training return: tensor(-56.6247, device='cuda:0')
episode: 442 training return: tensor(-114.1777, device='cuda:0')
episode: 443 training return: tensor(-115.8209, device='cuda:0')
epoch: 111 test_true_pfm: 2836.3105622048383 sim_pfm: 315.40646403101465
episode: 444 training return: tensor(-128.0722, device='cuda:0')
episode: 445 training return: tensor(311.9019, device='cuda:0')
episode: 446 training return: tensor(323.2596, device='cuda:0')
episode: 447 training return: tensor(326.8522, device='cuda:0')
epoch: 112 test_true_pfm: 3353.1945714766193 sim_pfm: 349.0037869879549
episode: 448 training return: tensor(-60.6350, device='cuda:0')
episode: 449 training return: tensor(333.3735, device='cuda:0')
episode: 450 training return: tensor(326.0653, device='cuda:0')
episode: 451 training return: tensor(326.9454, device='cuda:0')
epoch: 113 test_true_pfm: 3343.3894213998733 sim_pfm: 332.3558831609359
episode: 452 training return: tensor(-158.7318, device='cuda:0')
episode: 453 training return: tensor(36.0872, device='cuda:0')
episode: 454 training return: tensor(355.8944, device='cuda:0')
episode: 455 training return: tensor(224.8020, device='cuda:0')
epoch: 114 test_true_pfm: 3405.462288907603 sim_pfm: 182.68156851904737
episode: 456 training return: tensor(-93.6594, device='cuda:0')
episode: 457 training return: tensor(311.2681, device='cuda:0')
episode: 458 training return: tensor(366.0548, device='cuda:0')
episode: 459 training return: tensor(351.3853, device='cuda:0')
epoch: 115 test_true_pfm: 3218.728060435156 sim_pfm: 318.29908255990205
episode: 460 training return: tensor(326.3632, device='cuda:0')
episode: 461 training return: tensor(276.1084, device='cuda:0')
episode: 462 training return: tensor(-62.6019, device='cuda:0')
episode: 463 training return: tensor(324.8480, device='cuda:0')
epoch: 116 test_true_pfm: 3341.420489246892 sim_pfm: 292.0630427063443
episode: 464 training return: tensor(302.2520, device='cuda:0')
episode: 465 training return: tensor(395.2398, device='cuda:0')
episode: 466 training return: tensor(-123.1871, device='cuda:0')
episode: 467 training return: tensor(335.5115, device='cuda:0')
epoch: 117 test_true_pfm: 2954.036117203594 sim_pfm: 324.92664232320385
episode: 468 training return: tensor(284.6535, device='cuda:0')
episode: 469 training return: tensor(372.0576, device='cuda:0')
episode: 470 training return: tensor(367.4374, device='cuda:0')
episode: 471 training return: tensor(115.4074, device='cuda:0')
epoch: 118 test_true_pfm: 3256.209490423693 sim_pfm: 290.75303287321003
episode: 472 training return: tensor(259.8770, device='cuda:0')
episode: 473 training return: tensor(298.6219, device='cuda:0')
episode: 474 training return: tensor(-141.2211, device='cuda:0')
episode: 475 training return: tensor(349.3230, device='cuda:0')
epoch: 119 test_true_pfm: 3348.25756386269 sim_pfm: 318.1968155570794
episode: 476 training return: tensor(73.2244, device='cuda:0')
episode: 477 training return: tensor(-148.1573, device='cuda:0')
episode: 478 training return: tensor(309.0674, device='cuda:0')
episode: 479 training return: tensor(364.2519, device='cuda:0')
epoch: 120 test_true_pfm: 3339.4741749393984 sim_pfm: 325.4323710041742
episode: 480 training return: tensor(363.8486, device='cuda:0')
episode: 481 training return: tensor(-190.1530, device='cuda:0')
episode: 482 training return: tensor(286.8406, device='cuda:0')
episode: 483 training return: tensor(344.7554, device='cuda:0')
epoch: 121 test_true_pfm: 3162.1597957463564 sim_pfm: 283.94830053666374
episode: 484 training return: tensor(403.7095, device='cuda:0')
episode: 485 training return: tensor(345.5397, device='cuda:0')
episode: 486 training return: tensor(296.2915, device='cuda:0')
episode: 487 training return: tensor(332.0627, device='cuda:0')
epoch: 122 test_true_pfm: 3354.389024139298 sim_pfm: 333.3705970836066
episode: 488 training return: tensor(339.9412, device='cuda:0')
episode: 489 training return: tensor(189.3433, device='cuda:0')
episode: 490 training return: tensor(397.0982, device='cuda:0')
episode: 491 training return: tensor(336.1628, device='cuda:0')
epoch: 123 test_true_pfm: 3403.9007555031853 sim_pfm: 333.05110380155384
episode: 492 training return: tensor(310.8980, device='cuda:0')
episode: 493 training return: tensor(345.5811, device='cuda:0')
episode: 494 training return: tensor(-209.4663, device='cuda:0')
episode: 495 training return: tensor(314.2445, device='cuda:0')
epoch: 124 test_true_pfm: 2962.501726550105 sim_pfm: 343.61319793188403
episode: 496 training return: tensor(321.0525, device='cuda:0')
episode: 497 training return: tensor(337.9612, device='cuda:0')
episode: 498 training return: tensor(343.7223, device='cuda:0')
episode: 499 training return: tensor(334.6251, device='cuda:0')
epoch: 125 test_true_pfm: 3102.193275837122 sim_pfm: 329.0510963055228
episode: 500 training return: tensor(-46.8283, device='cuda:0')
episode: 501 training return: tensor(270.2105, device='cuda:0')
episode: 502 training return: tensor(335.9915, device='cuda:0')
episode: 503 training return: tensor(362.1719, device='cuda:0')
epoch: 126 test_true_pfm: 3023.212081646919 sim_pfm: 343.96717444741324
episode: 504 training return: tensor(373.0209, device='cuda:0')
episode: 505 training return: tensor(359.5563, device='cuda:0')
episode: 506 training return: tensor(268.3005, device='cuda:0')
episode: 507 training return: tensor(-92.1194, device='cuda:0')
epoch: 127 test_true_pfm: 2471.83095073991 sim_pfm: 143.65760906685804
episode: 508 training return: tensor(383.3747, device='cuda:0')
episode: 509 training return: tensor(364.5717, device='cuda:0')
episode: 510 training return: tensor(356.0652, device='cuda:0')
episode: 511 training return: tensor(-151.1394, device='cuda:0')
epoch: 128 test_true_pfm: 2912.053905389032 sim_pfm: 278.23786904115696
episode: 512 training return: tensor(-42.6519, device='cuda:0')
episode: 513 training return: tensor(-65.2307, device='cuda:0')
episode: 514 training return: tensor(-190.3768, device='cuda:0')
episode: 515 training return: tensor(287.9756, device='cuda:0')
epoch: 129 test_true_pfm: 3364.9557905905135 sim_pfm: 166.39702394862738
episode: 516 training return: tensor(346.5096, device='cuda:0')
episode: 517 training return: tensor(316.6798, device='cuda:0')
episode: 518 training return: tensor(263.5106, device='cuda:0')
episode: 519 training return: tensor(303.5146, device='cuda:0')
epoch: 130 test_true_pfm: 3345.6418003251806 sim_pfm: 176.56185670766476
episode: 520 training return: tensor(341.4841, device='cuda:0')
episode: 521 training return: tensor(-96.5945, device='cuda:0')
episode: 522 training return: tensor(306.7773, device='cuda:0')
episode: 523 training return: tensor(11.7686, device='cuda:0')
epoch: 131 test_true_pfm: 3370.0750386279233 sim_pfm: 312.0068219439515
episode: 524 training return: tensor(358.0826, device='cuda:0')
episode: 525 training return: tensor(153.5182, device='cuda:0')
episode: 526 training return: tensor(235.9553, device='cuda:0')
episode: 527 training return: tensor(-171.5595, device='cuda:0')
epoch: 132 test_true_pfm: 3367.0206872106614 sim_pfm: 355.2914445878899
episode: 528 training return: tensor(311.0147, device='cuda:0')
episode: 529 training return: tensor(-78.5571, device='cuda:0')
episode: 530 training return: tensor(389.6036, device='cuda:0')
episode: 531 training return: tensor(393.5719, device='cuda:0')
epoch: 133 test_true_pfm: 3332.2086292847257 sim_pfm: 340.62755197690177
episode: 532 training return: tensor(334.6629, device='cuda:0')
episode: 533 training return: tensor(327.1382, device='cuda:0')
episode: 534 training return: tensor(283.6307, device='cuda:0')
episode: 535 training return: tensor(337.5554, device='cuda:0')
epoch: 134 test_true_pfm: 3370.6048167435074 sim_pfm: 242.57584348286036
episode: 536 training return: tensor(397.7247, device='cuda:0')
episode: 537 training return: tensor(372.8699, device='cuda:0')
episode: 538 training return: tensor(364.5193, device='cuda:0')
episode: 539 training return: tensor(365.3569, device='cuda:0')
epoch: 135 test_true_pfm: 2483.5150635854657 sim_pfm: 372.64578404428903
episode: 540 training return: tensor(341.1909, device='cuda:0')
episode: 541 training return: tensor(316.1440, device='cuda:0')
episode: 542 training return: tensor(349.4281, device='cuda:0')
episode: 543 training return: tensor(306.3896, device='cuda:0')
epoch: 136 test_true_pfm: 3339.3461875095886 sim_pfm: 342.67068674085505
episode: 544 training return: tensor(332.6646, device='cuda:0')
episode: 545 training return: tensor(328.1942, device='cuda:0')
episode: 546 training return: tensor(371.9501, device='cuda:0')
episode: 547 training return: tensor(393.9852, device='cuda:0')
epoch: 137 test_true_pfm: 3347.3266833030593 sim_pfm: 333.18317498720717
episode: 548 training return: tensor(347.0797, device='cuda:0')
episode: 549 training return: tensor(322.6601, device='cuda:0')
episode: 550 training return: tensor(-163.0696, device='cuda:0')
episode: 551 training return: tensor(170.3040, device='cuda:0')
epoch: 138 test_true_pfm: 3370.7188402285774 sim_pfm: 223.16510205938053
episode: 552 training return: tensor(340.0023, device='cuda:0')
episode: 553 training return: tensor(365.8055, device='cuda:0')
episode: 554 training return: tensor(355.6020, device='cuda:0')
episode: 555 training return: tensor(254.8653, device='cuda:0')
epoch: 139 test_true_pfm: 3291.349204137981 sim_pfm: 310.7084866004686
episode: 556 training return: tensor(311.7701, device='cuda:0')
episode: 557 training return: tensor(369.1335, device='cuda:0')
episode: 558 training return: tensor(342.7897, device='cuda:0')
episode: 559 training return: tensor(277.3204, device='cuda:0')
epoch: 140 test_true_pfm: 3286.1877377813835 sim_pfm: 183.99786432577335
episode: 560 training return: tensor(348.3627, device='cuda:0')
episode: 561 training return: tensor(347.4243, device='cuda:0')
episode: 562 training return: tensor(307.4485, device='cuda:0')
episode: 563 training return: tensor(349.9323, device='cuda:0')
epoch: 141 test_true_pfm: 2905.7822989560923 sim_pfm: 210.0501737492838
episode: 564 training return: tensor(326.2177, device='cuda:0')
episode: 565 training return: tensor(380.1270, device='cuda:0')
episode: 566 training return: tensor(-87.7248, device='cuda:0')
episode: 567 training return: tensor(-179.9183, device='cuda:0')
epoch: 142 test_true_pfm: 3350.196936105703 sim_pfm: 316.44779749331065
episode: 568 training return: tensor(118.6989, device='cuda:0')
episode: 569 training return: tensor(325.4124, device='cuda:0')
episode: 570 training return: tensor(388.0540, device='cuda:0')
episode: 571 training return: tensor(349.4375, device='cuda:0')
epoch: 143 test_true_pfm: 3264.941848081278 sim_pfm: 328.1616714009627
episode: 572 training return: tensor(302.7412, device='cuda:0')
episode: 573 training return: tensor(334.5491, device='cuda:0')
episode: 574 training return: tensor(335.5668, device='cuda:0')
episode: 575 training return: tensor(294.9635, device='cuda:0')
epoch: 144 test_true_pfm: 3378.061864795893 sim_pfm: 331.72426546205924
episode: 576 training return: tensor(320.6039, device='cuda:0')
episode: 577 training return: tensor(322.8900, device='cuda:0')
episode: 578 training return: tensor(-143.3799, device='cuda:0')
episode: 579 training return: tensor(133.2095, device='cuda:0')
epoch: 145 test_true_pfm: 3006.7183753117365 sim_pfm: 323.18819718431524
episode: 580 training return: tensor(297.1755, device='cuda:0')
episode: 581 training return: tensor(-37.5764, device='cuda:0')
episode: 582 training return: tensor(348.0155, device='cuda:0')
episode: 583 training return: tensor(272.6015, device='cuda:0')
epoch: 146 test_true_pfm: 3309.9928650737656 sim_pfm: 169.85132112535453
episode: 584 training return: tensor(-85.8738, device='cuda:0')
episode: 585 training return: tensor(-115.8225, device='cuda:0')
episode: 586 training return: tensor(411.2951, device='cuda:0')
episode: 587 training return: tensor(395.0164, device='cuda:0')
epoch: 147 test_true_pfm: 3377.877286930689 sim_pfm: 133.66590846017547
episode: 588 training return: tensor(302.8807, device='cuda:0')
episode: 589 training return: tensor(249.1177, device='cuda:0')
episode: 590 training return: tensor(339.6310, device='cuda:0')
episode: 591 training return: tensor(299.6333, device='cuda:0')
epoch: 148 test_true_pfm: 2656.979490506725 sim_pfm: 336.97911898454186
episode: 592 training return: tensor(364.9843, device='cuda:0')
episode: 593 training return: tensor(1.8960, device='cuda:0')
episode: 594 training return: tensor(262.1873, device='cuda:0')
episode: 595 training return: tensor(307.5332, device='cuda:0')
epoch: 149 test_true_pfm: 3328.916283225941 sim_pfm: 163.01669083644325
episode: 596 training return: tensor(366.7527, device='cuda:0')
episode: 597 training return: tensor(353.6890, device='cuda:0')
episode: 598 training return: tensor(370.1966, device='cuda:0')
episode: 599 training return: tensor(-269.7481, device='cuda:0')
epoch: 150 test_true_pfm: 3362.7755404254476 sim_pfm: 111.41279583532985
