['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'baseline', '--traj', 'expert', '--seed', '1']
episode: 0 training return: tensor(198.1868, device='cuda:0')
episode: 1 training return: tensor(243.0744, device='cuda:0')
episode: 2 training return: tensor(41.1833, device='cuda:0')
episode: 3 training return: tensor(53.8175, device='cuda:0')
epoch: 1 test_true_pfm: 10.519023378996566 sim_pfm: 149.46410691277705
episode: 4 training return: tensor(289.3562, device='cuda:0')
episode: 5 training return: tensor(199.4621, device='cuda:0')
episode: 6 training return: tensor(160.5215, device='cuda:0')
episode: 7 training return: tensor(184.2429, device='cuda:0')
epoch: 2 test_true_pfm: 22.89782781351129 sim_pfm: 141.74339323937892
episode: 8 training return: tensor(200.4198, device='cuda:0')
episode: 9 training return: tensor(182.5431, device='cuda:0')
episode: 10 training return: tensor(145.1057, device='cuda:0')
episode: 11 training return: tensor(130.4647, device='cuda:0')
epoch: 3 test_true_pfm: -2.4770967954352145 sim_pfm: 149.46311671212896
episode: 12 training return: tensor(144.5926, device='cuda:0')
episode: 13 training return: tensor(91.3025, device='cuda:0')
episode: 14 training return: tensor(121.6427, device='cuda:0')
episode: 15 training return: tensor(226.1826, device='cuda:0')
epoch: 4 test_true_pfm: 36.448754073299824 sim_pfm: 225.0329811938107
episode: 16 training return: tensor(213.4124, device='cuda:0')
episode: 17 training return: tensor(97.8348, device='cuda:0')
episode: 18 training return: tensor(237.9047, device='cuda:0')
episode: 19 training return: tensor(250.6632, device='cuda:0')
epoch: 5 test_true_pfm: 38.975093389878325 sim_pfm: 245.371587899531
episode: 20 training return: tensor(185.1417, device='cuda:0')
episode: 21 training return: tensor(229.8115, device='cuda:0')
episode: 22 training return: tensor(257.6593, device='cuda:0')
episode: 23 training return: tensor(227.9440, device='cuda:0')
epoch: 6 test_true_pfm: 35.52336938925382 sim_pfm: 317.0899681299925
episode: 24 training return: tensor(191.4976, device='cuda:0')
episode: 25 training return: tensor(192.8607, device='cuda:0')
episode: 26 training return: tensor(170.8472, device='cuda:0')
episode: 27 training return: tensor(210.4720, device='cuda:0')
epoch: 7 test_true_pfm: 39.567200628239185 sim_pfm: 262.14433914572
episode: 28 training return: tensor(225.7091, device='cuda:0')
episode: 29 training return: tensor(286.2615, device='cuda:0')
episode: 30 training return: tensor(219.0100, device='cuda:0')
episode: 31 training return: tensor(286.1497, device='cuda:0')
epoch: 8 test_true_pfm: 42.24025737234043 sim_pfm: 257.1364375308156
episode: 32 training return: tensor(147.6502, device='cuda:0')
episode: 33 training return: tensor(273.9667, device='cuda:0')
episode: 34 training return: tensor(216.2954, device='cuda:0')
episode: 35 training return: tensor(240.6268, device='cuda:0')
epoch: 9 test_true_pfm: 39.45687402562308 sim_pfm: 235.48778401985766
episode: 36 training return: tensor(209.8880, device='cuda:0')
episode: 37 training return: tensor(236.5955, device='cuda:0')
episode: 38 training return: tensor(261.7061, device='cuda:0')
episode: 39 training return: tensor(219.5649, device='cuda:0')
epoch: 10 test_true_pfm: 23.966255543477967 sim_pfm: 224.81264119388652
episode: 40 training return: tensor(235.6646, device='cuda:0')
episode: 41 training return: tensor(277.2747, device='cuda:0')
episode: 42 training return: tensor(259.0376, device='cuda:0')
episode: 43 training return: tensor(290.4585, device='cuda:0')
epoch: 11 test_true_pfm: -0.384936190632028 sim_pfm: 304.86730814790354
episode: 44 training return: tensor(317.5360, device='cuda:0')
episode: 45 training return: tensor(289.2553, device='cuda:0')
episode: 46 training return: tensor(260.8417, device='cuda:0')
episode: 47 training return: tensor(276.9043, device='cuda:0')
epoch: 12 test_true_pfm: 38.633160181175995 sim_pfm: 210.40337071623654
episode: 48 training return: tensor(239.0901, device='cuda:0')
episode: 49 training return: tensor(-27.1811, device='cuda:0')
episode: 50 training return: tensor(-552.3728, device='cuda:0')
episode: 51 training return: tensor(49.3970, device='cuda:0')
epoch: 13 test_true_pfm: 18.711496905222344 sim_pfm: -822.479666454345
episode: 52 training return: tensor(71.9812, device='cuda:0')
episode: 53 training return: tensor(304.3337, device='cuda:0')
episode: 54 training return: tensor(237.4705, device='cuda:0')
episode: 55 training return: tensor(198.1212, device='cuda:0')
epoch: 14 test_true_pfm: 10.013420942586384 sim_pfm: 254.5835486748023
episode: 56 training return: tensor(278.5319, device='cuda:0')
episode: 57 training return: tensor(494.0129, device='cuda:0')
episode: 58 training return: tensor(263.8143, device='cuda:0')
episode: 59 training return: tensor(549.7540, device='cuda:0')
epoch: 15 test_true_pfm: 19.23993618362485 sim_pfm: 508.9437874181196
episode: 60 training return: tensor(587.9338, device='cuda:0')
episode: 61 training return: tensor(542.4877, device='cuda:0')
episode: 62 training return: tensor(435.3366, device='cuda:0')
episode: 63 training return: tensor(457.7571, device='cuda:0')
epoch: 16 test_true_pfm: -31.932861674220412 sim_pfm: 415.0337990162778
episode: 64 training return: tensor(503.8747, device='cuda:0')
episode: 65 training return: tensor(555.2084, device='cuda:0')
episode: 66 training return: tensor(576.0107, device='cuda:0')
episode: 67 training return: tensor(592.9035, device='cuda:0')
epoch: 17 test_true_pfm: 16.563633790482804 sim_pfm: 587.6475486746058
episode: 68 training return: tensor(623.9484, device='cuda:0')
episode: 69 training return: tensor(579.1161, device='cuda:0')
episode: 70 training return: tensor(616.9179, device='cuda:0')
episode: 71 training return: tensor(558.9825, device='cuda:0')
epoch: 18 test_true_pfm: 50.52698755428745 sim_pfm: 563.5933031904976
episode: 72 training return: tensor(546.8118, device='cuda:0')
episode: 73 training return: tensor(474.8848, device='cuda:0')
episode: 74 training return: tensor(100.2955, device='cuda:0')
episode: 75 training return: tensor(126.7420, device='cuda:0')
epoch: 19 test_true_pfm: 16.09219292096594 sim_pfm: 156.6653496898187
episode: 76 training return: tensor(159.3401, device='cuda:0')
episode: 77 training return: tensor(22.9130, device='cuda:0')
episode: 78 training return: tensor(180.1734, device='cuda:0')
episode: 79 training return: tensor(231.4599, device='cuda:0')
epoch: 20 test_true_pfm: 20.778715563507784 sim_pfm: 217.64830574095248
episode: 80 training return: tensor(199.4532, device='cuda:0')
episode: 81 training return: tensor(239.6429, device='cuda:0')
episode: 82 training return: tensor(202.6275, device='cuda:0')
episode: 83 training return: tensor(262.3459, device='cuda:0')
epoch: 21 test_true_pfm: 25.584240260864856 sim_pfm: 367.2729012561962
episode: 84 training return: tensor(311.8940, device='cuda:0')
episode: 85 training return: tensor(288.6278, device='cuda:0')
episode: 86 training return: tensor(291.0181, device='cuda:0')
episode: 87 training return: tensor(321.9381, device='cuda:0')
epoch: 22 test_true_pfm: 29.717493720513563 sim_pfm: 569.8901673589135
episode: 88 training return: tensor(578.8507, device='cuda:0')
episode: 89 training return: tensor(474.2055, device='cuda:0')
episode: 90 training return: tensor(534.2426, device='cuda:0')
episode: 91 training return: tensor(535.3572, device='cuda:0')
epoch: 23 test_true_pfm: 47.19129558610798 sim_pfm: 568.6565192508976
episode: 92 training return: tensor(566.3620, device='cuda:0')
episode: 93 training return: tensor(502.7222, device='cuda:0')
episode: 94 training return: tensor(476.7642, device='cuda:0')
episode: 95 training return: tensor(106.7062, device='cuda:0')
epoch: 24 test_true_pfm: 13.739545162235672 sim_pfm: 115.29658919237554
episode: 96 training return: tensor(304.0862, device='cuda:0')
episode: 97 training return: tensor(713.5568, device='cuda:0')
episode: 98 training return: tensor(565.8578, device='cuda:0')
episode: 99 training return: tensor(565.2695, device='cuda:0')
epoch: 25 test_true_pfm: 4.42564825459273 sim_pfm: 471.2627951734932
episode: 100 training return: tensor(611.5844, device='cuda:0')
episode: 101 training return: tensor(565.0277, device='cuda:0')
episode: 102 training return: tensor(609.1613, device='cuda:0')
episode: 103 training return: tensor(585.0405, device='cuda:0')
epoch: 26 test_true_pfm: -2.029032099505949 sim_pfm: 553.4608464351855
episode: 104 training return: tensor(688.7512, device='cuda:0')
episode: 105 training return: tensor(587.5919, device='cuda:0')
episode: 106 training return: tensor(613.5784, device='cuda:0')
episode: 107 training return: tensor(614.1302, device='cuda:0')
epoch: 27 test_true_pfm: 24.42916762859721 sim_pfm: 277.21979313464834
episode: 108 training return: tensor(647.4073, device='cuda:0')
episode: 109 training return: tensor(144.6556, device='cuda:0')
episode: 110 training return: tensor(301.1023, device='cuda:0')
episode: 111 training return: tensor(289.8934, device='cuda:0')
epoch: 28 test_true_pfm: 41.208256771692064 sim_pfm: 661.9554747670888
episode: 112 training return: tensor(660.6926, device='cuda:0')
episode: 113 training return: tensor(326.1813, device='cuda:0')
episode: 114 training return: tensor(385.3180, device='cuda:0')
episode: 115 training return: tensor(600.4791, device='cuda:0')
epoch: 29 test_true_pfm: 30.038307401519557 sim_pfm: 585.0646700506331
episode: 116 training return: tensor(583.9545, device='cuda:0')
episode: 117 training return: tensor(595.7263, device='cuda:0')
episode: 118 training return: tensor(603.6277, device='cuda:0')
episode: 119 training return: tensor(613.4946, device='cuda:0')
epoch: 30 test_true_pfm: 36.826709540074084 sim_pfm: 631.1141259905417
episode: 120 training return: tensor(593.8825, device='cuda:0')
episode: 121 training return: tensor(637.2563, device='cuda:0')
episode: 122 training return: tensor(586.3607, device='cuda:0')
episode: 123 training return: tensor(575.9872, device='cuda:0')
epoch: 31 test_true_pfm: 44.91379526838766 sim_pfm: 610.8598535604775
episode: 124 training return: tensor(609.8827, device='cuda:0')
episode: 125 training return: tensor(582.9404, device='cuda:0')
episode: 126 training return: tensor(598.3422, device='cuda:0')
episode: 127 training return: tensor(605.9639, device='cuda:0')
epoch: 32 test_true_pfm: 40.49763297052843 sim_pfm: 639.5039822241291
episode: 128 training return: tensor(408.7646, device='cuda:0')
episode: 129 training return: tensor(293.3757, device='cuda:0')
episode: 130 training return: tensor(275.8876, device='cuda:0')
episode: 131 training return: tensor(223.1157, device='cuda:0')
epoch: 33 test_true_pfm: 32.19294051658297 sim_pfm: 422.4797823699191
episode: 132 training return: tensor(281.7864, device='cuda:0')
episode: 133 training return: tensor(308.5030, device='cuda:0')
episode: 134 training return: tensor(324.9632, device='cuda:0')
episode: 135 training return: tensor(544.8357, device='cuda:0')
epoch: 34 test_true_pfm: 32.92805682839969 sim_pfm: 677.7377459494746
episode: 136 training return: tensor(616.9130, device='cuda:0')
episode: 137 training return: tensor(406.3010, device='cuda:0')
episode: 138 training return: tensor(658.2838, device='cuda:0')
episode: 139 training return: tensor(608.5442, device='cuda:0')
epoch: 35 test_true_pfm: 34.584201424654715 sim_pfm: 609.4919713733718
episode: 140 training return: tensor(772.1701, device='cuda:0')
episode: 141 training return: tensor(662.0981, device='cuda:0')
episode: 142 training return: tensor(626.4141, device='cuda:0')
episode: 143 training return: tensor(664.0978, device='cuda:0')
epoch: 36 test_true_pfm: 38.69717536883988 sim_pfm: 627.5787050371407
episode: 144 training return: tensor(581.6746, device='cuda:0')
episode: 145 training return: tensor(594.6144, device='cuda:0')
episode: 146 training return: tensor(627.2374, device='cuda:0')
episode: 147 training return: tensor(671.5425, device='cuda:0')
epoch: 37 test_true_pfm: 26.62289455846501 sim_pfm: 344.9923667645082
episode: 148 training return: tensor(219.1894, device='cuda:0')
episode: 149 training return: tensor(615.9493, device='cuda:0')
episode: 150 training return: tensor(449.7090, device='cuda:0')
episode: 151 training return: tensor(501.5476, device='cuda:0')
epoch: 38 test_true_pfm: -29.273911305747554 sim_pfm: 489.33569871990477
episode: 152 training return: tensor(326.3281, device='cuda:0')
episode: 153 training return: tensor(242.6444, device='cuda:0')
episode: 154 training return: tensor(439.9633, device='cuda:0')
episode: 155 training return: tensor(337.2732, device='cuda:0')
epoch: 39 test_true_pfm: 33.87541671460981 sim_pfm: 509.7600277639925
episode: 156 training return: tensor(640.9698, device='cuda:0')
episode: 157 training return: tensor(625.8790, device='cuda:0')
episode: 158 training return: tensor(426.9764, device='cuda:0')
episode: 159 training return: tensor(694.9320, device='cuda:0')
epoch: 40 test_true_pfm: 32.55390439141788 sim_pfm: 705.5887213176117
episode: 160 training return: tensor(692.8973, device='cuda:0')
episode: 161 training return: tensor(360.1054, device='cuda:0')
episode: 162 training return: tensor(637.5709, device='cuda:0')
episode: 163 training return: tensor(634.2341, device='cuda:0')
epoch: 41 test_true_pfm: 21.733874729690267 sim_pfm: 676.6264380723238
episode: 164 training return: tensor(669.4197, device='cuda:0')
episode: 165 training return: tensor(607.8287, device='cuda:0')
episode: 166 training return: tensor(734.3586, device='cuda:0')
episode: 167 training return: tensor(451.0942, device='cuda:0')
epoch: 42 test_true_pfm: 32.10569280616592 sim_pfm: 663.4589686617022
episode: 168 training return: tensor(619.7859, device='cuda:0')
episode: 169 training return: tensor(614.6111, device='cuda:0')
episode: 170 training return: tensor(611.6741, device='cuda:0')
episode: 171 training return: tensor(651.6072, device='cuda:0')
epoch: 43 test_true_pfm: 23.144659805507683 sim_pfm: 642.7052822956582
episode: 172 training return: tensor(654.3043, device='cuda:0')
episode: 173 training return: tensor(576.3121, device='cuda:0')
episode: 174 training return: tensor(597.9033, device='cuda:0')
episode: 175 training return: tensor(611.5741, device='cuda:0')
epoch: 44 test_true_pfm: 2.6448646169609566 sim_pfm: 774.0963287958875
episode: 176 training return: tensor(589.8536, device='cuda:0')
episode: 177 training return: tensor(361.4816, device='cuda:0')
episode: 178 training return: tensor(237.6506, device='cuda:0')
episode: 179 training return: tensor(209.3701, device='cuda:0')
epoch: 45 test_true_pfm: 32.600244597027725 sim_pfm: 336.685920548439
episode: 180 training return: tensor(246.1324, device='cuda:0')
episode: 181 training return: tensor(274.6387, device='cuda:0')
episode: 182 training return: tensor(293.4279, device='cuda:0')
episode: 183 training return: tensor(303.6980, device='cuda:0')
epoch: 46 test_true_pfm: 34.43507251781235 sim_pfm: 386.476411126554
episode: 184 training return: tensor(171.8270, device='cuda:0')
episode: 185 training return: tensor(146.9023, device='cuda:0')
episode: 186 training return: tensor(360.8233, device='cuda:0')
episode: 187 training return: tensor(617.7256, device='cuda:0')
epoch: 47 test_true_pfm: 39.88536746933761 sim_pfm: 601.6104455219581
episode: 188 training return: tensor(632.3677, device='cuda:0')
episode: 189 training return: tensor(543.8098, device='cuda:0')
episode: 190 training return: tensor(655.2274, device='cuda:0')
episode: 191 training return: tensor(624.5532, device='cuda:0')
epoch: 48 test_true_pfm: 41.733012736328234 sim_pfm: 646.9586418545805
episode: 192 training return: tensor(630.2957, device='cuda:0')
episode: 193 training return: tensor(609.3419, device='cuda:0')
episode: 194 training return: tensor(632.1361, device='cuda:0')
episode: 195 training return: tensor(622.0487, device='cuda:0')
epoch: 49 test_true_pfm: 24.027688861009494 sim_pfm: 630.5050719032995
episode: 196 training return: tensor(478.4352, device='cuda:0')
episode: 197 training return: tensor(680.9349, device='cuda:0')
episode: 198 training return: tensor(653.3671, device='cuda:0')
episode: 199 training return: tensor(625.9647, device='cuda:0')
epoch: 50 test_true_pfm: 40.092745704613264 sim_pfm: 622.4619177183137
episode: 200 training return: tensor(638.1194, device='cuda:0')
episode: 201 training return: tensor(607.8060, device='cuda:0')
episode: 202 training return: tensor(612.5581, device='cuda:0')
episode: 203 training return: tensor(703.0791, device='cuda:0')
epoch: 51 test_true_pfm: 25.565254746015448 sim_pfm: 683.8384270602837
episode: 204 training return: tensor(656.9025, device='cuda:0')
episode: 205 training return: tensor(646.1151, device='cuda:0')
episode: 206 training return: tensor(610.8527, device='cuda:0')
episode: 207 training return: tensor(679.1806, device='cuda:0')
epoch: 52 test_true_pfm: 42.932173517316286 sim_pfm: 595.2856537908316
episode: 208 training return: tensor(593.7505, device='cuda:0')
episode: 209 training return: tensor(752.3409, device='cuda:0')
episode: 210 training return: tensor(766.7771, device='cuda:0')
episode: 211 training return: tensor(657.9609, device='cuda:0')
epoch: 53 test_true_pfm: 35.7145321406781 sim_pfm: 620.5860304288566
episode: 212 training return: tensor(708.0035, device='cuda:0')
episode: 213 training return: tensor(366.9389, device='cuda:0')
episode: 214 training return: tensor(749.5001, device='cuda:0')
episode: 215 training return: tensor(761.0824, device='cuda:0')
epoch: 54 test_true_pfm: 20.16334802047617 sim_pfm: 796.7255756415427
episode: 216 training return: tensor(527.0309, device='cuda:0')
episode: 217 training return: tensor(749.7779, device='cuda:0')
episode: 218 training return: tensor(741.3857, device='cuda:0')
episode: 219 training return: tensor(767.1396, device='cuda:0')
epoch: 55 test_true_pfm: 21.050071926297147 sim_pfm: 716.2042300679489
episode: 220 training return: tensor(753.3282, device='cuda:0')
episode: 221 training return: tensor(728.9625, device='cuda:0')
episode: 222 training return: tensor(704.5108, device='cuda:0')
episode: 223 training return: tensor(603.3999, device='cuda:0')
epoch: 56 test_true_pfm: 7.12844362405796 sim_pfm: 784.3810605988838
episode: 224 training return: tensor(688.2036, device='cuda:0')
episode: 225 training return: tensor(703.7047, device='cuda:0')
episode: 226 training return: tensor(746.7563, device='cuda:0')
episode: 227 training return: tensor(767.4376, device='cuda:0')
epoch: 57 test_true_pfm: 23.67072258447761 sim_pfm: 296.80110287048154
episode: 228 training return: tensor(525.8668, device='cuda:0')
episode: 229 training return: tensor(384.7758, device='cuda:0')
episode: 230 training return: tensor(441.9483, device='cuda:0')
episode: 231 training return: tensor(746.2859, device='cuda:0')
epoch: 58 test_true_pfm: 13.653626540441334 sim_pfm: 615.0791893170215
episode: 232 training return: tensor(675.9664, device='cuda:0')
episode: 233 training return: tensor(669.1065, device='cuda:0')
episode: 234 training return: tensor(763.5908, device='cuda:0')
episode: 235 training return: tensor(732.7630, device='cuda:0')
epoch: 59 test_true_pfm: 12.256117505901415 sim_pfm: 763.3267731696367
episode: 236 training return: tensor(782.2190, device='cuda:0')
episode: 237 training return: tensor(785.5413, device='cuda:0')
episode: 238 training return: tensor(768.8835, device='cuda:0')
episode: 239 training return: tensor(780.3093, device='cuda:0')
epoch: 60 test_true_pfm: 27.19580982173543 sim_pfm: 806.3204507278278
episode: 240 training return: tensor(774.3185, device='cuda:0')
episode: 241 training return: tensor(764.3699, device='cuda:0')
episode: 242 training return: tensor(810.8763, device='cuda:0')
episode: 243 training return: tensor(793.5329, device='cuda:0')
epoch: 61 test_true_pfm: 25.299254833047975 sim_pfm: 794.3306061863899
episode: 244 training return: tensor(807.6935, device='cuda:0')
episode: 245 training return: tensor(753.0854, device='cuda:0')
episode: 246 training return: tensor(761.9679, device='cuda:0')
episode: 247 training return: tensor(762.5422, device='cuda:0')
epoch: 62 test_true_pfm: 10.273298432903665 sim_pfm: 748.6712776184082
episode: 248 training return: tensor(744.1821, device='cuda:0')
episode: 249 training return: tensor(789.5032, device='cuda:0')
episode: 250 training return: tensor(770.8835, device='cuda:0')
episode: 251 training return: tensor(749.5328, device='cuda:0')
epoch: 63 test_true_pfm: 12.640698299907742 sim_pfm: 783.1573149168864
episode: 252 training return: tensor(730.1628, device='cuda:0')
episode: 253 training return: tensor(808.8815, device='cuda:0')
episode: 254 training return: tensor(775.2786, device='cuda:0')
episode: 255 training return: tensor(751.1670, device='cuda:0')
epoch: 64 test_true_pfm: 8.544015058513532 sim_pfm: 785.9579814910888
episode: 256 training return: tensor(783.2867, device='cuda:0')
episode: 257 training return: tensor(792.6860, device='cuda:0')
episode: 258 training return: tensor(784.5269, device='cuda:0')
episode: 259 training return: tensor(694.5660, device='cuda:0')
epoch: 65 test_true_pfm: 21.099171956308318 sim_pfm: 788.9408515773714
episode: 260 training return: tensor(786.0140, device='cuda:0')
episode: 261 training return: tensor(767.7214, device='cuda:0')
episode: 262 training return: tensor(745.7358, device='cuda:0')
episode: 263 training return: tensor(746.7089, device='cuda:0')
epoch: 66 test_true_pfm: 19.162424174543123 sim_pfm: 795.0281689244323
episode: 264 training return: tensor(789.0837, device='cuda:0')
episode: 265 training return: tensor(774.7957, device='cuda:0')
episode: 266 training return: tensor(755.7223, device='cuda:0')
episode: 267 training return: tensor(765.5344, device='cuda:0')
epoch: 67 test_true_pfm: 21.344187734012017 sim_pfm: 772.3730539031327
episode: 268 training return: tensor(795.6820, device='cuda:0')
episode: 269 training return: tensor(788.7662, device='cuda:0')
episode: 270 training return: tensor(767.6577, device='cuda:0')
episode: 271 training return: tensor(769.9680, device='cuda:0')
epoch: 68 test_true_pfm: 23.105159467679698 sim_pfm: 788.7706391364336
episode: 272 training return: tensor(752.3951, device='cuda:0')
episode: 273 training return: tensor(779.6821, device='cuda:0')
episode: 274 training return: tensor(736.0388, device='cuda:0')
episode: 275 training return: tensor(785.2304, device='cuda:0')
epoch: 69 test_true_pfm: 18.253468916161914 sim_pfm: 782.1996746949851
episode: 276 training return: tensor(791.5826, device='cuda:0')
episode: 277 training return: tensor(764.8160, device='cuda:0')
episode: 278 training return: tensor(800.1253, device='cuda:0')
episode: 279 training return: tensor(777.8903, device='cuda:0')
epoch: 70 test_true_pfm: 17.442221927024857 sim_pfm: 796.2877565373201
episode: 280 training return: tensor(767.4814, device='cuda:0')
episode: 281 training return: tensor(765.2721, device='cuda:0')
episode: 282 training return: tensor(802.0438, device='cuda:0')
episode: 283 training return: tensor(801.9979, device='cuda:0')
epoch: 71 test_true_pfm: 20.828153669973293 sim_pfm: 786.8101038679481
episode: 284 training return: tensor(765.2853, device='cuda:0')
episode: 285 training return: tensor(684.7478, device='cuda:0')
episode: 286 training return: tensor(800.8212, device='cuda:0')
episode: 287 training return: tensor(761.8987, device='cuda:0')
epoch: 72 test_true_pfm: 22.05403638163969 sim_pfm: 784.1380752908066
episode: 288 training return: tensor(791.8483, device='cuda:0')
episode: 289 training return: tensor(782.1618, device='cuda:0')
episode: 290 training return: tensor(789.6832, device='cuda:0')
episode: 291 training return: tensor(790.6920, device='cuda:0')
epoch: 73 test_true_pfm: 24.622337576526945 sim_pfm: 785.6682285441086
episode: 292 training return: tensor(780.1513, device='cuda:0')
episode: 293 training return: tensor(759.1369, device='cuda:0')
episode: 294 training return: tensor(744.6811, device='cuda:0')
episode: 295 training return: tensor(737.2676, device='cuda:0')
epoch: 74 test_true_pfm: 16.35334054818572 sim_pfm: 795.0827789781615
episode: 296 training return: tensor(771.1455, device='cuda:0')
episode: 297 training return: tensor(790.6202, device='cuda:0')
episode: 298 training return: tensor(761.3454, device='cuda:0')
episode: 299 training return: tensor(765.2746, device='cuda:0')
epoch: 75 test_true_pfm: 27.174338000599665 sim_pfm: 823.2569533914327
episode: 300 training return: tensor(768.8428, device='cuda:0')
episode: 301 training return: tensor(816.9941, device='cuda:0')
episode: 302 training return: tensor(800.9631, device='cuda:0')
episode: 303 training return: tensor(784.0878, device='cuda:0')
epoch: 76 test_true_pfm: 15.46745329560873 sim_pfm: 772.0309682516381
episode: 304 training return: tensor(797.8903, device='cuda:0')
episode: 305 training return: tensor(771.6300, device='cuda:0')
episode: 306 training return: tensor(712.1288, device='cuda:0')
episode: 307 training return: tensor(715.7423, device='cuda:0')
epoch: 77 test_true_pfm: 9.306526908227287 sim_pfm: 786.18169952631
episode: 308 training return: tensor(753.8786, device='cuda:0')
episode: 309 training return: tensor(753.0660, device='cuda:0')
episode: 310 training return: tensor(802.8558, device='cuda:0')
episode: 311 training return: tensor(807.0474, device='cuda:0')
epoch: 78 test_true_pfm: 27.13996641768723 sim_pfm: 813.6947277897968
episode: 312 training return: tensor(776.2894, device='cuda:0')
episode: 313 training return: tensor(786.9073, device='cuda:0')
episode: 314 training return: tensor(782.4731, device='cuda:0')
episode: 315 training return: tensor(802.5486, device='cuda:0')
epoch: 79 test_true_pfm: 17.3827170909966 sim_pfm: 783.2580179844051
episode: 316 training return: tensor(800.6500, device='cuda:0')
episode: 317 training return: tensor(758.4045, device='cuda:0')
episode: 318 training return: tensor(791.9670, device='cuda:0')
episode: 319 training return: tensor(808.9638, device='cuda:0')
epoch: 80 test_true_pfm: 17.840499085503886 sim_pfm: 789.1119052059948
episode: 320 training return: tensor(807.7593, device='cuda:0')
episode: 321 training return: tensor(801.0880, device='cuda:0')
episode: 322 training return: tensor(780.3842, device='cuda:0')
episode: 323 training return: tensor(816.3651, device='cuda:0')
epoch: 81 test_true_pfm: 21.994933244837714 sim_pfm: 802.5035651870072
episode: 324 training return: tensor(786.9637, device='cuda:0')
episode: 325 training return: tensor(795.6357, device='cuda:0')
episode: 326 training return: tensor(794.7696, device='cuda:0')
episode: 327 training return: tensor(805.1219, device='cuda:0')
epoch: 82 test_true_pfm: 22.99462431199339 sim_pfm: 785.0147089954465
episode: 328 training return: tensor(808.0150, device='cuda:0')
episode: 329 training return: tensor(792.9881, device='cuda:0')
episode: 330 training return: tensor(791.2078, device='cuda:0')
episode: 331 training return: tensor(795.3644, device='cuda:0')
epoch: 83 test_true_pfm: 17.87841304964698 sim_pfm: 788.2650269806385
episode: 332 training return: tensor(725.6983, device='cuda:0')
episode: 333 training return: tensor(739.0388, device='cuda:0')
episode: 334 training return: tensor(626.7606, device='cuda:0')
episode: 335 training return: tensor(676.6119, device='cuda:0')
epoch: 84 test_true_pfm: 24.013363059906155 sim_pfm: 694.4674275538885
episode: 336 training return: tensor(623.4295, device='cuda:0')
episode: 337 training return: tensor(657.0905, device='cuda:0')
episode: 338 training return: tensor(787.9529, device='cuda:0')
episode: 339 training return: tensor(811.9836, device='cuda:0')
epoch: 85 test_true_pfm: 12.24447256647987 sim_pfm: 812.7440900616348
episode: 340 training return: tensor(779.1084, device='cuda:0')
episode: 341 training return: tensor(824.3508, device='cuda:0')
episode: 342 training return: tensor(795.2733, device='cuda:0')
episode: 343 training return: tensor(791.5972, device='cuda:0')
epoch: 86 test_true_pfm: 16.38217387343364 sim_pfm: 826.6405042994768
episode: 344 training return: tensor(812.6448, device='cuda:0')
episode: 345 training return: tensor(747.5359, device='cuda:0')
episode: 346 training return: tensor(809.3672, device='cuda:0')
episode: 347 training return: tensor(752.5986, device='cuda:0')
epoch: 87 test_true_pfm: 13.87151153520935 sim_pfm: 824.0243092335761
episode: 348 training return: tensor(772.4321, device='cuda:0')
episode: 349 training return: tensor(799.2581, device='cuda:0')
episode: 350 training return: tensor(690.3445, device='cuda:0')
episode: 351 training return: tensor(782.7169, device='cuda:0')
epoch: 88 test_true_pfm: 14.13495021609828 sim_pfm: 759.330860941857
episode: 352 training return: tensor(787.9658, device='cuda:0')
episode: 353 training return: tensor(800.2250, device='cuda:0')
episode: 354 training return: tensor(775.9506, device='cuda:0')
episode: 355 training return: tensor(773.2400, device='cuda:0')
epoch: 89 test_true_pfm: 22.823649763528113 sim_pfm: 820.9643797529745
episode: 356 training return: tensor(826.5267, device='cuda:0')
episode: 357 training return: tensor(816.4779, device='cuda:0')
episode: 358 training return: tensor(804.4462, device='cuda:0')
episode: 359 training return: tensor(801.1552, device='cuda:0')
epoch: 90 test_true_pfm: 40.526067508895856 sim_pfm: 827.749604247883
episode: 360 training return: tensor(827.8862, device='cuda:0')
episode: 361 training return: tensor(833.5135, device='cuda:0')
episode: 362 training return: tensor(798.1000, device='cuda:0')
episode: 363 training return: tensor(791.6689, device='cuda:0')
epoch: 91 test_true_pfm: 35.26692913547572 sim_pfm: 838.1392266079783
episode: 364 training return: tensor(789.5504, device='cuda:0')
episode: 365 training return: tensor(795.6344, device='cuda:0')
episode: 366 training return: tensor(782.6028, device='cuda:0')
episode: 367 training return: tensor(776.7313, device='cuda:0')
epoch: 92 test_true_pfm: 4.0714468680489 sim_pfm: 813.1086557418108
episode: 368 training return: tensor(799.1761, device='cuda:0')
episode: 369 training return: tensor(810.3193, device='cuda:0')
episode: 370 training return: tensor(791.8806, device='cuda:0')
episode: 371 training return: tensor(809.1340, device='cuda:0')
epoch: 93 test_true_pfm: -2.329184945135086 sim_pfm: 811.5337295701262
episode: 372 training return: tensor(767.9478, device='cuda:0')
episode: 373 training return: tensor(787.6346, device='cuda:0')
episode: 374 training return: tensor(735.8397, device='cuda:0')
episode: 375 training return: tensor(560.2207, device='cuda:0')
epoch: 94 test_true_pfm: 0.2958915520525549 sim_pfm: 564.6034726971178
episode: 376 training return: tensor(601.8746, device='cuda:0')
episode: 377 training return: tensor(640.8825, device='cuda:0')
episode: 378 training return: tensor(631.2599, device='cuda:0')
episode: 379 training return: tensor(616.1862, device='cuda:0')
epoch: 95 test_true_pfm: 1.8204284488403033 sim_pfm: 636.7235106121167
episode: 380 training return: tensor(620.2485, device='cuda:0')
episode: 381 training return: tensor(450.0880, device='cuda:0')
episode: 382 training return: tensor(453.6634, device='cuda:0')
episode: 383 training return: tensor(613.4327, device='cuda:0')
epoch: 96 test_true_pfm: -21.755189005976824 sim_pfm: 671.778123277449
episode: 384 training return: tensor(699.0313, device='cuda:0')
episode: 385 training return: tensor(693.8756, device='cuda:0')
episode: 386 training return: tensor(719.6293, device='cuda:0')
episode: 387 training return: tensor(705.7365, device='cuda:0')
epoch: 97 test_true_pfm: 3.139102193829598 sim_pfm: 738.3356291285716
episode: 388 training return: tensor(727.4495, device='cuda:0')
episode: 389 training return: tensor(762.2716, device='cuda:0')
episode: 390 training return: tensor(793.6345, device='cuda:0')
episode: 391 training return: tensor(821.2291, device='cuda:0')
epoch: 98 test_true_pfm: -3.8830455113273574 sim_pfm: 834.9774482473731
episode: 392 training return: tensor(785.2325, device='cuda:0')
episode: 393 training return: tensor(774.9656, device='cuda:0')
episode: 394 training return: tensor(773.7130, device='cuda:0')
episode: 395 training return: tensor(270.9389, device='cuda:0')
epoch: 99 test_true_pfm: 18.989293624322528 sim_pfm: 479.3215375066851
episode: 396 training return: tensor(626.4495, device='cuda:0')
episode: 397 training return: tensor(607.8837, device='cuda:0')
episode: 398 training return: tensor(637.6930, device='cuda:0')
episode: 399 training return: tensor(589.0241, device='cuda:0')
epoch: 100 test_true_pfm: 17.4580006707755 sim_pfm: 631.0850895568728
episode: 400 training return: tensor(616.3913, device='cuda:0')
episode: 401 training return: tensor(628.7746, device='cuda:0')
episode: 402 training return: tensor(632.3757, device='cuda:0')
episode: 403 training return: tensor(641.6610, device='cuda:0')
epoch: 101 test_true_pfm: -62.3945096891661 sim_pfm: 623.8120931826066
episode: 404 training return: tensor(654.7933, device='cuda:0')
episode: 405 training return: tensor(630.8630, device='cuda:0')
episode: 406 training return: tensor(617.9082, device='cuda:0')
episode: 407 training return: tensor(610.0002, device='cuda:0')
epoch: 102 test_true_pfm: 0.34372717661478874 sim_pfm: 642.8592500881757
episode: 408 training return: tensor(648.1357, device='cuda:0')
episode: 409 training return: tensor(592.3961, device='cuda:0')
episode: 410 training return: tensor(789.4992, device='cuda:0')
episode: 411 training return: tensor(809.6453, device='cuda:0')
epoch: 103 test_true_pfm: -1.8180883942253885 sim_pfm: 793.02061988418
episode: 412 training return: tensor(790.0677, device='cuda:0')
episode: 413 training return: tensor(782.5335, device='cuda:0')
episode: 414 training return: tensor(789.6434, device='cuda:0')
episode: 415 training return: tensor(802.2432, device='cuda:0')
epoch: 104 test_true_pfm: -5.26384393829571 sim_pfm: 779.6678959291428
episode: 416 training return: tensor(792.9613, device='cuda:0')
episode: 417 training return: tensor(791.8347, device='cuda:0')
episode: 418 training return: tensor(686.9291, device='cuda:0')
episode: 419 training return: tensor(766.3311, device='cuda:0')
epoch: 105 test_true_pfm: -0.6584359268143825 sim_pfm: 789.5703786115162
episode: 420 training return: tensor(740.5604, device='cuda:0')
episode: 421 training return: tensor(784.7365, device='cuda:0')
episode: 422 training return: tensor(789.0831, device='cuda:0')
episode: 423 training return: tensor(770.0869, device='cuda:0')
epoch: 106 test_true_pfm: -0.3869055772823597 sim_pfm: 786.7131868183612
episode: 424 training return: tensor(807.1957, device='cuda:0')
episode: 425 training return: tensor(817.0170, device='cuda:0')
episode: 426 training return: tensor(818.4022, device='cuda:0')
episode: 427 training return: tensor(815.1393, device='cuda:0')
epoch: 107 test_true_pfm: 1.8372134017960964 sim_pfm: 799.5318586412817
episode: 428 training return: tensor(802.5024, device='cuda:0')
episode: 429 training return: tensor(809.5618, device='cuda:0')
episode: 430 training return: tensor(799.5342, device='cuda:0')
episode: 431 training return: tensor(791.4515, device='cuda:0')
epoch: 108 test_true_pfm: 10.993387947274874 sim_pfm: 805.9025769546628
episode: 432 training return: tensor(810.9942, device='cuda:0')
episode: 433 training return: tensor(828.9610, device='cuda:0')
episode: 434 training return: tensor(822.0394, device='cuda:0')
episode: 435 training return: tensor(819.7108, device='cuda:0')
epoch: 109 test_true_pfm: 1.5720698005240663 sim_pfm: 794.4573810607195
episode: 436 training return: tensor(806.1046, device='cuda:0')
episode: 437 training return: tensor(802.2071, device='cuda:0')
episode: 438 training return: tensor(763.5438, device='cuda:0')
episode: 439 training return: tensor(833.0277, device='cuda:0')
epoch: 110 test_true_pfm: -7.582766301927288 sim_pfm: 803.6331325262785
episode: 440 training return: tensor(765.4320, device='cuda:0')
episode: 441 training return: tensor(786.0724, device='cuda:0')
episode: 442 training return: tensor(809.7885, device='cuda:0')
episode: 443 training return: tensor(796.1998, device='cuda:0')
epoch: 111 test_true_pfm: 24.929482164198213 sim_pfm: 800.319625807181
episode: 444 training return: tensor(802.2172, device='cuda:0')
episode: 445 training return: tensor(827.7632, device='cuda:0')
episode: 446 training return: tensor(804.9156, device='cuda:0')
episode: 447 training return: tensor(816.2193, device='cuda:0')
epoch: 112 test_true_pfm: 14.14354364304658 sim_pfm: 848.3533199727535
episode: 448 training return: tensor(797.8030, device='cuda:0')
episode: 449 training return: tensor(826.7784, device='cuda:0')
episode: 450 training return: tensor(765.8351, device='cuda:0')
episode: 451 training return: tensor(782.2112, device='cuda:0')
epoch: 113 test_true_pfm: -3.6567596327002967 sim_pfm: 814.3155169174075
episode: 452 training return: tensor(826.4747, device='cuda:0')
episode: 453 training return: tensor(843.9934, device='cuda:0')
episode: 454 training return: tensor(839.5844, device='cuda:0')
episode: 455 training return: tensor(849.4857, device='cuda:0')
epoch: 114 test_true_pfm: 32.122956186732765 sim_pfm: 847.5365498459898
episode: 456 training return: tensor(793.5747, device='cuda:0')
episode: 457 training return: tensor(788.2585, device='cuda:0')
episode: 458 training return: tensor(835.3707, device='cuda:0')
episode: 459 training return: tensor(816.9514, device='cuda:0')
epoch: 115 test_true_pfm: 25.11111557061891 sim_pfm: 783.553214842081
episode: 460 training return: tensor(794.7283, device='cuda:0')
episode: 461 training return: tensor(805.0980, device='cuda:0')
episode: 462 training return: tensor(797.4904, device='cuda:0')
episode: 463 training return: tensor(813.8303, device='cuda:0')
epoch: 116 test_true_pfm: -4.597634904985755 sim_pfm: 808.6775146909058
episode: 464 training return: tensor(804.9543, device='cuda:0')
episode: 465 training return: tensor(792.8333, device='cuda:0')
episode: 466 training return: tensor(784.7983, device='cuda:0')
episode: 467 training return: tensor(673.1194, device='cuda:0')
epoch: 117 test_true_pfm: 8.168380818615613 sim_pfm: 751.3990772001446
episode: 468 training return: tensor(681.0984, device='cuda:0')
episode: 469 training return: tensor(652.5679, device='cuda:0')
episode: 470 training return: tensor(800.6069, device='cuda:0')
episode: 471 training return: tensor(780.3468, device='cuda:0')
epoch: 118 test_true_pfm: 15.148455317647702 sim_pfm: 824.3734440576285
episode: 472 training return: tensor(836.2291, device='cuda:0')
episode: 473 training return: tensor(665.4714, device='cuda:0')
episode: 474 training return: tensor(809.7214, device='cuda:0')
episode: 475 training return: tensor(838.6260, device='cuda:0')
epoch: 119 test_true_pfm: 49.83934427037287 sim_pfm: 820.4914342920238
episode: 476 training return: tensor(807.0986, device='cuda:0')
episode: 477 training return: tensor(823.0297, device='cuda:0')
episode: 478 training return: tensor(712.7510, device='cuda:0')
episode: 479 training return: tensor(768.9594, device='cuda:0')
epoch: 120 test_true_pfm: -5.1831268230846375 sim_pfm: 759.4614391911775
episode: 480 training return: tensor(783.9482, device='cuda:0')
episode: 481 training return: tensor(826.5624, device='cuda:0')
episode: 482 training return: tensor(856.4858, device='cuda:0')
episode: 483 training return: tensor(858.6577, device='cuda:0')
epoch: 121 test_true_pfm: 87.37610779822697 sim_pfm: 850.7102574370801
episode: 484 training return: tensor(849.0737, device='cuda:0')
episode: 485 training return: tensor(840.9169, device='cuda:0')
episode: 486 training return: tensor(834.9048, device='cuda:0')
episode: 487 training return: tensor(859.9250, device='cuda:0')
epoch: 122 test_true_pfm: 51.36450964583855 sim_pfm: 785.9867252483964
episode: 488 training return: tensor(858.6027, device='cuda:0')
episode: 489 training return: tensor(825.3870, device='cuda:0')
episode: 490 training return: tensor(827.9309, device='cuda:0')
episode: 491 training return: tensor(850.0823, device='cuda:0')
epoch: 123 test_true_pfm: 3.5237446621194666 sim_pfm: 818.4358042221517
episode: 492 training return: tensor(851.5275, device='cuda:0')
episode: 493 training return: tensor(837.2109, device='cuda:0')
episode: 494 training return: tensor(854.8561, device='cuda:0')
episode: 495 training return: tensor(855.2338, device='cuda:0')
epoch: 124 test_true_pfm: 54.66854319738403 sim_pfm: 840.462752109766
episode: 496 training return: tensor(866.4443, device='cuda:0')
episode: 497 training return: tensor(873.9180, device='cuda:0')
episode: 498 training return: tensor(857.6605, device='cuda:0')
episode: 499 training return: tensor(860.3221, device='cuda:0')
epoch: 125 test_true_pfm: 41.32207669879879 sim_pfm: 856.4987110719085
episode: 500 training return: tensor(849.1372, device='cuda:0')
episode: 501 training return: tensor(859.8547, device='cuda:0')
episode: 502 training return: tensor(851.6193, device='cuda:0')
episode: 503 training return: tensor(866.0853, device='cuda:0')
epoch: 126 test_true_pfm: 64.25288861705926 sim_pfm: 841.4062104812823
episode: 504 training return: tensor(872.3447, device='cuda:0')
episode: 505 training return: tensor(857.1626, device='cuda:0')
episode: 506 training return: tensor(855.8792, device='cuda:0')
episode: 507 training return: tensor(847.4858, device='cuda:0')
epoch: 127 test_true_pfm: 72.73771350351156 sim_pfm: 865.0090783050284
episode: 508 training return: tensor(853.7252, device='cuda:0')
episode: 509 training return: tensor(856.6807, device='cuda:0')
episode: 510 training return: tensor(851.9893, device='cuda:0')
episode: 511 training return: tensor(843.7357, device='cuda:0')
epoch: 128 test_true_pfm: 49.984207053129765 sim_pfm: 825.5664289625362
episode: 512 training return: tensor(863.6157, device='cuda:0')
episode: 513 training return: tensor(856.1584, device='cuda:0')
episode: 514 training return: tensor(856.9280, device='cuda:0')
episode: 515 training return: tensor(862.1265, device='cuda:0')
epoch: 129 test_true_pfm: 76.94292371271203 sim_pfm: 855.9052100185305
episode: 516 training return: tensor(873.8928, device='cuda:0')
episode: 517 training return: tensor(871.5706, device='cuda:0')
episode: 518 training return: tensor(886.6891, device='cuda:0')
episode: 519 training return: tensor(883.6608, device='cuda:0')
epoch: 130 test_true_pfm: 140.49844616022057 sim_pfm: 872.0102812674828
episode: 520 training return: tensor(896.3516, device='cuda:0')
episode: 521 training return: tensor(880.2448, device='cuda:0')
episode: 522 training return: tensor(880.2559, device='cuda:0')
episode: 523 training return: tensor(859.6441, device='cuda:0')
epoch: 131 test_true_pfm: 114.01137760115196 sim_pfm: 889.6185231328011
episode: 524 training return: tensor(862.0233, device='cuda:0')
episode: 525 training return: tensor(870.3911, device='cuda:0')
episode: 526 training return: tensor(880.8626, device='cuda:0')
episode: 527 training return: tensor(880.1110, device='cuda:0')
epoch: 132 test_true_pfm: 76.35566237230867 sim_pfm: 877.3374300785363
episode: 528 training return: tensor(896.5184, device='cuda:0')
episode: 529 training return: tensor(874.7445, device='cuda:0')
episode: 530 training return: tensor(866.1370, device='cuda:0')
episode: 531 training return: tensor(887.3159, device='cuda:0')
epoch: 133 test_true_pfm: 98.88283868008007 sim_pfm: 874.0992513443343
episode: 532 training return: tensor(881.2465, device='cuda:0')
episode: 533 training return: tensor(869.7787, device='cuda:0')
episode: 534 training return: tensor(894.7024, device='cuda:0')
episode: 535 training return: tensor(883.5157, device='cuda:0')
epoch: 134 test_true_pfm: 114.18648407390306 sim_pfm: 890.3294514998794
episode: 536 training return: tensor(882.2040, device='cuda:0')
episode: 537 training return: tensor(890.5104, device='cuda:0')
episode: 538 training return: tensor(890.0745, device='cuda:0')
episode: 539 training return: tensor(896.4386, device='cuda:0')
epoch: 135 test_true_pfm: 169.29049722108635 sim_pfm: 895.5285309938714
episode: 540 training return: tensor(881.4987, device='cuda:0')
episode: 541 training return: tensor(894.7178, device='cuda:0')
episode: 542 training return: tensor(895.8823, device='cuda:0')
episode: 543 training return: tensor(881.4322, device='cuda:0')
epoch: 136 test_true_pfm: 156.64381550074162 sim_pfm: 880.3049788778648
episode: 544 training return: tensor(862.2064, device='cuda:0')
episode: 545 training return: tensor(873.9814, device='cuda:0')
episode: 546 training return: tensor(849.0315, device='cuda:0')
episode: 547 training return: tensor(846.4080, device='cuda:0')
epoch: 137 test_true_pfm: 151.74502069215976 sim_pfm: 878.1966354799457
episode: 548 training return: tensor(864.7557, device='cuda:0')
episode: 549 training return: tensor(881.5840, device='cuda:0')
episode: 550 training return: tensor(839.4644, device='cuda:0')
episode: 551 training return: tensor(881.6040, device='cuda:0')
epoch: 138 test_true_pfm: 169.48068120344016 sim_pfm: 900.5900825906544
episode: 552 training return: tensor(878.3089, device='cuda:0')
episode: 553 training return: tensor(868.9369, device='cuda:0')
episode: 554 training return: tensor(898.4849, device='cuda:0')
episode: 555 training return: tensor(900.5777, device='cuda:0')
epoch: 139 test_true_pfm: 179.9228398458115 sim_pfm: 909.4879152655601
episode: 556 training return: tensor(899.9926, device='cuda:0')
episode: 557 training return: tensor(886.9959, device='cuda:0')
episode: 558 training return: tensor(832.6782, device='cuda:0')
episode: 559 training return: tensor(754.9108, device='cuda:0')
epoch: 140 test_true_pfm: 146.507415064642 sim_pfm: 849.1938507443759
episode: 560 training return: tensor(857.5818, device='cuda:0')
episode: 561 training return: tensor(871.3239, device='cuda:0')
episode: 562 training return: tensor(856.6442, device='cuda:0')
episode: 563 training return: tensor(839.6823, device='cuda:0')
epoch: 141 test_true_pfm: 88.89226629883511 sim_pfm: 832.0880330434069
episode: 564 training return: tensor(843.5286, device='cuda:0')
episode: 565 training return: tensor(850.8630, device='cuda:0')
episode: 566 training return: tensor(859.7661, device='cuda:0')
episode: 567 training return: tensor(858.1699, device='cuda:0')
epoch: 142 test_true_pfm: 87.53330119293705 sim_pfm: 860.6156397520565
episode: 568 training return: tensor(871.9380, device='cuda:0')
episode: 569 training return: tensor(874.4796, device='cuda:0')
episode: 570 training return: tensor(893.3929, device='cuda:0')
episode: 571 training return: tensor(862.6869, device='cuda:0')
epoch: 143 test_true_pfm: 171.02258486707228 sim_pfm: 885.4703000120819
episode: 572 training return: tensor(854.2855, device='cuda:0')
episode: 573 training return: tensor(855.4679, device='cuda:0')
episode: 574 training return: tensor(858.6778, device='cuda:0')
episode: 575 training return: tensor(836.9562, device='cuda:0')
epoch: 144 test_true_pfm: 109.47698329216205 sim_pfm: 791.6629128396744
episode: 576 training return: tensor(817.5578, device='cuda:0')
episode: 577 training return: tensor(882.5247, device='cuda:0')
episode: 578 training return: tensor(883.7481, device='cuda:0')
episode: 579 training return: tensor(866.7904, device='cuda:0')
epoch: 145 test_true_pfm: 188.42405822520217 sim_pfm: 839.3043752093799
episode: 580 training return: tensor(849.5975, device='cuda:0')
episode: 581 training return: tensor(888.8876, device='cuda:0')
episode: 582 training return: tensor(907.9614, device='cuda:0')
episode: 583 training return: tensor(886.1932, device='cuda:0')
epoch: 146 test_true_pfm: 190.24398101868397 sim_pfm: 920.500204096362
episode: 584 training return: tensor(905.5827, device='cuda:0')
episode: 585 training return: tensor(915.8817, device='cuda:0')
episode: 586 training return: tensor(888.3333, device='cuda:0')
episode: 587 training return: tensor(889.4154, device='cuda:0')
epoch: 147 test_true_pfm: 219.69440850715736 sim_pfm: 888.3643926367164
episode: 588 training return: tensor(876.4678, device='cuda:0')
episode: 589 training return: tensor(899.3156, device='cuda:0')
episode: 590 training return: tensor(905.1815, device='cuda:0')
episode: 591 training return: tensor(912.7641, device='cuda:0')
epoch: 148 test_true_pfm: 114.69803468502649 sim_pfm: 900.0882985785604
episode: 592 training return: tensor(912.3228, device='cuda:0')
episode: 593 training return: tensor(899.4177, device='cuda:0')
episode: 594 training return: tensor(898.5045, device='cuda:0')
episode: 595 training return: tensor(916.8415, device='cuda:0')
epoch: 149 test_true_pfm: 185.4378883367131 sim_pfm: 921.7640031857416
episode: 596 training return: tensor(915.5859, device='cuda:0')
episode: 597 training return: tensor(914.6382, device='cuda:0')
episode: 598 training return: tensor(916.8923, device='cuda:0')
episode: 599 training return: tensor(917.3251, device='cuda:0')
epoch: 150 test_true_pfm: 116.6821256387475 sim_pfm: 900.7502925336361
