['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'baseline', '--traj', 'medium', '--seed', '4']
episode: 0 training return: tensor(-749.1620, device='cuda:0')
episode: 1 training return: tensor(-314.3844, device='cuda:0')
episode: 2 training return: tensor(-321.2352, device='cuda:0')
episode: 3 training return: tensor(-327.5023, device='cuda:0')
epoch: 1 test_true_pfm: 256.9372601200467 sim_pfm: -850.1247500389194
episode: 4 training return: tensor(-229.7190, device='cuda:0')
episode: 5 training return: tensor(-777.3631, device='cuda:0')
episode: 6 training return: tensor(116.0677, device='cuda:0')
episode: 7 training return: tensor(-590.9968, device='cuda:0')
epoch: 2 test_true_pfm: 192.97942007384097 sim_pfm: -748.2483885758169
episode: 8 training return: tensor(-99.4263, device='cuda:0')
episode: 9 training return: tensor(-95.1364, device='cuda:0')
episode: 10 training return: tensor(-606.5738, device='cuda:0')
episode: 11 training return: tensor(-539.7759, device='cuda:0')
epoch: 3 test_true_pfm: 294.54742635140747 sim_pfm: -515.1248038061118
episode: 12 training return: tensor(-758.2702, device='cuda:0')
episode: 13 training return: tensor(-504.1412, device='cuda:0')
episode: 14 training return: tensor(-557.7103, device='cuda:0')
episode: 15 training return: tensor(-539.0563, device='cuda:0')
epoch: 4 test_true_pfm: 178.7002254852421 sim_pfm: -526.4537092742199
episode: 16 training return: tensor(-540.6387, device='cuda:0')
episode: 17 training return: tensor(-540.1330, device='cuda:0')
episode: 18 training return: tensor(419.2425, device='cuda:0')
episode: 19 training return: tensor(82.4222, device='cuda:0')
epoch: 5 test_true_pfm: 41.56252587384201 sim_pfm: 114.20336447865702
episode: 20 training return: tensor(67.6230, device='cuda:0')
episode: 21 training return: tensor(71.2208, device='cuda:0')
episode: 22 training return: tensor(34.0283, device='cuda:0')
episode: 23 training return: tensor(41.6153, device='cuda:0')
epoch: 6 test_true_pfm: 19.690891406764944 sim_pfm: 56.170826504705474
episode: 24 training return: tensor(47.6924, device='cuda:0')
episode: 25 training return: tensor(431.8118, device='cuda:0')
episode: 26 training return: tensor(514.5662, device='cuda:0')
episode: 27 training return: tensor(513.1216, device='cuda:0')
epoch: 7 test_true_pfm: 48.27607122267733 sim_pfm: 553.6056476659918
episode: 28 training return: tensor(528.0919, device='cuda:0')
episode: 29 training return: tensor(66.5936, device='cuda:0')
episode: 30 training return: tensor(84.0325, device='cuda:0')
episode: 31 training return: tensor(88.3572, device='cuda:0')
epoch: 8 test_true_pfm: 21.391368443361007 sim_pfm: 426.23007102683187
episode: 32 training return: tensor(-77.5249, device='cuda:0')
episode: 33 training return: tensor(448.8755, device='cuda:0')
episode: 34 training return: tensor(438.9715, device='cuda:0')
episode: 35 training return: tensor(486.0416, device='cuda:0')
epoch: 9 test_true_pfm: 96.92408352069337 sim_pfm: 286.4162287164169
episode: 36 training return: tensor(463.8582, device='cuda:0')
episode: 37 training return: tensor(170.8566, device='cuda:0')
episode: 38 training return: tensor(501.0911, device='cuda:0')
episode: 39 training return: tensor(235.9451, device='cuda:0')
epoch: 10 test_true_pfm: 53.30670681243638 sim_pfm: 406.1642346084118
episode: 40 training return: tensor(362.3033, device='cuda:0')
episode: 41 training return: tensor(454.1675, device='cuda:0')
episode: 42 training return: tensor(196.5059, device='cuda:0')
episode: 43 training return: tensor(492.2474, device='cuda:0')
epoch: 11 test_true_pfm: 100.97620917685909 sim_pfm: 480.41012637301657
episode: 44 training return: tensor(508.6660, device='cuda:0')
episode: 45 training return: tensor(500.3677, device='cuda:0')
episode: 46 training return: tensor(208.7296, device='cuda:0')
episode: 47 training return: tensor(540.9305, device='cuda:0')
epoch: 12 test_true_pfm: 90.22813429095574 sim_pfm: 496.6369812857981
episode: 48 training return: tensor(190.7821, device='cuda:0')
episode: 49 training return: tensor(530.5519, device='cuda:0')
episode: 50 training return: tensor(481.0929, device='cuda:0')
episode: 51 training return: tensor(479.1850, device='cuda:0')
epoch: 13 test_true_pfm: 110.95865887670999 sim_pfm: 524.0784997468969
episode: 52 training return: tensor(222.4743, device='cuda:0')
episode: 53 training return: tensor(230.0735, device='cuda:0')
episode: 54 training return: tensor(215.1560, device='cuda:0')
episode: 55 training return: tensor(203.8059, device='cuda:0')
epoch: 14 test_true_pfm: 87.97707835667389 sim_pfm: 520.517447820787
episode: 56 training return: tensor(506.9390, device='cuda:0')
episode: 57 training return: tensor(422.7728, device='cuda:0')
episode: 58 training return: tensor(469.1445, device='cuda:0')
episode: 59 training return: tensor(286.2355, device='cuda:0')
epoch: 15 test_true_pfm: 132.29863677370597 sim_pfm: 361.51218407104415
episode: 60 training return: tensor(492.5881, device='cuda:0')
episode: 61 training return: tensor(499.2113, device='cuda:0')
episode: 62 training return: tensor(547.5560, device='cuda:0')
episode: 63 training return: tensor(513.3132, device='cuda:0')
epoch: 16 test_true_pfm: 79.07507786931613 sim_pfm: 539.6268778011048
episode: 64 training return: tensor(226.9136, device='cuda:0')
episode: 65 training return: tensor(506.4146, device='cuda:0')
episode: 66 training return: tensor(527.7054, device='cuda:0')
episode: 67 training return: tensor(527.0162, device='cuda:0')
epoch: 17 test_true_pfm: 74.2614452924377 sim_pfm: 542.8504517736534
episode: 68 training return: tensor(222.9319, device='cuda:0')
episode: 69 training return: tensor(405.1292, device='cuda:0')
episode: 70 training return: tensor(489.8810, device='cuda:0')
episode: 71 training return: tensor(516.9521, device='cuda:0')
epoch: 18 test_true_pfm: 95.44699368257368 sim_pfm: 218.48609556168472
episode: 72 training return: tensor(220.0846, device='cuda:0')
episode: 73 training return: tensor(277.4144, device='cuda:0')
episode: 74 training return: tensor(509.8481, device='cuda:0')
episode: 75 training return: tensor(220.3106, device='cuda:0')
epoch: 19 test_true_pfm: 80.1241640267617 sim_pfm: 386.25843061242875
episode: 76 training return: tensor(217.9588, device='cuda:0')
episode: 77 training return: tensor(208.9228, device='cuda:0')
episode: 78 training return: tensor(216.4443, device='cuda:0')
episode: 79 training return: tensor(222.3468, device='cuda:0')
epoch: 20 test_true_pfm: 51.8792160409 sim_pfm: 519.8262539773326
episode: 80 training return: tensor(218.8356, device='cuda:0')
episode: 81 training return: tensor(213.0271, device='cuda:0')
episode: 82 training return: tensor(198.1843, device='cuda:0')
episode: 83 training return: tensor(208.3158, device='cuda:0')
epoch: 21 test_true_pfm: 72.51710677876675 sim_pfm: 387.40561017797637
episode: 84 training return: tensor(276.8349, device='cuda:0')
episode: 85 training return: tensor(206.4742, device='cuda:0')
episode: 86 training return: tensor(217.5992, device='cuda:0')
episode: 87 training return: tensor(195.7842, device='cuda:0')
epoch: 22 test_true_pfm: 88.56670188204465 sim_pfm: 375.8398377268265
episode: 88 training return: tensor(498.2410, device='cuda:0')
episode: 89 training return: tensor(224.3529, device='cuda:0')
episode: 90 training return: tensor(213.4736, device='cuda:0')
episode: 91 training return: tensor(201.7065, device='cuda:0')
epoch: 23 test_true_pfm: 88.88963143450518 sim_pfm: 367.1786592916275
episode: 92 training return: tensor(215.0252, device='cuda:0')
episode: 93 training return: tensor(489.7914, device='cuda:0')
episode: 94 training return: tensor(487.1060, device='cuda:0')
episode: 95 training return: tensor(292.6166, device='cuda:0')
epoch: 24 test_true_pfm: 67.17851352210413 sim_pfm: 520.9167283137213
episode: 96 training return: tensor(210.9997, device='cuda:0')
episode: 97 training return: tensor(204.9785, device='cuda:0')
episode: 98 training return: tensor(207.7055, device='cuda:0')
episode: 99 training return: tensor(211.7890, device='cuda:0')
epoch: 25 test_true_pfm: 107.79527036924542 sim_pfm: 366.101048575171
episode: 100 training return: tensor(218.3992, device='cuda:0')
episode: 101 training return: tensor(213.0055, device='cuda:0')
episode: 102 training return: tensor(481.2694, device='cuda:0')
episode: 103 training return: tensor(198.8711, device='cuda:0')
epoch: 26 test_true_pfm: 75.27827084250214 sim_pfm: 539.3468828471377
episode: 104 training return: tensor(216.7605, device='cuda:0')
episode: 105 training return: tensor(523.3259, device='cuda:0')
episode: 106 training return: tensor(483.8831, device='cuda:0')
episode: 107 training return: tensor(504.2979, device='cuda:0')
epoch: 27 test_true_pfm: 82.16500404791775 sim_pfm: 363.32810263335705
episode: 108 training return: tensor(227.7186, device='cuda:0')
episode: 109 training return: tensor(481.2095, device='cuda:0')
episode: 110 training return: tensor(223.9388, device='cuda:0')
episode: 111 training return: tensor(237.5861, device='cuda:0')
epoch: 28 test_true_pfm: 83.9999433775772 sim_pfm: 364.42937418135506
episode: 112 training return: tensor(498.6194, device='cuda:0')
episode: 113 training return: tensor(470.1078, device='cuda:0')
episode: 114 training return: tensor(259.4904, device='cuda:0')
episode: 115 training return: tensor(217.4254, device='cuda:0')
epoch: 29 test_true_pfm: 75.82925327337897 sim_pfm: 516.9115306263169
episode: 116 training return: tensor(512.9229, device='cuda:0')
episode: 117 training return: tensor(220.1510, device='cuda:0')
episode: 118 training return: tensor(507.0267, device='cuda:0')
episode: 119 training return: tensor(244.1251, device='cuda:0')
epoch: 30 test_true_pfm: 66.66343737110913 sim_pfm: 520.3864578236826
episode: 120 training return: tensor(539.4662, device='cuda:0')
episode: 121 training return: tensor(222.2091, device='cuda:0')
episode: 122 training return: tensor(219.9111, device='cuda:0')
episode: 123 training return: tensor(525.2343, device='cuda:0')
epoch: 31 test_true_pfm: 102.47595023897509 sim_pfm: 369.53818655751337
episode: 124 training return: tensor(217.4543, device='cuda:0')
episode: 125 training return: tensor(221.1664, device='cuda:0')
episode: 126 training return: tensor(517.6511, device='cuda:0')
episode: 127 training return: tensor(489.8978, device='cuda:0')
epoch: 32 test_true_pfm: 97.58679569657953 sim_pfm: 365.8201432006899
episode: 128 training return: tensor(228.7411, device='cuda:0')
episode: 129 training return: tensor(523.4349, device='cuda:0')
episode: 130 training return: tensor(221.5855, device='cuda:0')
episode: 131 training return: tensor(224.4147, device='cuda:0')
epoch: 33 test_true_pfm: 89.9818302935543 sim_pfm: 339.8784773225586
episode: 132 training return: tensor(211.3766, device='cuda:0')
episode: 133 training return: tensor(231.9158, device='cuda:0')
episode: 134 training return: tensor(519.6593, device='cuda:0')
episode: 135 training return: tensor(488.3170, device='cuda:0')
epoch: 34 test_true_pfm: 89.47062023456233 sim_pfm: 336.3141677627961
episode: 136 training return: tensor(224.2191, device='cuda:0')
episode: 137 training return: tensor(203.8756, device='cuda:0')
episode: 138 training return: tensor(524.7385, device='cuda:0')
episode: 139 training return: tensor(470.6577, device='cuda:0')
epoch: 35 test_true_pfm: 89.12125405359178 sim_pfm: 334.84927809634246
episode: 140 training return: tensor(207.3678, device='cuda:0')
episode: 141 training return: tensor(214.2872, device='cuda:0')
episode: 142 training return: tensor(219.4508, device='cuda:0')
episode: 143 training return: tensor(210.5968, device='cuda:0')
epoch: 36 test_true_pfm: 56.1971859901426 sim_pfm: 508.6661968221888
episode: 144 training return: tensor(508.2462, device='cuda:0')
episode: 145 training return: tensor(222.7885, device='cuda:0')
episode: 146 training return: tensor(208.0707, device='cuda:0')
episode: 147 training return: tensor(222.4231, device='cuda:0')
epoch: 37 test_true_pfm: 97.37292157945836 sim_pfm: 264.9529657748838
episode: 148 training return: tensor(220.2012, device='cuda:0')
episode: 149 training return: tensor(214.0755, device='cuda:0')
episode: 150 training return: tensor(233.8859, device='cuda:0')
episode: 151 training return: tensor(225.6649, device='cuda:0')
epoch: 38 test_true_pfm: 67.33686094327801 sim_pfm: 357.828861857454
episode: 152 training return: tensor(221.1630, device='cuda:0')
episode: 153 training return: tensor(245.1880, device='cuda:0')
episode: 154 training return: tensor(215.6441, device='cuda:0')
episode: 155 training return: tensor(482.7092, device='cuda:0')
epoch: 39 test_true_pfm: 92.29264427316976 sim_pfm: 351.57866268837824
episode: 156 training return: tensor(224.2921, device='cuda:0')
episode: 157 training return: tensor(228.4337, device='cuda:0')
episode: 158 training return: tensor(216.4831, device='cuda:0')
episode: 159 training return: tensor(235.3844, device='cuda:0')
epoch: 40 test_true_pfm: 91.94628113751848 sim_pfm: 260.9490557378158
episode: 160 training return: tensor(231.9121, device='cuda:0')
episode: 161 training return: tensor(215.3502, device='cuda:0')
episode: 162 training return: tensor(194.4626, device='cuda:0')
episode: 163 training return: tensor(231.9243, device='cuda:0')
epoch: 41 test_true_pfm: 81.66094068242455 sim_pfm: 260.26476774178445
episode: 164 training return: tensor(210.6831, device='cuda:0')
episode: 165 training return: tensor(144.4068, device='cuda:0')
episode: 166 training return: tensor(221.9891, device='cuda:0')
episode: 167 training return: tensor(224.0212, device='cuda:0')
epoch: 42 test_true_pfm: 88.1306828857646 sim_pfm: 266.9508719413231
episode: 168 training return: tensor(226.2274, device='cuda:0')
episode: 169 training return: tensor(233.5509, device='cuda:0')
episode: 170 training return: tensor(232.3086, device='cuda:0')
episode: 171 training return: tensor(237.7300, device='cuda:0')
epoch: 43 test_true_pfm: 84.4213077339622 sim_pfm: 356.6190963073556
episode: 172 training return: tensor(229.3369, device='cuda:0')
episode: 173 training return: tensor(247.9781, device='cuda:0')
episode: 174 training return: tensor(245.7083, device='cuda:0')
episode: 175 training return: tensor(223.5876, device='cuda:0')
epoch: 44 test_true_pfm: 87.53641100807164 sim_pfm: 273.98885159318644
episode: 176 training return: tensor(237.1047, device='cuda:0')
episode: 177 training return: tensor(235.5418, device='cuda:0')
episode: 178 training return: tensor(258.7266, device='cuda:0')
episode: 179 training return: tensor(242.5511, device='cuda:0')
epoch: 45 test_true_pfm: 81.5872901132061 sim_pfm: 370.31673295532045
episode: 180 training return: tensor(229.3793, device='cuda:0')
episode: 181 training return: tensor(243.9164, device='cuda:0')
episode: 182 training return: tensor(259.9102, device='cuda:0')
episode: 183 training return: tensor(223.9130, device='cuda:0')
epoch: 46 test_true_pfm: 123.5911104090477 sim_pfm: 290.8273888361388
episode: 184 training return: tensor(230.7169, device='cuda:0')
episode: 185 training return: tensor(227.5100, device='cuda:0')
episode: 186 training return: tensor(289.8910, device='cuda:0')
episode: 187 training return: tensor(231.4126, device='cuda:0')
epoch: 47 test_true_pfm: 114.53457115821293 sim_pfm: 355.2590101756311
episode: 188 training return: tensor(251.1343, device='cuda:0')
episode: 189 training return: tensor(242.6992, device='cuda:0')
episode: 190 training return: tensor(252.0685, device='cuda:0')
episode: 191 training return: tensor(251.4997, device='cuda:0')
epoch: 48 test_true_pfm: 113.7583778063727 sim_pfm: 300.66013481126475
episode: 192 training return: tensor(240.3408, device='cuda:0')
episode: 193 training return: tensor(244.7646, device='cuda:0')
episode: 194 training return: tensor(223.7381, device='cuda:0')
episode: 195 training return: tensor(256.9706, device='cuda:0')
epoch: 49 test_true_pfm: 125.10132616921807 sim_pfm: 304.450497219339
episode: 196 training return: tensor(259.5132, device='cuda:0')
episode: 197 training return: tensor(228.6983, device='cuda:0')
episode: 198 training return: tensor(237.1857, device='cuda:0')
episode: 199 training return: tensor(528.9625, device='cuda:0')
epoch: 50 test_true_pfm: 130.23453729274988 sim_pfm: 302.0959246235434
episode: 200 training return: tensor(253.9689, device='cuda:0')
episode: 201 training return: tensor(267.4231, device='cuda:0')
episode: 202 training return: tensor(235.5821, device='cuda:0')
episode: 203 training return: tensor(251.9277, device='cuda:0')
epoch: 51 test_true_pfm: 112.15097235853663 sim_pfm: 300.5693029289444
episode: 204 training return: tensor(224.4633, device='cuda:0')
episode: 205 training return: tensor(227.0382, device='cuda:0')
episode: 206 training return: tensor(240.5173, device='cuda:0')
episode: 207 training return: tensor(219.0533, device='cuda:0')
epoch: 52 test_true_pfm: 114.34455102061584 sim_pfm: 293.3386056819775
episode: 208 training return: tensor(237.5934, device='cuda:0')
episode: 209 training return: tensor(229.6188, device='cuda:0')
episode: 210 training return: tensor(238.8906, device='cuda:0')
episode: 211 training return: tensor(222.8614, device='cuda:0')
epoch: 53 test_true_pfm: 121.097607790101 sim_pfm: 279.60767906642286
episode: 212 training return: tensor(278.4508, device='cuda:0')
episode: 213 training return: tensor(202.3757, device='cuda:0')
episode: 214 training return: tensor(162.8979, device='cuda:0')
episode: 215 training return: tensor(174.2617, device='cuda:0')
epoch: 54 test_true_pfm: 109.62765111200856 sim_pfm: 209.08432814603051
episode: 216 training return: tensor(155.6336, device='cuda:0')
episode: 217 training return: tensor(341.4952, device='cuda:0')
episode: 218 training return: tensor(255.9501, device='cuda:0')
episode: 219 training return: tensor(233.3915, device='cuda:0')
epoch: 55 test_true_pfm: 138.57037027028716 sim_pfm: 293.07466832983
episode: 220 training return: tensor(248.6791, device='cuda:0')
episode: 221 training return: tensor(232.7592, device='cuda:0')
episode: 222 training return: tensor(238.6364, device='cuda:0')
episode: 223 training return: tensor(236.6693, device='cuda:0')
epoch: 56 test_true_pfm: 70.03675994753125 sim_pfm: 533.4916185811162
episode: 224 training return: tensor(514.9998, device='cuda:0')
episode: 225 training return: tensor(250.9353, device='cuda:0')
episode: 226 training return: tensor(243.5848, device='cuda:0')
episode: 227 training return: tensor(254.9686, device='cuda:0')
epoch: 57 test_true_pfm: 110.3490234620967 sim_pfm: 307.90593530074693
episode: 228 training return: tensor(262.8084, device='cuda:0')
episode: 229 training return: tensor(230.2917, device='cuda:0')
episode: 230 training return: tensor(265.2352, device='cuda:0')
episode: 231 training return: tensor(243.4431, device='cuda:0')
epoch: 58 test_true_pfm: 64.08119134638754 sim_pfm: 501.20800859481096
episode: 232 training return: tensor(505.2125, device='cuda:0')
episode: 233 training return: tensor(256.6443, device='cuda:0')
episode: 234 training return: tensor(269.3517, device='cuda:0')
episode: 235 training return: tensor(246.6440, device='cuda:0')
epoch: 59 test_true_pfm: 68.18967433946068 sim_pfm: 521.53116709933
episode: 236 training return: tensor(502.9821, device='cuda:0')
episode: 237 training return: tensor(256.2459, device='cuda:0')
episode: 238 training return: tensor(243.6467, device='cuda:0')
episode: 239 training return: tensor(245.2147, device='cuda:0')
epoch: 60 test_true_pfm: 112.86455246456528 sim_pfm: 307.797569339474
episode: 240 training return: tensor(255.2991, device='cuda:0')
episode: 241 training return: tensor(258.4280, device='cuda:0')
episode: 242 training return: tensor(260.2520, device='cuda:0')
episode: 243 training return: tensor(266.0661, device='cuda:0')
epoch: 61 test_true_pfm: 104.44586418033278 sim_pfm: 318.15414277960855
episode: 244 training return: tensor(263.8789, device='cuda:0')
episode: 245 training return: tensor(255.2794, device='cuda:0')
episode: 246 training return: tensor(246.5929, device='cuda:0')
episode: 247 training return: tensor(233.2104, device='cuda:0')
epoch: 62 test_true_pfm: 122.92698459889448 sim_pfm: 300.16660055230994
episode: 248 training return: tensor(254.6513, device='cuda:0')
episode: 249 training return: tensor(265.6909, device='cuda:0')
episode: 250 training return: tensor(251.1625, device='cuda:0')
episode: 251 training return: tensor(245.3631, device='cuda:0')
epoch: 63 test_true_pfm: 105.94009321271248 sim_pfm: 301.46854753408115
episode: 252 training return: tensor(262.8516, device='cuda:0')
episode: 253 training return: tensor(260.9662, device='cuda:0')
episode: 254 training return: tensor(258.8795, device='cuda:0')
episode: 255 training return: tensor(247.7379, device='cuda:0')
epoch: 64 test_true_pfm: 126.0685200556875 sim_pfm: 298.9281657201548
episode: 256 training return: tensor(254.3291, device='cuda:0')
episode: 257 training return: tensor(253.6600, device='cuda:0')
episode: 258 training return: tensor(250.1172, device='cuda:0')
episode: 259 training return: tensor(250.3215, device='cuda:0')
epoch: 65 test_true_pfm: 126.14797670819188 sim_pfm: 297.3889837364356
episode: 260 training return: tensor(249.9028, device='cuda:0')
episode: 261 training return: tensor(247.9843, device='cuda:0')
episode: 262 training return: tensor(257.7955, device='cuda:0')
episode: 263 training return: tensor(254.6499, device='cuda:0')
epoch: 66 test_true_pfm: 122.42009485955872 sim_pfm: 311.18706769123673
episode: 264 training return: tensor(281.5729, device='cuda:0')
episode: 265 training return: tensor(228.7906, device='cuda:0')
episode: 266 training return: tensor(242.2668, device='cuda:0')
episode: 267 training return: tensor(236.1665, device='cuda:0')
epoch: 67 test_true_pfm: 62.61426699747491 sim_pfm: 529.4274629776677
episode: 268 training return: tensor(254.7954, device='cuda:0')
episode: 269 training return: tensor(522.9578, device='cuda:0')
episode: 270 training return: tensor(526.3344, device='cuda:0')
episode: 271 training return: tensor(273.8714, device='cuda:0')
epoch: 68 test_true_pfm: 102.35139728451965 sim_pfm: 331.73717648008216
episode: 272 training return: tensor(256.9241, device='cuda:0')
episode: 273 training return: tensor(242.2822, device='cuda:0')
episode: 274 training return: tensor(279.1838, device='cuda:0')
episode: 275 training return: tensor(257.1534, device='cuda:0')
epoch: 69 test_true_pfm: 128.53388341651177 sim_pfm: 305.91444270312786
episode: 276 training return: tensor(271.3425, device='cuda:0')
episode: 277 training return: tensor(268.4628, device='cuda:0')
episode: 278 training return: tensor(256.1560, device='cuda:0')
episode: 279 training return: tensor(247.6360, device='cuda:0')
epoch: 70 test_true_pfm: 131.03380282382014 sim_pfm: 302.6371721486018
episode: 280 training return: tensor(524.9974, device='cuda:0')
episode: 281 training return: tensor(238.6407, device='cuda:0')
episode: 282 training return: tensor(238.4399, device='cuda:0')
episode: 283 training return: tensor(248.7108, device='cuda:0')
epoch: 71 test_true_pfm: 59.666658114557414 sim_pfm: 529.1026010513306
episode: 284 training return: tensor(273.6583, device='cuda:0')
episode: 285 training return: tensor(238.1754, device='cuda:0')
episode: 286 training return: tensor(258.2096, device='cuda:0')
episode: 287 training return: tensor(235.5850, device='cuda:0')
epoch: 72 test_true_pfm: 114.68582063370377 sim_pfm: 401.9598360334833
episode: 288 training return: tensor(250.8750, device='cuda:0')
episode: 289 training return: tensor(523.1287, device='cuda:0')
episode: 290 training return: tensor(547.8121, device='cuda:0')
episode: 291 training return: tensor(269.7374, device='cuda:0')
epoch: 73 test_true_pfm: 136.47524398067102 sim_pfm: 303.6377707930903
episode: 292 training return: tensor(531.6635, device='cuda:0')
episode: 293 training return: tensor(526.2628, device='cuda:0')
episode: 294 training return: tensor(251.8483, device='cuda:0')
episode: 295 training return: tensor(256.9829, device='cuda:0')
epoch: 74 test_true_pfm: 136.03372768586095 sim_pfm: 297.9442486045882
episode: 296 training return: tensor(254.3627, device='cuda:0')
episode: 297 training return: tensor(259.5830, device='cuda:0')
episode: 298 training return: tensor(252.7099, device='cuda:0')
episode: 299 training return: tensor(250.6797, device='cuda:0')
epoch: 75 test_true_pfm: 132.46249237686797 sim_pfm: 296.8487114260594
episode: 300 training return: tensor(256.8643, device='cuda:0')
episode: 301 training return: tensor(259.2618, device='cuda:0')
episode: 302 training return: tensor(522.2427, device='cuda:0')
episode: 303 training return: tensor(264.1772, device='cuda:0')
epoch: 76 test_true_pfm: 128.16107948680545 sim_pfm: 312.744323183472
episode: 304 training return: tensor(262.0511, device='cuda:0')
episode: 305 training return: tensor(253.7919, device='cuda:0')
episode: 306 training return: tensor(264.8897, device='cuda:0')
episode: 307 training return: tensor(269.1917, device='cuda:0')
epoch: 77 test_true_pfm: 132.90377020963106 sim_pfm: 308.4004502530831
episode: 308 training return: tensor(236.6305, device='cuda:0')
episode: 309 training return: tensor(254.8511, device='cuda:0')
episode: 310 training return: tensor(519.2949, device='cuda:0')
episode: 311 training return: tensor(550.3403, device='cuda:0')
epoch: 78 test_true_pfm: 132.56623535013128 sim_pfm: 307.6315358268718
episode: 312 training return: tensor(520.5972, device='cuda:0')
episode: 313 training return: tensor(253.1168, device='cuda:0')
episode: 314 training return: tensor(530.6105, device='cuda:0')
episode: 315 training return: tensor(518.4399, device='cuda:0')
epoch: 79 test_true_pfm: 131.46824518414778 sim_pfm: 301.71164410561323
episode: 316 training return: tensor(543.0806, device='cuda:0')
episode: 317 training return: tensor(260.8535, device='cuda:0')
episode: 318 training return: tensor(261.4230, device='cuda:0')
episode: 319 training return: tensor(267.2446, device='cuda:0')
epoch: 80 test_true_pfm: 140.2248444431556 sim_pfm: 303.5887865036105
episode: 320 training return: tensor(257.8014, device='cuda:0')
episode: 321 training return: tensor(269.7291, device='cuda:0')
episode: 322 training return: tensor(261.6355, device='cuda:0')
episode: 323 training return: tensor(245.4336, device='cuda:0')
epoch: 81 test_true_pfm: 134.63917954284986 sim_pfm: 301.78277834303054
episode: 324 training return: tensor(542.8578, device='cuda:0')
episode: 325 training return: tensor(529.3312, device='cuda:0')
episode: 326 training return: tensor(263.7017, device='cuda:0')
episode: 327 training return: tensor(258.7201, device='cuda:0')
epoch: 82 test_true_pfm: 133.01642244472333 sim_pfm: 301.9048647886763
episode: 328 training return: tensor(257.8972, device='cuda:0')
episode: 329 training return: tensor(261.2940, device='cuda:0')
episode: 330 training return: tensor(253.0017, device='cuda:0')
episode: 331 training return: tensor(268.8599, device='cuda:0')
epoch: 83 test_true_pfm: 147.1256360393002 sim_pfm: 304.6812138655029
episode: 332 training return: tensor(252.2444, device='cuda:0')
episode: 333 training return: tensor(547.6749, device='cuda:0')
episode: 334 training return: tensor(547.5396, device='cuda:0')
episode: 335 training return: tensor(263.4983, device='cuda:0')
epoch: 84 test_true_pfm: 135.48196570883115 sim_pfm: 296.6440600187828
episode: 336 training return: tensor(264.0228, device='cuda:0')
episode: 337 training return: tensor(268.4950, device='cuda:0')
episode: 338 training return: tensor(-448.5234, device='cuda:0')
episode: 339 training return: tensor(-462.7495, device='cuda:0')
epoch: 85 test_true_pfm: 151.33081019550738 sim_pfm: 91.58846258556393
episode: 340 training return: tensor(-418.3275, device='cuda:0')
episode: 341 training return: tensor(189.1586, device='cuda:0')
episode: 342 training return: tensor(93.1231, device='cuda:0')
episode: 343 training return: tensor(-385.5169, device='cuda:0')
epoch: 86 test_true_pfm: 133.61276888739476 sim_pfm: 310.32443462063867
episode: 344 training return: tensor(266.3741, device='cuda:0')
episode: 345 training return: tensor(262.8659, device='cuda:0')
episode: 346 training return: tensor(542.8587, device='cuda:0')
episode: 347 training return: tensor(246.5926, device='cuda:0')
epoch: 87 test_true_pfm: 125.33631558041255 sim_pfm: 311.83295237412676
episode: 348 training return: tensor(269.5801, device='cuda:0')
episode: 349 training return: tensor(512.0187, device='cuda:0')
episode: 350 training return: tensor(239.3694, device='cuda:0')
episode: 351 training return: tensor(260.6626, device='cuda:0')
epoch: 88 test_true_pfm: 127.59799425193383 sim_pfm: 310.9390009877582
episode: 352 training return: tensor(266.9834, device='cuda:0')
episode: 353 training return: tensor(264.1838, device='cuda:0')
episode: 354 training return: tensor(543.0692, device='cuda:0')
episode: 355 training return: tensor(246.0302, device='cuda:0')
epoch: 89 test_true_pfm: 136.72720753232355 sim_pfm: 312.05863463692367
episode: 356 training return: tensor(259.6270, device='cuda:0')
episode: 357 training return: tensor(543.4725, device='cuda:0')
episode: 358 training return: tensor(544.6199, device='cuda:0')
episode: 359 training return: tensor(533.3381, device='cuda:0')
epoch: 90 test_true_pfm: 136.64304030755207 sim_pfm: 320.3175991275736
episode: 360 training return: tensor(273.7287, device='cuda:0')
episode: 361 training return: tensor(263.2774, device='cuda:0')
episode: 362 training return: tensor(270.9613, device='cuda:0')
episode: 363 training return: tensor(266.3114, device='cuda:0')
epoch: 91 test_true_pfm: 124.58787194691284 sim_pfm: 472.0309250645029
episode: 364 training return: tensor(264.7724, device='cuda:0')
episode: 365 training return: tensor(267.8364, device='cuda:0')
episode: 366 training return: tensor(267.1434, device='cuda:0')
episode: 367 training return: tensor(264.2316, device='cuda:0')
epoch: 92 test_true_pfm: 139.44424031985167 sim_pfm: 321.77653372008353
episode: 368 training return: tensor(263.4180, device='cuda:0')
episode: 369 training return: tensor(269.8759, device='cuda:0')
episode: 370 training return: tensor(243.8755, device='cuda:0')
episode: 371 training return: tensor(264.4966, device='cuda:0')
epoch: 93 test_true_pfm: 127.88046779083173 sim_pfm: 314.58289722570527
episode: 372 training return: tensor(259.6278, device='cuda:0')
episode: 373 training return: tensor(238.7372, device='cuda:0')
episode: 374 training return: tensor(522.1871, device='cuda:0')
episode: 375 training return: tensor(268.0771, device='cuda:0')
epoch: 94 test_true_pfm: 135.60795662007874 sim_pfm: 311.8761004159848
episode: 376 training return: tensor(537.1096, device='cuda:0')
episode: 377 training return: tensor(249.7616, device='cuda:0')
episode: 378 training return: tensor(537.3981, device='cuda:0')
episode: 379 training return: tensor(264.4595, device='cuda:0')
epoch: 95 test_true_pfm: 134.88603420408728 sim_pfm: 312.0074291905427
episode: 380 training return: tensor(264.5398, device='cuda:0')
episode: 381 training return: tensor(249.4153, device='cuda:0')
episode: 382 training return: tensor(249.5668, device='cuda:0')
episode: 383 training return: tensor(259.2563, device='cuda:0')
epoch: 96 test_true_pfm: 130.6380943118498 sim_pfm: 317.4314761497856
episode: 384 training return: tensor(526.9800, device='cuda:0')
episode: 385 training return: tensor(257.1483, device='cuda:0')
episode: 386 training return: tensor(263.0923, device='cuda:0')
episode: 387 training return: tensor(268.7969, device='cuda:0')
epoch: 97 test_true_pfm: 122.30166692316432 sim_pfm: 311.0934029559915
episode: 388 training return: tensor(243.0464, device='cuda:0')
episode: 389 training return: tensor(243.1599, device='cuda:0')
episode: 390 training return: tensor(239.4786, device='cuda:0')
episode: 391 training return: tensor(269.0363, device='cuda:0')
epoch: 98 test_true_pfm: 119.94370938338432 sim_pfm: 307.29976673424244
episode: 392 training return: tensor(272.2118, device='cuda:0')
episode: 393 training return: tensor(239.3278, device='cuda:0')
episode: 394 training return: tensor(242.7427, device='cuda:0')
episode: 395 training return: tensor(519.2430, device='cuda:0')
epoch: 99 test_true_pfm: 137.37369679809618 sim_pfm: 313.4898164284726
episode: 396 training return: tensor(270.0427, device='cuda:0')
episode: 397 training return: tensor(270.3838, device='cuda:0')
episode: 398 training return: tensor(267.2960, device='cuda:0')
episode: 399 training return: tensor(523.9603, device='cuda:0')
epoch: 100 test_true_pfm: 137.50514062903986 sim_pfm: 314.0522342932721
episode: 400 training return: tensor(261.8783, device='cuda:0')
episode: 401 training return: tensor(249.3947, device='cuda:0')
episode: 402 training return: tensor(263.2038, device='cuda:0')
episode: 403 training return: tensor(248.4904, device='cuda:0')
epoch: 101 test_true_pfm: 138.6793801915671 sim_pfm: 314.54307890093577
episode: 404 training return: tensor(258.8487, device='cuda:0')
episode: 405 training return: tensor(268.5665, device='cuda:0')
episode: 406 training return: tensor(263.5447, device='cuda:0')
episode: 407 training return: tensor(273.0889, device='cuda:0')
epoch: 102 test_true_pfm: 132.1817839211234 sim_pfm: 319.5167571225514
episode: 408 training return: tensor(270.3137, device='cuda:0')
episode: 409 training return: tensor(276.6318, device='cuda:0')
episode: 410 training return: tensor(541.1496, device='cuda:0')
episode: 411 training return: tensor(265.0556, device='cuda:0')
epoch: 103 test_true_pfm: 132.8374461329346 sim_pfm: 311.5252599815528
episode: 412 training return: tensor(536.8511, device='cuda:0')
episode: 413 training return: tensor(257.3755, device='cuda:0')
episode: 414 training return: tensor(251.8790, device='cuda:0')
episode: 415 training return: tensor(527.5070, device='cuda:0')
epoch: 104 test_true_pfm: 136.4262314317275 sim_pfm: 316.993299709633
episode: 416 training return: tensor(516.8386, device='cuda:0')
episode: 417 training return: tensor(239.0352, device='cuda:0')
episode: 418 training return: tensor(265.8806, device='cuda:0')
episode: 419 training return: tensor(268.2674, device='cuda:0')
epoch: 105 test_true_pfm: 141.4796460936882 sim_pfm: 312.6014611395852
episode: 420 training return: tensor(261.3866, device='cuda:0')
episode: 421 training return: tensor(264.9819, device='cuda:0')
episode: 422 training return: tensor(242.0891, device='cuda:0')
episode: 423 training return: tensor(523.0258, device='cuda:0')
epoch: 106 test_true_pfm: 138.70304333832405 sim_pfm: 323.32743268770474
episode: 424 training return: tensor(276.8703, device='cuda:0')
episode: 425 training return: tensor(261.1407, device='cuda:0')
episode: 426 training return: tensor(257.9278, device='cuda:0')
episode: 427 training return: tensor(266.9671, device='cuda:0')
epoch: 107 test_true_pfm: 135.75633876114918 sim_pfm: 315.1306869561474
episode: 428 training return: tensor(260.8353, device='cuda:0')
episode: 429 training return: tensor(260.5839, device='cuda:0')
episode: 430 training return: tensor(272.3446, device='cuda:0')
episode: 431 training return: tensor(266.0903, device='cuda:0')
epoch: 108 test_true_pfm: 131.7307155657119 sim_pfm: 310.6315724725525
episode: 432 training return: tensor(262.5545, device='cuda:0')
episode: 433 training return: tensor(279.6923, device='cuda:0')
episode: 434 training return: tensor(267.2629, device='cuda:0')
episode: 435 training return: tensor(260.6419, device='cuda:0')
epoch: 109 test_true_pfm: 136.2647325115386 sim_pfm: 315.1951922494918
episode: 436 training return: tensor(271.5955, device='cuda:0')
episode: 437 training return: tensor(269.3864, device='cuda:0')
episode: 438 training return: tensor(241.9822, device='cuda:0')
episode: 439 training return: tensor(272.3887, device='cuda:0')
epoch: 110 test_true_pfm: 133.21254682248045 sim_pfm: 319.1862026353677
episode: 440 training return: tensor(525.5037, device='cuda:0')
episode: 441 training return: tensor(250.3787, device='cuda:0')
episode: 442 training return: tensor(278.6853, device='cuda:0')
episode: 443 training return: tensor(279.7796, device='cuda:0')
epoch: 111 test_true_pfm: 129.54851565010225 sim_pfm: 317.85070160714287
episode: 444 training return: tensor(257.5067, device='cuda:0')
episode: 445 training return: tensor(269.8033, device='cuda:0')
episode: 446 training return: tensor(262.4844, device='cuda:0')
episode: 447 training return: tensor(279.4450, device='cuda:0')
epoch: 112 test_true_pfm: 135.3060253962079 sim_pfm: 311.2645784939329
episode: 448 training return: tensor(263.3410, device='cuda:0')
episode: 449 training return: tensor(262.9997, device='cuda:0')
episode: 450 training return: tensor(257.0425, device='cuda:0')
episode: 451 training return: tensor(272.9513, device='cuda:0')
epoch: 113 test_true_pfm: 136.84490507838427 sim_pfm: 312.16351722351584
episode: 452 training return: tensor(264.5070, device='cuda:0')
episode: 453 training return: tensor(263.4112, device='cuda:0')
episode: 454 training return: tensor(251.3381, device='cuda:0')
episode: 455 training return: tensor(253.3386, device='cuda:0')
epoch: 114 test_true_pfm: 132.69187172719322 sim_pfm: 317.38869708403945
episode: 456 training return: tensor(270.1155, device='cuda:0')
episode: 457 training return: tensor(261.6916, device='cuda:0')
episode: 458 training return: tensor(272.7617, device='cuda:0')
episode: 459 training return: tensor(275.1716, device='cuda:0')
epoch: 115 test_true_pfm: 136.13654712956725 sim_pfm: 316.38393242657185
episode: 460 training return: tensor(270.5226, device='cuda:0')
episode: 461 training return: tensor(264.8637, device='cuda:0')
episode: 462 training return: tensor(269.3740, device='cuda:0')
episode: 463 training return: tensor(279.9122, device='cuda:0')
epoch: 116 test_true_pfm: 129.10922483322963 sim_pfm: 313.88399960721534
episode: 464 training return: tensor(258.8471, device='cuda:0')
episode: 465 training return: tensor(261.1694, device='cuda:0')
episode: 466 training return: tensor(259.2641, device='cuda:0')
episode: 467 training return: tensor(274.7584, device='cuda:0')
epoch: 117 test_true_pfm: 133.5782531402877 sim_pfm: 315.03044142201543
episode: 468 training return: tensor(252.1217, device='cuda:0')
episode: 469 training return: tensor(264.8519, device='cuda:0')
episode: 470 training return: tensor(253.3970, device='cuda:0')
episode: 471 training return: tensor(267.9256, device='cuda:0')
epoch: 118 test_true_pfm: 133.84366202453 sim_pfm: 308.70561720679206
episode: 472 training return: tensor(266.6479, device='cuda:0')
episode: 473 training return: tensor(263.7423, device='cuda:0')
episode: 474 training return: tensor(265.6253, device='cuda:0')
episode: 475 training return: tensor(253.4346, device='cuda:0')
epoch: 119 test_true_pfm: 134.16254044070203 sim_pfm: 316.04133652398986
episode: 476 training return: tensor(256.0906, device='cuda:0')
episode: 477 training return: tensor(267.5385, device='cuda:0')
episode: 478 training return: tensor(269.6633, device='cuda:0')
episode: 479 training return: tensor(262.3321, device='cuda:0')
epoch: 120 test_true_pfm: 136.61844857824255 sim_pfm: 322.260561781625
episode: 480 training return: tensor(262.5062, device='cuda:0')
episode: 481 training return: tensor(262.1770, device='cuda:0')
episode: 482 training return: tensor(271.5729, device='cuda:0')
episode: 483 training return: tensor(258.6357, device='cuda:0')
epoch: 121 test_true_pfm: 134.58154811376974 sim_pfm: 316.67845657964546
episode: 484 training return: tensor(261.9867, device='cuda:0')
episode: 485 training return: tensor(524.4442, device='cuda:0')
episode: 486 training return: tensor(255.8881, device='cuda:0')
episode: 487 training return: tensor(268.1473, device='cuda:0')
epoch: 122 test_true_pfm: 130.47000699011014 sim_pfm: 315.8320817587276
episode: 488 training return: tensor(268.5096, device='cuda:0')
episode: 489 training return: tensor(261.8924, device='cuda:0')
episode: 490 training return: tensor(264.4171, device='cuda:0')
episode: 491 training return: tensor(268.9221, device='cuda:0')
epoch: 123 test_true_pfm: 136.0995728930716 sim_pfm: 312.38989420980215
episode: 492 training return: tensor(271.0841, device='cuda:0')
episode: 493 training return: tensor(274.5876, device='cuda:0')
episode: 494 training return: tensor(271.0826, device='cuda:0')
episode: 495 training return: tensor(276.5425, device='cuda:0')
epoch: 124 test_true_pfm: 137.06402051347396 sim_pfm: 318.7333351696531
episode: 496 training return: tensor(261.6943, device='cuda:0')
episode: 497 training return: tensor(269.3270, device='cuda:0')
episode: 498 training return: tensor(275.8594, device='cuda:0')
episode: 499 training return: tensor(276.4376, device='cuda:0')
epoch: 125 test_true_pfm: 135.46213465328276 sim_pfm: 316.6270609373848
episode: 500 training return: tensor(266.8462, device='cuda:0')
episode: 501 training return: tensor(270.3276, device='cuda:0')
episode: 502 training return: tensor(527.4336, device='cuda:0')
episode: 503 training return: tensor(274.3763, device='cuda:0')
epoch: 126 test_true_pfm: 133.28875511858865 sim_pfm: 317.57536688198644
episode: 504 training return: tensor(264.1769, device='cuda:0')
episode: 505 training return: tensor(259.7702, device='cuda:0')
episode: 506 training return: tensor(261.5586, device='cuda:0')
episode: 507 training return: tensor(272.1024, device='cuda:0')
epoch: 127 test_true_pfm: 132.75846904245455 sim_pfm: 309.06225751092035
episode: 508 training return: tensor(273.6758, device='cuda:0')
episode: 509 training return: tensor(274.4985, device='cuda:0')
episode: 510 training return: tensor(271.0189, device='cuda:0')
episode: 511 training return: tensor(257.6826, device='cuda:0')
epoch: 128 test_true_pfm: 141.2825983106686 sim_pfm: 317.9949462879449
episode: 512 training return: tensor(267.1599, device='cuda:0')
episode: 513 training return: tensor(253.8070, device='cuda:0')
episode: 514 training return: tensor(276.8599, device='cuda:0')
episode: 515 training return: tensor(260.9034, device='cuda:0')
epoch: 129 test_true_pfm: 132.09555599819578 sim_pfm: 310.49183450142544
episode: 516 training return: tensor(257.8454, device='cuda:0')
episode: 517 training return: tensor(257.5111, device='cuda:0')
episode: 518 training return: tensor(263.5826, device='cuda:0')
episode: 519 training return: tensor(263.2043, device='cuda:0')
epoch: 130 test_true_pfm: 138.6505733676365 sim_pfm: 317.61884886647266
episode: 520 training return: tensor(260.9242, device='cuda:0')
episode: 521 training return: tensor(264.4088, device='cuda:0')
episode: 522 training return: tensor(269.7946, device='cuda:0')
episode: 523 training return: tensor(271.6378, device='cuda:0')
epoch: 131 test_true_pfm: 131.91293800973446 sim_pfm: 313.2944342543681
episode: 524 training return: tensor(268.0762, device='cuda:0')
episode: 525 training return: tensor(263.6165, device='cuda:0')
episode: 526 training return: tensor(270.2066, device='cuda:0')
episode: 527 training return: tensor(284.9149, device='cuda:0')
epoch: 132 test_true_pfm: 147.37439303637782 sim_pfm: 319.40929258901934
episode: 528 training return: tensor(256.6758, device='cuda:0')
episode: 529 training return: tensor(275.8945, device='cuda:0')
episode: 530 training return: tensor(266.1837, device='cuda:0')
episode: 531 training return: tensor(529.0101, device='cuda:0')
epoch: 133 test_true_pfm: 135.7284698002718 sim_pfm: 316.30813384382054
episode: 532 training return: tensor(267.3567, device='cuda:0')
episode: 533 training return: tensor(262.0343, device='cuda:0')
episode: 534 training return: tensor(269.2939, device='cuda:0')
episode: 535 training return: tensor(274.7668, device='cuda:0')
epoch: 134 test_true_pfm: 133.09101663617867 sim_pfm: 315.5903545692563
episode: 536 training return: tensor(262.0487, device='cuda:0')
episode: 537 training return: tensor(263.4679, device='cuda:0')
episode: 538 training return: tensor(263.9340, device='cuda:0')
episode: 539 training return: tensor(277.4083, device='cuda:0')
epoch: 135 test_true_pfm: 135.53868854539084 sim_pfm: 317.6063803260525
episode: 540 training return: tensor(267.6246, device='cuda:0')
episode: 541 training return: tensor(267.5782, device='cuda:0')
episode: 542 training return: tensor(263.6040, device='cuda:0')
episode: 543 training return: tensor(258.5275, device='cuda:0')
epoch: 136 test_true_pfm: 135.9415638065215 sim_pfm: 313.0992679471771
episode: 544 training return: tensor(282.0305, device='cuda:0')
episode: 545 training return: tensor(271.6759, device='cuda:0')
episode: 546 training return: tensor(269.0585, device='cuda:0')
episode: 547 training return: tensor(267.0471, device='cuda:0')
epoch: 137 test_true_pfm: 140.06169131916596 sim_pfm: 318.5652343244292
episode: 548 training return: tensor(268.7868, device='cuda:0')
episode: 549 training return: tensor(268.7615, device='cuda:0')
episode: 550 training return: tensor(274.7497, device='cuda:0')
episode: 551 training return: tensor(263.4678, device='cuda:0')
epoch: 138 test_true_pfm: 137.3683022206097 sim_pfm: 312.86334847845137
episode: 552 training return: tensor(276.7162, device='cuda:0')
episode: 553 training return: tensor(264.7050, device='cuda:0')
episode: 554 training return: tensor(272.3518, device='cuda:0')
episode: 555 training return: tensor(264.5005, device='cuda:0')
epoch: 139 test_true_pfm: 135.37401865172538 sim_pfm: 312.1984681338072
episode: 556 training return: tensor(267.6722, device='cuda:0')
episode: 557 training return: tensor(265.5019, device='cuda:0')
episode: 558 training return: tensor(269.2220, device='cuda:0')
episode: 559 training return: tensor(260.4305, device='cuda:0')
epoch: 140 test_true_pfm: 136.12660739537088 sim_pfm: 309.9155196497838
episode: 560 training return: tensor(274.0954, device='cuda:0')
episode: 561 training return: tensor(270.8233, device='cuda:0')
episode: 562 training return: tensor(268.9011, device='cuda:0')
episode: 563 training return: tensor(254.8240, device='cuda:0')
epoch: 141 test_true_pfm: 137.04918112486087 sim_pfm: 317.51345642159384
episode: 564 training return: tensor(246.8217, device='cuda:0')
episode: 565 training return: tensor(271.5593, device='cuda:0')
episode: 566 training return: tensor(259.1982, device='cuda:0')
episode: 567 training return: tensor(258.6224, device='cuda:0')
epoch: 142 test_true_pfm: 137.9050573200644 sim_pfm: 316.0714355111122
episode: 568 training return: tensor(275.5627, device='cuda:0')
episode: 569 training return: tensor(259.3600, device='cuda:0')
episode: 570 training return: tensor(278.0083, device='cuda:0')
episode: 571 training return: tensor(263.2559, device='cuda:0')
epoch: 143 test_true_pfm: 139.84197797433865 sim_pfm: 316.825527132644
episode: 572 training return: tensor(272.5313, device='cuda:0')
episode: 573 training return: tensor(280.9515, device='cuda:0')
episode: 574 training return: tensor(264.4251, device='cuda:0')
episode: 575 training return: tensor(280.9081, device='cuda:0')
epoch: 144 test_true_pfm: 139.67799979655325 sim_pfm: 314.43656324595213
episode: 576 training return: tensor(269.0519, device='cuda:0')
episode: 577 training return: tensor(276.6760, device='cuda:0')
episode: 578 training return: tensor(271.3763, device='cuda:0')
episode: 579 training return: tensor(272.5632, device='cuda:0')
epoch: 145 test_true_pfm: 138.1028644479177 sim_pfm: 319.90748193114996
episode: 580 training return: tensor(269.7973, device='cuda:0')
episode: 581 training return: tensor(276.7366, device='cuda:0')
episode: 582 training return: tensor(275.9103, device='cuda:0')
episode: 583 training return: tensor(253.2115, device='cuda:0')
epoch: 146 test_true_pfm: 138.8142575473421 sim_pfm: 307.93696925168234
episode: 584 training return: tensor(268.8798, device='cuda:0')
episode: 585 training return: tensor(268.0181, device='cuda:0')
episode: 586 training return: tensor(273.9702, device='cuda:0')
episode: 587 training return: tensor(269.9500, device='cuda:0')
epoch: 147 test_true_pfm: 145.36226852245883 sim_pfm: 321.3118906713401
episode: 588 training return: tensor(266.6031, device='cuda:0')
episode: 589 training return: tensor(262.8136, device='cuda:0')
episode: 590 training return: tensor(274.4136, device='cuda:0')
episode: 591 training return: tensor(274.2339, device='cuda:0')
epoch: 148 test_true_pfm: 135.44362529217892 sim_pfm: 318.4534461783866
episode: 592 training return: tensor(269.6939, device='cuda:0')
episode: 593 training return: tensor(267.8434, device='cuda:0')
episode: 594 training return: tensor(283.0348, device='cuda:0')
episode: 595 training return: tensor(271.0221, device='cuda:0')
epoch: 149 test_true_pfm: 137.46779075810517 sim_pfm: 320.27377357085544
episode: 596 training return: tensor(274.5362, device='cuda:0')
episode: 597 training return: tensor(274.1067, device='cuda:0')
episode: 598 training return: tensor(265.2663, device='cuda:0')
episode: 599 training return: tensor(271.9924, device='cuda:0')
epoch: 150 test_true_pfm: 134.28245789592907 sim_pfm: 313.1939933889856
