['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '1', '--data', '100000', '--regu', '0.2']
2293.092962749887
episode: 0 training return: tensor(-321.1087, device='cuda:0')
episode: 1 training return: tensor(306.3079, device='cuda:0')
episode: 2 training return: tensor(191.8135, device='cuda:0')
episode: 3 training return: tensor(214.4231, device='cuda:0')
epoch: 1 test_true_pfm: 2296.6687185638984 sim_pfm: 202.0474214312077
episode: 4 training return: tensor(402.6003, device='cuda:0')
episode: 5 training return: tensor(321.6700, device='cuda:0')
episode: 6 training return: tensor(-80.9490, device='cuda:0')
episode: 7 training return: tensor(273.5590, device='cuda:0')
epoch: 2 test_true_pfm: 2404.110129608652 sim_pfm: 130.29127139394404
episode: 8 training return: tensor(259.3257, device='cuda:0')
episode: 9 training return: tensor(139.3775, device='cuda:0')
episode: 10 training return: tensor(315.8275, device='cuda:0')
episode: 11 training return: tensor(304.4829, device='cuda:0')
epoch: 3 test_true_pfm: 2579.8621286376733 sim_pfm: -78.40186345600523
episode: 12 training return: tensor(336.2914, device='cuda:0')
episode: 13 training return: tensor(406.2024, device='cuda:0')
episode: 14 training return: tensor(393.7197, device='cuda:0')
episode: 15 training return: tensor(421.8797, device='cuda:0')
epoch: 4 test_true_pfm: 2644.218708020196 sim_pfm: 438.87039471582585
episode: 16 training return: tensor(393.1755, device='cuda:0')
episode: 17 training return: tensor(365.6875, device='cuda:0')
episode: 18 training return: tensor(386.5920, device='cuda:0')
episode: 19 training return: tensor(-164.3137, device='cuda:0')
epoch: 5 test_true_pfm: 3390.0616610946254 sim_pfm: 440.7701521308627
episode: 20 training return: tensor(389.6631, device='cuda:0')
episode: 21 training return: tensor(-287.1819, device='cuda:0')
episode: 22 training return: tensor(392.2424, device='cuda:0')
episode: 23 training return: tensor(414.0881, device='cuda:0')
epoch: 6 test_true_pfm: 2564.9083509364623 sim_pfm: 53.82450542851196
episode: 24 training return: tensor(452.5825, device='cuda:0')
episode: 25 training return: tensor(421.0101, device='cuda:0')
episode: 26 training return: tensor(258.8892, device='cuda:0')
episode: 27 training return: tensor(43.7571, device='cuda:0')
epoch: 7 test_true_pfm: 2677.4852011257194 sim_pfm: 126.52567957899494
episode: 28 training return: tensor(-19.4205, device='cuda:0')
episode: 29 training return: tensor(420.8583, device='cuda:0')
episode: 30 training return: tensor(21.3501, device='cuda:0')
episode: 31 training return: tensor(136.9071, device='cuda:0')
epoch: 8 test_true_pfm: 2597.7338018886685 sim_pfm: 150.36011993640568
episode: 32 training return: tensor(364.8008, device='cuda:0')
episode: 33 training return: tensor(64.1828, device='cuda:0')
episode: 34 training return: tensor(315.7213, device='cuda:0')
episode: 35 training return: tensor(-125.0969, device='cuda:0')
epoch: 9 test_true_pfm: 2370.8942351530527 sim_pfm: 243.37231052736752
episode: 36 training return: tensor(450.5308, device='cuda:0')
episode: 37 training return: tensor(404.2026, device='cuda:0')
episode: 38 training return: tensor(-156.9075, device='cuda:0')
episode: 39 training return: tensor(-287.1055, device='cuda:0')
epoch: 10 test_true_pfm: 2570.09227069463 sim_pfm: 42.96767284466963
episode: 40 training return: tensor(394.8122, device='cuda:0')
episode: 41 training return: tensor(34.2266, device='cuda:0')
episode: 42 training return: tensor(2.6649, device='cuda:0')
episode: 43 training return: tensor(257.5808, device='cuda:0')
epoch: 11 test_true_pfm: 3305.068784256553 sim_pfm: 260.56823383129085
episode: 44 training return: tensor(346.1704, device='cuda:0')
episode: 45 training return: tensor(124.7659, device='cuda:0')
episode: 46 training return: tensor(416.5783, device='cuda:0')
episode: 47 training return: tensor(-300.3770, device='cuda:0')
epoch: 12 test_true_pfm: 3214.430294633683 sim_pfm: 262.01412518209935
episode: 48 training return: tensor(397.2600, device='cuda:0')
episode: 49 training return: tensor(-282.1328, device='cuda:0')
episode: 50 training return: tensor(341.6214, device='cuda:0')
episode: 51 training return: tensor(342.1996, device='cuda:0')
epoch: 13 test_true_pfm: 3020.1984111824113 sim_pfm: 157.383116544234
episode: 52 training return: tensor(335.5941, device='cuda:0')
episode: 53 training return: tensor(155.7908, device='cuda:0')
episode: 54 training return: tensor(-341.7862, device='cuda:0')
episode: 55 training return: tensor(428.0914, device='cuda:0')
epoch: 14 test_true_pfm: 3174.674121332247 sim_pfm: 287.7672235821762
episode: 56 training return: tensor(349.0653, device='cuda:0')
episode: 57 training return: tensor(231.3170, device='cuda:0')
episode: 58 training return: tensor(187.6886, device='cuda:0')
episode: 59 training return: tensor(354.5257, device='cuda:0')
epoch: 15 test_true_pfm: 3216.609538131144 sim_pfm: 368.42908576555783
episode: 60 training return: tensor(367.4216, device='cuda:0')
episode: 61 training return: tensor(382.1682, device='cuda:0')
episode: 62 training return: tensor(384.2980, device='cuda:0')
episode: 63 training return: tensor(403.6553, device='cuda:0')
epoch: 16 test_true_pfm: 3252.8935498636524 sim_pfm: 382.9765109586394
episode: 64 training return: tensor(353.0616, device='cuda:0')
episode: 65 training return: tensor(-124.8236, device='cuda:0')
episode: 66 training return: tensor(121.1141, device='cuda:0')
episode: 67 training return: tensor(340.7308, device='cuda:0')
epoch: 17 test_true_pfm: 3374.02460523147 sim_pfm: 340.1388936742442
episode: 68 training return: tensor(333.4912, device='cuda:0')
episode: 69 training return: tensor(433.7558, device='cuda:0')
episode: 70 training return: tensor(346.4223, device='cuda:0')
episode: 71 training return: tensor(279.5904, device='cuda:0')
epoch: 18 test_true_pfm: 2832.36013647341 sim_pfm: 125.81740120086276
episode: 72 training return: tensor(334.5464, device='cuda:0')
episode: 73 training return: tensor(232.5409, device='cuda:0')
episode: 74 training return: tensor(177.6628, device='cuda:0')
episode: 75 training return: tensor(70.0674, device='cuda:0')
epoch: 19 test_true_pfm: 3338.770931845758 sim_pfm: 298.1857863457408
episode: 76 training return: tensor(377.4140, device='cuda:0')
episode: 77 training return: tensor(369.9190, device='cuda:0')
episode: 78 training return: tensor(277.0366, device='cuda:0')
episode: 79 training return: tensor(414.1132, device='cuda:0')
epoch: 20 test_true_pfm: 3431.3426117893578 sim_pfm: 416.18413320170174
episode: 80 training return: tensor(349.2888, device='cuda:0')
episode: 81 training return: tensor(376.6393, device='cuda:0')
episode: 82 training return: tensor(374.4133, device='cuda:0')
episode: 83 training return: tensor(419.8134, device='cuda:0')
epoch: 21 test_true_pfm: 3383.0412026031554 sim_pfm: 339.1632192292406
episode: 84 training return: tensor(420.4091, device='cuda:0')
episode: 85 training return: tensor(316.4559, device='cuda:0')
episode: 86 training return: tensor(343.2797, device='cuda:0')
episode: 87 training return: tensor(307.1018, device='cuda:0')
epoch: 22 test_true_pfm: 3455.381549811927 sim_pfm: 403.9734722729966
episode: 88 training return: tensor(-426.7029, device='cuda:0')
episode: 89 training return: tensor(359.6157, device='cuda:0')
episode: 90 training return: tensor(321.4095, device='cuda:0')
episode: 91 training return: tensor(316.4568, device='cuda:0')
epoch: 23 test_true_pfm: 3438.0304869091638 sim_pfm: 316.97827251045965
episode: 92 training return: tensor(195.3140, device='cuda:0')
episode: 93 training return: tensor(415.2449, device='cuda:0')
episode: 94 training return: tensor(407.3779, device='cuda:0')
episode: 95 training return: tensor(330.0253, device='cuda:0')
epoch: 24 test_true_pfm: 3445.7741440852537 sim_pfm: 385.6785915252015
episode: 96 training return: tensor(350.8858, device='cuda:0')
episode: 97 training return: tensor(350.0629, device='cuda:0')
episode: 98 training return: tensor(361.7169, device='cuda:0')
episode: 99 training return: tensor(351.8421, device='cuda:0')
epoch: 25 test_true_pfm: 3520.5185524788308 sim_pfm: 380.6009675381065
episode: 100 training return: tensor(452.5201, device='cuda:0')
episode: 101 training return: tensor(132.6983, device='cuda:0')
episode: 102 training return: tensor(354.7720, device='cuda:0')
episode: 103 training return: tensor(398.4339, device='cuda:0')
epoch: 26 test_true_pfm: 3446.3321739311323 sim_pfm: 422.62055505778216
episode: 104 training return: tensor(339.4984, device='cuda:0')
episode: 105 training return: tensor(363.0851, device='cuda:0')
episode: 106 training return: tensor(386.0865, device='cuda:0')
episode: 107 training return: tensor(405.9887, device='cuda:0')
epoch: 27 test_true_pfm: 3432.981758052329 sim_pfm: 282.47090675596456
episode: 108 training return: tensor(439.7147, device='cuda:0')
episode: 109 training return: tensor(336.2622, device='cuda:0')
episode: 110 training return: tensor(383.4048, device='cuda:0')
episode: 111 training return: tensor(371.4835, device='cuda:0')
epoch: 28 test_true_pfm: 3383.724316489633 sim_pfm: 395.82910481457174
episode: 112 training return: tensor(395.5300, device='cuda:0')
episode: 113 training return: tensor(341.8651, device='cuda:0')
episode: 114 training return: tensor(399.9134, device='cuda:0')
episode: 115 training return: tensor(420.0993, device='cuda:0')
epoch: 29 test_true_pfm: 2895.5348690581263 sim_pfm: 239.20550856034
episode: 116 training return: tensor(349.3488, device='cuda:0')
episode: 117 training return: tensor(375.4075, device='cuda:0')
episode: 118 training return: tensor(407.1576, device='cuda:0')
episode: 119 training return: tensor(384.1831, device='cuda:0')
epoch: 30 test_true_pfm: 3418.5477678554166 sim_pfm: 294.9252529553293
episode: 120 training return: tensor(332.8825, device='cuda:0')
episode: 121 training return: tensor(412.8783, device='cuda:0')
episode: 122 training return: tensor(417.3315, device='cuda:0')
episode: 123 training return: tensor(266.2422, device='cuda:0')
epoch: 31 test_true_pfm: 3322.8783619489113 sim_pfm: 338.0121345204631
episode: 124 training return: tensor(423.6543, device='cuda:0')
episode: 125 training return: tensor(449.9037, device='cuda:0')
episode: 126 training return: tensor(338.3680, device='cuda:0')
episode: 127 training return: tensor(443.4648, device='cuda:0')
epoch: 32 test_true_pfm: 3433.41197333648 sim_pfm: 397.17313393542037
episode: 128 training return: tensor(379.1185, device='cuda:0')
episode: 129 training return: tensor(369.4242, device='cuda:0')
episode: 130 training return: tensor(363.2823, device='cuda:0')
episode: 131 training return: tensor(-128.1569, device='cuda:0')
epoch: 33 test_true_pfm: 3176.6769430174204 sim_pfm: 370.1900748358651
episode: 132 training return: tensor(321.1803, device='cuda:0')
episode: 133 training return: tensor(388.1026, device='cuda:0')
episode: 134 training return: tensor(405.4346, device='cuda:0')
episode: 135 training return: tensor(340.2190, device='cuda:0')
epoch: 34 test_true_pfm: 3376.340404122117 sim_pfm: 312.4488393233041
episode: 136 training return: tensor(193.4916, device='cuda:0')
episode: 137 training return: tensor(401.0018, device='cuda:0')
episode: 138 training return: tensor(335.2065, device='cuda:0')
episode: 139 training return: tensor(383.1288, device='cuda:0')
epoch: 35 test_true_pfm: 3441.597619038989 sim_pfm: 421.4053642503956
episode: 140 training return: tensor(126.5042, device='cuda:0')
episode: 141 training return: tensor(352.4147, device='cuda:0')
episode: 142 training return: tensor(443.3598, device='cuda:0')
episode: 143 training return: tensor(385.0876, device='cuda:0')
epoch: 36 test_true_pfm: 3398.217002358084 sim_pfm: 397.51102104370756
episode: 144 training return: tensor(355.0347, device='cuda:0')
episode: 145 training return: tensor(323.6151, device='cuda:0')
episode: 146 training return: tensor(340.3090, device='cuda:0')
episode: 147 training return: tensor(419.1031, device='cuda:0')
epoch: 37 test_true_pfm: 3501.2105805502088 sim_pfm: 415.740873081629
episode: 148 training return: tensor(331.9062, device='cuda:0')
episode: 149 training return: tensor(406.4597, device='cuda:0')
episode: 150 training return: tensor(374.5124, device='cuda:0')
episode: 151 training return: tensor(367.1058, device='cuda:0')
epoch: 38 test_true_pfm: 2810.8152412185864 sim_pfm: 285.07189891651313
episode: 152 training return: tensor(396.7721, device='cuda:0')
episode: 153 training return: tensor(416.8245, device='cuda:0')
episode: 154 training return: tensor(424.8463, device='cuda:0')
episode: 155 training return: tensor(167.3777, device='cuda:0')
epoch: 39 test_true_pfm: 3504.3148680582162 sim_pfm: 424.9357860776751
episode: 156 training return: tensor(242.6745, device='cuda:0')
episode: 157 training return: tensor(347.3643, device='cuda:0')
episode: 158 training return: tensor(423.0264, device='cuda:0')
episode: 159 training return: tensor(149.1018, device='cuda:0')
epoch: 40 test_true_pfm: 3436.693408213912 sim_pfm: 384.9676491357386
episode: 160 training return: tensor(429.6694, device='cuda:0')
episode: 161 training return: tensor(415.2715, device='cuda:0')
episode: 162 training return: tensor(364.4417, device='cuda:0')
episode: 163 training return: tensor(355.0298, device='cuda:0')
epoch: 41 test_true_pfm: 3429.943697118623 sim_pfm: 393.6989486165985
episode: 164 training return: tensor(430.1797, device='cuda:0')
episode: 165 training return: tensor(453.3970, device='cuda:0')
episode: 166 training return: tensor(356.0027, device='cuda:0')
episode: 167 training return: tensor(388.0198, device='cuda:0')
epoch: 42 test_true_pfm: 3534.5954442976454 sim_pfm: 482.88293125032214
episode: 168 training return: tensor(368.7060, device='cuda:0')
episode: 169 training return: tensor(406.9509, device='cuda:0')
episode: 170 training return: tensor(365.3045, device='cuda:0')
episode: 171 training return: tensor(341.9634, device='cuda:0')
epoch: 43 test_true_pfm: 3326.5692130369134 sim_pfm: 329.56536167288624
episode: 172 training return: tensor(-293.5615, device='cuda:0')
episode: 173 training return: tensor(432.7154, device='cuda:0')
episode: 174 training return: tensor(400.7549, device='cuda:0')
episode: 175 training return: tensor(372.4348, device='cuda:0')
epoch: 44 test_true_pfm: 3508.3699351296277 sim_pfm: 434.1569986952236
episode: 176 training return: tensor(334.1561, device='cuda:0')
episode: 177 training return: tensor(433.9254, device='cuda:0')
episode: 178 training return: tensor(424.3569, device='cuda:0')
episode: 179 training return: tensor(413.8472, device='cuda:0')
epoch: 45 test_true_pfm: 3107.6697422547845 sim_pfm: 247.5793915410759
episode: 180 training return: tensor(390.9036, device='cuda:0')
episode: 181 training return: tensor(386.2087, device='cuda:0')
episode: 182 training return: tensor(444.9228, device='cuda:0')
episode: 183 training return: tensor(363.8688, device='cuda:0')
epoch: 46 test_true_pfm: 3214.551955044102 sim_pfm: 463.59860951108084
episode: 184 training return: tensor(372.3353, device='cuda:0')
episode: 185 training return: tensor(391.4897, device='cuda:0')
episode: 186 training return: tensor(272.3102, device='cuda:0')
episode: 187 training return: tensor(380.6714, device='cuda:0')
epoch: 47 test_true_pfm: 3479.441578716254 sim_pfm: 454.0121204206371
episode: 188 training return: tensor(330.6638, device='cuda:0')
episode: 189 training return: tensor(414.1879, device='cuda:0')
episode: 190 training return: tensor(405.4639, device='cuda:0')
episode: 191 training return: tensor(428.0109, device='cuda:0')
epoch: 48 test_true_pfm: 3487.717466450518 sim_pfm: 449.75497570377775
episode: 192 training return: tensor(309.1825, device='cuda:0')
episode: 193 training return: tensor(400.8415, device='cuda:0')
episode: 194 training return: tensor(264.0394, device='cuda:0')
episode: 195 training return: tensor(320.6406, device='cuda:0')
epoch: 49 test_true_pfm: 3421.552436767244 sim_pfm: 398.0487212887383
episode: 196 training return: tensor(420.0660, device='cuda:0')
episode: 197 training return: tensor(390.1299, device='cuda:0')
episode: 198 training return: tensor(390.3590, device='cuda:0')
episode: 199 training return: tensor(399.2949, device='cuda:0')
epoch: 50 test_true_pfm: 3426.0697550648183 sim_pfm: 450.0670299498209
episode: 200 training return: tensor(375.5212, device='cuda:0')
episode: 201 training return: tensor(391.3767, device='cuda:0')
episode: 202 training return: tensor(396.3576, device='cuda:0')
episode: 203 training return: tensor(345.5990, device='cuda:0')
epoch: 51 test_true_pfm: 3487.8771692424384 sim_pfm: 426.0954035114458
episode: 204 training return: tensor(420.8087, device='cuda:0')
episode: 205 training return: tensor(409.5480, device='cuda:0')
episode: 206 training return: tensor(417.0157, device='cuda:0')
episode: 207 training return: tensor(407.2815, device='cuda:0')
epoch: 52 test_true_pfm: 3451.634421055203 sim_pfm: 402.72718613629695
episode: 208 training return: tensor(437.6521, device='cuda:0')
episode: 209 training return: tensor(375.8267, device='cuda:0')
episode: 210 training return: tensor(434.8365, device='cuda:0')
episode: 211 training return: tensor(420.0815, device='cuda:0')
epoch: 53 test_true_pfm: 3445.601907925868 sim_pfm: 420.41793119335006
episode: 212 training return: tensor(417.3417, device='cuda:0')
episode: 213 training return: tensor(481.6756, device='cuda:0')
episode: 214 training return: tensor(335.0752, device='cuda:0')
episode: 215 training return: tensor(507.1786, device='cuda:0')
epoch: 54 test_true_pfm: 3397.789251099759 sim_pfm: 423.8143363551159
episode: 216 training return: tensor(372.2214, device='cuda:0')
episode: 217 training return: tensor(364.8952, device='cuda:0')
episode: 218 training return: tensor(414.7180, device='cuda:0')
episode: 219 training return: tensor(395.7169, device='cuda:0')
epoch: 55 test_true_pfm: 3469.0295775517825 sim_pfm: 383.50983300385997
episode: 220 training return: tensor(461.6314, device='cuda:0')
episode: 221 training return: tensor(426.3880, device='cuda:0')
episode: 222 training return: tensor(-7.5810, device='cuda:0')
episode: 223 training return: tensor(374.1770, device='cuda:0')
epoch: 56 test_true_pfm: 3500.6359561110203 sim_pfm: 472.5657051449137
episode: 224 training return: tensor(371.6016, device='cuda:0')
episode: 225 training return: tensor(422.6578, device='cuda:0')
episode: 226 training return: tensor(353.6227, device='cuda:0')
episode: 227 training return: tensor(425.5102, device='cuda:0')
epoch: 57 test_true_pfm: 3370.4127505488796 sim_pfm: 427.3817281266577
episode: 228 training return: tensor(429.4481, device='cuda:0')
episode: 229 training return: tensor(316.3670, device='cuda:0')
episode: 230 training return: tensor(393.1541, device='cuda:0')
episode: 231 training return: tensor(415.8466, device='cuda:0')
epoch: 58 test_true_pfm: 3497.0565724118965 sim_pfm: 449.35185010740923
episode: 232 training return: tensor(397.6007, device='cuda:0')
episode: 233 training return: tensor(454.7557, device='cuda:0')
episode: 234 training return: tensor(415.8151, device='cuda:0')
episode: 235 training return: tensor(463.9333, device='cuda:0')
epoch: 59 test_true_pfm: 3490.0544247656994 sim_pfm: 467.1322949586126
episode: 236 training return: tensor(428.6104, device='cuda:0')
episode: 237 training return: tensor(63.4188, device='cuda:0')
episode: 238 training return: tensor(423.2409, device='cuda:0')
episode: 239 training return: tensor(354.4822, device='cuda:0')
epoch: 60 test_true_pfm: 3396.0520214589956 sim_pfm: 381.9419782012119
episode: 240 training return: tensor(362.6789, device='cuda:0')
episode: 241 training return: tensor(6.6800, device='cuda:0')
episode: 242 training return: tensor(455.9056, device='cuda:0')
episode: 243 training return: tensor(262.4287, device='cuda:0')
epoch: 61 test_true_pfm: 3453.9818434325243 sim_pfm: 416.7666212044035
episode: 244 training return: tensor(407.2078, device='cuda:0')
episode: 245 training return: tensor(409.4162, device='cuda:0')
episode: 246 training return: tensor(445.4254, device='cuda:0')
episode: 247 training return: tensor(427.8996, device='cuda:0')
epoch: 62 test_true_pfm: 3426.5186920795127 sim_pfm: 376.65555485435954
episode: 248 training return: tensor(375.3904, device='cuda:0')
episode: 249 training return: tensor(379.1625, device='cuda:0')
episode: 250 training return: tensor(347.8213, device='cuda:0')
episode: 251 training return: tensor(358.5238, device='cuda:0')
epoch: 63 test_true_pfm: 3467.065925351897 sim_pfm: 437.3652158163022
episode: 252 training return: tensor(377.8779, device='cuda:0')
episode: 253 training return: tensor(419.1934, device='cuda:0')
episode: 254 training return: tensor(429.8641, device='cuda:0')
episode: 255 training return: tensor(426.1381, device='cuda:0')
epoch: 64 test_true_pfm: 3532.8534239200903 sim_pfm: 448.2381939459786
episode: 256 training return: tensor(431.8126, device='cuda:0')
episode: 257 training return: tensor(479.6049, device='cuda:0')
episode: 258 training return: tensor(426.2732, device='cuda:0')
episode: 259 training return: tensor(484.6679, device='cuda:0')
epoch: 65 test_true_pfm: 3384.672441614766 sim_pfm: 350.44691292366286
episode: 260 training return: tensor(421.4359, device='cuda:0')
episode: 261 training return: tensor(432.9322, device='cuda:0')
episode: 262 training return: tensor(371.7309, device='cuda:0')
episode: 263 training return: tensor(48.7384, device='cuda:0')
epoch: 66 test_true_pfm: 3525.4147913656784 sim_pfm: 454.74835886313423
episode: 264 training return: tensor(399.2032, device='cuda:0')
episode: 265 training return: tensor(482.2129, device='cuda:0')
episode: 266 training return: tensor(473.3509, device='cuda:0')
episode: 267 training return: tensor(428.4686, device='cuda:0')
epoch: 67 test_true_pfm: 3463.151975083816 sim_pfm: 427.0839129587791
episode: 268 training return: tensor(407.5441, device='cuda:0')
episode: 269 training return: tensor(356.4011, device='cuda:0')
episode: 270 training return: tensor(425.6061, device='cuda:0')
episode: 271 training return: tensor(424.0108, device='cuda:0')
epoch: 68 test_true_pfm: 3492.1454072553133 sim_pfm: 448.75599220352404
episode: 272 training return: tensor(401.9299, device='cuda:0')
episode: 273 training return: tensor(486.0421, device='cuda:0')
episode: 274 training return: tensor(381.0468, device='cuda:0')
episode: 275 training return: tensor(504.4918, device='cuda:0')
epoch: 69 test_true_pfm: 3450.32216724405 sim_pfm: 414.5209643850103
episode: 276 training return: tensor(368.8935, device='cuda:0')
episode: 277 training return: tensor(464.6607, device='cuda:0')
episode: 278 training return: tensor(415.3725, device='cuda:0')
episode: 279 training return: tensor(398.6525, device='cuda:0')
epoch: 70 test_true_pfm: 3508.0420542607294 sim_pfm: 471.2395074157491
episode: 280 training return: tensor(422.1692, device='cuda:0')
episode: 281 training return: tensor(401.3693, device='cuda:0')
episode: 282 training return: tensor(440.7999, device='cuda:0')
episode: 283 training return: tensor(458.4176, device='cuda:0')
epoch: 71 test_true_pfm: 3511.1475991109824 sim_pfm: 469.13174534713227
episode: 284 training return: tensor(460.8261, device='cuda:0')
episode: 285 training return: tensor(373.6027, device='cuda:0')
episode: 286 training return: tensor(392.7935, device='cuda:0')
episode: 287 training return: tensor(380.6851, device='cuda:0')
epoch: 72 test_true_pfm: 3429.4995261634517 sim_pfm: 410.34077971129835
episode: 288 training return: tensor(420.6256, device='cuda:0')
episode: 289 training return: tensor(391.7858, device='cuda:0')
episode: 290 training return: tensor(421.7169, device='cuda:0')
episode: 291 training return: tensor(393.2950, device='cuda:0')
epoch: 73 test_true_pfm: 3444.5822874412115 sim_pfm: 476.17459768243134
episode: 292 training return: tensor(395.6955, device='cuda:0')
episode: 293 training return: tensor(441.9720, device='cuda:0')
episode: 294 training return: tensor(410.4654, device='cuda:0')
episode: 295 training return: tensor(442.5297, device='cuda:0')
epoch: 74 test_true_pfm: 3484.4396961758307 sim_pfm: 446.65725098868523
episode: 296 training return: tensor(426.3799, device='cuda:0')
episode: 297 training return: tensor(445.5440, device='cuda:0')
episode: 298 training return: tensor(463.1024, device='cuda:0')
episode: 299 training return: tensor(442.1628, device='cuda:0')
epoch: 75 test_true_pfm: 3554.836319570419 sim_pfm: 455.9888978804229
episode: 300 training return: tensor(425.7350, device='cuda:0')
episode: 301 training return: tensor(385.5353, device='cuda:0')
episode: 302 training return: tensor(440.6967, device='cuda:0')
episode: 303 training return: tensor(466.2999, device='cuda:0')
epoch: 76 test_true_pfm: 3554.0240358024953 sim_pfm: 479.4646308910257
episode: 304 training return: tensor(442.2559, device='cuda:0')
episode: 305 training return: tensor(435.5980, device='cuda:0')
episode: 306 training return: tensor(399.5828, device='cuda:0')
episode: 307 training return: tensor(377.5411, device='cuda:0')
epoch: 77 test_true_pfm: 3479.1623896848905 sim_pfm: 449.45131420736044
episode: 308 training return: tensor(431.5827, device='cuda:0')
episode: 309 training return: tensor(465.3579, device='cuda:0')
episode: 310 training return: tensor(427.6266, device='cuda:0')
episode: 311 training return: tensor(434.0042, device='cuda:0')
epoch: 78 test_true_pfm: 3565.0238733820406 sim_pfm: 487.54097629047465
episode: 312 training return: tensor(433.1045, device='cuda:0')
episode: 313 training return: tensor(432.6263, device='cuda:0')
episode: 314 training return: tensor(457.3391, device='cuda:0')
episode: 315 training return: tensor(436.0832, device='cuda:0')
epoch: 79 test_true_pfm: 3503.6982946849844 sim_pfm: 462.7994484259786
episode: 316 training return: tensor(323.1334, device='cuda:0')
episode: 317 training return: tensor(386.3442, device='cuda:0')
episode: 318 training return: tensor(453.5582, device='cuda:0')
episode: 319 training return: tensor(92.4062, device='cuda:0')
epoch: 80 test_true_pfm: 3415.060441256317 sim_pfm: 379.9953087591469
episode: 320 training return: tensor(382.3208, device='cuda:0')
episode: 321 training return: tensor(434.4175, device='cuda:0')
episode: 322 training return: tensor(417.8109, device='cuda:0')
episode: 323 training return: tensor(443.8834, device='cuda:0')
epoch: 81 test_true_pfm: 3593.09520643079 sim_pfm: 496.79615025362
episode: 324 training return: tensor(388.5317, device='cuda:0')
episode: 325 training return: tensor(448.7870, device='cuda:0')
episode: 326 training return: tensor(438.1948, device='cuda:0')
episode: 327 training return: tensor(257.0315, device='cuda:0')
epoch: 82 test_true_pfm: 3540.616262490394 sim_pfm: 450.6145958999987
episode: 328 training return: tensor(425.3002, device='cuda:0')
episode: 329 training return: tensor(487.5637, device='cuda:0')
episode: 330 training return: tensor(461.7927, device='cuda:0')
episode: 331 training return: tensor(441.3376, device='cuda:0')
epoch: 83 test_true_pfm: 3470.990103205198 sim_pfm: 451.4955797153525
episode: 332 training return: tensor(439.3998, device='cuda:0')
episode: 333 training return: tensor(381.6437, device='cuda:0')
episode: 334 training return: tensor(428.9187, device='cuda:0')
episode: 335 training return: tensor(444.8184, device='cuda:0')
epoch: 84 test_true_pfm: 3503.160022373217 sim_pfm: 457.84736135648564
episode: 336 training return: tensor(473.9344, device='cuda:0')
episode: 337 training return: tensor(425.8509, device='cuda:0')
episode: 338 training return: tensor(432.8948, device='cuda:0')
episode: 339 training return: tensor(392.5714, device='cuda:0')
epoch: 85 test_true_pfm: 3465.105034883989 sim_pfm: 468.68747874492936
episode: 340 training return: tensor(416.5694, device='cuda:0')
episode: 341 training return: tensor(430.7169, device='cuda:0')
episode: 342 training return: tensor(440.3205, device='cuda:0')
episode: 343 training return: tensor(346.8953, device='cuda:0')
epoch: 86 test_true_pfm: 3591.4467979322467 sim_pfm: 475.08580948018545
episode: 344 training return: tensor(404.6111, device='cuda:0')
episode: 345 training return: tensor(395.4181, device='cuda:0')
episode: 346 training return: tensor(428.9579, device='cuda:0')
episode: 347 training return: tensor(437.1278, device='cuda:0')
epoch: 87 test_true_pfm: 3556.102680327904 sim_pfm: 475.6303830937929
episode: 348 training return: tensor(410.2515, device='cuda:0')
episode: 349 training return: tensor(418.7932, device='cuda:0')
episode: 350 training return: tensor(484.1663, device='cuda:0')
episode: 351 training return: tensor(396.5863, device='cuda:0')
epoch: 88 test_true_pfm: 3574.619852535652 sim_pfm: 484.7615968642446
episode: 352 training return: tensor(467.5324, device='cuda:0')
episode: 353 training return: tensor(516.9854, device='cuda:0')
episode: 354 training return: tensor(484.0421, device='cuda:0')
episode: 355 training return: tensor(420.6190, device='cuda:0')
epoch: 89 test_true_pfm: 3565.936986066636 sim_pfm: 483.2967590714882
episode: 356 training return: tensor(462.3946, device='cuda:0')
episode: 357 training return: tensor(438.5958, device='cuda:0')
episode: 358 training return: tensor(406.5600, device='cuda:0')
episode: 359 training return: tensor(433.3305, device='cuda:0')
epoch: 90 test_true_pfm: 3532.1606146072168 sim_pfm: 481.8781657165964
episode: 360 training return: tensor(382.2544, device='cuda:0')
episode: 361 training return: tensor(448.9162, device='cuda:0')
episode: 362 training return: tensor(472.4655, device='cuda:0')
episode: 363 training return: tensor(432.0432, device='cuda:0')
epoch: 91 test_true_pfm: 3531.319026061563 sim_pfm: 455.3458417507354
episode: 364 training return: tensor(409.7651, device='cuda:0')
episode: 365 training return: tensor(379.0082, device='cuda:0')
episode: 366 training return: tensor(405.2007, device='cuda:0')
episode: 367 training return: tensor(439.9329, device='cuda:0')
epoch: 92 test_true_pfm: 3536.134237543751 sim_pfm: 449.40346358685446
episode: 368 training return: tensor(422.5897, device='cuda:0')
episode: 369 training return: tensor(399.0257, device='cuda:0')
episode: 370 training return: tensor(-144.9113, device='cuda:0')
episode: 371 training return: tensor(424.3398, device='cuda:0')
epoch: 93 test_true_pfm: 3377.1454890264145 sim_pfm: 364.82447959459387
episode: 372 training return: tensor(390.5955, device='cuda:0')
episode: 373 training return: tensor(461.3506, device='cuda:0')
episode: 374 training return: tensor(374.5326, device='cuda:0')
episode: 375 training return: tensor(466.3344, device='cuda:0')
epoch: 94 test_true_pfm: 3491.5623542526027 sim_pfm: 447.2411864527967
episode: 376 training return: tensor(390.6642, device='cuda:0')
episode: 377 training return: tensor(476.5031, device='cuda:0')
episode: 378 training return: tensor(433.3125, device='cuda:0')
episode: 379 training return: tensor(469.3991, device='cuda:0')
epoch: 95 test_true_pfm: 3507.8642245364404 sim_pfm: 384.6221915540421
episode: 380 training return: tensor(430.4758, device='cuda:0')
episode: 381 training return: tensor(427.4870, device='cuda:0')
episode: 382 training return: tensor(368.1302, device='cuda:0')
episode: 383 training return: tensor(413.4250, device='cuda:0')
epoch: 96 test_true_pfm: 3395.4657018841804 sim_pfm: 371.860526440539
episode: 384 training return: tensor(440.4060, device='cuda:0')
episode: 385 training return: tensor(416.2059, device='cuda:0')
episode: 386 training return: tensor(424.0122, device='cuda:0')
episode: 387 training return: tensor(414.1946, device='cuda:0')
epoch: 97 test_true_pfm: 3496.8133731147404 sim_pfm: 399.35053609874257
episode: 388 training return: tensor(377.7932, device='cuda:0')
episode: 389 training return: tensor(385.1556, device='cuda:0')
episode: 390 training return: tensor(418.7372, device='cuda:0')
episode: 391 training return: tensor(408.8170, device='cuda:0')
epoch: 98 test_true_pfm: 3596.5309786571142 sim_pfm: 482.1718607268219
episode: 392 training return: tensor(494.7041, device='cuda:0')
episode: 393 training return: tensor(440.0525, device='cuda:0')
episode: 394 training return: tensor(420.1332, device='cuda:0')
episode: 395 training return: tensor(525.2029, device='cuda:0')
epoch: 99 test_true_pfm: 3480.435150631345 sim_pfm: 430.7938616439157
episode: 396 training return: tensor(370.0826, device='cuda:0')
episode: 397 training return: tensor(405.1006, device='cuda:0')
episode: 398 training return: tensor(369.8797, device='cuda:0')
episode: 399 training return: tensor(425.9249, device='cuda:0')
epoch: 100 test_true_pfm: 3522.7012276660403 sim_pfm: 447.5434914366536
episode: 400 training return: tensor(382.4200, device='cuda:0')
episode: 401 training return: tensor(458.3550, device='cuda:0')
episode: 402 training return: tensor(387.6032, device='cuda:0')
episode: 403 training return: tensor(410.3015, device='cuda:0')
epoch: 101 test_true_pfm: 3492.8715976883905 sim_pfm: 442.91979122540215
episode: 404 training return: tensor(405.0529, device='cuda:0')
episode: 405 training return: tensor(464.3957, device='cuda:0')
episode: 406 training return: tensor(484.2432, device='cuda:0')
episode: 407 training return: tensor(438.1733, device='cuda:0')
epoch: 102 test_true_pfm: 3506.2925385238523 sim_pfm: 478.6600022269704
episode: 408 training return: tensor(441.4487, device='cuda:0')
episode: 409 training return: tensor(426.3307, device='cuda:0')
episode: 410 training return: tensor(383.1210, device='cuda:0')
episode: 411 training return: tensor(443.5401, device='cuda:0')
epoch: 103 test_true_pfm: 3474.819823637868 sim_pfm: 426.0572864752806
episode: 412 training return: tensor(401.9776, device='cuda:0')
episode: 413 training return: tensor(472.0812, device='cuda:0')
episode: 414 training return: tensor(398.4393, device='cuda:0')
episode: 415 training return: tensor(453.6925, device='cuda:0')
epoch: 104 test_true_pfm: 3480.6334522220545 sim_pfm: 419.6377361566604
episode: 416 training return: tensor(388.7409, device='cuda:0')
episode: 417 training return: tensor(436.4128, device='cuda:0')
episode: 418 training return: tensor(502.1371, device='cuda:0')
episode: 419 training return: tensor(423.9701, device='cuda:0')
epoch: 105 test_true_pfm: 3448.3287659044163 sim_pfm: 447.8640596288412
episode: 420 training return: tensor(403.5106, device='cuda:0')
episode: 421 training return: tensor(429.0817, device='cuda:0')
episode: 422 training return: tensor(350.9108, device='cuda:0')
episode: 423 training return: tensor(504.8269, device='cuda:0')
epoch: 106 test_true_pfm: 3597.353198465144 sim_pfm: 527.9654817966124
episode: 424 training return: tensor(402.9826, device='cuda:0')
episode: 425 training return: tensor(400.7415, device='cuda:0')
episode: 426 training return: tensor(416.2109, device='cuda:0')
episode: 427 training return: tensor(436.9442, device='cuda:0')
epoch: 107 test_true_pfm: 3471.6303387047133 sim_pfm: 448.52616168246215
episode: 428 training return: tensor(444.3061, device='cuda:0')
episode: 429 training return: tensor(405.8404, device='cuda:0')
episode: 430 training return: tensor(493.6843, device='cuda:0')
episode: 431 training return: tensor(361.5056, device='cuda:0')
epoch: 108 test_true_pfm: 3490.980351705944 sim_pfm: 441.8772725169935
episode: 432 training return: tensor(413.9115, device='cuda:0')
episode: 433 training return: tensor(421.7433, device='cuda:0')
episode: 434 training return: tensor(480.8238, device='cuda:0')
episode: 435 training return: tensor(422.6110, device='cuda:0')
epoch: 109 test_true_pfm: 3514.2345887675874 sim_pfm: 477.233566064989
episode: 436 training return: tensor(461.0451, device='cuda:0')
episode: 437 training return: tensor(378.2285, device='cuda:0')
episode: 438 training return: tensor(461.6437, device='cuda:0')
episode: 439 training return: tensor(432.1913, device='cuda:0')
epoch: 110 test_true_pfm: 3463.214068668481 sim_pfm: 429.4400988854468
episode: 440 training return: tensor(440.3199, device='cuda:0')
episode: 441 training return: tensor(463.4408, device='cuda:0')
episode: 442 training return: tensor(386.9744, device='cuda:0')
episode: 443 training return: tensor(450.7225, device='cuda:0')
epoch: 111 test_true_pfm: 3340.038347221655 sim_pfm: 340.0562282470637
episode: 444 training return: tensor(340.4677, device='cuda:0')
episode: 445 training return: tensor(440.5464, device='cuda:0')
episode: 446 training return: tensor(475.5487, device='cuda:0')
episode: 447 training return: tensor(427.9891, device='cuda:0')
epoch: 112 test_true_pfm: 3556.554456713238 sim_pfm: 491.57408477932523
episode: 448 training return: tensor(456.6554, device='cuda:0')
episode: 449 training return: tensor(420.7632, device='cuda:0')
episode: 450 training return: tensor(439.7901, device='cuda:0')
episode: 451 training return: tensor(427.4964, device='cuda:0')
epoch: 113 test_true_pfm: 3627.775362747782 sim_pfm: 463.9901887300075
episode: 452 training return: tensor(388.2491, device='cuda:0')
episode: 453 training return: tensor(440.1913, device='cuda:0')
episode: 454 training return: tensor(496.5179, device='cuda:0')
episode: 455 training return: tensor(401.6810, device='cuda:0')
epoch: 114 test_true_pfm: 3490.86451207095 sim_pfm: 431.9113682308719
episode: 456 training return: tensor(430.1112, device='cuda:0')
episode: 457 training return: tensor(431.8586, device='cuda:0')
episode: 458 training return: tensor(429.2123, device='cuda:0')
episode: 459 training return: tensor(365.4752, device='cuda:0')
epoch: 115 test_true_pfm: 3591.16222629957 sim_pfm: 494.37170197287924
episode: 460 training return: tensor(405.5287, device='cuda:0')
episode: 461 training return: tensor(425.0638, device='cuda:0')
episode: 462 training return: tensor(421.2855, device='cuda:0')
episode: 463 training return: tensor(446.9434, device='cuda:0')
epoch: 116 test_true_pfm: 3355.9536100400437 sim_pfm: 348.37377494131215
episode: 464 training return: tensor(402.2459, device='cuda:0')
episode: 465 training return: tensor(434.2872, device='cuda:0')
episode: 466 training return: tensor(414.6374, device='cuda:0')
episode: 467 training return: tensor(394.4744, device='cuda:0')
epoch: 117 test_true_pfm: 3556.875958734428 sim_pfm: 480.8460657618416
episode: 468 training return: tensor(466.8518, device='cuda:0')
episode: 469 training return: tensor(408.2411, device='cuda:0')
episode: 470 training return: tensor(426.9729, device='cuda:0')
episode: 471 training return: tensor(395.8616, device='cuda:0')
epoch: 118 test_true_pfm: 3511.058213456088 sim_pfm: 431.5606656027376
episode: 472 training return: tensor(444.6480, device='cuda:0')
episode: 473 training return: tensor(397.6859, device='cuda:0')
episode: 474 training return: tensor(435.5192, device='cuda:0')
episode: 475 training return: tensor(412.4718, device='cuda:0')
epoch: 119 test_true_pfm: 3526.8678812855646 sim_pfm: 474.35309859971557
episode: 476 training return: tensor(413.1689, device='cuda:0')
episode: 477 training return: tensor(467.4140, device='cuda:0')
episode: 478 training return: tensor(373.0298, device='cuda:0')
episode: 479 training return: tensor(411.0114, device='cuda:0')
epoch: 120 test_true_pfm: 3427.6652382186353 sim_pfm: 474.27327661784756
episode: 480 training return: tensor(404.5139, device='cuda:0')
episode: 481 training return: tensor(367.2888, device='cuda:0')
episode: 482 training return: tensor(422.3445, device='cuda:0')
episode: 483 training return: tensor(397.7295, device='cuda:0')
epoch: 121 test_true_pfm: 3541.9618358519183 sim_pfm: 504.58466743910685
episode: 484 training return: tensor(444.7775, device='cuda:0')
episode: 485 training return: tensor(475.3023, device='cuda:0')
episode: 486 training return: tensor(410.8056, device='cuda:0')
episode: 487 training return: tensor(381.9446, device='cuda:0')
epoch: 122 test_true_pfm: 3379.754459278433 sim_pfm: 352.7022684082428
episode: 488 training return: tensor(380.8014, device='cuda:0')
episode: 489 training return: tensor(400.5562, device='cuda:0')
episode: 490 training return: tensor(441.5165, device='cuda:0')
episode: 491 training return: tensor(307.2667, device='cuda:0')
epoch: 123 test_true_pfm: 3491.2151130976163 sim_pfm: 459.9116807795751
episode: 492 training return: tensor(408.3807, device='cuda:0')
episode: 493 training return: tensor(344.3104, device='cuda:0')
episode: 494 training return: tensor(444.3774, device='cuda:0')
episode: 495 training return: tensor(417.2762, device='cuda:0')
epoch: 124 test_true_pfm: 3544.668320205646 sim_pfm: 456.70210970631643
episode: 496 training return: tensor(486.5668, device='cuda:0')
episode: 497 training return: tensor(424.4088, device='cuda:0')
episode: 498 training return: tensor(409.3188, device='cuda:0')
episode: 499 training return: tensor(390.6150, device='cuda:0')
epoch: 125 test_true_pfm: 3397.665361832527 sim_pfm: 388.14089065231383
episode: 500 training return: tensor(446.1700, device='cuda:0')
episode: 501 training return: tensor(396.9380, device='cuda:0')
episode: 502 training return: tensor(403.2821, device='cuda:0')
episode: 503 training return: tensor(455.5909, device='cuda:0')
epoch: 126 test_true_pfm: 3552.9967802525225 sim_pfm: 486.97496055035543
episode: 504 training return: tensor(365.3551, device='cuda:0')
episode: 505 training return: tensor(409.4367, device='cuda:0')
episode: 506 training return: tensor(412.0800, device='cuda:0')
episode: 507 training return: tensor(364.0229, device='cuda:0')
epoch: 127 test_true_pfm: 3466.752076962608 sim_pfm: 421.84605097175034
episode: 508 training return: tensor(454.1852, device='cuda:0')
episode: 509 training return: tensor(441.8620, device='cuda:0')
episode: 510 training return: tensor(320.7314, device='cuda:0')
episode: 511 training return: tensor(446.3761, device='cuda:0')
epoch: 128 test_true_pfm: 3510.126020314561 sim_pfm: 467.3167942657213
episode: 512 training return: tensor(411.8274, device='cuda:0')
episode: 513 training return: tensor(443.7697, device='cuda:0')
episode: 514 training return: tensor(424.3027, device='cuda:0')
episode: 515 training return: tensor(468.6455, device='cuda:0')
epoch: 129 test_true_pfm: 3489.010969495926 sim_pfm: 449.8961962708466
episode: 516 training return: tensor(422.4668, device='cuda:0')
episode: 517 training return: tensor(376.9763, device='cuda:0')
episode: 518 training return: tensor(395.4712, device='cuda:0')
episode: 519 training return: tensor(431.5051, device='cuda:0')
epoch: 130 test_true_pfm: 3530.1903323294296 sim_pfm: 510.46872951385257
episode: 520 training return: tensor(477.6310, device='cuda:0')
episode: 521 training return: tensor(452.0514, device='cuda:0')
episode: 522 training return: tensor(415.5122, device='cuda:0')
episode: 523 training return: tensor(407.1749, device='cuda:0')
epoch: 131 test_true_pfm: 3375.3708291848757 sim_pfm: 365.4333258362215
episode: 524 training return: tensor(423.2636, device='cuda:0')
episode: 525 training return: tensor(383.8581, device='cuda:0')
episode: 526 training return: tensor(477.0764, device='cuda:0')
episode: 527 training return: tensor(345.8615, device='cuda:0')
epoch: 132 test_true_pfm: 3520.4372182497223 sim_pfm: 458.57948509703664
episode: 528 training return: tensor(348.9982, device='cuda:0')
episode: 529 training return: tensor(431.4434, device='cuda:0')
episode: 530 training return: tensor(431.9781, device='cuda:0')
episode: 531 training return: tensor(412.3092, device='cuda:0')
epoch: 133 test_true_pfm: 3560.7863895746154 sim_pfm: 480.80924480148434
episode: 532 training return: tensor(398.6004, device='cuda:0')
episode: 533 training return: tensor(457.7218, device='cuda:0')
episode: 534 training return: tensor(437.2369, device='cuda:0')
episode: 535 training return: tensor(458.4681, device='cuda:0')
epoch: 134 test_true_pfm: 3330.643405436187 sim_pfm: 326.76482752837666
episode: 536 training return: tensor(407.9714, device='cuda:0')
episode: 537 training return: tensor(413.9831, device='cuda:0')
episode: 538 training return: tensor(440.6776, device='cuda:0')
episode: 539 training return: tensor(414.1640, device='cuda:0')
epoch: 135 test_true_pfm: 3492.5005250973695 sim_pfm: 450.3023003153503
episode: 540 training return: tensor(359.0643, device='cuda:0')
episode: 541 training return: tensor(451.2422, device='cuda:0')
episode: 542 training return: tensor(406.2213, device='cuda:0')
episode: 543 training return: tensor(353.2544, device='cuda:0')
epoch: 136 test_true_pfm: 3417.2122161437364 sim_pfm: 397.03705645740655
episode: 544 training return: tensor(410.0475, device='cuda:0')
episode: 545 training return: tensor(441.4360, device='cuda:0')
episode: 546 training return: tensor(478.3136, device='cuda:0')
episode: 547 training return: tensor(428.4295, device='cuda:0')
epoch: 137 test_true_pfm: 3403.4466114452757 sim_pfm: 393.30252907486283
episode: 548 training return: tensor(396.4087, device='cuda:0')
episode: 549 training return: tensor(473.1802, device='cuda:0')
episode: 550 training return: tensor(443.1432, device='cuda:0')
episode: 551 training return: tensor(449.1973, device='cuda:0')
epoch: 138 test_true_pfm: 3579.9335777165993 sim_pfm: 501.8030769934024
episode: 552 training return: tensor(421.0812, device='cuda:0')
episode: 553 training return: tensor(417.5615, device='cuda:0')
episode: 554 training return: tensor(420.3068, device='cuda:0')
episode: 555 training return: tensor(466.9122, device='cuda:0')
epoch: 139 test_true_pfm: 3373.3788863868144 sim_pfm: 362.61374248102464
episode: 556 training return: tensor(414.8726, device='cuda:0')
episode: 557 training return: tensor(387.7046, device='cuda:0')
episode: 558 training return: tensor(420.4839, device='cuda:0')
episode: 559 training return: tensor(407.7825, device='cuda:0')
epoch: 140 test_true_pfm: 3445.838218428615 sim_pfm: 415.3595679044568
episode: 560 training return: tensor(427.0426, device='cuda:0')
episode: 561 training return: tensor(430.8228, device='cuda:0')
episode: 562 training return: tensor(437.6621, device='cuda:0')
episode: 563 training return: tensor(428.4915, device='cuda:0')
epoch: 141 test_true_pfm: 3465.6040202701693 sim_pfm: 421.45352832538384
episode: 564 training return: tensor(477.7107, device='cuda:0')
episode: 565 training return: tensor(413.8200, device='cuda:0')
episode: 566 training return: tensor(417.8893, device='cuda:0')
episode: 567 training return: tensor(352.8646, device='cuda:0')
epoch: 142 test_true_pfm: 3445.458472653006 sim_pfm: 398.2470847905497
episode: 568 training return: tensor(492.4139, device='cuda:0')
episode: 569 training return: tensor(411.1382, device='cuda:0')
episode: 570 training return: tensor(417.8774, device='cuda:0')
episode: 571 training return: tensor(370.0375, device='cuda:0')
epoch: 143 test_true_pfm: 3513.4323377753026 sim_pfm: 416.40002405226306
episode: 572 training return: tensor(420.1838, device='cuda:0')
episode: 573 training return: tensor(499.1902, device='cuda:0')
episode: 574 training return: tensor(393.8651, device='cuda:0')
episode: 575 training return: tensor(402.2980, device='cuda:0')
epoch: 144 test_true_pfm: 3439.427307536765 sim_pfm: 415.57773832355934
episode: 576 training return: tensor(412.6369, device='cuda:0')
episode: 577 training return: tensor(421.7139, device='cuda:0')
episode: 578 training return: tensor(438.9415, device='cuda:0')
episode: 579 training return: tensor(405.9765, device='cuda:0')
epoch: 145 test_true_pfm: 3464.5210788653635 sim_pfm: 420.63708295704174
episode: 580 training return: tensor(380.7469, device='cuda:0')
episode: 581 training return: tensor(402.4203, device='cuda:0')
episode: 582 training return: tensor(366.7379, device='cuda:0')
episode: 583 training return: tensor(457.5961, device='cuda:0')
epoch: 146 test_true_pfm: 3548.181071451842 sim_pfm: 526.2061773312938
episode: 584 training return: tensor(421.3971, device='cuda:0')
episode: 585 training return: tensor(349.3748, device='cuda:0')
episode: 586 training return: tensor(392.4186, device='cuda:0')
episode: 587 training return: tensor(395.0904, device='cuda:0')
epoch: 147 test_true_pfm: 3473.4693273700746 sim_pfm: 446.51421381525387
episode: 588 training return: tensor(394.4277, device='cuda:0')
episode: 589 training return: tensor(403.9548, device='cuda:0')
episode: 590 training return: tensor(475.8370, device='cuda:0')
episode: 591 training return: tensor(452.4496, device='cuda:0')
epoch: 148 test_true_pfm: 3453.662608166955 sim_pfm: 408.2007883565966
episode: 592 training return: tensor(438.0704, device='cuda:0')
episode: 593 training return: tensor(376.4277, device='cuda:0')
episode: 594 training return: tensor(438.3550, device='cuda:0')
episode: 595 training return: tensor(403.4736, device='cuda:0')
epoch: 149 test_true_pfm: 3463.044855733377 sim_pfm: 433.84257059249404
episode: 596 training return: tensor(-162.1490, device='cuda:0')
episode: 597 training return: tensor(351.0922, device='cuda:0')
episode: 598 training return: tensor(416.3254, device='cuda:0')
episode: 599 training return: tensor(399.8304, device='cuda:0')
epoch: 150 test_true_pfm: 3344.482452533019 sim_pfm: 360.31541634684737
