['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '4', '--data', '100000', '--regu', '0.2']
2485.57761732423
episode: 0 training return: tensor(65.8040, device='cuda:0')
episode: 1 training return: tensor(218.5818, device='cuda:0')
episode: 2 training return: tensor(335.2766, device='cuda:0')
episode: 3 training return: tensor(242.4704, device='cuda:0')
epoch: 1 test_true_pfm: 2342.783782407309 sim_pfm: 44.402743339393055
episode: 4 training return: tensor(414.8042, device='cuda:0')
episode: 5 training return: tensor(300.4607, device='cuda:0')
episode: 6 training return: tensor(191.6723, device='cuda:0')
episode: 7 training return: tensor(240.6966, device='cuda:0')
epoch: 2 test_true_pfm: 2812.0963419571867 sim_pfm: 397.3872589074308
episode: 8 training return: tensor(333.3790, device='cuda:0')
episode: 9 training return: tensor(329.5809, device='cuda:0')
episode: 10 training return: tensor(274.5550, device='cuda:0')
episode: 11 training return: tensor(329.6855, device='cuda:0')
epoch: 3 test_true_pfm: 3396.768086715725 sim_pfm: 361.42645139518817
episode: 12 training return: tensor(296.1246, device='cuda:0')
episode: 13 training return: tensor(337.3823, device='cuda:0')
episode: 14 training return: tensor(370.4874, device='cuda:0')
episode: 15 training return: tensor(241.6133, device='cuda:0')
epoch: 4 test_true_pfm: 2865.132088894854 sim_pfm: 261.6217219644071
episode: 16 training return: tensor(376.6706, device='cuda:0')
episode: 17 training return: tensor(-314.7249, device='cuda:0')
episode: 18 training return: tensor(352.8072, device='cuda:0')
episode: 19 training return: tensor(314.0505, device='cuda:0')
epoch: 5 test_true_pfm: 3459.37995352564 sim_pfm: 362.5894622617828
episode: 20 training return: tensor(-186.5367, device='cuda:0')
episode: 21 training return: tensor(303.2640, device='cuda:0')
episode: 22 training return: tensor(351.0766, device='cuda:0')
episode: 23 training return: tensor(381.6108, device='cuda:0')
epoch: 6 test_true_pfm: 3483.840093945002 sim_pfm: 338.70650684895617
episode: 24 training return: tensor(378.5310, device='cuda:0')
episode: 25 training return: tensor(369.8628, device='cuda:0')
episode: 26 training return: tensor(242.6020, device='cuda:0')
episode: 27 training return: tensor(378.4485, device='cuda:0')
epoch: 7 test_true_pfm: 3361.4419559992343 sim_pfm: 274.777054223329
episode: 28 training return: tensor(332.1483, device='cuda:0')
episode: 29 training return: tensor(364.3606, device='cuda:0')
episode: 30 training return: tensor(335.1158, device='cuda:0')
episode: 31 training return: tensor(276.9819, device='cuda:0')
epoch: 8 test_true_pfm: 3459.9946724044307 sim_pfm: 165.30301938315583
episode: 32 training return: tensor(329.9483, device='cuda:0')
episode: 33 training return: tensor(436.0927, device='cuda:0')
episode: 34 training return: tensor(-242.4569, device='cuda:0')
episode: 35 training return: tensor(-104.2368, device='cuda:0')
epoch: 9 test_true_pfm: 2997.812987767042 sim_pfm: 392.7923204014078
episode: 36 training return: tensor(-321.6340, device='cuda:0')
episode: 37 training return: tensor(342.7485, device='cuda:0')
episode: 38 training return: tensor(321.1401, device='cuda:0')
episode: 39 training return: tensor(313.3766, device='cuda:0')
epoch: 10 test_true_pfm: 3016.4616378117207 sim_pfm: 121.14937835326418
episode: 40 training return: tensor(386.7687, device='cuda:0')
episode: 41 training return: tensor(-433.0205, device='cuda:0')
episode: 42 training return: tensor(306.9933, device='cuda:0')
episode: 43 training return: tensor(356.4796, device='cuda:0')
epoch: 11 test_true_pfm: 2958.8577241568746 sim_pfm: 91.34613377624191
episode: 44 training return: tensor(-143.8515, device='cuda:0')
episode: 45 training return: tensor(333.7805, device='cuda:0')
episode: 46 training return: tensor(-117.4824, device='cuda:0')
episode: 47 training return: tensor(393.3161, device='cuda:0')
epoch: 12 test_true_pfm: 3339.4834222929203 sim_pfm: 285.1424843455704
episode: 48 training return: tensor(290.5594, device='cuda:0')
episode: 49 training return: tensor(75.0248, device='cuda:0')
episode: 50 training return: tensor(373.0935, device='cuda:0')
episode: 51 training return: tensor(340.4033, device='cuda:0')
epoch: 13 test_true_pfm: 3499.169002607509 sim_pfm: 338.5976103191497
episode: 52 training return: tensor(263.6948, device='cuda:0')
episode: 53 training return: tensor(388.2645, device='cuda:0')
episode: 54 training return: tensor(375.3048, device='cuda:0')
episode: 55 training return: tensor(213.8939, device='cuda:0')
epoch: 14 test_true_pfm: 2552.403570454675 sim_pfm: -12.394962336324776
episode: 56 training return: tensor(416.3618, device='cuda:0')
episode: 57 training return: tensor(391.0416, device='cuda:0')
episode: 58 training return: tensor(386.7202, device='cuda:0')
episode: 59 training return: tensor(311.7635, device='cuda:0')
epoch: 15 test_true_pfm: 3527.704406541516 sim_pfm: 184.4902365099018
episode: 60 training return: tensor(263.0929, device='cuda:0')
episode: 61 training return: tensor(370.1792, device='cuda:0')
episode: 62 training return: tensor(374.6243, device='cuda:0')
episode: 63 training return: tensor(319.1519, device='cuda:0')
epoch: 16 test_true_pfm: 3452.6225633662975 sim_pfm: 394.2798368551691
episode: 64 training return: tensor(321.3262, device='cuda:0')
episode: 65 training return: tensor(-2.6591, device='cuda:0')
episode: 66 training return: tensor(310.5333, device='cuda:0')
episode: 67 training return: tensor(391.3103, device='cuda:0')
epoch: 17 test_true_pfm: 3001.8022764426114 sim_pfm: 197.26143862118866
episode: 68 training return: tensor(79.2914, device='cuda:0')
episode: 69 training return: tensor(-14.9948, device='cuda:0')
episode: 70 training return: tensor(327.0733, device='cuda:0')
episode: 71 training return: tensor(-246.5374, device='cuda:0')
epoch: 18 test_true_pfm: 2163.538479975481 sim_pfm: 352.45527819936007
episode: 72 training return: tensor(-168.9974, device='cuda:0')
episode: 73 training return: tensor(329.4601, device='cuda:0')
episode: 74 training return: tensor(390.2390, device='cuda:0')
episode: 75 training return: tensor(365.0454, device='cuda:0')
epoch: 19 test_true_pfm: 2402.1960033915125 sim_pfm: 82.83218176461135
episode: 76 training return: tensor(251.3293, device='cuda:0')
episode: 77 training return: tensor(82.5839, device='cuda:0')
episode: 78 training return: tensor(431.4850, device='cuda:0')
episode: 79 training return: tensor(360.9554, device='cuda:0')
epoch: 20 test_true_pfm: 3512.7130035785613 sim_pfm: 259.44756780929555
episode: 80 training return: tensor(-131.2348, device='cuda:0')
episode: 81 training return: tensor(381.6511, device='cuda:0')
episode: 82 training return: tensor(283.7020, device='cuda:0')
episode: 83 training return: tensor(301.0667, device='cuda:0')
epoch: 21 test_true_pfm: 3467.8461871080613 sim_pfm: 375.9965232872831
episode: 84 training return: tensor(94.2950, device='cuda:0')
episode: 85 training return: tensor(322.4947, device='cuda:0')
episode: 86 training return: tensor(353.3782, device='cuda:0')
episode: 87 training return: tensor(-159.9470, device='cuda:0')
epoch: 22 test_true_pfm: 2177.1388251739613 sim_pfm: 367.95096326526254
episode: 88 training return: tensor(384.8329, device='cuda:0')
episode: 89 training return: tensor(13.6043, device='cuda:0')
episode: 90 training return: tensor(362.3326, device='cuda:0')
episode: 91 training return: tensor(132.3513, device='cuda:0')
epoch: 23 test_true_pfm: 3074.3807879299966 sim_pfm: 358.26532939037617
episode: 92 training return: tensor(342.7671, device='cuda:0')
episode: 93 training return: tensor(-282.2309, device='cuda:0')
episode: 94 training return: tensor(286.3647, device='cuda:0')
episode: 95 training return: tensor(-369.3359, device='cuda:0')
epoch: 24 test_true_pfm: 3431.8764095549104 sim_pfm: 351.43240664628684
episode: 96 training return: tensor(333.0598, device='cuda:0')
episode: 97 training return: tensor(56.9604, device='cuda:0')
episode: 98 training return: tensor(293.6343, device='cuda:0')
episode: 99 training return: tensor(140.1794, device='cuda:0')
epoch: 25 test_true_pfm: 3466.6931935281245 sim_pfm: 337.5728435827089
episode: 100 training return: tensor(327.1340, device='cuda:0')
episode: 101 training return: tensor(318.5437, device='cuda:0')
episode: 102 training return: tensor(336.1266, device='cuda:0')
episode: 103 training return: tensor(35.6736, device='cuda:0')
epoch: 26 test_true_pfm: 3470.2652226681116 sim_pfm: 313.0632611174563
episode: 104 training return: tensor(371.0862, device='cuda:0')
episode: 105 training return: tensor(321.2892, device='cuda:0')
episode: 106 training return: tensor(-148.5654, device='cuda:0')
episode: 107 training return: tensor(324.2732, device='cuda:0')
epoch: 27 test_true_pfm: 3446.9996527317876 sim_pfm: 274.6142295509344
episode: 108 training return: tensor(266.6207, device='cuda:0')
episode: 109 training return: tensor(319.4700, device='cuda:0')
episode: 110 training return: tensor(316.2287, device='cuda:0')
episode: 111 training return: tensor(-63.4579, device='cuda:0')
epoch: 28 test_true_pfm: 3440.3287094311745 sim_pfm: 378.51310948979034
episode: 112 training return: tensor(295.8833, device='cuda:0')
episode: 113 training return: tensor(353.6351, device='cuda:0')
episode: 114 training return: tensor(395.5378, device='cuda:0')
episode: 115 training return: tensor(350.9857, device='cuda:0')
epoch: 29 test_true_pfm: 2958.4100405651175 sim_pfm: 271.2435762559374
episode: 116 training return: tensor(394.9523, device='cuda:0')
episode: 117 training return: tensor(399.9420, device='cuda:0')
episode: 118 training return: tensor(204.1950, device='cuda:0')
episode: 119 training return: tensor(-151.7717, device='cuda:0')
epoch: 30 test_true_pfm: 3543.9559153563573 sim_pfm: 358.884244482518
episode: 120 training return: tensor(307.9044, device='cuda:0')
episode: 121 training return: tensor(336.2666, device='cuda:0')
episode: 122 training return: tensor(319.8993, device='cuda:0')
episode: 123 training return: tensor(349.4283, device='cuda:0')
epoch: 31 test_true_pfm: 3260.109518284484 sim_pfm: 333.2319000720745
episode: 124 training return: tensor(299.3571, device='cuda:0')
episode: 125 training return: tensor(348.7771, device='cuda:0')
episode: 126 training return: tensor(332.4004, device='cuda:0')
episode: 127 training return: tensor(323.4973, device='cuda:0')
epoch: 32 test_true_pfm: 3091.2127048756624 sim_pfm: 334.33777793263044
episode: 128 training return: tensor(316.8531, device='cuda:0')
episode: 129 training return: tensor(376.4690, device='cuda:0')
episode: 130 training return: tensor(194.6668, device='cuda:0')
episode: 131 training return: tensor(374.1776, device='cuda:0')
epoch: 33 test_true_pfm: 3486.7153109128863 sim_pfm: 200.29394737881375
episode: 132 training return: tensor(362.2055, device='cuda:0')
episode: 133 training return: tensor(297.3998, device='cuda:0')
episode: 134 training return: tensor(327.7954, device='cuda:0')
episode: 135 training return: tensor(375.5874, device='cuda:0')
epoch: 34 test_true_pfm: 3278.0272000820746 sim_pfm: 340.74394712255645
episode: 136 training return: tensor(66.8271, device='cuda:0')
episode: 137 training return: tensor(351.4082, device='cuda:0')
episode: 138 training return: tensor(390.9122, device='cuda:0')
episode: 139 training return: tensor(-7.8813, device='cuda:0')
epoch: 35 test_true_pfm: 2832.599515864356 sim_pfm: 429.6213263932926
episode: 140 training return: tensor(354.0989, device='cuda:0')
episode: 141 training return: tensor(350.1004, device='cuda:0')
episode: 142 training return: tensor(312.6827, device='cuda:0')
episode: 143 training return: tensor(333.1162, device='cuda:0')
epoch: 36 test_true_pfm: 2757.4040014809725 sim_pfm: 410.50904939506046
episode: 144 training return: tensor(400.9417, device='cuda:0')
episode: 145 training return: tensor(365.2382, device='cuda:0')
episode: 146 training return: tensor(241.0584, device='cuda:0')
episode: 147 training return: tensor(332.8755, device='cuda:0')
epoch: 37 test_true_pfm: 3127.292838943608 sim_pfm: 322.2107360261725
episode: 148 training return: tensor(345.8405, device='cuda:0')
episode: 149 training return: tensor(403.7456, device='cuda:0')
episode: 150 training return: tensor(435.6015, device='cuda:0')
episode: 151 training return: tensor(368.7992, device='cuda:0')
epoch: 38 test_true_pfm: 3460.9717872645215 sim_pfm: 395.54529202387977
episode: 152 training return: tensor(379.6665, device='cuda:0')
episode: 153 training return: tensor(408.6244, device='cuda:0')
episode: 154 training return: tensor(290.9465, device='cuda:0')
episode: 155 training return: tensor(331.2799, device='cuda:0')
epoch: 39 test_true_pfm: 3580.9731737300667 sim_pfm: 463.12030822521774
episode: 156 training return: tensor(313.5647, device='cuda:0')
episode: 157 training return: tensor(331.9971, device='cuda:0')
episode: 158 training return: tensor(355.3081, device='cuda:0')
episode: 159 training return: tensor(342.2560, device='cuda:0')
epoch: 40 test_true_pfm: 3418.3479792902403 sim_pfm: 351.87499205882585
episode: 160 training return: tensor(279.7087, device='cuda:0')
episode: 161 training return: tensor(271.4542, device='cuda:0')
episode: 162 training return: tensor(288.4303, device='cuda:0')
episode: 163 training return: tensor(339.2741, device='cuda:0')
epoch: 41 test_true_pfm: 3634.2652913291054 sim_pfm: 373.28838372024865
episode: 164 training return: tensor(427.6520, device='cuda:0')
episode: 165 training return: tensor(397.2374, device='cuda:0')
episode: 166 training return: tensor(407.3290, device='cuda:0')
episode: 167 training return: tensor(342.5930, device='cuda:0')
epoch: 42 test_true_pfm: 2705.5109265891997 sim_pfm: 375.3662298705215
episode: 168 training return: tensor(404.2268, device='cuda:0')
episode: 169 training return: tensor(266.8138, device='cuda:0')
episode: 170 training return: tensor(361.7387, device='cuda:0')
episode: 171 training return: tensor(320.5953, device='cuda:0')
epoch: 43 test_true_pfm: 3480.0953338682652 sim_pfm: 373.3128543748074
episode: 172 training return: tensor(372.5395, device='cuda:0')
episode: 173 training return: tensor(258.6037, device='cuda:0')
episode: 174 training return: tensor(353.9653, device='cuda:0')
episode: 175 training return: tensor(419.7097, device='cuda:0')
epoch: 44 test_true_pfm: 3558.9067476763776 sim_pfm: 414.05935941970284
episode: 176 training return: tensor(344.9639, device='cuda:0')
episode: 177 training return: tensor(407.0412, device='cuda:0')
episode: 178 training return: tensor(291.5910, device='cuda:0')
episode: 179 training return: tensor(345.1884, device='cuda:0')
epoch: 45 test_true_pfm: 3362.9328241387398 sim_pfm: 357.87683819282876
episode: 180 training return: tensor(395.2968, device='cuda:0')
episode: 181 training return: tensor(336.0855, device='cuda:0')
episode: 182 training return: tensor(370.0292, device='cuda:0')
episode: 183 training return: tensor(343.3803, device='cuda:0')
epoch: 46 test_true_pfm: 3488.4470907428063 sim_pfm: 349.73233545709326
episode: 184 training return: tensor(19.4588, device='cuda:0')
episode: 185 training return: tensor(-214.0441, device='cuda:0')
episode: 186 training return: tensor(351.7109, device='cuda:0')
episode: 187 training return: tensor(312.5909, device='cuda:0')
epoch: 47 test_true_pfm: 3585.291681050014 sim_pfm: 404.3911395857576
episode: 188 training return: tensor(380.6000, device='cuda:0')
episode: 189 training return: tensor(411.2280, device='cuda:0')
episode: 190 training return: tensor(359.7543, device='cuda:0')
episode: 191 training return: tensor(438.4616, device='cuda:0')
epoch: 48 test_true_pfm: 3546.4379329611806 sim_pfm: 395.41063781240763
episode: 192 training return: tensor(388.1567, device='cuda:0')
episode: 193 training return: tensor(350.3125, device='cuda:0')
episode: 194 training return: tensor(379.2790, device='cuda:0')
episode: 195 training return: tensor(359.1370, device='cuda:0')
epoch: 49 test_true_pfm: 3536.043534388336 sim_pfm: 397.38721851768787
episode: 196 training return: tensor(349.8943, device='cuda:0')
episode: 197 training return: tensor(426.4477, device='cuda:0')
episode: 198 training return: tensor(336.8863, device='cuda:0')
episode: 199 training return: tensor(355.2806, device='cuda:0')
epoch: 50 test_true_pfm: 3201.889741672297 sim_pfm: 442.1922180795809
episode: 200 training return: tensor(477.3096, device='cuda:0')
episode: 201 training return: tensor(370.0870, device='cuda:0')
episode: 202 training return: tensor(395.3367, device='cuda:0')
episode: 203 training return: tensor(367.8402, device='cuda:0')
epoch: 51 test_true_pfm: 3503.100516597046 sim_pfm: 386.37307818299934
episode: 204 training return: tensor(324.1587, device='cuda:0')
episode: 205 training return: tensor(348.2145, device='cuda:0')
episode: 206 training return: tensor(310.9608, device='cuda:0')
episode: 207 training return: tensor(373.8257, device='cuda:0')
epoch: 52 test_true_pfm: 3526.279240881048 sim_pfm: 410.8715939555356
episode: 208 training return: tensor(260.4118, device='cuda:0')
episode: 209 training return: tensor(411.4203, device='cuda:0')
episode: 210 training return: tensor(404.4987, device='cuda:0')
episode: 211 training return: tensor(464.4676, device='cuda:0')
epoch: 53 test_true_pfm: 3577.1803531724568 sim_pfm: 395.65981215423864
episode: 212 training return: tensor(402.2732, device='cuda:0')
episode: 213 training return: tensor(413.7970, device='cuda:0')
episode: 214 training return: tensor(133.9396, device='cuda:0')
episode: 215 training return: tensor(417.1222, device='cuda:0')
epoch: 54 test_true_pfm: 3562.3068183196883 sim_pfm: 438.0958525483923
episode: 216 training return: tensor(378.2703, device='cuda:0')
episode: 217 training return: tensor(350.6481, device='cuda:0')
episode: 218 training return: tensor(346.9373, device='cuda:0')
episode: 219 training return: tensor(358.4279, device='cuda:0')
epoch: 55 test_true_pfm: 3451.2867403426985 sim_pfm: 430.40912727679824
episode: 220 training return: tensor(397.2226, device='cuda:0')
episode: 221 training return: tensor(391.2562, device='cuda:0')
episode: 222 training return: tensor(345.7065, device='cuda:0')
episode: 223 training return: tensor(295.0302, device='cuda:0')
epoch: 56 test_true_pfm: 3416.0291872994426 sim_pfm: 333.883896503937
episode: 224 training return: tensor(355.3062, device='cuda:0')
episode: 225 training return: tensor(414.4324, device='cuda:0')
episode: 226 training return: tensor(435.1622, device='cuda:0')
episode: 227 training return: tensor(350.9742, device='cuda:0')
epoch: 57 test_true_pfm: 3469.1758610139805 sim_pfm: 377.22195827213
episode: 228 training return: tensor(299.3127, device='cuda:0')
episode: 229 training return: tensor(-36.0981, device='cuda:0')
episode: 230 training return: tensor(339.7829, device='cuda:0')
episode: 231 training return: tensor(386.5590, device='cuda:0')
epoch: 58 test_true_pfm: 3486.686669631939 sim_pfm: 378.94796081640135
episode: 232 training return: tensor(-80.9434, device='cuda:0')
episode: 233 training return: tensor(390.4682, device='cuda:0')
episode: 234 training return: tensor(409.7429, device='cuda:0')
episode: 235 training return: tensor(224.4507, device='cuda:0')
epoch: 59 test_true_pfm: 3517.3591833911037 sim_pfm: 355.04410287597176
episode: 236 training return: tensor(398.6595, device='cuda:0')
episode: 237 training return: tensor(415.3974, device='cuda:0')
episode: 238 training return: tensor(370.4590, device='cuda:0')
episode: 239 training return: tensor(330.1581, device='cuda:0')
epoch: 60 test_true_pfm: 3536.283830359663 sim_pfm: 394.990176169551
episode: 240 training return: tensor(367.4165, device='cuda:0')
episode: 241 training return: tensor(373.2580, device='cuda:0')
episode: 242 training return: tensor(325.8709, device='cuda:0')
episode: 243 training return: tensor(298.0618, device='cuda:0')
epoch: 61 test_true_pfm: 3511.158385436888 sim_pfm: 397.05921760910616
episode: 244 training return: tensor(353.3566, device='cuda:0')
episode: 245 training return: tensor(337.3412, device='cuda:0')
episode: 246 training return: tensor(374.0112, device='cuda:0')
episode: 247 training return: tensor(339.6310, device='cuda:0')
epoch: 62 test_true_pfm: 3298.3722581615907 sim_pfm: 332.96418937963125
episode: 248 training return: tensor(368.3436, device='cuda:0')
episode: 249 training return: tensor(362.4055, device='cuda:0')
episode: 250 training return: tensor(316.1083, device='cuda:0')
episode: 251 training return: tensor(385.6604, device='cuda:0')
epoch: 63 test_true_pfm: 3484.656132828528 sim_pfm: 407.6416919476663
episode: 252 training return: tensor(378.8232, device='cuda:0')
episode: 253 training return: tensor(366.1278, device='cuda:0')
episode: 254 training return: tensor(398.1189, device='cuda:0')
episode: 255 training return: tensor(353.7024, device='cuda:0')
epoch: 64 test_true_pfm: 3476.521773908213 sim_pfm: 358.7092128019528
episode: 256 training return: tensor(346.9091, device='cuda:0')
episode: 257 training return: tensor(399.0963, device='cuda:0')
episode: 258 training return: tensor(471.8250, device='cuda:0')
episode: 259 training return: tensor(319.7455, device='cuda:0')
epoch: 65 test_true_pfm: 3545.477413089619 sim_pfm: 401.1857261384915
episode: 260 training return: tensor(330.7601, device='cuda:0')
episode: 261 training return: tensor(342.2715, device='cuda:0')
episode: 262 training return: tensor(354.1168, device='cuda:0')
episode: 263 training return: tensor(368.3761, device='cuda:0')
epoch: 66 test_true_pfm: 3506.614783601397 sim_pfm: 369.8689955074806
episode: 264 training return: tensor(374.9798, device='cuda:0')
episode: 265 training return: tensor(343.0081, device='cuda:0')
episode: 266 training return: tensor(329.2650, device='cuda:0')
episode: 267 training return: tensor(356.3675, device='cuda:0')
epoch: 67 test_true_pfm: 3506.0398331352403 sim_pfm: 335.6910098355341
episode: 268 training return: tensor(283.8307, device='cuda:0')
episode: 269 training return: tensor(414.0408, device='cuda:0')
episode: 270 training return: tensor(363.8633, device='cuda:0')
episode: 271 training return: tensor(433.3294, device='cuda:0')
epoch: 68 test_true_pfm: 3641.3873323470775 sim_pfm: 413.5106441855023
episode: 272 training return: tensor(410.2985, device='cuda:0')
episode: 273 training return: tensor(96.5695, device='cuda:0')
episode: 274 training return: tensor(388.8643, device='cuda:0')
episode: 275 training return: tensor(364.0660, device='cuda:0')
epoch: 69 test_true_pfm: 3506.2077200912136 sim_pfm: 429.05184102844214
episode: 276 training return: tensor(358.6791, device='cuda:0')
episode: 277 training return: tensor(393.7713, device='cuda:0')
episode: 278 training return: tensor(333.1862, device='cuda:0')
episode: 279 training return: tensor(348.6415, device='cuda:0')
epoch: 70 test_true_pfm: 3534.538161194083 sim_pfm: 432.0185739125106
episode: 280 training return: tensor(465.1395, device='cuda:0')
episode: 281 training return: tensor(401.0248, device='cuda:0')
episode: 282 training return: tensor(365.0673, device='cuda:0')
episode: 283 training return: tensor(352.9097, device='cuda:0')
epoch: 71 test_true_pfm: 3430.7068594632947 sim_pfm: 382.5974406403645
episode: 284 training return: tensor(383.0573, device='cuda:0')
episode: 285 training return: tensor(386.0531, device='cuda:0')
episode: 286 training return: tensor(392.6273, device='cuda:0')
episode: 287 training return: tensor(381.9587, device='cuda:0')
epoch: 72 test_true_pfm: 3463.106014483374 sim_pfm: 351.83903995649115
episode: 288 training return: tensor(-70.6675, device='cuda:0')
episode: 289 training return: tensor(316.6121, device='cuda:0')
episode: 290 training return: tensor(363.6087, device='cuda:0')
episode: 291 training return: tensor(350.6944, device='cuda:0')
epoch: 73 test_true_pfm: 3489.500529233503 sim_pfm: 387.2559087642779
episode: 292 training return: tensor(316.8697, device='cuda:0')
episode: 293 training return: tensor(431.1835, device='cuda:0')
episode: 294 training return: tensor(325.1518, device='cuda:0')
episode: 295 training return: tensor(371.4336, device='cuda:0')
epoch: 74 test_true_pfm: 3458.933384892653 sim_pfm: 415.3725362804641
episode: 296 training return: tensor(325.3283, device='cuda:0')
episode: 297 training return: tensor(355.8808, device='cuda:0')
episode: 298 training return: tensor(381.4393, device='cuda:0')
episode: 299 training return: tensor(280.9946, device='cuda:0')
epoch: 75 test_true_pfm: 3581.3173367639015 sim_pfm: 397.7456612419725
episode: 300 training return: tensor(357.7373, device='cuda:0')
episode: 301 training return: tensor(350.3349, device='cuda:0')
episode: 302 training return: tensor(392.9915, device='cuda:0')
episode: 303 training return: tensor(373.5230, device='cuda:0')
epoch: 76 test_true_pfm: 3491.2441209353406 sim_pfm: 376.72839915486594
episode: 304 training return: tensor(300.0466, device='cuda:0')
episode: 305 training return: tensor(390.2239, device='cuda:0')
episode: 306 training return: tensor(385.7800, device='cuda:0')
episode: 307 training return: tensor(316.3344, device='cuda:0')
epoch: 77 test_true_pfm: 3491.793962955035 sim_pfm: 363.74841085939744
episode: 308 training return: tensor(362.9869, device='cuda:0')
episode: 309 training return: tensor(327.6583, device='cuda:0')
episode: 310 training return: tensor(398.6227, device='cuda:0')
episode: 311 training return: tensor(397.2032, device='cuda:0')
epoch: 78 test_true_pfm: 3495.0952701605947 sim_pfm: 383.98572073954466
episode: 312 training return: tensor(379.8243, device='cuda:0')
episode: 313 training return: tensor(319.7011, device='cuda:0')
episode: 314 training return: tensor(386.7740, device='cuda:0')
episode: 315 training return: tensor(376.3296, device='cuda:0')
epoch: 79 test_true_pfm: 3452.3957207535054 sim_pfm: 385.45194244454615
episode: 316 training return: tensor(384.8370, device='cuda:0')
episode: 317 training return: tensor(409.9336, device='cuda:0')
episode: 318 training return: tensor(311.7834, device='cuda:0')
episode: 319 training return: tensor(389.5970, device='cuda:0')
epoch: 80 test_true_pfm: 3400.0121342692114 sim_pfm: 329.27933749975637
episode: 320 training return: tensor(414.6264, device='cuda:0')
episode: 321 training return: tensor(457.1932, device='cuda:0')
episode: 322 training return: tensor(335.3342, device='cuda:0')
episode: 323 training return: tensor(370.1117, device='cuda:0')
epoch: 81 test_true_pfm: 3503.7516463666884 sim_pfm: 395.4858272174412
episode: 324 training return: tensor(418.1160, device='cuda:0')
episode: 325 training return: tensor(333.8968, device='cuda:0')
episode: 326 training return: tensor(384.1349, device='cuda:0')
episode: 327 training return: tensor(398.6964, device='cuda:0')
epoch: 82 test_true_pfm: 3450.832938932603 sim_pfm: 391.23890869649284
episode: 328 training return: tensor(347.9777, device='cuda:0')
episode: 329 training return: tensor(-82.1456, device='cuda:0')
episode: 330 training return: tensor(432.1809, device='cuda:0')
episode: 331 training return: tensor(390.3396, device='cuda:0')
epoch: 83 test_true_pfm: 3540.9826204813908 sim_pfm: 388.2479178944486
episode: 332 training return: tensor(331.0686, device='cuda:0')
episode: 333 training return: tensor(419.7146, device='cuda:0')
episode: 334 training return: tensor(329.9908, device='cuda:0')
episode: 335 training return: tensor(412.5715, device='cuda:0')
epoch: 84 test_true_pfm: 3532.355631468692 sim_pfm: 426.8031589523501
episode: 336 training return: tensor(35.9153, device='cuda:0')
episode: 337 training return: tensor(349.3243, device='cuda:0')
episode: 338 training return: tensor(327.6304, device='cuda:0')
episode: 339 training return: tensor(261.3853, device='cuda:0')
epoch: 85 test_true_pfm: 3529.697176858465 sim_pfm: 393.80272874981165
episode: 340 training return: tensor(335.2274, device='cuda:0')
episode: 341 training return: tensor(365.6454, device='cuda:0')
episode: 342 training return: tensor(411.3066, device='cuda:0')
episode: 343 training return: tensor(375.7769, device='cuda:0')
epoch: 86 test_true_pfm: 3508.4100510773715 sim_pfm: 387.8526925589346
episode: 344 training return: tensor(368.1824, device='cuda:0')
episode: 345 training return: tensor(405.3126, device='cuda:0')
episode: 346 training return: tensor(361.0528, device='cuda:0')
episode: 347 training return: tensor(411.6879, device='cuda:0')
epoch: 87 test_true_pfm: 3578.552430500377 sim_pfm: 332.5080787134745
episode: 348 training return: tensor(417.3953, device='cuda:0')
episode: 349 training return: tensor(340.6811, device='cuda:0')
episode: 350 training return: tensor(368.0999, device='cuda:0')
episode: 351 training return: tensor(342.5505, device='cuda:0')
epoch: 88 test_true_pfm: 3566.427997507401 sim_pfm: 436.21937255817465
episode: 352 training return: tensor(401.2070, device='cuda:0')
episode: 353 training return: tensor(-42.2673, device='cuda:0')
episode: 354 training return: tensor(424.9473, device='cuda:0')
episode: 355 training return: tensor(423.5349, device='cuda:0')
epoch: 89 test_true_pfm: 3529.109573310143 sim_pfm: 408.9139737136429
episode: 356 training return: tensor(55.4588, device='cuda:0')
episode: 357 training return: tensor(359.3773, device='cuda:0')
episode: 358 training return: tensor(375.1765, device='cuda:0')
episode: 359 training return: tensor(379.8741, device='cuda:0')
epoch: 90 test_true_pfm: 3540.1498937500837 sim_pfm: 405.4430444598547
episode: 360 training return: tensor(371.4070, device='cuda:0')
episode: 361 training return: tensor(382.2012, device='cuda:0')
episode: 362 training return: tensor(415.3821, device='cuda:0')
episode: 363 training return: tensor(412.0604, device='cuda:0')
epoch: 91 test_true_pfm: 3528.0309904245423 sim_pfm: 389.8061031434433
episode: 364 training return: tensor(406.0271, device='cuda:0')
episode: 365 training return: tensor(369.7176, device='cuda:0')
episode: 366 training return: tensor(325.0109, device='cuda:0')
episode: 367 training return: tensor(387.4670, device='cuda:0')
epoch: 92 test_true_pfm: 3538.0173141705072 sim_pfm: 418.0554681816332
episode: 368 training return: tensor(364.4602, device='cuda:0')
episode: 369 training return: tensor(358.0484, device='cuda:0')
episode: 370 training return: tensor(357.5534, device='cuda:0')
episode: 371 training return: tensor(377.9356, device='cuda:0')
epoch: 93 test_true_pfm: 3570.2794879924236 sim_pfm: 421.1368011467469
episode: 372 training return: tensor(342.6636, device='cuda:0')
episode: 373 training return: tensor(358.7893, device='cuda:0')
episode: 374 training return: tensor(325.7416, device='cuda:0')
episode: 375 training return: tensor(357.4206, device='cuda:0')
epoch: 94 test_true_pfm: 3467.168158763625 sim_pfm: 443.40291282900336
episode: 376 training return: tensor(386.3682, device='cuda:0')
episode: 377 training return: tensor(387.6303, device='cuda:0')
episode: 378 training return: tensor(320.5004, device='cuda:0')
episode: 379 training return: tensor(394.3023, device='cuda:0')
epoch: 95 test_true_pfm: 3526.226425473382 sim_pfm: 396.1591312692035
episode: 380 training return: tensor(405.6624, device='cuda:0')
episode: 381 training return: tensor(391.6070, device='cuda:0')
episode: 382 training return: tensor(380.6330, device='cuda:0')
episode: 383 training return: tensor(383.2015, device='cuda:0')
epoch: 96 test_true_pfm: 3556.4986171314417 sim_pfm: 419.46113640642335
episode: 384 training return: tensor(425.2014, device='cuda:0')
episode: 385 training return: tensor(403.2710, device='cuda:0')
episode: 386 training return: tensor(428.5471, device='cuda:0')
episode: 387 training return: tensor(338.6104, device='cuda:0')
epoch: 97 test_true_pfm: 3488.202368816805 sim_pfm: 425.4608235393146
episode: 388 training return: tensor(370.8715, device='cuda:0')
episode: 389 training return: tensor(403.5986, device='cuda:0')
episode: 390 training return: tensor(383.9525, device='cuda:0')
episode: 391 training return: tensor(337.1319, device='cuda:0')
epoch: 98 test_true_pfm: 3487.8333592395024 sim_pfm: 379.68069251432706
episode: 392 training return: tensor(384.1193, device='cuda:0')
episode: 393 training return: tensor(381.2485, device='cuda:0')
episode: 394 training return: tensor(370.2259, device='cuda:0')
episode: 395 training return: tensor(407.7280, device='cuda:0')
epoch: 99 test_true_pfm: 3569.289130770201 sim_pfm: 437.0708679835564
episode: 396 training return: tensor(319.7468, device='cuda:0')
episode: 397 training return: tensor(393.7283, device='cuda:0')
episode: 398 training return: tensor(408.2694, device='cuda:0')
episode: 399 training return: tensor(405.2459, device='cuda:0')
epoch: 100 test_true_pfm: 3505.504927938558 sim_pfm: 398.5283912057251
episode: 400 training return: tensor(375.0949, device='cuda:0')
episode: 401 training return: tensor(377.5251, device='cuda:0')
episode: 402 training return: tensor(344.1615, device='cuda:0')
episode: 403 training return: tensor(391.6143, device='cuda:0')
epoch: 101 test_true_pfm: 3515.2320220713323 sim_pfm: 396.89352099746856
episode: 404 training return: tensor(421.0780, device='cuda:0')
episode: 405 training return: tensor(341.9369, device='cuda:0')
episode: 406 training return: tensor(362.2701, device='cuda:0')
episode: 407 training return: tensor(361.4573, device='cuda:0')
epoch: 102 test_true_pfm: 3576.0953196498995 sim_pfm: 417.67352242798853
episode: 408 training return: tensor(407.7782, device='cuda:0')
episode: 409 training return: tensor(375.6737, device='cuda:0')
episode: 410 training return: tensor(455.4236, device='cuda:0')
episode: 411 training return: tensor(209.4878, device='cuda:0')
epoch: 103 test_true_pfm: 3532.594635921097 sim_pfm: 411.1074086999579
episode: 412 training return: tensor(333.6153, device='cuda:0')
episode: 413 training return: tensor(398.8574, device='cuda:0')
episode: 414 training return: tensor(405.6835, device='cuda:0')
episode: 415 training return: tensor(349.2324, device='cuda:0')
epoch: 104 test_true_pfm: 3592.487708762205 sim_pfm: 425.70040771140094
episode: 416 training return: tensor(353.5663, device='cuda:0')
episode: 417 training return: tensor(383.7191, device='cuda:0')
episode: 418 training return: tensor(151.3268, device='cuda:0')
episode: 419 training return: tensor(406.5945, device='cuda:0')
epoch: 105 test_true_pfm: 3496.0816385064486 sim_pfm: 418.66787742470234
episode: 420 training return: tensor(326.3082, device='cuda:0')
episode: 421 training return: tensor(436.9530, device='cuda:0')
episode: 422 training return: tensor(399.2890, device='cuda:0')
episode: 423 training return: tensor(133.7862, device='cuda:0')
epoch: 106 test_true_pfm: 3546.3302549134446 sim_pfm: 415.13750931123894
episode: 424 training return: tensor(369.6952, device='cuda:0')
episode: 425 training return: tensor(358.5204, device='cuda:0')
episode: 426 training return: tensor(371.4850, device='cuda:0')
episode: 427 training return: tensor(392.9782, device='cuda:0')
epoch: 107 test_true_pfm: 3603.175834920867 sim_pfm: 434.0147677117881
episode: 428 training return: tensor(425.2871, device='cuda:0')
episode: 429 training return: tensor(399.9171, device='cuda:0')
episode: 430 training return: tensor(376.0646, device='cuda:0')
episode: 431 training return: tensor(384.6143, device='cuda:0')
epoch: 108 test_true_pfm: 3541.608392548546 sim_pfm: 402.8005001554654
episode: 432 training return: tensor(361.8735, device='cuda:0')
episode: 433 training return: tensor(373.1532, device='cuda:0')
episode: 434 training return: tensor(479.7791, device='cuda:0')
episode: 435 training return: tensor(354.0476, device='cuda:0')
epoch: 109 test_true_pfm: 3551.866633335421 sim_pfm: 405.45526224860805
episode: 436 training return: tensor(408.5211, device='cuda:0')
episode: 437 training return: tensor(192.1301, device='cuda:0')
episode: 438 training return: tensor(409.6701, device='cuda:0')
episode: 439 training return: tensor(347.4305, device='cuda:0')
epoch: 110 test_true_pfm: 3490.6235296201594 sim_pfm: 381.91352369308396
episode: 440 training return: tensor(382.8322, device='cuda:0')
episode: 441 training return: tensor(394.0592, device='cuda:0')
episode: 442 training return: tensor(372.1680, device='cuda:0')
episode: 443 training return: tensor(364.5065, device='cuda:0')
epoch: 111 test_true_pfm: 3530.558566628127 sim_pfm: 392.92146506899735
episode: 444 training return: tensor(360.2298, device='cuda:0')
episode: 445 training return: tensor(437.7748, device='cuda:0')
episode: 446 training return: tensor(351.8107, device='cuda:0')
episode: 447 training return: tensor(355.7307, device='cuda:0')
epoch: 112 test_true_pfm: 3569.804209781811 sim_pfm: 449.17746282800607
episode: 448 training return: tensor(429.0843, device='cuda:0')
episode: 449 training return: tensor(391.4186, device='cuda:0')
episode: 450 training return: tensor(425.2342, device='cuda:0')
episode: 451 training return: tensor(445.0970, device='cuda:0')
epoch: 113 test_true_pfm: 3534.764884133289 sim_pfm: 395.9852144669858
episode: 452 training return: tensor(405.3207, device='cuda:0')
episode: 453 training return: tensor(377.4566, device='cuda:0')
episode: 454 training return: tensor(397.4652, device='cuda:0')
episode: 455 training return: tensor(355.4548, device='cuda:0')
epoch: 114 test_true_pfm: 3578.4774946489383 sim_pfm: 423.3417877565371
episode: 456 training return: tensor(417.3750, device='cuda:0')
episode: 457 training return: tensor(386.7593, device='cuda:0')
episode: 458 training return: tensor(389.9445, device='cuda:0')
episode: 459 training return: tensor(406.4498, device='cuda:0')
epoch: 115 test_true_pfm: 3419.6211583412864 sim_pfm: 329.2310441545754
episode: 460 training return: tensor(156.9380, device='cuda:0')
episode: 461 training return: tensor(389.5910, device='cuda:0')
episode: 462 training return: tensor(394.5162, device='cuda:0')
episode: 463 training return: tensor(415.8380, device='cuda:0')
epoch: 116 test_true_pfm: 3524.543749562707 sim_pfm: 404.49741027427564
episode: 464 training return: tensor(364.3702, device='cuda:0')
episode: 465 training return: tensor(374.0440, device='cuda:0')
episode: 466 training return: tensor(417.0246, device='cuda:0')
episode: 467 training return: tensor(377.8998, device='cuda:0')
epoch: 117 test_true_pfm: 3478.850600728167 sim_pfm: 355.42671310439863
episode: 468 training return: tensor(369.0582, device='cuda:0')
episode: 469 training return: tensor(401.2121, device='cuda:0')
episode: 470 training return: tensor(407.7868, device='cuda:0')
episode: 471 training return: tensor(318.8815, device='cuda:0')
epoch: 118 test_true_pfm: 3540.218211021893 sim_pfm: 397.55533291201573
episode: 472 training return: tensor(392.5510, device='cuda:0')
episode: 473 training return: tensor(392.8614, device='cuda:0')
episode: 474 training return: tensor(370.4461, device='cuda:0')
episode: 475 training return: tensor(400.5468, device='cuda:0')
epoch: 119 test_true_pfm: 3516.0235394575793 sim_pfm: 401.8465855913625
episode: 476 training return: tensor(360.2624, device='cuda:0')
episode: 477 training return: tensor(380.1099, device='cuda:0')
episode: 478 training return: tensor(387.8437, device='cuda:0')
episode: 479 training return: tensor(414.4699, device='cuda:0')
epoch: 120 test_true_pfm: 3545.68742217279 sim_pfm: 400.8067203102789
episode: 480 training return: tensor(383.8870, device='cuda:0')
episode: 481 training return: tensor(364.8455, device='cuda:0')
episode: 482 training return: tensor(361.6767, device='cuda:0')
episode: 483 training return: tensor(418.3594, device='cuda:0')
epoch: 121 test_true_pfm: 3531.144695122295 sim_pfm: 401.8135281951788
episode: 484 training return: tensor(407.5689, device='cuda:0')
episode: 485 training return: tensor(349.8188, device='cuda:0')
episode: 486 training return: tensor(330.7627, device='cuda:0')
episode: 487 training return: tensor(369.0092, device='cuda:0')
epoch: 122 test_true_pfm: 3519.3411471430886 sim_pfm: 412.63585427301587
episode: 488 training return: tensor(351.2321, device='cuda:0')
episode: 489 training return: tensor(403.8697, device='cuda:0')
episode: 490 training return: tensor(425.7195, device='cuda:0')
episode: 491 training return: tensor(423.9658, device='cuda:0')
epoch: 123 test_true_pfm: 3492.732177986227 sim_pfm: 385.7712147360823
episode: 492 training return: tensor(391.6841, device='cuda:0')
episode: 493 training return: tensor(351.2801, device='cuda:0')
episode: 494 training return: tensor(424.6150, device='cuda:0')
episode: 495 training return: tensor(392.6369, device='cuda:0')
epoch: 124 test_true_pfm: 3568.0003551274017 sim_pfm: 416.56302843882196
episode: 496 training return: tensor(381.9421, device='cuda:0')
episode: 497 training return: tensor(368.4056, device='cuda:0')
episode: 498 training return: tensor(360.5034, device='cuda:0')
episode: 499 training return: tensor(395.1317, device='cuda:0')
epoch: 125 test_true_pfm: 3584.4927690914433 sim_pfm: 436.36555771369603
episode: 500 training return: tensor(353.8522, device='cuda:0')
episode: 501 training return: tensor(373.7405, device='cuda:0')
episode: 502 training return: tensor(454.6312, device='cuda:0')
episode: 503 training return: tensor(396.7890, device='cuda:0')
epoch: 126 test_true_pfm: 3573.5147585914924 sim_pfm: 427.3916836690332
episode: 504 training return: tensor(394.9875, device='cuda:0')
episode: 505 training return: tensor(343.5933, device='cuda:0')
episode: 506 training return: tensor(430.1095, device='cuda:0')
episode: 507 training return: tensor(380.9985, device='cuda:0')
epoch: 127 test_true_pfm: 3509.483058524082 sim_pfm: 374.719980711195
episode: 508 training return: tensor(369.0066, device='cuda:0')
episode: 509 training return: tensor(398.6470, device='cuda:0')
episode: 510 training return: tensor(422.4129, device='cuda:0')
episode: 511 training return: tensor(407.5191, device='cuda:0')
epoch: 128 test_true_pfm: 3532.4565846488913 sim_pfm: 413.9582149673176
episode: 512 training return: tensor(363.4886, device='cuda:0')
episode: 513 training return: tensor(374.9455, device='cuda:0')
episode: 514 training return: tensor(388.7794, device='cuda:0')
episode: 515 training return: tensor(391.0261, device='cuda:0')
epoch: 129 test_true_pfm: 3509.112393680022 sim_pfm: 402.03152260295855
episode: 516 training return: tensor(399.3469, device='cuda:0')
episode: 517 training return: tensor(409.8496, device='cuda:0')
episode: 518 training return: tensor(365.2032, device='cuda:0')
episode: 519 training return: tensor(346.5689, device='cuda:0')
epoch: 130 test_true_pfm: 3596.234722111751 sim_pfm: 440.2778747789562
episode: 520 training return: tensor(398.6225, device='cuda:0')
episode: 521 training return: tensor(411.3091, device='cuda:0')
episode: 522 training return: tensor(407.6133, device='cuda:0')
episode: 523 training return: tensor(396.9223, device='cuda:0')
epoch: 131 test_true_pfm: 3599.424951084497 sim_pfm: 420.7493232450215
episode: 524 training return: tensor(408.1991, device='cuda:0')
episode: 525 training return: tensor(320.5952, device='cuda:0')
episode: 526 training return: tensor(379.5284, device='cuda:0')
episode: 527 training return: tensor(334.6957, device='cuda:0')
epoch: 132 test_true_pfm: 3608.7587605696403 sim_pfm: 408.49488377191784
episode: 528 training return: tensor(420.2397, device='cuda:0')
episode: 529 training return: tensor(413.6936, device='cuda:0')
episode: 530 training return: tensor(339.8347, device='cuda:0')
episode: 531 training return: tensor(399.2653, device='cuda:0')
epoch: 133 test_true_pfm: 3552.3607663370153 sim_pfm: 416.2780124768615
episode: 532 training return: tensor(397.6069, device='cuda:0')
episode: 533 training return: tensor(395.8998, device='cuda:0')
episode: 534 training return: tensor(398.1397, device='cuda:0')
episode: 535 training return: tensor(384.3571, device='cuda:0')
epoch: 134 test_true_pfm: 3552.990784250623 sim_pfm: 420.92007473753375
episode: 536 training return: tensor(414.2460, device='cuda:0')
episode: 537 training return: tensor(413.9717, device='cuda:0')
episode: 538 training return: tensor(406.4216, device='cuda:0')
episode: 539 training return: tensor(360.1587, device='cuda:0')
epoch: 135 test_true_pfm: 3479.746169729686 sim_pfm: 381.5234326064431
episode: 540 training return: tensor(392.5343, device='cuda:0')
episode: 541 training return: tensor(379.6411, device='cuda:0')
episode: 542 training return: tensor(427.4720, device='cuda:0')
episode: 543 training return: tensor(384.8035, device='cuda:0')
epoch: 136 test_true_pfm: 3543.9104997488625 sim_pfm: 413.03927146604593
episode: 544 training return: tensor(389.2926, device='cuda:0')
episode: 545 training return: tensor(399.8089, device='cuda:0')
episode: 546 training return: tensor(411.9402, device='cuda:0')
episode: 547 training return: tensor(391.2065, device='cuda:0')
epoch: 137 test_true_pfm: 3608.4308208399943 sim_pfm: 392.0425941907063
episode: 548 training return: tensor(424.3292, device='cuda:0')
episode: 549 training return: tensor(422.7585, device='cuda:0')
episode: 550 training return: tensor(356.4584, device='cuda:0')
episode: 551 training return: tensor(324.0465, device='cuda:0')
epoch: 138 test_true_pfm: 3499.443606999185 sim_pfm: 382.53028925526695
episode: 552 training return: tensor(416.7555, device='cuda:0')
episode: 553 training return: tensor(409.2283, device='cuda:0')
episode: 554 training return: tensor(410.3473, device='cuda:0')
episode: 555 training return: tensor(391.3470, device='cuda:0')
epoch: 139 test_true_pfm: 3545.0630772476675 sim_pfm: 417.79952908619697
episode: 556 training return: tensor(382.5459, device='cuda:0')
episode: 557 training return: tensor(431.6295, device='cuda:0')
episode: 558 training return: tensor(407.6625, device='cuda:0')
episode: 559 training return: tensor(397.6111, device='cuda:0')
epoch: 140 test_true_pfm: 3563.9837297142 sim_pfm: 417.3540356333445
episode: 560 training return: tensor(387.6031, device='cuda:0')
episode: 561 training return: tensor(396.8786, device='cuda:0')
episode: 562 training return: tensor(395.2533, device='cuda:0')
episode: 563 training return: tensor(425.6453, device='cuda:0')
epoch: 141 test_true_pfm: 3531.1918781497793 sim_pfm: 396.8486002520658
episode: 564 training return: tensor(419.0533, device='cuda:0')
episode: 565 training return: tensor(431.3824, device='cuda:0')
episode: 566 training return: tensor(367.3315, device='cuda:0')
episode: 567 training return: tensor(453.5841, device='cuda:0')
epoch: 142 test_true_pfm: 3487.4311432477184 sim_pfm: 402.28041197856265
episode: 568 training return: tensor(367.4085, device='cuda:0')
episode: 569 training return: tensor(339.0972, device='cuda:0')
episode: 570 training return: tensor(376.1095, device='cuda:0')
episode: 571 training return: tensor(409.9824, device='cuda:0')
epoch: 143 test_true_pfm: 3537.3152516310897 sim_pfm: 405.8948310937073
episode: 572 training return: tensor(406.3654, device='cuda:0')
episode: 573 training return: tensor(407.8381, device='cuda:0')
episode: 574 training return: tensor(417.3672, device='cuda:0')
episode: 575 training return: tensor(355.2373, device='cuda:0')
epoch: 144 test_true_pfm: 3531.6708742114133 sim_pfm: 410.24615035535925
episode: 576 training return: tensor(415.4545, device='cuda:0')
episode: 577 training return: tensor(326.4104, device='cuda:0')
episode: 578 training return: tensor(419.6808, device='cuda:0')
episode: 579 training return: tensor(393.3292, device='cuda:0')
epoch: 145 test_true_pfm: 3520.724460607513 sim_pfm: 392.3173795554515
episode: 580 training return: tensor(363.7703, device='cuda:0')
episode: 581 training return: tensor(431.1424, device='cuda:0')
episode: 582 training return: tensor(374.5105, device='cuda:0')
episode: 583 training return: tensor(365.1493, device='cuda:0')
epoch: 146 test_true_pfm: 3520.7893003415397 sim_pfm: 395.8542443225936
episode: 584 training return: tensor(363.5784, device='cuda:0')
episode: 585 training return: tensor(377.8916, device='cuda:0')
episode: 586 training return: tensor(405.0526, device='cuda:0')
episode: 587 training return: tensor(462.5003, device='cuda:0')
epoch: 147 test_true_pfm: 3515.7356813551232 sim_pfm: 428.76263051505276
episode: 588 training return: tensor(413.1731, device='cuda:0')
episode: 589 training return: tensor(395.3744, device='cuda:0')
episode: 590 training return: tensor(369.6703, device='cuda:0')
episode: 591 training return: tensor(384.8244, device='cuda:0')
epoch: 148 test_true_pfm: 3543.964266846225 sim_pfm: 408.0589931504898
episode: 592 training return: tensor(388.4145, device='cuda:0')
episode: 593 training return: tensor(399.1536, device='cuda:0')
episode: 594 training return: tensor(425.4850, device='cuda:0')
episode: 595 training return: tensor(366.2480, device='cuda:0')
epoch: 149 test_true_pfm: 3544.272088762175 sim_pfm: 418.5637899684759
episode: 596 training return: tensor(251.0697, device='cuda:0')
episode: 597 training return: tensor(358.1725, device='cuda:0')
episode: 598 training return: tensor(408.4288, device='cuda:0')
episode: 599 training return: tensor(367.1682, device='cuda:0')
epoch: 150 test_true_pfm: 3484.8283733843186 sim_pfm: 377.3432738772438
