['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '1', '--data', '100000', '--regu', '0.05']
2480.461704760203
episode: 0 training return: tensor(359.5052, device='cuda:0')
episode: 1 training return: tensor(-141.9552, device='cuda:0')
episode: 2 training return: tensor(-168.9048, device='cuda:0')
episode: 3 training return: tensor(233.9868, device='cuda:0')
epoch: 1 test_true_pfm: 2194.783078305442 sim_pfm: -8.659180090995505
episode: 4 training return: tensor(313.0023, device='cuda:0')
episode: 5 training return: tensor(307.8090, device='cuda:0')
episode: 6 training return: tensor(310.4887, device='cuda:0')
episode: 7 training return: tensor(189.7920, device='cuda:0')
epoch: 2 test_true_pfm: 2810.335967995885 sim_pfm: 316.4117835364984
episode: 8 training return: tensor(-130.0436, device='cuda:0')
episode: 9 training return: tensor(-146.3985, device='cuda:0')
episode: 10 training return: tensor(94.5659, device='cuda:0')
episode: 11 training return: tensor(297.1141, device='cuda:0')
epoch: 3 test_true_pfm: 3268.2245155400065 sim_pfm: 76.58550228141637
episode: 12 training return: tensor(-41.0198, device='cuda:0')
episode: 13 training return: tensor(256.4727, device='cuda:0')
episode: 14 training return: tensor(287.9731, device='cuda:0')
episode: 15 training return: tensor(293.3730, device='cuda:0')
epoch: 4 test_true_pfm: 2067.8063198680784 sim_pfm: -68.32959309022408
episode: 16 training return: tensor(313.1421, device='cuda:0')
episode: 17 training return: tensor(-31.3693, device='cuda:0')
episode: 18 training return: tensor(217.9669, device='cuda:0')
episode: 19 training return: tensor(272.1015, device='cuda:0')
epoch: 5 test_true_pfm: 1779.6189979120027 sim_pfm: -9.677598029190753
episode: 20 training return: tensor(294.3032, device='cuda:0')
episode: 21 training return: tensor(317.8282, device='cuda:0')
episode: 22 training return: tensor(300.6229, device='cuda:0')
episode: 23 training return: tensor(-216.4046, device='cuda:0')
epoch: 6 test_true_pfm: 2105.2289011742514 sim_pfm: -48.42205588368233
episode: 24 training return: tensor(-266.3307, device='cuda:0')
episode: 25 training return: tensor(-101.5554, device='cuda:0')
episode: 26 training return: tensor(-214.6553, device='cuda:0')
episode: 27 training return: tensor(-17.9362, device='cuda:0')
epoch: 7 test_true_pfm: 1892.403727135488 sim_pfm: 5.997518019711909
episode: 28 training return: tensor(-232.2315, device='cuda:0')
episode: 29 training return: tensor(325.7717, device='cuda:0')
episode: 30 training return: tensor(-112.9948, device='cuda:0')
episode: 31 training return: tensor(-56.8278, device='cuda:0')
epoch: 8 test_true_pfm: 2578.751072807639 sim_pfm: 257.7551184032927
episode: 32 training return: tensor(292.7546, device='cuda:0')
episode: 33 training return: tensor(-125.4910, device='cuda:0')
episode: 34 training return: tensor(353.9448, device='cuda:0')
episode: 35 training return: tensor(258.5752, device='cuda:0')
epoch: 9 test_true_pfm: 2283.4334123341364 sim_pfm: 179.34929763684826
episode: 36 training return: tensor(-189.5665, device='cuda:0')
episode: 37 training return: tensor(346.5783, device='cuda:0')
episode: 38 training return: tensor(270.3051, device='cuda:0')
episode: 39 training return: tensor(-143.8383, device='cuda:0')
epoch: 10 test_true_pfm: 2839.374139280169 sim_pfm: 138.28854821218798
episode: 40 training return: tensor(311.6882, device='cuda:0')
episode: 41 training return: tensor(345.9037, device='cuda:0')
episode: 42 training return: tensor(-41.9906, device='cuda:0')
episode: 43 training return: tensor(-103.1806, device='cuda:0')
epoch: 11 test_true_pfm: 2258.0263280434433 sim_pfm: 11.671106117738722
episode: 44 training return: tensor(-128.2803, device='cuda:0')
episode: 45 training return: tensor(-103.7716, device='cuda:0')
episode: 46 training return: tensor(312.7868, device='cuda:0')
episode: 47 training return: tensor(168.4426, device='cuda:0')
epoch: 12 test_true_pfm: 2784.128669884192 sim_pfm: 188.01415700434396
episode: 48 training return: tensor(-120.5270, device='cuda:0')
episode: 49 training return: tensor(-209.9776, device='cuda:0')
episode: 50 training return: tensor(-270.6750, device='cuda:0')
episode: 51 training return: tensor(173.8732, device='cuda:0')
epoch: 13 test_true_pfm: 3086.9509884421636 sim_pfm: 345.8197763233523
episode: 52 training return: tensor(370.6563, device='cuda:0')
episode: 53 training return: tensor(318.8993, device='cuda:0')
episode: 54 training return: tensor(-226.4221, device='cuda:0')
episode: 55 training return: tensor(325.1502, device='cuda:0')
epoch: 14 test_true_pfm: 3101.8417504268764 sim_pfm: 191.34849937291196
episode: 56 training return: tensor(294.9829, device='cuda:0')
episode: 57 training return: tensor(-105.9417, device='cuda:0')
episode: 58 training return: tensor(303.5813, device='cuda:0')
episode: 59 training return: tensor(19.3122, device='cuda:0')
epoch: 15 test_true_pfm: 2842.915356820471 sim_pfm: 175.38966068421723
episode: 60 training return: tensor(333.1407, device='cuda:0')
episode: 61 training return: tensor(324.7718, device='cuda:0')
episode: 62 training return: tensor(330.5267, device='cuda:0')
episode: 63 training return: tensor(289.7719, device='cuda:0')
epoch: 16 test_true_pfm: 3254.4570756337885 sim_pfm: 152.89266930654412
episode: 64 training return: tensor(115.8042, device='cuda:0')
episode: 65 training return: tensor(370.7689, device='cuda:0')
episode: 66 training return: tensor(302.2639, device='cuda:0')
episode: 67 training return: tensor(318.5954, device='cuda:0')
epoch: 17 test_true_pfm: 2428.464127746512 sim_pfm: 307.78165593130205
episode: 68 training return: tensor(-23.9312, device='cuda:0')
episode: 69 training return: tensor(287.6737, device='cuda:0')
episode: 70 training return: tensor(-134.8883, device='cuda:0')
episode: 71 training return: tensor(-100.7726, device='cuda:0')
epoch: 18 test_true_pfm: 3298.567319457485 sim_pfm: 163.02337292928132
episode: 72 training return: tensor(302.5040, device='cuda:0')
episode: 73 training return: tensor(360.1036, device='cuda:0')
episode: 74 training return: tensor(-89.9290, device='cuda:0')
episode: 75 training return: tensor(273.3031, device='cuda:0')
epoch: 19 test_true_pfm: 2785.034924075611 sim_pfm: 185.70748344356738
episode: 76 training return: tensor(330.9799, device='cuda:0')
episode: 77 training return: tensor(195.2619, device='cuda:0')
episode: 78 training return: tensor(170.1702, device='cuda:0')
episode: 79 training return: tensor(334.0623, device='cuda:0')
epoch: 20 test_true_pfm: 3334.2096931776246 sim_pfm: -97.12030414769349
episode: 80 training return: tensor(-86.7285, device='cuda:0')
episode: 81 training return: tensor(-12.6605, device='cuda:0')
episode: 82 training return: tensor(282.0068, device='cuda:0')
episode: 83 training return: tensor(323.7935, device='cuda:0')
epoch: 21 test_true_pfm: 2920.0215926897413 sim_pfm: 9.981264717100808
episode: 84 training return: tensor(242.8933, device='cuda:0')
episode: 85 training return: tensor(-260.3185, device='cuda:0')
episode: 86 training return: tensor(-216.5638, device='cuda:0')
episode: 87 training return: tensor(302.3947, device='cuda:0')
epoch: 22 test_true_pfm: 3335.272044228757 sim_pfm: 158.82604712536946
episode: 88 training return: tensor(368.0903, device='cuda:0')
episode: 89 training return: tensor(294.3167, device='cuda:0')
episode: 90 training return: tensor(-102.3561, device='cuda:0')
episode: 91 training return: tensor(342.4164, device='cuda:0')
epoch: 23 test_true_pfm: 3345.513718220003 sim_pfm: 233.76375336156343
episode: 92 training return: tensor(323.2867, device='cuda:0')
episode: 93 training return: tensor(246.2880, device='cuda:0')
episode: 94 training return: tensor(318.2651, device='cuda:0')
episode: 95 training return: tensor(344.9226, device='cuda:0')
epoch: 24 test_true_pfm: 3126.1425900801646 sim_pfm: 200.8989282179779
episode: 96 training return: tensor(287.9685, device='cuda:0')
episode: 97 training return: tensor(306.0019, device='cuda:0')
episode: 98 training return: tensor(-148.8667, device='cuda:0')
episode: 99 training return: tensor(186.2853, device='cuda:0')
epoch: 25 test_true_pfm: 1871.9014657902474 sim_pfm: 123.35059752497666
episode: 100 training return: tensor(294.9700, device='cuda:0')
episode: 101 training return: tensor(248.4374, device='cuda:0')
episode: 102 training return: tensor(199.0007, device='cuda:0')
episode: 103 training return: tensor(283.7455, device='cuda:0')
epoch: 26 test_true_pfm: 2790.2595025659575 sim_pfm: 327.45998608120135
episode: 104 training return: tensor(368.0997, device='cuda:0')
episode: 105 training return: tensor(309.5956, device='cuda:0')
episode: 106 training return: tensor(293.5043, device='cuda:0')
episode: 107 training return: tensor(-212.2780, device='cuda:0')
epoch: 27 test_true_pfm: 2906.797748378352 sim_pfm: -48.32948116686506
episode: 108 training return: tensor(25.3303, device='cuda:0')
episode: 109 training return: tensor(333.8672, device='cuda:0')
episode: 110 training return: tensor(-137.0377, device='cuda:0')
episode: 111 training return: tensor(321.2022, device='cuda:0')
epoch: 28 test_true_pfm: 3343.1137055141903 sim_pfm: 189.91700101271272
episode: 112 training return: tensor(408.1259, device='cuda:0')
episode: 113 training return: tensor(-147.8471, device='cuda:0')
episode: 114 training return: tensor(267.5078, device='cuda:0')
episode: 115 training return: tensor(367.7473, device='cuda:0')
epoch: 29 test_true_pfm: 2812.2766677545446 sim_pfm: 219.01581739242343
episode: 116 training return: tensor(355.1522, device='cuda:0')
episode: 117 training return: tensor(-197.4344, device='cuda:0')
episode: 118 training return: tensor(-80.8867, device='cuda:0')
episode: 119 training return: tensor(-140.4102, device='cuda:0')
epoch: 30 test_true_pfm: 3336.9444566577963 sim_pfm: 294.53428078474826
episode: 120 training return: tensor(325.8579, device='cuda:0')
episode: 121 training return: tensor(38.6460, device='cuda:0')
episode: 122 training return: tensor(328.0876, device='cuda:0')
episode: 123 training return: tensor(313.6153, device='cuda:0')
epoch: 31 test_true_pfm: 2907.004454158085 sim_pfm: 217.9410499058819
episode: 124 training return: tensor(-127.8227, device='cuda:0')
episode: 125 training return: tensor(314.8318, device='cuda:0')
episode: 126 training return: tensor(-185.2168, device='cuda:0')
episode: 127 training return: tensor(342.5701, device='cuda:0')
epoch: 32 test_true_pfm: 3049.3458611433703 sim_pfm: 337.1406351889406
episode: 128 training return: tensor(329.9725, device='cuda:0')
episode: 129 training return: tensor(322.3524, device='cuda:0')
episode: 130 training return: tensor(313.0909, device='cuda:0')
episode: 131 training return: tensor(26.3394, device='cuda:0')
epoch: 33 test_true_pfm: 2919.649586049907 sim_pfm: 253.70084892015439
episode: 132 training return: tensor(321.9200, device='cuda:0')
episode: 133 training return: tensor(1.2800, device='cuda:0')
episode: 134 training return: tensor(314.8191, device='cuda:0')
episode: 135 training return: tensor(323.9709, device='cuda:0')
epoch: 34 test_true_pfm: 3355.3838715393526 sim_pfm: 50.50881016260246
episode: 136 training return: tensor(286.9752, device='cuda:0')
episode: 137 training return: tensor(318.2790, device='cuda:0')
episode: 138 training return: tensor(-166.2209, device='cuda:0')
episode: 139 training return: tensor(328.0308, device='cuda:0')
epoch: 35 test_true_pfm: 2924.6128095420804 sim_pfm: 341.48849622670485
episode: 140 training return: tensor(305.4289, device='cuda:0')
episode: 141 training return: tensor(-115.5449, device='cuda:0')
episode: 142 training return: tensor(381.0020, device='cuda:0')
episode: 143 training return: tensor(306.2738, device='cuda:0')
epoch: 36 test_true_pfm: 1975.5329626859548 sim_pfm: 168.2502416059627
episode: 144 training return: tensor(270.6041, device='cuda:0')
episode: 145 training return: tensor(285.4977, device='cuda:0')
episode: 146 training return: tensor(99.8545, device='cuda:0')
episode: 147 training return: tensor(396.0032, device='cuda:0')
epoch: 37 test_true_pfm: 3142.190360962961 sim_pfm: 318.73419870907674
episode: 148 training return: tensor(-219.0286, device='cuda:0')
episode: 149 training return: tensor(284.0108, device='cuda:0')
episode: 150 training return: tensor(12.5607, device='cuda:0')
episode: 151 training return: tensor(314.6174, device='cuda:0')
epoch: 38 test_true_pfm: 2810.180487612432 sim_pfm: 154.57843328521508
episode: 152 training return: tensor(-134.5108, device='cuda:0')
episode: 153 training return: tensor(387.3760, device='cuda:0')
episode: 154 training return: tensor(155.2352, device='cuda:0')
episode: 155 training return: tensor(77.1457, device='cuda:0')
epoch: 39 test_true_pfm: 2361.944246843345 sim_pfm: 297.4643625043682
episode: 156 training return: tensor(330.3539, device='cuda:0')
episode: 157 training return: tensor(299.4375, device='cuda:0')
episode: 158 training return: tensor(-154.4466, device='cuda:0')
episode: 159 training return: tensor(-180.3792, device='cuda:0')
epoch: 40 test_true_pfm: 3378.9780394480185 sim_pfm: 322.23132801102474
episode: 160 training return: tensor(311.0858, device='cuda:0')
episode: 161 training return: tensor(308.6191, device='cuda:0')
episode: 162 training return: tensor(331.2082, device='cuda:0')
episode: 163 training return: tensor(309.3770, device='cuda:0')
epoch: 41 test_true_pfm: 2950.396555437646 sim_pfm: 148.9189682524108
episode: 164 training return: tensor(-150.3961, device='cuda:0')
episode: 165 training return: tensor(180.2896, device='cuda:0')
episode: 166 training return: tensor(307.9493, device='cuda:0')
episode: 167 training return: tensor(56.4503, device='cuda:0')
epoch: 42 test_true_pfm: 2825.0912995856866 sim_pfm: 249.92988485715856
episode: 168 training return: tensor(-144.5288, device='cuda:0')
episode: 169 training return: tensor(-180.7856, device='cuda:0')
episode: 170 training return: tensor(337.6798, device='cuda:0')
episode: 171 training return: tensor(-144.0195, device='cuda:0')
epoch: 43 test_true_pfm: 2241.503317458617 sim_pfm: 128.325126147926
episode: 172 training return: tensor(13.9988, device='cuda:0')
episode: 173 training return: tensor(-192.7469, device='cuda:0')
episode: 174 training return: tensor(318.9165, device='cuda:0')
episode: 175 training return: tensor(289.4745, device='cuda:0')
epoch: 44 test_true_pfm: 3350.690346939084 sim_pfm: 176.15273948657946
episode: 176 training return: tensor(258.1238, device='cuda:0')
episode: 177 training return: tensor(-112.4864, device='cuda:0')
episode: 178 training return: tensor(340.4479, device='cuda:0')
episode: 179 training return: tensor(-113.9428, device='cuda:0')
epoch: 45 test_true_pfm: 3342.6666574603437 sim_pfm: 351.1482365047171
episode: 180 training return: tensor(300.9615, device='cuda:0')
episode: 181 training return: tensor(346.4250, device='cuda:0')
episode: 182 training return: tensor(354.9007, device='cuda:0')
episode: 183 training return: tensor(373.2578, device='cuda:0')
epoch: 46 test_true_pfm: 2854.401576829578 sim_pfm: 141.78492854671399
episode: 184 training return: tensor(-34.6205, device='cuda:0')
episode: 185 training return: tensor(-109.6550, device='cuda:0')
episode: 186 training return: tensor(303.3461, device='cuda:0')
episode: 187 training return: tensor(47.2992, device='cuda:0')
epoch: 47 test_true_pfm: 3360.166572779021 sim_pfm: 304.71749140657874
episode: 188 training return: tensor(382.8744, device='cuda:0')
episode: 189 training return: tensor(349.6852, device='cuda:0')
episode: 190 training return: tensor(326.6561, device='cuda:0')
episode: 191 training return: tensor(378.1810, device='cuda:0')
epoch: 48 test_true_pfm: 2409.207643742186 sim_pfm: 228.97595928556984
episode: 192 training return: tensor(350.1205, device='cuda:0')
episode: 193 training return: tensor(300.3032, device='cuda:0')
episode: 194 training return: tensor(-115.4386, device='cuda:0')
episode: 195 training return: tensor(305.9591, device='cuda:0')
epoch: 49 test_true_pfm: 3004.169346633736 sim_pfm: 196.5042107050734
episode: 196 training return: tensor(-152.0022, device='cuda:0')
episode: 197 training return: tensor(-251.1707, device='cuda:0')
episode: 198 training return: tensor(316.7672, device='cuda:0')
episode: 199 training return: tensor(329.0703, device='cuda:0')
epoch: 50 test_true_pfm: 3297.525856841301 sim_pfm: 253.63454135266753
episode: 200 training return: tensor(-185.5857, device='cuda:0')
episode: 201 training return: tensor(316.0235, device='cuda:0')
episode: 202 training return: tensor(-151.4885, device='cuda:0')
episode: 203 training return: tensor(337.9428, device='cuda:0')
epoch: 51 test_true_pfm: 3082.846587749415 sim_pfm: 86.27295656246133
episode: 204 training return: tensor(340.5844, device='cuda:0')
episode: 205 training return: tensor(337.2352, device='cuda:0')
episode: 206 training return: tensor(326.7980, device='cuda:0')
episode: 207 training return: tensor(348.3327, device='cuda:0')
epoch: 52 test_true_pfm: 3321.955935886514 sim_pfm: 48.02487401710823
episode: 208 training return: tensor(288.0662, device='cuda:0')
episode: 209 training return: tensor(275.4219, device='cuda:0')
episode: 210 training return: tensor(-95.7121, device='cuda:0')
episode: 211 training return: tensor(-139.9901, device='cuda:0')
epoch: 53 test_true_pfm: 3337.041761666313 sim_pfm: 217.90626262491182
episode: 212 training return: tensor(-85.8159, device='cuda:0')
episode: 213 training return: tensor(307.4326, device='cuda:0')
episode: 214 training return: tensor(233.3456, device='cuda:0')
episode: 215 training return: tensor(284.1501, device='cuda:0')
epoch: 54 test_true_pfm: 3405.1972714928143 sim_pfm: 349.13348420385347
episode: 216 training return: tensor(328.5571, device='cuda:0')
episode: 217 training return: tensor(131.3820, device='cuda:0')
episode: 218 training return: tensor(271.5772, device='cuda:0')
episode: 219 training return: tensor(-172.5863, device='cuda:0')
epoch: 55 test_true_pfm: 3329.1413177478166 sim_pfm: -25.553557037647504
episode: 220 training return: tensor(279.2391, device='cuda:0')
episode: 221 training return: tensor(325.4780, device='cuda:0')
episode: 222 training return: tensor(283.2097, device='cuda:0')
episode: 223 training return: tensor(267.8716, device='cuda:0')
epoch: 56 test_true_pfm: 3291.518627731515 sim_pfm: 328.119648574638
episode: 224 training return: tensor(303.6192, device='cuda:0')
episode: 225 training return: tensor(-167.0534, device='cuda:0')
episode: 226 training return: tensor(346.6867, device='cuda:0')
episode: 227 training return: tensor(342.3294, device='cuda:0')
epoch: 57 test_true_pfm: 2868.918197382544 sim_pfm: 347.1187774105153
episode: 228 training return: tensor(364.2177, device='cuda:0')
episode: 229 training return: tensor(256.4320, device='cuda:0')
episode: 230 training return: tensor(-69.9989, device='cuda:0')
episode: 231 training return: tensor(304.2239, device='cuda:0')
epoch: 58 test_true_pfm: 2944.4103249186555 sim_pfm: 108.75893643238426
episode: 232 training return: tensor(-138.0710, device='cuda:0')
episode: 233 training return: tensor(-6.3712, device='cuda:0')
episode: 234 training return: tensor(326.7887, device='cuda:0')
episode: 235 training return: tensor(281.4869, device='cuda:0')
epoch: 59 test_true_pfm: 3378.5007844578654 sim_pfm: 172.51157812065017
episode: 236 training return: tensor(391.2896, device='cuda:0')
episode: 237 training return: tensor(-0.9773, device='cuda:0')
episode: 238 training return: tensor(333.9623, device='cuda:0')
episode: 239 training return: tensor(298.3283, device='cuda:0')
epoch: 60 test_true_pfm: 3181.824672542432 sim_pfm: 320.2756309697094
episode: 240 training return: tensor(-241.9833, device='cuda:0')
episode: 241 training return: tensor(299.9500, device='cuda:0')
episode: 242 training return: tensor(313.5781, device='cuda:0')
episode: 243 training return: tensor(347.0517, device='cuda:0')
epoch: 61 test_true_pfm: 2415.0652915980036 sim_pfm: 191.24103578117015
episode: 244 training return: tensor(308.4655, device='cuda:0')
episode: 245 training return: tensor(314.2003, device='cuda:0')
episode: 246 training return: tensor(-78.8838, device='cuda:0')
episode: 247 training return: tensor(310.5055, device='cuda:0')
epoch: 62 test_true_pfm: 3350.285831704617 sim_pfm: 361.2534009105293
episode: 248 training return: tensor(342.2062, device='cuda:0')
episode: 249 training return: tensor(359.3984, device='cuda:0')
episode: 250 training return: tensor(-174.5549, device='cuda:0')
episode: 251 training return: tensor(330.2791, device='cuda:0')
epoch: 63 test_true_pfm: 2866.885906115001 sim_pfm: 271.98693825149286
episode: 252 training return: tensor(356.9211, device='cuda:0')
episode: 253 training return: tensor(321.2807, device='cuda:0')
episode: 254 training return: tensor(355.7389, device='cuda:0')
episode: 255 training return: tensor(292.8948, device='cuda:0')
epoch: 64 test_true_pfm: 2499.220553380328 sim_pfm: 326.19535802714137
episode: 256 training return: tensor(293.8343, device='cuda:0')
episode: 257 training return: tensor(277.9822, device='cuda:0')
episode: 258 training return: tensor(347.0245, device='cuda:0')
episode: 259 training return: tensor(267.0480, device='cuda:0')
epoch: 65 test_true_pfm: 3325.8786178661576 sim_pfm: 172.47316590482174
episode: 260 training return: tensor(304.3746, device='cuda:0')
episode: 261 training return: tensor(396.9967, device='cuda:0')
episode: 262 training return: tensor(344.2688, device='cuda:0')
episode: 263 training return: tensor(270.2161, device='cuda:0')
epoch: 66 test_true_pfm: 2919.0789980759255 sim_pfm: 219.64960471084729
episode: 264 training return: tensor(272.6845, device='cuda:0')
episode: 265 training return: tensor(328.8679, device='cuda:0')
episode: 266 training return: tensor(385.8230, device='cuda:0')
episode: 267 training return: tensor(297.1005, device='cuda:0')
epoch: 67 test_true_pfm: 3208.4865766688076 sim_pfm: 322.49807806976605
episode: 268 training return: tensor(350.2049, device='cuda:0')
episode: 269 training return: tensor(306.8961, device='cuda:0')
episode: 270 training return: tensor(-121.4015, device='cuda:0')
episode: 271 training return: tensor(281.4985, device='cuda:0')
epoch: 68 test_true_pfm: 3352.3373987006266 sim_pfm: 317.6626494964682
episode: 272 training return: tensor(285.4699, device='cuda:0')
episode: 273 training return: tensor(364.7035, device='cuda:0')
episode: 274 training return: tensor(325.0700, device='cuda:0')
episode: 275 training return: tensor(298.2250, device='cuda:0')
epoch: 69 test_true_pfm: 3316.7229220797376 sim_pfm: 342.5937469039636
episode: 276 training return: tensor(-161.9520, device='cuda:0')
episode: 277 training return: tensor(290.5656, device='cuda:0')
episode: 278 training return: tensor(278.4616, device='cuda:0')
episode: 279 training return: tensor(-198.0973, device='cuda:0')
epoch: 70 test_true_pfm: 3337.2079904675825 sim_pfm: 324.8819542088895
episode: 280 training return: tensor(337.0063, device='cuda:0')
episode: 281 training return: tensor(181.3237, device='cuda:0')
episode: 282 training return: tensor(294.3084, device='cuda:0')
episode: 283 training return: tensor(336.3694, device='cuda:0')
epoch: 71 test_true_pfm: 3013.940748456657 sim_pfm: 2.404771718545817
episode: 284 training return: tensor(290.1451, device='cuda:0')
episode: 285 training return: tensor(342.0359, device='cuda:0')
episode: 286 training return: tensor(356.6172, device='cuda:0')
episode: 287 training return: tensor(308.2321, device='cuda:0')
epoch: 72 test_true_pfm: 3344.7080437192776 sim_pfm: 311.14648825954646
episode: 288 training return: tensor(333.5238, device='cuda:0')
episode: 289 training return: tensor(260.2607, device='cuda:0')
episode: 290 training return: tensor(344.3806, device='cuda:0')
episode: 291 training return: tensor(235.7641, device='cuda:0')
epoch: 73 test_true_pfm: 2974.6869289946794 sim_pfm: 258.3590266035947
episode: 292 training return: tensor(335.9815, device='cuda:0')
episode: 293 training return: tensor(318.6284, device='cuda:0')
episode: 294 training return: tensor(355.7287, device='cuda:0')
episode: 295 training return: tensor(269.9263, device='cuda:0')
epoch: 74 test_true_pfm: 3382.1036668059373 sim_pfm: 317.2948488777717
episode: 296 training return: tensor(392.8727, device='cuda:0')
episode: 297 training return: tensor(371.3502, device='cuda:0')
episode: 298 training return: tensor(339.8734, device='cuda:0')
episode: 299 training return: tensor(269.0323, device='cuda:0')
epoch: 75 test_true_pfm: 3310.6234737730774 sim_pfm: 205.23760611140946
episode: 300 training return: tensor(325.5953, device='cuda:0')
episode: 301 training return: tensor(-121.5712, device='cuda:0')
episode: 302 training return: tensor(334.3098, device='cuda:0')
episode: 303 training return: tensor(345.2872, device='cuda:0')
epoch: 76 test_true_pfm: 3287.211816198313 sim_pfm: 153.41961054853164
episode: 304 training return: tensor(370.9411, device='cuda:0')
episode: 305 training return: tensor(269.4171, device='cuda:0')
episode: 306 training return: tensor(306.9274, device='cuda:0')
episode: 307 training return: tensor(288.1449, device='cuda:0')
epoch: 77 test_true_pfm: 3274.7827356649964 sim_pfm: 360.39857104217907
episode: 308 training return: tensor(280.4594, device='cuda:0')
episode: 309 training return: tensor(294.9712, device='cuda:0')
episode: 310 training return: tensor(302.2668, device='cuda:0')
episode: 311 training return: tensor(323.2521, device='cuda:0')
epoch: 78 test_true_pfm: 3340.622107673676 sim_pfm: 313.8333919758831
episode: 312 training return: tensor(340.8296, device='cuda:0')
episode: 313 training return: tensor(286.7999, device='cuda:0')
episode: 314 training return: tensor(286.9870, device='cuda:0')
episode: 315 training return: tensor(300.4178, device='cuda:0')
epoch: 79 test_true_pfm: 3021.257379641504 sim_pfm: 344.2714497319345
episode: 316 training return: tensor(196.5941, device='cuda:0')
episode: 317 training return: tensor(345.3452, device='cuda:0')
episode: 318 training return: tensor(298.2144, device='cuda:0')
episode: 319 training return: tensor(314.2323, device='cuda:0')
epoch: 80 test_true_pfm: 3327.434830372186 sim_pfm: 241.20301433759354
episode: 320 training return: tensor(53.3684, device='cuda:0')
episode: 321 training return: tensor(-92.7463, device='cuda:0')
episode: 322 training return: tensor(345.8045, device='cuda:0')
episode: 323 training return: tensor(373.2029, device='cuda:0')
epoch: 81 test_true_pfm: 3366.627505749823 sim_pfm: 312.18495987040416
episode: 324 training return: tensor(330.9916, device='cuda:0')
episode: 325 training return: tensor(-40.4748, device='cuda:0')
episode: 326 training return: tensor(-170.3059, device='cuda:0')
episode: 327 training return: tensor(277.8878, device='cuda:0')
epoch: 82 test_true_pfm: 3353.4051673386516 sim_pfm: 329.7757754365933
episode: 328 training return: tensor(-224.6703, device='cuda:0')
episode: 329 training return: tensor(286.4341, device='cuda:0')
episode: 330 training return: tensor(316.2938, device='cuda:0')
episode: 331 training return: tensor(270.1670, device='cuda:0')
epoch: 83 test_true_pfm: 3349.4986601827345 sim_pfm: 35.00505515090966
episode: 332 training return: tensor(299.4084, device='cuda:0')
episode: 333 training return: tensor(329.6474, device='cuda:0')
episode: 334 training return: tensor(300.4042, device='cuda:0')
episode: 335 training return: tensor(306.9970, device='cuda:0')
epoch: 84 test_true_pfm: 3346.9644095775657 sim_pfm: 327.1954608401963
episode: 336 training return: tensor(340.2007, device='cuda:0')
episode: 337 training return: tensor(449.7168, device='cuda:0')
episode: 338 training return: tensor(-196.3920, device='cuda:0')
episode: 339 training return: tensor(307.1431, device='cuda:0')
epoch: 85 test_true_pfm: 3359.86349486431 sim_pfm: 172.5958684050711
episode: 340 training return: tensor(339.6415, device='cuda:0')
episode: 341 training return: tensor(302.5133, device='cuda:0')
episode: 342 training return: tensor(171.1662, device='cuda:0')
episode: 343 training return: tensor(301.5547, device='cuda:0')
epoch: 86 test_true_pfm: 3325.299743816513 sim_pfm: 359.78859946112306
episode: 344 training return: tensor(224.1970, device='cuda:0')
episode: 345 training return: tensor(314.8150, device='cuda:0')
episode: 346 training return: tensor(343.9311, device='cuda:0')
episode: 347 training return: tensor(310.2290, device='cuda:0')
epoch: 87 test_true_pfm: 2795.1945755276706 sim_pfm: 326.45952432135044
episode: 348 training return: tensor(-276.1685, device='cuda:0')
episode: 349 training return: tensor(315.0598, device='cuda:0')
episode: 350 training return: tensor(319.7246, device='cuda:0')
episode: 351 training return: tensor(306.7841, device='cuda:0')
epoch: 88 test_true_pfm: 3329.499928850703 sim_pfm: 180.61852748018768
episode: 352 training return: tensor(326.3836, device='cuda:0')
episode: 353 training return: tensor(284.2830, device='cuda:0')
episode: 354 training return: tensor(299.8087, device='cuda:0')
episode: 355 training return: tensor(-252.7120, device='cuda:0')
epoch: 89 test_true_pfm: 3283.1346978978527 sim_pfm: 182.52392288985234
episode: 356 training return: tensor(324.4744, device='cuda:0')
episode: 357 training return: tensor(140.8161, device='cuda:0')
episode: 358 training return: tensor(294.9518, device='cuda:0')
episode: 359 training return: tensor(-269.4956, device='cuda:0')
epoch: 90 test_true_pfm: 2905.9752324354827 sim_pfm: 313.2442246605642
episode: 360 training return: tensor(305.4375, device='cuda:0')
episode: 361 training return: tensor(-75.0969, device='cuda:0')
episode: 362 training return: tensor(380.9534, device='cuda:0')
episode: 363 training return: tensor(320.0582, device='cuda:0')
epoch: 91 test_true_pfm: 3374.9120701818233 sim_pfm: 308.9250810504697
episode: 364 training return: tensor(-106.5497, device='cuda:0')
episode: 365 training return: tensor(-50.7987, device='cuda:0')
episode: 366 training return: tensor(336.5229, device='cuda:0')
episode: 367 training return: tensor(386.2307, device='cuda:0')
epoch: 92 test_true_pfm: 3325.589701766456 sim_pfm: 341.9022148050135
episode: 368 training return: tensor(334.4962, device='cuda:0')
episode: 369 training return: tensor(291.0739, device='cuda:0')
episode: 370 training return: tensor(318.0776, device='cuda:0')
episode: 371 training return: tensor(336.6254, device='cuda:0')
epoch: 93 test_true_pfm: 2941.649801612435 sim_pfm: 168.52918764095133
episode: 372 training return: tensor(332.6270, device='cuda:0')
episode: 373 training return: tensor(373.8007, device='cuda:0')
episode: 374 training return: tensor(348.8121, device='cuda:0')
episode: 375 training return: tensor(288.5679, device='cuda:0')
epoch: 94 test_true_pfm: 2907.8554549106007 sim_pfm: 338.04903173115844
episode: 376 training return: tensor(313.7784, device='cuda:0')
episode: 377 training return: tensor(301.5121, device='cuda:0')
episode: 378 training return: tensor(338.7046, device='cuda:0')
episode: 379 training return: tensor(338.6579, device='cuda:0')
epoch: 95 test_true_pfm: 3362.674638018713 sim_pfm: 327.08274721133057
episode: 380 training return: tensor(-156.5168, device='cuda:0')
episode: 381 training return: tensor(301.0075, device='cuda:0')
episode: 382 training return: tensor(315.7017, device='cuda:0')
episode: 383 training return: tensor(285.4879, device='cuda:0')
epoch: 96 test_true_pfm: 3306.614745920115 sim_pfm: 343.46435954772943
episode: 384 training return: tensor(302.1380, device='cuda:0')
episode: 385 training return: tensor(255.4393, device='cuda:0')
episode: 386 training return: tensor(10.7191, device='cuda:0')
episode: 387 training return: tensor(261.8035, device='cuda:0')
epoch: 97 test_true_pfm: 3362.647092429844 sim_pfm: 143.70588862585524
episode: 388 training return: tensor(404.7139, device='cuda:0')
episode: 389 training return: tensor(385.9365, device='cuda:0')
episode: 390 training return: tensor(291.6252, device='cuda:0')
episode: 391 training return: tensor(315.3073, device='cuda:0')
epoch: 98 test_true_pfm: 2908.7570999128293 sim_pfm: 320.2054119606425
episode: 392 training return: tensor(329.4255, device='cuda:0')
episode: 393 training return: tensor(308.9118, device='cuda:0')
episode: 394 training return: tensor(307.6295, device='cuda:0')
episode: 395 training return: tensor(322.1757, device='cuda:0')
epoch: 99 test_true_pfm: 3274.0350135069 sim_pfm: 316.8497701227607
episode: 396 training return: tensor(335.7220, device='cuda:0')
episode: 397 training return: tensor(303.3465, device='cuda:0')
episode: 398 training return: tensor(284.9961, device='cuda:0')
episode: 399 training return: tensor(251.9841, device='cuda:0')
epoch: 100 test_true_pfm: 3356.1487189948625 sim_pfm: 190.16694432018753
episode: 400 training return: tensor(304.8039, device='cuda:0')
episode: 401 training return: tensor(331.4508, device='cuda:0')
episode: 402 training return: tensor(405.4763, device='cuda:0')
episode: 403 training return: tensor(246.9300, device='cuda:0')
epoch: 101 test_true_pfm: 2855.039378108861 sim_pfm: 310.05758453632006
episode: 404 training return: tensor(343.3523, device='cuda:0')
episode: 405 training return: tensor(290.1137, device='cuda:0')
episode: 406 training return: tensor(299.2465, device='cuda:0')
episode: 407 training return: tensor(229.7694, device='cuda:0')
epoch: 102 test_true_pfm: 3377.690773186474 sim_pfm: 341.79524797201157
episode: 408 training return: tensor(306.9844, device='cuda:0')
episode: 409 training return: tensor(298.4623, device='cuda:0')
episode: 410 training return: tensor(354.7322, device='cuda:0')
episode: 411 training return: tensor(322.5244, device='cuda:0')
epoch: 103 test_true_pfm: 3318.0825226599977 sim_pfm: 307.40371883212356
episode: 412 training return: tensor(357.8578, device='cuda:0')
episode: 413 training return: tensor(-165.5453, device='cuda:0')
episode: 414 training return: tensor(-71.1654, device='cuda:0')
episode: 415 training return: tensor(239.7038, device='cuda:0')
epoch: 104 test_true_pfm: 3337.8982884113325 sim_pfm: 341.4655225672371
episode: 416 training return: tensor(-129.6307, device='cuda:0')
episode: 417 training return: tensor(289.8397, device='cuda:0')
episode: 418 training return: tensor(325.3737, device='cuda:0')
episode: 419 training return: tensor(87.6278, device='cuda:0')
epoch: 105 test_true_pfm: 3334.7970453592516 sim_pfm: 320.86385722797905
episode: 420 training return: tensor(57.4841, device='cuda:0')
episode: 421 training return: tensor(-239.9387, device='cuda:0')
episode: 422 training return: tensor(308.8768, device='cuda:0')
episode: 423 training return: tensor(282.7964, device='cuda:0')
epoch: 106 test_true_pfm: 3332.0823640808717 sim_pfm: 306.4550469967168
episode: 424 training return: tensor(305.1447, device='cuda:0')
episode: 425 training return: tensor(310.5403, device='cuda:0')
episode: 426 training return: tensor(302.9084, device='cuda:0')
episode: 427 training return: tensor(116.5883, device='cuda:0')
epoch: 107 test_true_pfm: 3370.9149428845726 sim_pfm: 323.3269099368675
episode: 428 training return: tensor(-123.2343, device='cuda:0')
episode: 429 training return: tensor(325.6503, device='cuda:0')
episode: 430 training return: tensor(428.5144, device='cuda:0')
episode: 431 training return: tensor(321.4847, device='cuda:0')
epoch: 108 test_true_pfm: 3339.0637851944593 sim_pfm: 334.9514059937307
episode: 432 training return: tensor(335.8294, device='cuda:0')
episode: 433 training return: tensor(343.5478, device='cuda:0')
episode: 434 training return: tensor(247.2786, device='cuda:0')
episode: 435 training return: tensor(-263.1299, device='cuda:0')
epoch: 109 test_true_pfm: 3128.109041399001 sim_pfm: 323.1836920977512
episode: 436 training return: tensor(317.9732, device='cuda:0')
episode: 437 training return: tensor(292.7252, device='cuda:0')
episode: 438 training return: tensor(326.8735, device='cuda:0')
episode: 439 training return: tensor(256.2144, device='cuda:0')
epoch: 110 test_true_pfm: 3333.405014818078 sim_pfm: 335.6456119743331
episode: 440 training return: tensor(374.3918, device='cuda:0')
episode: 441 training return: tensor(318.7997, device='cuda:0')
episode: 442 training return: tensor(272.3700, device='cuda:0')
episode: 443 training return: tensor(-218.3628, device='cuda:0')
epoch: 111 test_true_pfm: 3331.980116268152 sim_pfm: 319.528104665922
episode: 444 training return: tensor(365.7144, device='cuda:0')
episode: 445 training return: tensor(-164.4115, device='cuda:0')
episode: 446 training return: tensor(305.9932, device='cuda:0')
episode: 447 training return: tensor(339.5086, device='cuda:0')
epoch: 112 test_true_pfm: 3336.0517133163216 sim_pfm: 323.8039814749791
episode: 448 training return: tensor(378.5375, device='cuda:0')
episode: 449 training return: tensor(254.9411, device='cuda:0')
episode: 450 training return: tensor(283.9704, device='cuda:0')
episode: 451 training return: tensor(316.3978, device='cuda:0')
epoch: 113 test_true_pfm: 3315.5351675523325 sim_pfm: 318.444680137715
episode: 452 training return: tensor(288.8616, device='cuda:0')
episode: 453 training return: tensor(75.0154, device='cuda:0')
episode: 454 training return: tensor(405.3964, device='cuda:0')
episode: 455 training return: tensor(276.3295, device='cuda:0')
epoch: 114 test_true_pfm: 3294.338340969645 sim_pfm: 334.6881084609874
episode: 456 training return: tensor(-157.6507, device='cuda:0')
episode: 457 training return: tensor(335.9076, device='cuda:0')
episode: 458 training return: tensor(272.8179, device='cuda:0')
episode: 459 training return: tensor(321.0186, device='cuda:0')
epoch: 115 test_true_pfm: 3325.4138134622895 sim_pfm: 335.0468811978062
episode: 460 training return: tensor(287.9514, device='cuda:0')
episode: 461 training return: tensor(323.8631, device='cuda:0')
episode: 462 training return: tensor(329.9039, device='cuda:0')
episode: 463 training return: tensor(336.2894, device='cuda:0')
epoch: 116 test_true_pfm: 2994.3218115778805 sim_pfm: 328.8162205712385
episode: 464 training return: tensor(353.3548, device='cuda:0')
episode: 465 training return: tensor(316.4606, device='cuda:0')
episode: 466 training return: tensor(-128.5030, device='cuda:0')
episode: 467 training return: tensor(380.1345, device='cuda:0')
epoch: 117 test_true_pfm: 3312.9386829684604 sim_pfm: 313.4449793601331
episode: 468 training return: tensor(322.9390, device='cuda:0')
episode: 469 training return: tensor(365.2813, device='cuda:0')
episode: 470 training return: tensor(324.2216, device='cuda:0')
episode: 471 training return: tensor(-123.9285, device='cuda:0')
epoch: 118 test_true_pfm: 3345.9887885937787 sim_pfm: 178.21167844491234
episode: 472 training return: tensor(301.9716, device='cuda:0')
episode: 473 training return: tensor(306.4480, device='cuda:0')
episode: 474 training return: tensor(365.4496, device='cuda:0')
episode: 475 training return: tensor(327.2254, device='cuda:0')
epoch: 119 test_true_pfm: 3340.406392944776 sim_pfm: 302.6295590379838
episode: 476 training return: tensor(299.0445, device='cuda:0')
episode: 477 training return: tensor(341.7123, device='cuda:0')
episode: 478 training return: tensor(311.4516, device='cuda:0')
episode: 479 training return: tensor(307.6085, device='cuda:0')
epoch: 120 test_true_pfm: 3357.726596196428 sim_pfm: 319.7669917613869
episode: 480 training return: tensor(314.3899, device='cuda:0')
episode: 481 training return: tensor(283.7831, device='cuda:0')
episode: 482 training return: tensor(-145.1356, device='cuda:0')
episode: 483 training return: tensor(61.8532, device='cuda:0')
epoch: 121 test_true_pfm: 3335.370630581307 sim_pfm: 328.27975328463555
episode: 484 training return: tensor(300.5009, device='cuda:0')
episode: 485 training return: tensor(331.2259, device='cuda:0')
episode: 486 training return: tensor(356.1320, device='cuda:0')
episode: 487 training return: tensor(286.5404, device='cuda:0')
epoch: 122 test_true_pfm: 3298.8080135783644 sim_pfm: 323.88332245411584
episode: 488 training return: tensor(380.7782, device='cuda:0')
episode: 489 training return: tensor(307.7817, device='cuda:0')
episode: 490 training return: tensor(353.4354, device='cuda:0')
episode: 491 training return: tensor(319.2499, device='cuda:0')
epoch: 123 test_true_pfm: 3355.623335998834 sim_pfm: 293.4109523203224
episode: 492 training return: tensor(307.5985, device='cuda:0')
episode: 493 training return: tensor(336.1833, device='cuda:0')
episode: 494 training return: tensor(316.5150, device='cuda:0')
episode: 495 training return: tensor(-61.5139, device='cuda:0')
epoch: 124 test_true_pfm: 2919.848436480657 sim_pfm: 204.57516338481219
episode: 496 training return: tensor(333.9662, device='cuda:0')
episode: 497 training return: tensor(332.2312, device='cuda:0')
episode: 498 training return: tensor(282.7225, device='cuda:0')
episode: 499 training return: tensor(392.1753, device='cuda:0')
epoch: 125 test_true_pfm: 3327.032943478109 sim_pfm: 306.9089731433778
episode: 500 training return: tensor(309.4777, device='cuda:0')
episode: 501 training return: tensor(315.5783, device='cuda:0')
episode: 502 training return: tensor(424.8941, device='cuda:0')
episode: 503 training return: tensor(299.2653, device='cuda:0')
epoch: 126 test_true_pfm: 2797.8835053438074 sim_pfm: 327.47134067786584
episode: 504 training return: tensor(-68.2155, device='cuda:0')
episode: 505 training return: tensor(303.8224, device='cuda:0')
episode: 506 training return: tensor(364.6648, device='cuda:0')
episode: 507 training return: tensor(280.6690, device='cuda:0')
epoch: 127 test_true_pfm: 3351.687179526651 sim_pfm: 306.81965114555595
episode: 508 training return: tensor(318.5504, device='cuda:0')
episode: 509 training return: tensor(335.3892, device='cuda:0')
episode: 510 training return: tensor(320.5357, device='cuda:0')
episode: 511 training return: tensor(400.7736, device='cuda:0')
epoch: 128 test_true_pfm: 3318.9327219573934 sim_pfm: 165.8928124939945
episode: 512 training return: tensor(276.0597, device='cuda:0')
episode: 513 training return: tensor(324.7769, device='cuda:0')
episode: 514 training return: tensor(304.5885, device='cuda:0')
episode: 515 training return: tensor(349.4359, device='cuda:0')
epoch: 129 test_true_pfm: 3323.107387821368 sim_pfm: 336.6628044155659
episode: 516 training return: tensor(343.4187, device='cuda:0')
episode: 517 training return: tensor(-164.6806, device='cuda:0')
episode: 518 training return: tensor(313.1536, device='cuda:0')
episode: 519 training return: tensor(311.9742, device='cuda:0')
epoch: 130 test_true_pfm: 3341.690334381025 sim_pfm: 332.5542039424084
episode: 520 training return: tensor(367.7039, device='cuda:0')
episode: 521 training return: tensor(268.7603, device='cuda:0')
episode: 522 training return: tensor(321.7598, device='cuda:0')
episode: 523 training return: tensor(309.1080, device='cuda:0')
epoch: 131 test_true_pfm: 3285.32064349869 sim_pfm: 323.2433561853929
episode: 524 training return: tensor(337.9586, device='cuda:0')
episode: 525 training return: tensor(165.9320, device='cuda:0')
episode: 526 training return: tensor(303.9579, device='cuda:0')
episode: 527 training return: tensor(121.6879, device='cuda:0')
epoch: 132 test_true_pfm: 3358.494882630621 sim_pfm: 325.7450728783151
episode: 528 training return: tensor(324.6290, device='cuda:0')
episode: 529 training return: tensor(297.0913, device='cuda:0')
episode: 530 training return: tensor(255.8398, device='cuda:0')
episode: 531 training return: tensor(333.0644, device='cuda:0')
epoch: 133 test_true_pfm: 3342.0891905410194 sim_pfm: 323.6136823508811
episode: 532 training return: tensor(291.3834, device='cuda:0')
episode: 533 training return: tensor(324.8737, device='cuda:0')
episode: 534 training return: tensor(278.9773, device='cuda:0')
episode: 535 training return: tensor(288.7472, device='cuda:0')
epoch: 134 test_true_pfm: 3332.738027862141 sim_pfm: 339.945724502623
episode: 536 training return: tensor(329.3597, device='cuda:0')
episode: 537 training return: tensor(325.6043, device='cuda:0')
episode: 538 training return: tensor(335.5310, device='cuda:0')
episode: 539 training return: tensor(387.2189, device='cuda:0')
epoch: 135 test_true_pfm: 2830.8251236563387 sim_pfm: 315.82949402489857
episode: 540 training return: tensor(225.1486, device='cuda:0')
episode: 541 training return: tensor(296.3865, device='cuda:0')
episode: 542 training return: tensor(307.2280, device='cuda:0')
episode: 543 training return: tensor(300.8589, device='cuda:0')
epoch: 136 test_true_pfm: 3316.099494599084 sim_pfm: 352.7398907775544
episode: 544 training return: tensor(327.3704, device='cuda:0')
episode: 545 training return: tensor(360.1251, device='cuda:0')
episode: 546 training return: tensor(304.3341, device='cuda:0')
episode: 547 training return: tensor(37.4508, device='cuda:0')
epoch: 137 test_true_pfm: 2950.1244461415176 sim_pfm: 164.4378274307625
episode: 548 training return: tensor(311.7203, device='cuda:0')
episode: 549 training return: tensor(298.9969, device='cuda:0')
episode: 550 training return: tensor(331.7571, device='cuda:0')
episode: 551 training return: tensor(289.6367, device='cuda:0')
epoch: 138 test_true_pfm: 3312.4461301591327 sim_pfm: 286.3267680889209
episode: 552 training return: tensor(289.9444, device='cuda:0')
episode: 553 training return: tensor(296.7604, device='cuda:0')
episode: 554 training return: tensor(262.9456, device='cuda:0')
episode: 555 training return: tensor(-124.7750, device='cuda:0')
epoch: 139 test_true_pfm: 3314.2077872552677 sim_pfm: 299.3658359530382
episode: 556 training return: tensor(354.1611, device='cuda:0')
episode: 557 training return: tensor(29.9750, device='cuda:0')
episode: 558 training return: tensor(-166.3189, device='cuda:0')
episode: 559 training return: tensor(308.2576, device='cuda:0')
epoch: 140 test_true_pfm: 3380.3114328569122 sim_pfm: 196.4564694669874
episode: 560 training return: tensor(390.0413, device='cuda:0')
episode: 561 training return: tensor(-154.9885, device='cuda:0')
episode: 562 training return: tensor(331.2922, device='cuda:0')
episode: 563 training return: tensor(300.0053, device='cuda:0')
epoch: 141 test_true_pfm: 3341.626952297421 sim_pfm: 267.7822815428469
episode: 564 training return: tensor(289.7160, device='cuda:0')
episode: 565 training return: tensor(306.0358, device='cuda:0')
episode: 566 training return: tensor(282.1077, device='cuda:0')
episode: 567 training return: tensor(368.1218, device='cuda:0')
epoch: 142 test_true_pfm: 3322.9950702184447 sim_pfm: 133.07520090031903
episode: 568 training return: tensor(315.6891, device='cuda:0')
episode: 569 training return: tensor(349.1593, device='cuda:0')
episode: 570 training return: tensor(357.7005, device='cuda:0')
episode: 571 training return: tensor(318.7976, device='cuda:0')
epoch: 143 test_true_pfm: 3157.006383420276 sim_pfm: 213.15847265566117
episode: 572 training return: tensor(-70.2974, device='cuda:0')
episode: 573 training return: tensor(382.2715, device='cuda:0')
episode: 574 training return: tensor(347.8842, device='cuda:0')
episode: 575 training return: tensor(287.4561, device='cuda:0')
epoch: 144 test_true_pfm: 2369.816314673283 sim_pfm: 20.90278167820846
episode: 576 training return: tensor(344.9821, device='cuda:0')
episode: 577 training return: tensor(-148.7114, device='cuda:0')
episode: 578 training return: tensor(-182.6000, device='cuda:0')
episode: 579 training return: tensor(252.0807, device='cuda:0')
epoch: 145 test_true_pfm: 3339.0934721696976 sim_pfm: 321.7698162464076
episode: 580 training return: tensor(307.0354, device='cuda:0')
episode: 581 training return: tensor(264.5468, device='cuda:0')
episode: 582 training return: tensor(295.7595, device='cuda:0')
episode: 583 training return: tensor(358.3525, device='cuda:0')
epoch: 146 test_true_pfm: 3279.3566789008073 sim_pfm: 158.4161096373961
episode: 584 training return: tensor(-42.6963, device='cuda:0')
episode: 585 training return: tensor(-218.6677, device='cuda:0')
episode: 586 training return: tensor(309.2090, device='cuda:0')
episode: 587 training return: tensor(347.4547, device='cuda:0')
epoch: 147 test_true_pfm: 3330.542063542549 sim_pfm: 316.4020457009319
episode: 588 training return: tensor(293.5074, device='cuda:0')
episode: 589 training return: tensor(329.0765, device='cuda:0')
episode: 590 training return: tensor(300.7466, device='cuda:0')
episode: 591 training return: tensor(-40.4142, device='cuda:0')
epoch: 148 test_true_pfm: 3335.112494755591 sim_pfm: 298.71284725291963
episode: 592 training return: tensor(281.7390, device='cuda:0')
episode: 593 training return: tensor(264.4394, device='cuda:0')
episode: 594 training return: tensor(316.0269, device='cuda:0')
episode: 595 training return: tensor(266.2153, device='cuda:0')
epoch: 149 test_true_pfm: 2859.8361741560107 sim_pfm: 354.0975909657427
episode: 596 training return: tensor(324.3172, device='cuda:0')
episode: 597 training return: tensor(352.4232, device='cuda:0')
episode: 598 training return: tensor(332.7747, device='cuda:0')
episode: 599 training return: tensor(304.3976, device='cuda:0')
epoch: 150 test_true_pfm: 3321.325099830581 sim_pfm: 294.59295626095263
