['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '2', '--data', '100000', '--regu', '0.05']
2711.682147097693
episode: 0 training return: tensor(245.2581, device='cuda:0')
episode: 1 training return: tensor(64.0254, device='cuda:0')
episode: 2 training return: tensor(-190.1372, device='cuda:0')
episode: 3 training return: tensor(180.3021, device='cuda:0')
epoch: 1 test_true_pfm: 2285.1395873947267 sim_pfm: -63.315081242937595
episode: 4 training return: tensor(261.8605, device='cuda:0')
episode: 5 training return: tensor(228.8579, device='cuda:0')
episode: 6 training return: tensor(281.1451, device='cuda:0')
episode: 7 training return: tensor(-23.4272, device='cuda:0')
epoch: 2 test_true_pfm: 2795.113074012788 sim_pfm: 11.696262387791649
episode: 8 training return: tensor(-198.2531, device='cuda:0')
episode: 9 training return: tensor(205.8963, device='cuda:0')
episode: 10 training return: tensor(-274.1535, device='cuda:0')
episode: 11 training return: tensor(247.8054, device='cuda:0')
epoch: 3 test_true_pfm: 3323.57993066505 sim_pfm: 192.1889193878742
episode: 12 training return: tensor(238.5620, device='cuda:0')
episode: 13 training return: tensor(-106.0707, device='cuda:0')
episode: 14 training return: tensor(259.5021, device='cuda:0')
episode: 15 training return: tensor(241.8047, device='cuda:0')
epoch: 4 test_true_pfm: 2363.3155834959352 sim_pfm: 58.96076329582138
episode: 16 training return: tensor(-256.1832, device='cuda:0')
episode: 17 training return: tensor(-187.8062, device='cuda:0')
episode: 18 training return: tensor(223.2615, device='cuda:0')
episode: 19 training return: tensor(-263.4325, device='cuda:0')
epoch: 5 test_true_pfm: 2703.577650956505 sim_pfm: -97.21095236503363
episode: 20 training return: tensor(-342.3729, device='cuda:0')
episode: 21 training return: tensor(39.1726, device='cuda:0')
episode: 22 training return: tensor(-62.0292, device='cuda:0')
episode: 23 training return: tensor(47.0597, device='cuda:0')
epoch: 6 test_true_pfm: 1708.9041907117637 sim_pfm: 115.00840022958194
episode: 24 training return: tensor(-30.0366, device='cuda:0')
episode: 25 training return: tensor(198.3290, device='cuda:0')
episode: 26 training return: tensor(-221.5177, device='cuda:0')
episode: 27 training return: tensor(132.5969, device='cuda:0')
epoch: 7 test_true_pfm: 2928.3714748450016 sim_pfm: 89.09421799950844
episode: 28 training return: tensor(220.0013, device='cuda:0')
episode: 29 training return: tensor(-72.7385, device='cuda:0')
episode: 30 training return: tensor(-287.4271, device='cuda:0')
episode: 31 training return: tensor(162.3350, device='cuda:0')
epoch: 8 test_true_pfm: 2933.0343212153216 sim_pfm: 158.1038433215193
episode: 32 training return: tensor(199.7560, device='cuda:0')
episode: 33 training return: tensor(-144.0161, device='cuda:0')
episode: 34 training return: tensor(190.6138, device='cuda:0')
episode: 35 training return: tensor(-407.3467, device='cuda:0')
epoch: 9 test_true_pfm: 2216.5510482314376 sim_pfm: 173.58066856605
episode: 36 training return: tensor(163.4382, device='cuda:0')
episode: 37 training return: tensor(-250.5180, device='cuda:0')
episode: 38 training return: tensor(253.7739, device='cuda:0')
episode: 39 training return: tensor(-305.7005, device='cuda:0')
epoch: 10 test_true_pfm: 2791.341434236479 sim_pfm: 176.26860474275114
episode: 40 training return: tensor(33.3121, device='cuda:0')
episode: 41 training return: tensor(201.1299, device='cuda:0')
episode: 42 training return: tensor(214.0595, device='cuda:0')
episode: 43 training return: tensor(244.0024, device='cuda:0')
epoch: 11 test_true_pfm: 2138.424981893278 sim_pfm: 166.2860775994874
episode: 44 training return: tensor(209.9805, device='cuda:0')
episode: 45 training return: tensor(212.8026, device='cuda:0')
episode: 46 training return: tensor(195.7874, device='cuda:0')
episode: 47 training return: tensor(-352.3311, device='cuda:0')
epoch: 12 test_true_pfm: 2842.02013881562 sim_pfm: 48.03006519932145
episode: 48 training return: tensor(213.5267, device='cuda:0')
episode: 49 training return: tensor(-30.7788, device='cuda:0')
episode: 50 training return: tensor(61.8183, device='cuda:0')
episode: 51 training return: tensor(299.8393, device='cuda:0')
epoch: 13 test_true_pfm: 2832.950841004635 sim_pfm: 22.597319285268895
episode: 52 training return: tensor(199.7281, device='cuda:0')
episode: 53 training return: tensor(-399.5993, device='cuda:0')
episode: 54 training return: tensor(-199.0860, device='cuda:0')
episode: 55 training return: tensor(188.2441, device='cuda:0')
epoch: 14 test_true_pfm: 2274.508587170101 sim_pfm: -15.219145180521688
episode: 56 training return: tensor(304.0900, device='cuda:0')
episode: 57 training return: tensor(280.9422, device='cuda:0')
episode: 58 training return: tensor(253.5205, device='cuda:0')
episode: 59 training return: tensor(-100.1968, device='cuda:0')
epoch: 15 test_true_pfm: 3347.626389082075 sim_pfm: -25.156680879260723
episode: 60 training return: tensor(209.5162, device='cuda:0')
episode: 61 training return: tensor(231.5429, device='cuda:0')
episode: 62 training return: tensor(-179.0746, device='cuda:0')
episode: 63 training return: tensor(238.8670, device='cuda:0')
epoch: 16 test_true_pfm: 3316.3933758076614 sim_pfm: 251.54230501831626
episode: 64 training return: tensor(181.6897, device='cuda:0')
episode: 65 training return: tensor(58.3394, device='cuda:0')
episode: 66 training return: tensor(344.0898, device='cuda:0')
episode: 67 training return: tensor(200.6664, device='cuda:0')
epoch: 17 test_true_pfm: 2691.754471304307 sim_pfm: 106.87686433829367
episode: 68 training return: tensor(-45.2856, device='cuda:0')
episode: 69 training return: tensor(-356.8381, device='cuda:0')
episode: 70 training return: tensor(202.5198, device='cuda:0')
episode: 71 training return: tensor(-256.7761, device='cuda:0')
epoch: 18 test_true_pfm: 2599.43026836327 sim_pfm: 221.5276874632303
episode: 72 training return: tensor(-126.2355, device='cuda:0')
episode: 73 training return: tensor(258.9484, device='cuda:0')
episode: 74 training return: tensor(147.9298, device='cuda:0')
episode: 75 training return: tensor(241.5376, device='cuda:0')
epoch: 19 test_true_pfm: 2768.0356493425315 sim_pfm: 187.43709497650465
episode: 76 training return: tensor(-27.1933, device='cuda:0')
episode: 77 training return: tensor(-287.9462, device='cuda:0')
episode: 78 training return: tensor(20.8416, device='cuda:0')
episode: 79 training return: tensor(24.5314, device='cuda:0')
epoch: 20 test_true_pfm: 3011.3610549038917 sim_pfm: 208.01572572280807
episode: 80 training return: tensor(170.8772, device='cuda:0')
episode: 81 training return: tensor(223.9476, device='cuda:0')
episode: 82 training return: tensor(-60.3277, device='cuda:0')
episode: 83 training return: tensor(-180.5295, device='cuda:0')
epoch: 21 test_true_pfm: 3176.987772393238 sim_pfm: 117.48835156894832
episode: 84 training return: tensor(-236.1504, device='cuda:0')
episode: 85 training return: tensor(219.3155, device='cuda:0')
episode: 86 training return: tensor(303.1841, device='cuda:0')
episode: 87 training return: tensor(223.0256, device='cuda:0')
epoch: 22 test_true_pfm: 2843.6526553977073 sim_pfm: 213.35193696247492
episode: 88 training return: tensor(231.0729, device='cuda:0')
episode: 89 training return: tensor(-409.0263, device='cuda:0')
episode: 90 training return: tensor(-162.9927, device='cuda:0')
episode: 91 training return: tensor(264.1382, device='cuda:0')
epoch: 23 test_true_pfm: 3357.116497493456 sim_pfm: 6.939648657377499
episode: 92 training return: tensor(8.3254, device='cuda:0')
episode: 93 training return: tensor(221.6647, device='cuda:0')
episode: 94 training return: tensor(237.4938, device='cuda:0')
episode: 95 training return: tensor(-351.6219, device='cuda:0')
epoch: 24 test_true_pfm: 2681.1253898054288 sim_pfm: 64.98925516901848
episode: 96 training return: tensor(190.3653, device='cuda:0')
episode: 97 training return: tensor(177.7412, device='cuda:0')
episode: 98 training return: tensor(279.6696, device='cuda:0')
episode: 99 training return: tensor(239.7673, device='cuda:0')
epoch: 25 test_true_pfm: 3130.5235608643766 sim_pfm: -34.93544194170196
episode: 100 training return: tensor(95.5500, device='cuda:0')
episode: 101 training return: tensor(223.2902, device='cuda:0')
episode: 102 training return: tensor(244.0346, device='cuda:0')
episode: 103 training return: tensor(229.4142, device='cuda:0')
epoch: 26 test_true_pfm: 2861.6489144963184 sim_pfm: -82.44159529850003
episode: 104 training return: tensor(148.1248, device='cuda:0')
episode: 105 training return: tensor(249.0933, device='cuda:0')
episode: 106 training return: tensor(167.2002, device='cuda:0')
episode: 107 training return: tensor(312.1425, device='cuda:0')
epoch: 27 test_true_pfm: 3167.6826076796965 sim_pfm: 54.77066262285613
episode: 108 training return: tensor(64.9151, device='cuda:0')
episode: 109 training return: tensor(-203.3398, device='cuda:0')
episode: 110 training return: tensor(166.5192, device='cuda:0')
episode: 111 training return: tensor(134.5606, device='cuda:0')
epoch: 28 test_true_pfm: 2791.4183446763 sim_pfm: 182.71390749936108
episode: 112 training return: tensor(14.6851, device='cuda:0')
episode: 113 training return: tensor(-159.7113, device='cuda:0')
episode: 114 training return: tensor(161.5120, device='cuda:0')
episode: 115 training return: tensor(-149.5373, device='cuda:0')
epoch: 29 test_true_pfm: 3309.5004225146445 sim_pfm: 97.52954517269973
episode: 116 training return: tensor(252.2015, device='cuda:0')
episode: 117 training return: tensor(192.6389, device='cuda:0')
episode: 118 training return: tensor(167.8944, device='cuda:0')
episode: 119 training return: tensor(307.1614, device='cuda:0')
epoch: 30 test_true_pfm: 2835.319902413727 sim_pfm: 8.454613024708427
episode: 120 training return: tensor(222.9098, device='cuda:0')
episode: 121 training return: tensor(238.2568, device='cuda:0')
episode: 122 training return: tensor(205.8562, device='cuda:0')
episode: 123 training return: tensor(240.1219, device='cuda:0')
epoch: 31 test_true_pfm: 2822.734450960935 sim_pfm: 235.6854790271027
episode: 124 training return: tensor(255.9571, device='cuda:0')
episode: 125 training return: tensor(-373.5575, device='cuda:0')
episode: 126 training return: tensor(-138.5500, device='cuda:0')
episode: 127 training return: tensor(252.8986, device='cuda:0')
epoch: 32 test_true_pfm: 2171.2799976015917 sim_pfm: 25.122928385273553
episode: 128 training return: tensor(-230.9203, device='cuda:0')
episode: 129 training return: tensor(221.6266, device='cuda:0')
episode: 130 training return: tensor(193.1007, device='cuda:0')
episode: 131 training return: tensor(251.6862, device='cuda:0')
epoch: 33 test_true_pfm: 3169.734523374509 sim_pfm: 224.8807190984177
episode: 132 training return: tensor(-381.3297, device='cuda:0')
episode: 133 training return: tensor(-166.8667, device='cuda:0')
episode: 134 training return: tensor(-264.4403, device='cuda:0')
episode: 135 training return: tensor(200.1367, device='cuda:0')
epoch: 34 test_true_pfm: 2351.8216192814175 sim_pfm: -0.3221717406607543
episode: 136 training return: tensor(-139.6650, device='cuda:0')
episode: 137 training return: tensor(147.4394, device='cuda:0')
episode: 138 training return: tensor(52.8383, device='cuda:0')
episode: 139 training return: tensor(194.1965, device='cuda:0')
epoch: 35 test_true_pfm: 2972.3029734070697 sim_pfm: 261.54698004408664
episode: 140 training return: tensor(290.1538, device='cuda:0')
episode: 141 training return: tensor(213.3436, device='cuda:0')
episode: 142 training return: tensor(-304.6218, device='cuda:0')
episode: 143 training return: tensor(255.5869, device='cuda:0')
epoch: 36 test_true_pfm: 2182.1708611140984 sim_pfm: 250.97213776155454
episode: 144 training return: tensor(128.0024, device='cuda:0')
episode: 145 training return: tensor(165.7919, device='cuda:0')
episode: 146 training return: tensor(173.7562, device='cuda:0')
episode: 147 training return: tensor(-275.3206, device='cuda:0')
epoch: 37 test_true_pfm: 3159.1825157967755 sim_pfm: 106.66159658809192
episode: 148 training return: tensor(-213.3398, device='cuda:0')
episode: 149 training return: tensor(217.5897, device='cuda:0')
episode: 150 training return: tensor(158.1259, device='cuda:0')
episode: 151 training return: tensor(202.0346, device='cuda:0')
epoch: 38 test_true_pfm: 3329.6156104525494 sim_pfm: 256.0889565044005
episode: 152 training return: tensor(163.9159, device='cuda:0')
episode: 153 training return: tensor(241.6614, device='cuda:0')
episode: 154 training return: tensor(220.0382, device='cuda:0')
episode: 155 training return: tensor(-229.7197, device='cuda:0')
epoch: 39 test_true_pfm: 2222.9020626285833 sim_pfm: -11.531215286658457
episode: 156 training return: tensor(164.3818, device='cuda:0')
episode: 157 training return: tensor(-183.7904, device='cuda:0')
episode: 158 training return: tensor(-100.3838, device='cuda:0')
episode: 159 training return: tensor(-269.5348, device='cuda:0')
epoch: 40 test_true_pfm: 3102.8195151968453 sim_pfm: -54.99168529755358
episode: 160 training return: tensor(-0.5582, device='cuda:0')
episode: 161 training return: tensor(-292.7753, device='cuda:0')
episode: 162 training return: tensor(-363.6718, device='cuda:0')
episode: 163 training return: tensor(264.7285, device='cuda:0')
epoch: 41 test_true_pfm: 3257.555698921649 sim_pfm: 13.267478198181683
episode: 164 training return: tensor(240.3643, device='cuda:0')
episode: 165 training return: tensor(256.9806, device='cuda:0')
episode: 166 training return: tensor(233.6484, device='cuda:0')
episode: 167 training return: tensor(209.3419, device='cuda:0')
epoch: 42 test_true_pfm: 3032.866533347717 sim_pfm: 27.08817812858615
episode: 168 training return: tensor(242.1477, device='cuda:0')
episode: 169 training return: tensor(245.3950, device='cuda:0')
episode: 170 training return: tensor(289.3834, device='cuda:0')
episode: 171 training return: tensor(-122.8480, device='cuda:0')
epoch: 43 test_true_pfm: 2303.968780595028 sim_pfm: 193.9905588750844
episode: 172 training return: tensor(-120.5230, device='cuda:0')
episode: 173 training return: tensor(213.3517, device='cuda:0')
episode: 174 training return: tensor(233.2516, device='cuda:0')
episode: 175 training return: tensor(32.7324, device='cuda:0')
epoch: 44 test_true_pfm: 2812.40079374336 sim_pfm: 136.4215233605937
episode: 176 training return: tensor(252.2446, device='cuda:0')
episode: 177 training return: tensor(147.6821, device='cuda:0')
episode: 178 training return: tensor(-226.2651, device='cuda:0')
episode: 179 training return: tensor(-136.1224, device='cuda:0')
epoch: 45 test_true_pfm: 2690.4783756252964 sim_pfm: 244.55904276804844
episode: 180 training return: tensor(265.6725, device='cuda:0')
episode: 181 training return: tensor(-41.4730, device='cuda:0')
episode: 182 training return: tensor(-134.9212, device='cuda:0')
episode: 183 training return: tensor(202.5916, device='cuda:0')
epoch: 46 test_true_pfm: 3372.537784453651 sim_pfm: 200.35672473264276
episode: 184 training return: tensor(205.4256, device='cuda:0')
episode: 185 training return: tensor(-196.3790, device='cuda:0')
episode: 186 training return: tensor(215.6261, device='cuda:0')
episode: 187 training return: tensor(261.6352, device='cuda:0')
epoch: 47 test_true_pfm: 3137.567640968066 sim_pfm: 131.03576741706152
episode: 188 training return: tensor(-265.2275, device='cuda:0')
episode: 189 training return: tensor(-211.5273, device='cuda:0')
episode: 190 training return: tensor(227.6615, device='cuda:0')
episode: 191 training return: tensor(211.8248, device='cuda:0')
epoch: 48 test_true_pfm: 3214.5727267083894 sim_pfm: 230.64721463079331
episode: 192 training return: tensor(-230.6911, device='cuda:0')
episode: 193 training return: tensor(259.5139, device='cuda:0')
episode: 194 training return: tensor(-108.5486, device='cuda:0')
episode: 195 training return: tensor(260.9084, device='cuda:0')
epoch: 49 test_true_pfm: 3185.3881641779394 sim_pfm: 44.574535977842366
episode: 196 training return: tensor(234.5183, device='cuda:0')
episode: 197 training return: tensor(244.7937, device='cuda:0')
episode: 198 training return: tensor(24.7668, device='cuda:0')
episode: 199 training return: tensor(256.1348, device='cuda:0')
epoch: 50 test_true_pfm: 2962.4157207653648 sim_pfm: -3.359709067211952
episode: 200 training return: tensor(271.0678, device='cuda:0')
episode: 201 training return: tensor(51.1003, device='cuda:0')
episode: 202 training return: tensor(-93.7514, device='cuda:0')
episode: 203 training return: tensor(224.0077, device='cuda:0')
epoch: 51 test_true_pfm: 2836.8452694608136 sim_pfm: 61.005998371363
episode: 204 training return: tensor(-230.9601, device='cuda:0')
episode: 205 training return: tensor(149.0633, device='cuda:0')
episode: 206 training return: tensor(-59.3867, device='cuda:0')
episode: 207 training return: tensor(249.0914, device='cuda:0')
epoch: 52 test_true_pfm: 3258.438223946087 sim_pfm: 104.17185122380033
episode: 208 training return: tensor(-109.9749, device='cuda:0')
episode: 209 training return: tensor(19.8050, device='cuda:0')
episode: 210 training return: tensor(-252.4051, device='cuda:0')
episode: 211 training return: tensor(265.0867, device='cuda:0')
epoch: 53 test_true_pfm: 2729.3298420523965 sim_pfm: 214.58123107996653
episode: 212 training return: tensor(259.3304, device='cuda:0')
episode: 213 training return: tensor(234.4179, device='cuda:0')
episode: 214 training return: tensor(235.7796, device='cuda:0')
episode: 215 training return: tensor(74.5808, device='cuda:0')
epoch: 54 test_true_pfm: 2067.172768441574 sim_pfm: 272.59097840319737
episode: 216 training return: tensor(188.3978, device='cuda:0')
episode: 217 training return: tensor(-120.6856, device='cuda:0')
episode: 218 training return: tensor(271.2303, device='cuda:0')
episode: 219 training return: tensor(311.0155, device='cuda:0')
epoch: 55 test_true_pfm: 3353.2048307843506 sim_pfm: 221.38660146117522
episode: 220 training return: tensor(212.3693, device='cuda:0')
episode: 221 training return: tensor(232.0414, device='cuda:0')
episode: 222 training return: tensor(-280.6857, device='cuda:0')
episode: 223 training return: tensor(261.3841, device='cuda:0')
epoch: 56 test_true_pfm: 2658.8052151762654 sim_pfm: -12.734248935633028
episode: 224 training return: tensor(234.1488, device='cuda:0')
episode: 225 training return: tensor(179.6289, device='cuda:0')
episode: 226 training return: tensor(-237.2506, device='cuda:0')
episode: 227 training return: tensor(95.2651, device='cuda:0')
epoch: 57 test_true_pfm: 3391.684326090605 sim_pfm: 230.21748118848578
episode: 228 training return: tensor(47.6722, device='cuda:0')
episode: 229 training return: tensor(-272.0322, device='cuda:0')
episode: 230 training return: tensor(267.6278, device='cuda:0')
episode: 231 training return: tensor(200.8863, device='cuda:0')
epoch: 58 test_true_pfm: 3308.950923490022 sim_pfm: 230.9242816901921
episode: 232 training return: tensor(156.5985, device='cuda:0')
episode: 233 training return: tensor(237.0497, device='cuda:0')
episode: 234 training return: tensor(252.6728, device='cuda:0')
episode: 235 training return: tensor(-99.4081, device='cuda:0')
epoch: 59 test_true_pfm: 3050.917526017751 sim_pfm: 206.64456706715282
episode: 236 training return: tensor(246.3937, device='cuda:0')
episode: 237 training return: tensor(222.1839, device='cuda:0')
episode: 238 training return: tensor(253.1757, device='cuda:0')
episode: 239 training return: tensor(253.0022, device='cuda:0')
epoch: 60 test_true_pfm: 3272.004838585821 sim_pfm: 173.3360681985117
episode: 240 training return: tensor(186.3621, device='cuda:0')
episode: 241 training return: tensor(-131.6538, device='cuda:0')
episode: 242 training return: tensor(263.8751, device='cuda:0')
episode: 243 training return: tensor(222.9538, device='cuda:0')
epoch: 61 test_true_pfm: 2531.125968065255 sim_pfm: 221.29530582442143
episode: 244 training return: tensor(-340.9532, device='cuda:0')
episode: 245 training return: tensor(-202.3825, device='cuda:0')
episode: 246 training return: tensor(-29.5713, device='cuda:0')
episode: 247 training return: tensor(-53.7867, device='cuda:0')
epoch: 62 test_true_pfm: 3312.822022074377 sim_pfm: -62.00544122823825
episode: 248 training return: tensor(185.8198, device='cuda:0')
episode: 249 training return: tensor(-225.2347, device='cuda:0')
episode: 250 training return: tensor(191.9510, device='cuda:0')
episode: 251 training return: tensor(230.3285, device='cuda:0')
epoch: 63 test_true_pfm: 3122.778837410115 sim_pfm: 201.87388585252725
episode: 252 training return: tensor(-254.1225, device='cuda:0')
episode: 253 training return: tensor(202.9020, device='cuda:0')
episode: 254 training return: tensor(220.3488, device='cuda:0')
episode: 255 training return: tensor(-181.2788, device='cuda:0')
epoch: 64 test_true_pfm: 3285.4235525938443 sim_pfm: 224.01326803150974
episode: 256 training return: tensor(-307.3690, device='cuda:0')
episode: 257 training return: tensor(295.5736, device='cuda:0')
episode: 258 training return: tensor(225.9789, device='cuda:0')
episode: 259 training return: tensor(211.7821, device='cuda:0')
epoch: 65 test_true_pfm: 3289.1412095890096 sim_pfm: 124.77627787129798
episode: 260 training return: tensor(225.6396, device='cuda:0')
episode: 261 training return: tensor(230.8584, device='cuda:0')
episode: 262 training return: tensor(207.5926, device='cuda:0')
episode: 263 training return: tensor(217.7253, device='cuda:0')
epoch: 66 test_true_pfm: 2690.4786994719057 sim_pfm: 239.76571652695807
episode: 264 training return: tensor(-6.6534, device='cuda:0')
episode: 265 training return: tensor(253.7341, device='cuda:0')
episode: 266 training return: tensor(204.6087, device='cuda:0')
episode: 267 training return: tensor(218.3672, device='cuda:0')
epoch: 67 test_true_pfm: 3287.6975977854477 sim_pfm: -121.45599436942332
episode: 268 training return: tensor(-229.7334, device='cuda:0')
episode: 269 training return: tensor(210.3874, device='cuda:0')
episode: 270 training return: tensor(240.8598, device='cuda:0')
episode: 271 training return: tensor(232.5541, device='cuda:0')
epoch: 68 test_true_pfm: 3264.2407121301894 sim_pfm: 27.789325549228426
episode: 272 training return: tensor(-8.7835, device='cuda:0')
episode: 273 training return: tensor(262.7087, device='cuda:0')
episode: 274 training return: tensor(257.5360, device='cuda:0')
episode: 275 training return: tensor(-27.5619, device='cuda:0')
epoch: 69 test_true_pfm: 2769.145779062316 sim_pfm: 234.2717457980713
episode: 276 training return: tensor(153.6293, device='cuda:0')
episode: 277 training return: tensor(265.3727, device='cuda:0')
episode: 278 training return: tensor(212.8407, device='cuda:0')
episode: 279 training return: tensor(241.6991, device='cuda:0')
epoch: 70 test_true_pfm: 2619.518681007206 sim_pfm: -95.08035280240195
episode: 280 training return: tensor(204.1293, device='cuda:0')
episode: 281 training return: tensor(272.2640, device='cuda:0')
episode: 282 training return: tensor(-60.3778, device='cuda:0')
episode: 283 training return: tensor(-134.1739, device='cuda:0')
epoch: 71 test_true_pfm: 3265.2723323170717 sim_pfm: 14.323095579990573
episode: 284 training return: tensor(-77.3835, device='cuda:0')
episode: 285 training return: tensor(257.6172, device='cuda:0')
episode: 286 training return: tensor(-250.7268, device='cuda:0')
episode: 287 training return: tensor(135.5460, device='cuda:0')
epoch: 72 test_true_pfm: 2263.321481352766 sim_pfm: -96.8408279294769
episode: 288 training return: tensor(220.4136, device='cuda:0')
episode: 289 training return: tensor(41.9163, device='cuda:0')
episode: 290 training return: tensor(252.5240, device='cuda:0')
episode: 291 training return: tensor(256.9943, device='cuda:0')
epoch: 73 test_true_pfm: 3117.622663703434 sim_pfm: 12.333802011669226
episode: 292 training return: tensor(164.0984, device='cuda:0')
episode: 293 training return: tensor(-6.7241, device='cuda:0')
episode: 294 training return: tensor(207.7361, device='cuda:0')
episode: 295 training return: tensor(-156.0780, device='cuda:0')
epoch: 74 test_true_pfm: 2610.452297747493 sim_pfm: 236.6035551117384
episode: 296 training return: tensor(-246.8921, device='cuda:0')
episode: 297 training return: tensor(-494.5738, device='cuda:0')
episode: 298 training return: tensor(244.9092, device='cuda:0')
episode: 299 training return: tensor(-287.6884, device='cuda:0')
epoch: 75 test_true_pfm: 2573.8538748122432 sim_pfm: 109.46975400106749
episode: 300 training return: tensor(209.0325, device='cuda:0')
episode: 301 training return: tensor(-37.7849, device='cuda:0')
episode: 302 training return: tensor(309.2393, device='cuda:0')
episode: 303 training return: tensor(273.2915, device='cuda:0')
epoch: 76 test_true_pfm: 3334.5442061562935 sim_pfm: -52.72385194461094
episode: 304 training return: tensor(112.7500, device='cuda:0')
episode: 305 training return: tensor(-167.4938, device='cuda:0')
episode: 306 training return: tensor(250.9911, device='cuda:0')
episode: 307 training return: tensor(231.4904, device='cuda:0')
epoch: 77 test_true_pfm: 3061.0052439619135 sim_pfm: 257.3875482632623
episode: 308 training return: tensor(231.3389, device='cuda:0')
episode: 309 training return: tensor(129.9399, device='cuda:0')
episode: 310 training return: tensor(16.0828, device='cuda:0')
episode: 311 training return: tensor(256.3116, device='cuda:0')
epoch: 78 test_true_pfm: 2751.811137649039 sim_pfm: 191.59871491550197
episode: 312 training return: tensor(262.8310, device='cuda:0')
episode: 313 training return: tensor(114.3355, device='cuda:0')
episode: 314 training return: tensor(212.2428, device='cuda:0')
episode: 315 training return: tensor(234.5288, device='cuda:0')
epoch: 79 test_true_pfm: 3332.2207170561724 sim_pfm: 267.8971741326968
episode: 316 training return: tensor(143.4034, device='cuda:0')
episode: 317 training return: tensor(258.8999, device='cuda:0')
episode: 318 training return: tensor(197.9299, device='cuda:0')
episode: 319 training return: tensor(248.0026, device='cuda:0')
epoch: 80 test_true_pfm: 3343.536449951456 sim_pfm: 74.92400066335297
episode: 320 training return: tensor(303.5166, device='cuda:0')
episode: 321 training return: tensor(278.6704, device='cuda:0')
episode: 322 training return: tensor(229.9342, device='cuda:0')
episode: 323 training return: tensor(224.5144, device='cuda:0')
epoch: 81 test_true_pfm: 3184.5039238183485 sim_pfm: 233.29832106275717
episode: 324 training return: tensor(7.0313, device='cuda:0')
episode: 325 training return: tensor(255.0680, device='cuda:0')
episode: 326 training return: tensor(230.7281, device='cuda:0')
episode: 327 training return: tensor(238.6511, device='cuda:0')
epoch: 82 test_true_pfm: 3029.3179886212242 sim_pfm: -9.87844686358585
episode: 328 training return: tensor(271.1611, device='cuda:0')
episode: 329 training return: tensor(237.4622, device='cuda:0')
episode: 330 training return: tensor(162.5627, device='cuda:0')
episode: 331 training return: tensor(201.3666, device='cuda:0')
epoch: 83 test_true_pfm: 3191.7670427428916 sim_pfm: 22.628425998816965
episode: 332 training return: tensor(266.3741, device='cuda:0')
episode: 333 training return: tensor(180.8512, device='cuda:0')
episode: 334 training return: tensor(227.6784, device='cuda:0')
episode: 335 training return: tensor(269.9048, device='cuda:0')
epoch: 84 test_true_pfm: 3373.0540470914907 sim_pfm: 133.82856519522224
episode: 336 training return: tensor(83.7707, device='cuda:0')
episode: 337 training return: tensor(191.6838, device='cuda:0')
episode: 338 training return: tensor(244.4428, device='cuda:0')
episode: 339 training return: tensor(-132.6112, device='cuda:0')
epoch: 85 test_true_pfm: 3367.3349892279907 sim_pfm: 230.9142149150915
episode: 340 training return: tensor(333.1016, device='cuda:0')
episode: 341 training return: tensor(-45.5079, device='cuda:0')
episode: 342 training return: tensor(129.6614, device='cuda:0')
episode: 343 training return: tensor(236.9171, device='cuda:0')
epoch: 86 test_true_pfm: 2676.456339013466 sim_pfm: 275.368943300981
episode: 344 training return: tensor(223.8186, device='cuda:0')
episode: 345 training return: tensor(-168.4618, device='cuda:0')
episode: 346 training return: tensor(238.6902, device='cuda:0')
episode: 347 training return: tensor(242.3700, device='cuda:0')
epoch: 87 test_true_pfm: 3242.700787667922 sim_pfm: 247.0199831913536
episode: 348 training return: tensor(249.5166, device='cuda:0')
episode: 349 training return: tensor(226.5508, device='cuda:0')
episode: 350 training return: tensor(215.0748, device='cuda:0')
episode: 351 training return: tensor(-260.4319, device='cuda:0')
epoch: 88 test_true_pfm: 2783.961662668366 sim_pfm: 100.95173517172225
episode: 352 training return: tensor(230.1951, device='cuda:0')
episode: 353 training return: tensor(-242.1163, device='cuda:0')
episode: 354 training return: tensor(309.9821, device='cuda:0')
episode: 355 training return: tensor(189.6539, device='cuda:0')
epoch: 89 test_true_pfm: 2650.570265742481 sim_pfm: 254.76914246034963
episode: 356 training return: tensor(253.5813, device='cuda:0')
episode: 357 training return: tensor(61.7955, device='cuda:0')
episode: 358 training return: tensor(-238.4439, device='cuda:0')
episode: 359 training return: tensor(176.1311, device='cuda:0')
epoch: 90 test_true_pfm: 3234.5180728627724 sim_pfm: 257.20307956354617
episode: 360 training return: tensor(214.5209, device='cuda:0')
episode: 361 training return: tensor(-69.7081, device='cuda:0')
episode: 362 training return: tensor(281.0956, device='cuda:0')
episode: 363 training return: tensor(179.7736, device='cuda:0')
epoch: 91 test_true_pfm: 3205.8578598403906 sim_pfm: -8.85961659765841
episode: 364 training return: tensor(258.5598, device='cuda:0')
episode: 365 training return: tensor(-105.9374, device='cuda:0')
episode: 366 training return: tensor(200.2274, device='cuda:0')
episode: 367 training return: tensor(238.9139, device='cuda:0')
epoch: 92 test_true_pfm: 3109.7453143242915 sim_pfm: 67.56812869443092
episode: 368 training return: tensor(246.4610, device='cuda:0')
episode: 369 training return: tensor(247.1951, device='cuda:0')
episode: 370 training return: tensor(220.7282, device='cuda:0')
episode: 371 training return: tensor(228.0195, device='cuda:0')
epoch: 93 test_true_pfm: 3301.0670386059214 sim_pfm: 141.92098368836255
episode: 372 training return: tensor(206.8840, device='cuda:0')
episode: 373 training return: tensor(237.2813, device='cuda:0')
episode: 374 training return: tensor(324.3559, device='cuda:0')
episode: 375 training return: tensor(206.7348, device='cuda:0')
epoch: 94 test_true_pfm: 3292.6484794969524 sim_pfm: -53.53945394946883
episode: 376 training return: tensor(272.3673, device='cuda:0')
episode: 377 training return: tensor(237.2860, device='cuda:0')
episode: 378 training return: tensor(228.0252, device='cuda:0')
episode: 379 training return: tensor(-86.4666, device='cuda:0')
epoch: 95 test_true_pfm: 3315.235835806677 sim_pfm: -23.29478476786365
episode: 380 training return: tensor(269.1418, device='cuda:0')
episode: 381 training return: tensor(201.6623, device='cuda:0')
episode: 382 training return: tensor(230.4227, device='cuda:0')
episode: 383 training return: tensor(282.1189, device='cuda:0')
epoch: 96 test_true_pfm: 3335.419989573076 sim_pfm: 75.63405316672288
episode: 384 training return: tensor(131.3992, device='cuda:0')
episode: 385 training return: tensor(192.8196, device='cuda:0')
episode: 386 training return: tensor(208.3624, device='cuda:0')
episode: 387 training return: tensor(270.4297, device='cuda:0')
epoch: 97 test_true_pfm: 3369.7035547981063 sim_pfm: 124.0157598524723
episode: 388 training return: tensor(199.3504, device='cuda:0')
episode: 389 training return: tensor(142.6393, device='cuda:0')
episode: 390 training return: tensor(164.8183, device='cuda:0')
episode: 391 training return: tensor(96.1320, device='cuda:0')
epoch: 98 test_true_pfm: 3043.720573227179 sim_pfm: 105.81618365393176
episode: 392 training return: tensor(224.6454, device='cuda:0')
episode: 393 training return: tensor(-213.6072, device='cuda:0')
episode: 394 training return: tensor(208.2604, device='cuda:0')
episode: 395 training return: tensor(292.4713, device='cuda:0')
epoch: 99 test_true_pfm: 2584.7218327168116 sim_pfm: 174.0503948694289
episode: 396 training return: tensor(255.7123, device='cuda:0')
episode: 397 training return: tensor(-137.3925, device='cuda:0')
episode: 398 training return: tensor(236.8555, device='cuda:0')
episode: 399 training return: tensor(254.3651, device='cuda:0')
epoch: 100 test_true_pfm: 3176.4153014500193 sim_pfm: -105.2341463319802
episode: 400 training return: tensor(245.7426, device='cuda:0')
episode: 401 training return: tensor(-10.7750, device='cuda:0')
episode: 402 training return: tensor(254.3262, device='cuda:0')
episode: 403 training return: tensor(-258.4539, device='cuda:0')
epoch: 101 test_true_pfm: 2817.7428376693947 sim_pfm: 299.95780477571924
episode: 404 training return: tensor(122.9506, device='cuda:0')
episode: 405 training return: tensor(223.6970, device='cuda:0')
episode: 406 training return: tensor(238.6693, device='cuda:0')
episode: 407 training return: tensor(353.7006, device='cuda:0')
epoch: 102 test_true_pfm: 3344.078388522304 sim_pfm: 160.63080415882482
episode: 408 training return: tensor(300.3804, device='cuda:0')
episode: 409 training return: tensor(60.1453, device='cuda:0')
episode: 410 training return: tensor(-212.3676, device='cuda:0')
episode: 411 training return: tensor(34.1297, device='cuda:0')
epoch: 103 test_true_pfm: 2619.546808584036 sim_pfm: 82.07467537584792
episode: 412 training return: tensor(228.6037, device='cuda:0')
episode: 413 training return: tensor(136.0449, device='cuda:0')
episode: 414 training return: tensor(271.8639, device='cuda:0')
episode: 415 training return: tensor(252.7575, device='cuda:0')
epoch: 104 test_true_pfm: 1931.492983117412 sim_pfm: 50.758330046044044
episode: 416 training return: tensor(227.6907, device='cuda:0')
episode: 417 training return: tensor(-345.0616, device='cuda:0')
episode: 418 training return: tensor(192.6582, device='cuda:0')
episode: 419 training return: tensor(228.6212, device='cuda:0')
epoch: 105 test_true_pfm: 3389.9373545844087 sim_pfm: 93.97481591547451
episode: 420 training return: tensor(224.9253, device='cuda:0')
episode: 421 training return: tensor(251.3545, device='cuda:0')
episode: 422 training return: tensor(222.0339, device='cuda:0')
episode: 423 training return: tensor(268.8376, device='cuda:0')
epoch: 106 test_true_pfm: 2611.6487463049352 sim_pfm: 86.96807488928123
episode: 424 training return: tensor(118.1192, device='cuda:0')
episode: 425 training return: tensor(210.5468, device='cuda:0')
episode: 426 training return: tensor(246.6269, device='cuda:0')
episode: 427 training return: tensor(279.3339, device='cuda:0')
epoch: 107 test_true_pfm: 2644.5882772899654 sim_pfm: 242.20423395667845
episode: 428 training return: tensor(204.5330, device='cuda:0')
episode: 429 training return: tensor(-314.0618, device='cuda:0')
episode: 430 training return: tensor(247.4099, device='cuda:0')
episode: 431 training return: tensor(251.7428, device='cuda:0')
epoch: 108 test_true_pfm: 2573.175007146972 sim_pfm: 20.1874082426948
episode: 432 training return: tensor(285.5988, device='cuda:0')
episode: 433 training return: tensor(252.9187, device='cuda:0')
episode: 434 training return: tensor(222.6561, device='cuda:0')
episode: 435 training return: tensor(-117.3206, device='cuda:0')
epoch: 109 test_true_pfm: 2013.6807950411555 sim_pfm: 142.6208852500325
episode: 436 training return: tensor(276.1577, device='cuda:0')
episode: 437 training return: tensor(234.7409, device='cuda:0')
episode: 438 training return: tensor(273.3492, device='cuda:0')
episode: 439 training return: tensor(221.8660, device='cuda:0')
epoch: 110 test_true_pfm: 3349.867035893407 sim_pfm: 267.8813826447974
episode: 440 training return: tensor(247.2143, device='cuda:0')
episode: 441 training return: tensor(-125.9430, device='cuda:0')
episode: 442 training return: tensor(244.4805, device='cuda:0')
episode: 443 training return: tensor(164.7453, device='cuda:0')
epoch: 111 test_true_pfm: 3416.0918518628864 sim_pfm: 272.00814485972904
episode: 444 training return: tensor(234.5714, device='cuda:0')
episode: 445 training return: tensor(228.3579, device='cuda:0')
episode: 446 training return: tensor(278.0862, device='cuda:0')
episode: 447 training return: tensor(224.3916, device='cuda:0')
epoch: 112 test_true_pfm: 2502.876984701544 sim_pfm: 83.95869227646229
episode: 448 training return: tensor(-320.9129, device='cuda:0')
episode: 449 training return: tensor(218.4557, device='cuda:0')
episode: 450 training return: tensor(106.2113, device='cuda:0')
episode: 451 training return: tensor(250.0233, device='cuda:0')
epoch: 113 test_true_pfm: 3366.827835414293 sim_pfm: -72.45794920563155
episode: 452 training return: tensor(264.3689, device='cuda:0')
episode: 453 training return: tensor(-261.7329, device='cuda:0')
episode: 454 training return: tensor(-200.9512, device='cuda:0')
episode: 455 training return: tensor(-364.0108, device='cuda:0')
epoch: 114 test_true_pfm: 3228.1364343870773 sim_pfm: 214.53484874393325
episode: 456 training return: tensor(225.8791, device='cuda:0')
episode: 457 training return: tensor(271.6589, device='cuda:0')
episode: 458 training return: tensor(254.8145, device='cuda:0')
episode: 459 training return: tensor(-64.8798, device='cuda:0')
epoch: 115 test_true_pfm: 3354.8134822313064 sim_pfm: 105.40323419380002
episode: 460 training return: tensor(216.5111, device='cuda:0')
episode: 461 training return: tensor(133.1913, device='cuda:0')
episode: 462 training return: tensor(-5.7780, device='cuda:0')
episode: 463 training return: tensor(217.8080, device='cuda:0')
epoch: 116 test_true_pfm: 2658.43869916205 sim_pfm: 66.63584572688949
episode: 464 training return: tensor(209.6590, device='cuda:0')
episode: 465 training return: tensor(276.3990, device='cuda:0')
episode: 466 training return: tensor(240.6457, device='cuda:0')
episode: 467 training return: tensor(-228.5490, device='cuda:0')
epoch: 117 test_true_pfm: 2268.6622616765017 sim_pfm: -233.49560062628007
episode: 468 training return: tensor(231.2006, device='cuda:0')
episode: 469 training return: tensor(-290.4225, device='cuda:0')
episode: 470 training return: tensor(199.0440, device='cuda:0')
episode: 471 training return: tensor(235.2825, device='cuda:0')
epoch: 118 test_true_pfm: 3358.3568354074846 sim_pfm: 236.28142398347458
episode: 472 training return: tensor(-218.4723, device='cuda:0')
episode: 473 training return: tensor(158.8810, device='cuda:0')
episode: 474 training return: tensor(163.6914, device='cuda:0')
episode: 475 training return: tensor(282.2425, device='cuda:0')
epoch: 119 test_true_pfm: 3168.3816014112 sim_pfm: 261.54785939793027
episode: 476 training return: tensor(-177.1762, device='cuda:0')
episode: 477 training return: tensor(247.6063, device='cuda:0')
episode: 478 training return: tensor(350.1084, device='cuda:0')
episode: 479 training return: tensor(267.6599, device='cuda:0')
epoch: 120 test_true_pfm: 3388.5864351159653 sim_pfm: 91.58832492573613
episode: 480 training return: tensor(48.6572, device='cuda:0')
episode: 481 training return: tensor(-200.5686, device='cuda:0')
episode: 482 training return: tensor(265.1205, device='cuda:0')
episode: 483 training return: tensor(217.1884, device='cuda:0')
epoch: 121 test_true_pfm: 3364.7150419004756 sim_pfm: 93.015504943983
episode: 484 training return: tensor(222.8882, device='cuda:0')
episode: 485 training return: tensor(225.8339, device='cuda:0')
episode: 486 training return: tensor(233.2646, device='cuda:0')
episode: 487 training return: tensor(259.9007, device='cuda:0')
epoch: 122 test_true_pfm: 3369.477258632835 sim_pfm: 261.7805306248968
episode: 488 training return: tensor(202.2281, device='cuda:0')
episode: 489 training return: tensor(216.1343, device='cuda:0')
episode: 490 training return: tensor(245.6960, device='cuda:0')
episode: 491 training return: tensor(246.8433, device='cuda:0')
epoch: 123 test_true_pfm: 3308.17839396178 sim_pfm: 286.94245668216416
episode: 492 training return: tensor(-173.9815, device='cuda:0')
episode: 493 training return: tensor(239.3485, device='cuda:0')
episode: 494 training return: tensor(254.5715, device='cuda:0')
episode: 495 training return: tensor(196.7761, device='cuda:0')
epoch: 124 test_true_pfm: 3328.0719565847517 sim_pfm: 223.91472609971729
episode: 496 training return: tensor(-249.5787, device='cuda:0')
episode: 497 training return: tensor(86.0821, device='cuda:0')
episode: 498 training return: tensor(177.0401, device='cuda:0')
episode: 499 training return: tensor(247.1279, device='cuda:0')
epoch: 125 test_true_pfm: 3321.9235664666835 sim_pfm: 242.02291333220396
episode: 500 training return: tensor(198.9241, device='cuda:0')
episode: 501 training return: tensor(227.5009, device='cuda:0')
episode: 502 training return: tensor(-329.5017, device='cuda:0')
episode: 503 training return: tensor(292.0138, device='cuda:0')
epoch: 126 test_true_pfm: 2887.656596550713 sim_pfm: 276.4410009881637
episode: 504 training return: tensor(278.4767, device='cuda:0')
episode: 505 training return: tensor(216.5404, device='cuda:0')
episode: 506 training return: tensor(155.6392, device='cuda:0')
episode: 507 training return: tensor(239.7932, device='cuda:0')
epoch: 127 test_true_pfm: 3299.9887424060707 sim_pfm: 147.0835637131046
episode: 508 training return: tensor(-165.8863, device='cuda:0')
episode: 509 training return: tensor(-246.2265, device='cuda:0')
episode: 510 training return: tensor(224.9016, device='cuda:0')
episode: 511 training return: tensor(232.2541, device='cuda:0')
epoch: 128 test_true_pfm: 2984.5650815190093 sim_pfm: 206.72875205007344
episode: 512 training return: tensor(-266.3248, device='cuda:0')
episode: 513 training return: tensor(5.5998, device='cuda:0')
episode: 514 training return: tensor(160.9395, device='cuda:0')
episode: 515 training return: tensor(208.9874, device='cuda:0')
epoch: 129 test_true_pfm: 3338.5834502402336 sim_pfm: 118.98174500926204
episode: 516 training return: tensor(210.4930, device='cuda:0')
episode: 517 training return: tensor(298.6759, device='cuda:0')
episode: 518 training return: tensor(207.4100, device='cuda:0')
episode: 519 training return: tensor(249.6909, device='cuda:0')
epoch: 130 test_true_pfm: 2725.7519236361727 sim_pfm: 234.62265094317263
episode: 520 training return: tensor(213.9555, device='cuda:0')
episode: 521 training return: tensor(238.5998, device='cuda:0')
episode: 522 training return: tensor(190.1892, device='cuda:0')
episode: 523 training return: tensor(-259.2525, device='cuda:0')
epoch: 131 test_true_pfm: 3289.1637501444384 sim_pfm: 252.52043697736613
episode: 524 training return: tensor(248.6535, device='cuda:0')
episode: 525 training return: tensor(105.2979, device='cuda:0')
episode: 526 training return: tensor(239.3073, device='cuda:0')
episode: 527 training return: tensor(232.3909, device='cuda:0')
epoch: 132 test_true_pfm: 2504.099513215482 sim_pfm: 143.6942984982743
episode: 528 training return: tensor(244.9487, device='cuda:0')
episode: 529 training return: tensor(206.5876, device='cuda:0')
episode: 530 training return: tensor(256.8286, device='cuda:0')
episode: 531 training return: tensor(271.7895, device='cuda:0')
epoch: 133 test_true_pfm: 2711.2702852858447 sim_pfm: 247.79771053915223
episode: 532 training return: tensor(163.1273, device='cuda:0')
episode: 533 training return: tensor(265.6828, device='cuda:0')
episode: 534 training return: tensor(299.6844, device='cuda:0')
episode: 535 training return: tensor(207.1992, device='cuda:0')
epoch: 134 test_true_pfm: 2680.8129830450443 sim_pfm: 273.27405044817715
episode: 536 training return: tensor(220.7256, device='cuda:0')
episode: 537 training return: tensor(-59.9772, device='cuda:0')
episode: 538 training return: tensor(242.8608, device='cuda:0')
episode: 539 training return: tensor(205.5739, device='cuda:0')
epoch: 135 test_true_pfm: 2918.84004402286 sim_pfm: 21.254948791892577
episode: 540 training return: tensor(267.0045, device='cuda:0')
episode: 541 training return: tensor(261.3891, device='cuda:0')
episode: 542 training return: tensor(195.1819, device='cuda:0')
episode: 543 training return: tensor(43.8777, device='cuda:0')
epoch: 136 test_true_pfm: 2671.4187741653423 sim_pfm: 48.402134620264405
episode: 544 training return: tensor(218.4852, device='cuda:0')
episode: 545 training return: tensor(277.1307, device='cuda:0')
episode: 546 training return: tensor(165.7042, device='cuda:0')
episode: 547 training return: tensor(263.6039, device='cuda:0')
epoch: 137 test_true_pfm: 3378.3604510521163 sim_pfm: 84.01187388262285
episode: 548 training return: tensor(208.5835, device='cuda:0')
episode: 549 training return: tensor(79.5233, device='cuda:0')
episode: 550 training return: tensor(210.3392, device='cuda:0')
episode: 551 training return: tensor(351.6833, device='cuda:0')
epoch: 138 test_true_pfm: 3215.5104699977696 sim_pfm: 54.489532693405636
episode: 552 training return: tensor(305.1977, device='cuda:0')
episode: 553 training return: tensor(249.8482, device='cuda:0')
episode: 554 training return: tensor(288.4323, device='cuda:0')
episode: 555 training return: tensor(269.0952, device='cuda:0')
epoch: 139 test_true_pfm: 3043.109157994498 sim_pfm: 208.70739159496347
episode: 556 training return: tensor(248.3119, device='cuda:0')
episode: 557 training return: tensor(194.8658, device='cuda:0')
episode: 558 training return: tensor(160.9827, device='cuda:0')
episode: 559 training return: tensor(326.4049, device='cuda:0')
epoch: 140 test_true_pfm: 2744.18009176042 sim_pfm: 73.9925887468271
episode: 560 training return: tensor(234.8695, device='cuda:0')
episode: 561 training return: tensor(-83.9112, device='cuda:0')
episode: 562 training return: tensor(218.9097, device='cuda:0')
episode: 563 training return: tensor(163.2715, device='cuda:0')
epoch: 141 test_true_pfm: 3238.040868804363 sim_pfm: 17.819867295678705
episode: 564 training return: tensor(236.1644, device='cuda:0')
episode: 565 training return: tensor(254.5860, device='cuda:0')
episode: 566 training return: tensor(-276.1476, device='cuda:0')
episode: 567 training return: tensor(191.7653, device='cuda:0')
epoch: 142 test_true_pfm: 2575.147027945532 sim_pfm: 251.55014570474546
episode: 568 training return: tensor(241.8626, device='cuda:0')
episode: 569 training return: tensor(-285.1057, device='cuda:0')
episode: 570 training return: tensor(266.6468, device='cuda:0')
episode: 571 training return: tensor(-198.7498, device='cuda:0')
epoch: 143 test_true_pfm: 3218.88035035507 sim_pfm: -8.969317745165123
episode: 572 training return: tensor(209.4779, device='cuda:0')
episode: 573 training return: tensor(245.1896, device='cuda:0')
episode: 574 training return: tensor(213.1290, device='cuda:0')
episode: 575 training return: tensor(268.4178, device='cuda:0')
epoch: 144 test_true_pfm: 3348.3922470661582 sim_pfm: 271.75250250982936
episode: 576 training return: tensor(-273.6559, device='cuda:0')
episode: 577 training return: tensor(246.2525, device='cuda:0')
episode: 578 training return: tensor(209.4577, device='cuda:0')
episode: 579 training return: tensor(352.0119, device='cuda:0')
epoch: 145 test_true_pfm: 2780.190018450244 sim_pfm: 49.28757898290254
episode: 580 training return: tensor(129.0558, device='cuda:0')
episode: 581 training return: tensor(207.4270, device='cuda:0')
episode: 582 training return: tensor(58.3180, device='cuda:0')
episode: 583 training return: tensor(-222.8690, device='cuda:0')
epoch: 146 test_true_pfm: 3278.383975272029 sim_pfm: 103.58192884882253
episode: 584 training return: tensor(-236.4802, device='cuda:0')
episode: 585 training return: tensor(276.8178, device='cuda:0')
episode: 586 training return: tensor(210.9909, device='cuda:0')
episode: 587 training return: tensor(214.5128, device='cuda:0')
epoch: 147 test_true_pfm: 3317.7222531438783 sim_pfm: 259.31877620797604
episode: 588 training return: tensor(238.4116, device='cuda:0')
episode: 589 training return: tensor(-188.9582, device='cuda:0')
episode: 590 training return: tensor(237.9179, device='cuda:0')
episode: 591 training return: tensor(257.3292, device='cuda:0')
epoch: 148 test_true_pfm: 3376.5257574753928 sim_pfm: 287.64671501990716
episode: 592 training return: tensor(-31.5834, device='cuda:0')
episode: 593 training return: tensor(214.6169, device='cuda:0')
episode: 594 training return: tensor(-244.9675, device='cuda:0')
episode: 595 training return: tensor(219.8351, device='cuda:0')
epoch: 149 test_true_pfm: 2929.4129409494567 sim_pfm: 143.6212422225508
episode: 596 training return: tensor(146.2504, device='cuda:0')
episode: 597 training return: tensor(224.0054, device='cuda:0')
episode: 598 training return: tensor(263.7319, device='cuda:0')
episode: 599 training return: tensor(211.0309, device='cuda:0')
epoch: 150 test_true_pfm: 3027.844035983992 sim_pfm: 251.22479403256634
