['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'medium', '--seed', '0', '--data', '100000', '--regu', '0.05']
5113.433082928212
episode: 0 training return: tensor(31.8028, device='cuda:0')
episode: 1 training return: tensor(-21.5944, device='cuda:0')
episode: 2 training return: tensor(260.3371, device='cuda:0')
episode: 3 training return: tensor(65.9195, device='cuda:0')
epoch: 1 test_true_pfm: 5111.86991735316 sim_pfm: 28.02582351847862
episode: 4 training return: tensor(51.4628, device='cuda:0')
episode: 5 training return: tensor(-71.9231, device='cuda:0')
episode: 6 training return: tensor(109.9761, device='cuda:0')
episode: 7 training return: tensor(64.0378, device='cuda:0')
epoch: 2 test_true_pfm: 4942.899131894916 sim_pfm: 28.40661568089854
episode: 8 training return: tensor(-36.1271, device='cuda:0')
episode: 9 training return: tensor(90.2566, device='cuda:0')
episode: 10 training return: tensor(-25.7098, device='cuda:0')
episode: 11 training return: tensor(170.7661, device='cuda:0')
epoch: 3 test_true_pfm: 4997.866619245268 sim_pfm: 115.65032054301507
episode: 12 training return: tensor(125.8255, device='cuda:0')
episode: 13 training return: tensor(96.9043, device='cuda:0')
episode: 14 training return: tensor(78.2719, device='cuda:0')
episode: 15 training return: tensor(-12.6254, device='cuda:0')
epoch: 4 test_true_pfm: 5222.715594539106 sim_pfm: 120.97962671033262
episode: 16 training return: tensor(22.9794, device='cuda:0')
episode: 17 training return: tensor(130.7267, device='cuda:0')
episode: 18 training return: tensor(6.1652, device='cuda:0')
episode: 19 training return: tensor(127.5291, device='cuda:0')
epoch: 5 test_true_pfm: 5225.76926806467 sim_pfm: 185.3841725171272
episode: 20 training return: tensor(230.2414, device='cuda:0')
episode: 21 training return: tensor(-16.5547, device='cuda:0')
episode: 22 training return: tensor(20.6699, device='cuda:0')
episode: 23 training return: tensor(106.7615, device='cuda:0')
epoch: 6 test_true_pfm: 5132.323663319269 sim_pfm: 164.7497200880122
episode: 24 training return: tensor(110.9395, device='cuda:0')
episode: 25 training return: tensor(-116.9291, device='cuda:0')
episode: 26 training return: tensor(84.1988, device='cuda:0')
episode: 27 training return: tensor(161.7430, device='cuda:0')
epoch: 7 test_true_pfm: 5075.646127176952 sim_pfm: 106.03506677002103
episode: 28 training return: tensor(84.6747, device='cuda:0')
episode: 29 training return: tensor(118.5455, device='cuda:0')
episode: 30 training return: tensor(99.3276, device='cuda:0')
episode: 31 training return: tensor(43.0077, device='cuda:0')
epoch: 8 test_true_pfm: 5084.674213025669 sim_pfm: 68.79578576537703
episode: 32 training return: tensor(87.6575, device='cuda:0')
episode: 33 training return: tensor(34.7650, device='cuda:0')
episode: 34 training return: tensor(-30.8623, device='cuda:0')
episode: 35 training return: tensor(123.7167, device='cuda:0')
epoch: 9 test_true_pfm: 5024.343431182314 sim_pfm: 60.11337017754946
episode: 36 training return: tensor(139.1206, device='cuda:0')
episode: 37 training return: tensor(86.5228, device='cuda:0')
episode: 38 training return: tensor(120.0916, device='cuda:0')
episode: 39 training return: tensor(77.6282, device='cuda:0')
epoch: 10 test_true_pfm: 5200.898410010909 sim_pfm: 89.00652905879542
episode: 40 training return: tensor(134.2850, device='cuda:0')
episode: 41 training return: tensor(-7.3529, device='cuda:0')
episode: 42 training return: tensor(48.1553, device='cuda:0')
episode: 43 training return: tensor(78.7039, device='cuda:0')
epoch: 11 test_true_pfm: 5069.9238054650605 sim_pfm: 202.84642602009504
episode: 44 training return: tensor(92.4137, device='cuda:0')
episode: 45 training return: tensor(225.4452, device='cuda:0')
episode: 46 training return: tensor(294.4664, device='cuda:0')
episode: 47 training return: tensor(142.9145, device='cuda:0')
epoch: 12 test_true_pfm: 5094.1440848238735 sim_pfm: 140.505840630581
episode: 48 training return: tensor(150.0321, device='cuda:0')
episode: 49 training return: tensor(13.6882, device='cuda:0')
episode: 50 training return: tensor(84.4797, device='cuda:0')
episode: 51 training return: tensor(101.2918, device='cuda:0')
epoch: 13 test_true_pfm: 5206.835166381272 sim_pfm: 65.88984093330025
episode: 52 training return: tensor(85.8713, device='cuda:0')
episode: 53 training return: tensor(67.0546, device='cuda:0')
episode: 54 training return: tensor(283.9470, device='cuda:0')
episode: 55 training return: tensor(-35.8420, device='cuda:0')
epoch: 14 test_true_pfm: 5239.309363399632 sim_pfm: 141.22845446888823
episode: 56 training return: tensor(119.0333, device='cuda:0')
episode: 57 training return: tensor(56.4760, device='cuda:0')
episode: 58 training return: tensor(7.0667, device='cuda:0')
episode: 59 training return: tensor(170.9771, device='cuda:0')
epoch: 15 test_true_pfm: 5195.180579315948 sim_pfm: 117.89189057564363
episode: 60 training return: tensor(-15.9917, device='cuda:0')
episode: 61 training return: tensor(165.3944, device='cuda:0')
episode: 62 training return: tensor(72.0901, device='cuda:0')
episode: 63 training return: tensor(89.4193, device='cuda:0')
epoch: 16 test_true_pfm: 5193.292596037517 sim_pfm: 158.57285952808647
episode: 64 training return: tensor(191.0348, device='cuda:0')
episode: 65 training return: tensor(-73.2515, device='cuda:0')
episode: 66 training return: tensor(120.4742, device='cuda:0')
episode: 67 training return: tensor(168.1191, device='cuda:0')
epoch: 17 test_true_pfm: 5202.155822390835 sim_pfm: 217.81022260105237
episode: 68 training return: tensor(72.0795, device='cuda:0')
episode: 69 training return: tensor(45.2278, device='cuda:0')
episode: 70 training return: tensor(2.4161, device='cuda:0')
episode: 71 training return: tensor(170.4397, device='cuda:0')
epoch: 18 test_true_pfm: 5215.928967282344 sim_pfm: 178.30136643827427
episode: 72 training return: tensor(151.1581, device='cuda:0')
episode: 73 training return: tensor(87.8245, device='cuda:0')
episode: 74 training return: tensor(86.2151, device='cuda:0')
episode: 75 training return: tensor(19.8351, device='cuda:0')
epoch: 19 test_true_pfm: 5217.023309576786 sim_pfm: 174.02628446838935
episode: 76 training return: tensor(72.7716, device='cuda:0')
episode: 77 training return: tensor(221.8419, device='cuda:0')
episode: 78 training return: tensor(120.4478, device='cuda:0')
episode: 79 training return: tensor(25.0972, device='cuda:0')
epoch: 20 test_true_pfm: 5138.721029727123 sim_pfm: 83.47183468777803
episode: 80 training return: tensor(94.5547, device='cuda:0')
episode: 81 training return: tensor(116.5262, device='cuda:0')
episode: 82 training return: tensor(-39.6810, device='cuda:0')
episode: 83 training return: tensor(68.3655, device='cuda:0')
epoch: 21 test_true_pfm: 5275.695053009658 sim_pfm: 171.1303715967418
episode: 84 training return: tensor(114.8782, device='cuda:0')
episode: 85 training return: tensor(260.2635, device='cuda:0')
episode: 86 training return: tensor(32.5440, device='cuda:0')
episode: 87 training return: tensor(38.5535, device='cuda:0')
epoch: 22 test_true_pfm: 5167.540485111232 sim_pfm: 153.93706513017727
episode: 88 training return: tensor(40.1004, device='cuda:0')
episode: 89 training return: tensor(149.2669, device='cuda:0')
episode: 90 training return: tensor(93.5437, device='cuda:0')
episode: 91 training return: tensor(84.2181, device='cuda:0')
epoch: 23 test_true_pfm: 5245.566341607372 sim_pfm: 102.39891424769303
episode: 92 training return: tensor(119.9899, device='cuda:0')
episode: 93 training return: tensor(37.2188, device='cuda:0')
episode: 94 training return: tensor(-61.0730, device='cuda:0')
episode: 95 training return: tensor(-24.4422, device='cuda:0')
epoch: 24 test_true_pfm: 5230.234319452678 sim_pfm: 183.70800010026628
episode: 96 training return: tensor(60.9696, device='cuda:0')
episode: 97 training return: tensor(51.3281, device='cuda:0')
episode: 98 training return: tensor(47.6627, device='cuda:0')
episode: 99 training return: tensor(252.2267, device='cuda:0')
epoch: 25 test_true_pfm: 5237.097556202814 sim_pfm: 185.21304682970126
episode: 100 training return: tensor(176.3159, device='cuda:0')
episode: 101 training return: tensor(64.8520, device='cuda:0')
episode: 102 training return: tensor(112.9346, device='cuda:0')
episode: 103 training return: tensor(35.9076, device='cuda:0')
epoch: 26 test_true_pfm: 5230.453134788969 sim_pfm: 185.0783524902654
episode: 104 training return: tensor(289.6662, device='cuda:0')
episode: 105 training return: tensor(166.4593, device='cuda:0')
episode: 106 training return: tensor(66.4441, device='cuda:0')
episode: 107 training return: tensor(44.1738, device='cuda:0')
epoch: 27 test_true_pfm: 5159.617984535082 sim_pfm: 264.1272161575034
episode: 108 training return: tensor(84.1984, device='cuda:0')
episode: 109 training return: tensor(124.1372, device='cuda:0')
episode: 110 training return: tensor(19.0293, device='cuda:0')
episode: 111 training return: tensor(206.3065, device='cuda:0')
epoch: 28 test_true_pfm: 5226.485764237413 sim_pfm: 172.00036265454642
episode: 112 training return: tensor(59.4738, device='cuda:0')
episode: 113 training return: tensor(69.4452, device='cuda:0')
episode: 114 training return: tensor(142.6173, device='cuda:0')
episode: 115 training return: tensor(75.6670, device='cuda:0')
epoch: 29 test_true_pfm: 5267.735841384511 sim_pfm: 163.90532462118426
episode: 116 training return: tensor(160.8040, device='cuda:0')
episode: 117 training return: tensor(180.9203, device='cuda:0')
episode: 118 training return: tensor(122.3179, device='cuda:0')
episode: 119 training return: tensor(115.7106, device='cuda:0')
epoch: 30 test_true_pfm: 5187.11136249069 sim_pfm: 163.74832729574214
episode: 120 training return: tensor(196.0231, device='cuda:0')
episode: 121 training return: tensor(97.8757, device='cuda:0')
episode: 122 training return: tensor(83.5035, device='cuda:0')
episode: 123 training return: tensor(81.5270, device='cuda:0')
epoch: 31 test_true_pfm: 5265.516871951515 sim_pfm: 107.99958890035244
episode: 124 training return: tensor(-67.4871, device='cuda:0')
episode: 125 training return: tensor(80.7564, device='cuda:0')
episode: 126 training return: tensor(98.3953, device='cuda:0')
episode: 127 training return: tensor(142.6717, device='cuda:0')
epoch: 32 test_true_pfm: 5184.160155678505 sim_pfm: 165.07987182569923
episode: 128 training return: tensor(-31.1494, device='cuda:0')
episode: 129 training return: tensor(129.7642, device='cuda:0')
episode: 130 training return: tensor(104.2461, device='cuda:0')
episode: 131 training return: tensor(33.7249, device='cuda:0')
epoch: 33 test_true_pfm: 5248.454858340831 sim_pfm: 135.6633348444399
episode: 132 training return: tensor(131.8540, device='cuda:0')
episode: 133 training return: tensor(178.1832, device='cuda:0')
episode: 134 training return: tensor(202.6960, device='cuda:0')
episode: 135 training return: tensor(26.9228, device='cuda:0')
epoch: 34 test_true_pfm: 5274.543818455021 sim_pfm: 178.30377239458417
episode: 136 training return: tensor(89.0073, device='cuda:0')
episode: 137 training return: tensor(106.3933, device='cuda:0')
episode: 138 training return: tensor(141.1540, device='cuda:0')
episode: 139 training return: tensor(-81.9799, device='cuda:0')
epoch: 35 test_true_pfm: 5222.262780853108 sim_pfm: 175.1844756845579
episode: 140 training return: tensor(134.0254, device='cuda:0')
episode: 141 training return: tensor(28.1156, device='cuda:0')
episode: 142 training return: tensor(-38.6372, device='cuda:0')
episode: 143 training return: tensor(170.8357, device='cuda:0')
epoch: 36 test_true_pfm: 5242.086981118894 sim_pfm: 208.34243530846047
episode: 144 training return: tensor(60.8240, device='cuda:0')
episode: 145 training return: tensor(91.6157, device='cuda:0')
episode: 146 training return: tensor(155.2765, device='cuda:0')
episode: 147 training return: tensor(47.7284, device='cuda:0')
epoch: 37 test_true_pfm: 5275.393708488019 sim_pfm: 196.1736185339978
episode: 148 training return: tensor(140.6427, device='cuda:0')
episode: 149 training return: tensor(217.6344, device='cuda:0')
episode: 150 training return: tensor(40.1092, device='cuda:0')
episode: 151 training return: tensor(161.3988, device='cuda:0')
epoch: 38 test_true_pfm: 5283.970905793381 sim_pfm: 178.62148524324098
episode: 152 training return: tensor(118.3494, device='cuda:0')
episode: 153 training return: tensor(124.5350, device='cuda:0')
episode: 154 training return: tensor(176.5046, device='cuda:0')
episode: 155 training return: tensor(-19.2711, device='cuda:0')
epoch: 39 test_true_pfm: 5141.421683531343 sim_pfm: 216.67475437006215
episode: 156 training return: tensor(45.3675, device='cuda:0')
episode: 157 training return: tensor(185.5740, device='cuda:0')
episode: 158 training return: tensor(110.0461, device='cuda:0')
episode: 159 training return: tensor(-75.6118, device='cuda:0')
epoch: 40 test_true_pfm: 5264.871665729163 sim_pfm: 165.1934758941061
episode: 160 training return: tensor(93.9677, device='cuda:0')
episode: 161 training return: tensor(145.8293, device='cuda:0')
episode: 162 training return: tensor(68.7793, device='cuda:0')
episode: 163 training return: tensor(49.6543, device='cuda:0')
epoch: 41 test_true_pfm: 5211.3046365118935 sim_pfm: 206.4602564086963
episode: 164 training return: tensor(154.7939, device='cuda:0')
episode: 165 training return: tensor(143.3303, device='cuda:0')
episode: 166 training return: tensor(226.4302, device='cuda:0')
episode: 167 training return: tensor(187.5300, device='cuda:0')
epoch: 42 test_true_pfm: 5326.070050488928 sim_pfm: 148.1825217687874
episode: 168 training return: tensor(-158.0316, device='cuda:0')
episode: 169 training return: tensor(208.7522, device='cuda:0')
episode: 170 training return: tensor(70.9315, device='cuda:0')
episode: 171 training return: tensor(38.5629, device='cuda:0')
epoch: 43 test_true_pfm: 5197.5774913052455 sim_pfm: 165.65031623774362
episode: 172 training return: tensor(-45.0324, device='cuda:0')
episode: 173 training return: tensor(117.5108, device='cuda:0')
episode: 174 training return: tensor(160.5662, device='cuda:0')
episode: 175 training return: tensor(190.5864, device='cuda:0')
epoch: 44 test_true_pfm: 5249.005979667918 sim_pfm: 226.04887344607656
episode: 176 training return: tensor(163.1501, device='cuda:0')
episode: 177 training return: tensor(120.1016, device='cuda:0')
episode: 178 training return: tensor(113.4750, device='cuda:0')
episode: 179 training return: tensor(221.1015, device='cuda:0')
epoch: 45 test_true_pfm: 5293.296174466638 sim_pfm: 111.01639696845086
episode: 180 training return: tensor(124.8334, device='cuda:0')
episode: 181 training return: tensor(129.5357, device='cuda:0')
episode: 182 training return: tensor(110.8263, device='cuda:0')
episode: 183 training return: tensor(49.9044, device='cuda:0')
epoch: 46 test_true_pfm: 5384.860805845448 sim_pfm: 227.5703028230831
episode: 184 training return: tensor(129.8559, device='cuda:0')
episode: 185 training return: tensor(152.2952, device='cuda:0')
episode: 186 training return: tensor(23.4895, device='cuda:0')
episode: 187 training return: tensor(161.8250, device='cuda:0')
epoch: 47 test_true_pfm: 5237.936808762526 sim_pfm: 135.92506295797648
episode: 188 training return: tensor(130.9560, device='cuda:0')
episode: 189 training return: tensor(54.4411, device='cuda:0')
episode: 190 training return: tensor(147.1560, device='cuda:0')
episode: 191 training return: tensor(109.3525, device='cuda:0')
epoch: 48 test_true_pfm: 5270.731132385182 sim_pfm: 165.9018966454702
episode: 192 training return: tensor(179.4608, device='cuda:0')
episode: 193 training return: tensor(91.6354, device='cuda:0')
episode: 194 training return: tensor(111.5820, device='cuda:0')
episode: 195 training return: tensor(1.5436, device='cuda:0')
epoch: 49 test_true_pfm: 5202.685963750591 sim_pfm: 220.6926917041031
episode: 196 training return: tensor(-67.9610, device='cuda:0')
episode: 197 training return: tensor(123.4065, device='cuda:0')
episode: 198 training return: tensor(223.7135, device='cuda:0')
episode: 199 training return: tensor(12.6588, device='cuda:0')
epoch: 50 test_true_pfm: 5161.144153482813 sim_pfm: 201.7949237493643
episode: 200 training return: tensor(172.6748, device='cuda:0')
episode: 201 training return: tensor(-40.6968, device='cuda:0')
episode: 202 training return: tensor(248.4388, device='cuda:0')
episode: 203 training return: tensor(100.3732, device='cuda:0')
epoch: 51 test_true_pfm: 5266.012938965739 sim_pfm: 116.60185434752687
episode: 204 training return: tensor(108.6412, device='cuda:0')
episode: 205 training return: tensor(162.5551, device='cuda:0')
episode: 206 training return: tensor(136.6747, device='cuda:0')
episode: 207 training return: tensor(106.7903, device='cuda:0')
epoch: 52 test_true_pfm: 5273.844654792138 sim_pfm: 179.8451187942022
episode: 208 training return: tensor(187.2265, device='cuda:0')
episode: 209 training return: tensor(-8.2150, device='cuda:0')
episode: 210 training return: tensor(93.6474, device='cuda:0')
episode: 211 training return: tensor(95.9280, device='cuda:0')
epoch: 53 test_true_pfm: 5288.77217326349 sim_pfm: 199.6385671434109
episode: 212 training return: tensor(281.7311, device='cuda:0')
episode: 213 training return: tensor(117.1558, device='cuda:0')
episode: 214 training return: tensor(180.2533, device='cuda:0')
episode: 215 training return: tensor(185.3835, device='cuda:0')
epoch: 54 test_true_pfm: 5313.142745191309 sim_pfm: 205.64029478650386
episode: 216 training return: tensor(91.4077, device='cuda:0')
episode: 217 training return: tensor(122.2844, device='cuda:0')
episode: 218 training return: tensor(62.0069, device='cuda:0')
episode: 219 training return: tensor(128.5708, device='cuda:0')
epoch: 55 test_true_pfm: 5170.962152747309 sim_pfm: 231.2147990244363
episode: 220 training return: tensor(97.7616, device='cuda:0')
episode: 221 training return: tensor(161.8480, device='cuda:0')
episode: 222 training return: tensor(67.5706, device='cuda:0')
episode: 223 training return: tensor(134.8531, device='cuda:0')
epoch: 56 test_true_pfm: 5194.504284230344 sim_pfm: 111.62021979483931
episode: 224 training return: tensor(307.9689, device='cuda:0')
episode: 225 training return: tensor(157.0350, device='cuda:0')
episode: 226 training return: tensor(52.6577, device='cuda:0')
episode: 227 training return: tensor(98.6265, device='cuda:0')
epoch: 57 test_true_pfm: 5184.08215399249 sim_pfm: 231.75714477353418
episode: 228 training return: tensor(154.2714, device='cuda:0')
episode: 229 training return: tensor(171.7212, device='cuda:0')
episode: 230 training return: tensor(232.9054, device='cuda:0')
episode: 231 training return: tensor(72.6378, device='cuda:0')
epoch: 58 test_true_pfm: 5236.47895550375 sim_pfm: 175.66156080801738
episode: 232 training return: tensor(245.6473, device='cuda:0')
episode: 233 training return: tensor(103.4625, device='cuda:0')
episode: 234 training return: tensor(236.1729, device='cuda:0')
episode: 235 training return: tensor(253.7290, device='cuda:0')
epoch: 59 test_true_pfm: 5307.682120854863 sim_pfm: 205.1420054390328
episode: 236 training return: tensor(227.2532, device='cuda:0')
episode: 237 training return: tensor(142.5809, device='cuda:0')
episode: 238 training return: tensor(128.0510, device='cuda:0')
episode: 239 training return: tensor(91.6081, device='cuda:0')
epoch: 60 test_true_pfm: 5309.250960660537 sim_pfm: 120.20287738159338
episode: 240 training return: tensor(116.6676, device='cuda:0')
episode: 241 training return: tensor(169.8690, device='cuda:0')
episode: 242 training return: tensor(-624.2769, device='cuda:0')
episode: 243 training return: tensor(91.5391, device='cuda:0')
epoch: 61 test_true_pfm: 5186.047703700454 sim_pfm: 248.42668151284064
episode: 244 training return: tensor(242.5844, device='cuda:0')
episode: 245 training return: tensor(25.2564, device='cuda:0')
episode: 246 training return: tensor(250.3991, device='cuda:0')
episode: 247 training return: tensor(209.0883, device='cuda:0')
epoch: 62 test_true_pfm: 5306.115263631494 sim_pfm: 191.19769216132894
episode: 248 training return: tensor(216.9062, device='cuda:0')
episode: 249 training return: tensor(141.1023, device='cuda:0')
episode: 250 training return: tensor(241.3813, device='cuda:0')
episode: 251 training return: tensor(35.1125, device='cuda:0')
epoch: 63 test_true_pfm: 5210.277032320331 sim_pfm: 148.65300403537307
episode: 252 training return: tensor(202.8187, device='cuda:0')
episode: 253 training return: tensor(291.7488, device='cuda:0')
episode: 254 training return: tensor(52.9405, device='cuda:0')
episode: 255 training return: tensor(16.0309, device='cuda:0')
epoch: 64 test_true_pfm: 5315.040917154679 sim_pfm: 198.59639176549777
episode: 256 training return: tensor(264.7856, device='cuda:0')
episode: 257 training return: tensor(165.2966, device='cuda:0')
episode: 258 training return: tensor(157.5934, device='cuda:0')
episode: 259 training return: tensor(177.9509, device='cuda:0')
epoch: 65 test_true_pfm: 5276.605926251349 sim_pfm: 176.0401143047493
episode: 260 training return: tensor(180.8574, device='cuda:0')
episode: 261 training return: tensor(170.4908, device='cuda:0')
episode: 262 training return: tensor(64.4784, device='cuda:0')
episode: 263 training return: tensor(146.1204, device='cuda:0')
epoch: 66 test_true_pfm: 5259.585310197755 sim_pfm: 208.5520507164862
episode: 264 training return: tensor(208.0061, device='cuda:0')
episode: 265 training return: tensor(39.8929, device='cuda:0')
episode: 266 training return: tensor(305.1237, device='cuda:0')
episode: 267 training return: tensor(161.8413, device='cuda:0')
epoch: 67 test_true_pfm: 5262.842785987449 sim_pfm: 248.85309944201921
episode: 268 training return: tensor(164.3871, device='cuda:0')
episode: 269 training return: tensor(112.9327, device='cuda:0')
episode: 270 training return: tensor(-609.2031, device='cuda:0')
episode: 271 training return: tensor(193.7656, device='cuda:0')
epoch: 68 test_true_pfm: 5240.199213625015 sim_pfm: 105.65954386390513
episode: 272 training return: tensor(266.1820, device='cuda:0')
episode: 273 training return: tensor(114.4903, device='cuda:0')
episode: 274 training return: tensor(182.8212, device='cuda:0')
episode: 275 training return: tensor(143.4542, device='cuda:0')
epoch: 69 test_true_pfm: 5224.191845546318 sim_pfm: 181.12022616517302
episode: 276 training return: tensor(114.2000, device='cuda:0')
episode: 277 training return: tensor(133.2771, device='cuda:0')
episode: 278 training return: tensor(217.3849, device='cuda:0')
episode: 279 training return: tensor(173.3852, device='cuda:0')
epoch: 70 test_true_pfm: 5314.397732728253 sim_pfm: 172.5748508190154
episode: 280 training return: tensor(85.4873, device='cuda:0')
episode: 281 training return: tensor(86.4253, device='cuda:0')
episode: 282 training return: tensor(207.6591, device='cuda:0')
episode: 283 training return: tensor(181.0993, device='cuda:0')
epoch: 71 test_true_pfm: 5346.912374107526 sim_pfm: 199.04934505811738
episode: 284 training return: tensor(165.3912, device='cuda:0')
episode: 285 training return: tensor(322.4454, device='cuda:0')
episode: 286 training return: tensor(29.8610, device='cuda:0')
episode: 287 training return: tensor(189.3468, device='cuda:0')
epoch: 72 test_true_pfm: 5321.2996145087645 sim_pfm: 178.16890527144037
episode: 288 training return: tensor(104.8026, device='cuda:0')
episode: 289 training return: tensor(186.0899, device='cuda:0')
episode: 290 training return: tensor(138.5715, device='cuda:0')
episode: 291 training return: tensor(157.5381, device='cuda:0')
epoch: 73 test_true_pfm: 5285.308046374314 sim_pfm: 214.17727936297888
episode: 292 training return: tensor(155.2758, device='cuda:0')
episode: 293 training return: tensor(243.2947, device='cuda:0')
episode: 294 training return: tensor(132.5872, device='cuda:0')
episode: 295 training return: tensor(156.1626, device='cuda:0')
epoch: 74 test_true_pfm: 5255.362490919285 sim_pfm: 223.19378996405672
episode: 296 training return: tensor(233.9827, device='cuda:0')
episode: 297 training return: tensor(183.2160, device='cuda:0')
episode: 298 training return: tensor(37.0429, device='cuda:0')
episode: 299 training return: tensor(182.4478, device='cuda:0')
epoch: 75 test_true_pfm: 5336.475427682457 sim_pfm: 198.2541157132364
episode: 300 training return: tensor(82.0945, device='cuda:0')
episode: 301 training return: tensor(140.2137, device='cuda:0')
episode: 302 training return: tensor(245.6869, device='cuda:0')
episode: 303 training return: tensor(89.5749, device='cuda:0')
epoch: 76 test_true_pfm: 5247.538342245301 sim_pfm: -173.25923474841207
episode: 304 training return: tensor(204.7350, device='cuda:0')
episode: 305 training return: tensor(190.4921, device='cuda:0')
episode: 306 training return: tensor(91.4984, device='cuda:0')
episode: 307 training return: tensor(265.3181, device='cuda:0')
epoch: 77 test_true_pfm: 5279.964318780458 sim_pfm: 138.2565700051588
episode: 308 training return: tensor(103.2809, device='cuda:0')
episode: 309 training return: tensor(152.6798, device='cuda:0')
episode: 310 training return: tensor(209.4438, device='cuda:0')
episode: 311 training return: tensor(170.2776, device='cuda:0')
epoch: 78 test_true_pfm: 5300.434072127325 sim_pfm: 163.81118829310677
episode: 312 training return: tensor(154.9544, device='cuda:0')
episode: 313 training return: tensor(198.5335, device='cuda:0')
episode: 314 training return: tensor(196.0055, device='cuda:0')
episode: 315 training return: tensor(284.6545, device='cuda:0')
epoch: 79 test_true_pfm: 5362.203490540307 sim_pfm: 314.507197643766
episode: 316 training return: tensor(282.1799, device='cuda:0')
episode: 317 training return: tensor(101.1578, device='cuda:0')
episode: 318 training return: tensor(234.8451, device='cuda:0')
episode: 319 training return: tensor(225.3188, device='cuda:0')
epoch: 80 test_true_pfm: 5311.23966403901 sim_pfm: 223.07626511036264
episode: 320 training return: tensor(188.2350, device='cuda:0')
episode: 321 training return: tensor(184.8761, device='cuda:0')
episode: 322 training return: tensor(-26.2169, device='cuda:0')
episode: 323 training return: tensor(164.4191, device='cuda:0')
epoch: 81 test_true_pfm: 5175.665005972603 sim_pfm: 207.77725289263375
episode: 324 training return: tensor(236.2975, device='cuda:0')
episode: 325 training return: tensor(109.8636, device='cuda:0')
episode: 326 training return: tensor(163.8208, device='cuda:0')
episode: 327 training return: tensor(172.8226, device='cuda:0')
epoch: 82 test_true_pfm: 5339.484598043661 sim_pfm: 157.56818837788887
episode: 328 training return: tensor(109.0458, device='cuda:0')
episode: 329 training return: tensor(221.3304, device='cuda:0')
episode: 330 training return: tensor(111.2664, device='cuda:0')
episode: 331 training return: tensor(224.4359, device='cuda:0')
epoch: 83 test_true_pfm: 5265.250234267512 sim_pfm: 285.0330402685407
episode: 332 training return: tensor(168.9228, device='cuda:0')
episode: 333 training return: tensor(186.9448, device='cuda:0')
episode: 334 training return: tensor(213.0340, device='cuda:0')
episode: 335 training return: tensor(152.5545, device='cuda:0')
epoch: 84 test_true_pfm: 5330.4123416606335 sim_pfm: 249.65244882590682
episode: 336 training return: tensor(69.9047, device='cuda:0')
episode: 337 training return: tensor(203.9017, device='cuda:0')
episode: 338 training return: tensor(177.9101, device='cuda:0')
episode: 339 training return: tensor(162.7836, device='cuda:0')
epoch: 85 test_true_pfm: 5318.554128093383 sim_pfm: 171.24353109482522
episode: 340 training return: tensor(236.4419, device='cuda:0')
episode: 341 training return: tensor(212.5801, device='cuda:0')
episode: 342 training return: tensor(16.1373, device='cuda:0')
episode: 343 training return: tensor(136.9195, device='cuda:0')
epoch: 86 test_true_pfm: 5373.238353479386 sim_pfm: 135.49465482035885
episode: 344 training return: tensor(116.7672, device='cuda:0')
episode: 345 training return: tensor(176.7127, device='cuda:0')
episode: 346 training return: tensor(136.2831, device='cuda:0')
episode: 347 training return: tensor(55.3901, device='cuda:0')
epoch: 87 test_true_pfm: 5251.6014543419415 sim_pfm: 231.78629212525752
episode: 348 training return: tensor(174.6328, device='cuda:0')
episode: 349 training return: tensor(115.6526, device='cuda:0')
episode: 350 training return: tensor(77.5250, device='cuda:0')
episode: 351 training return: tensor(147.2868, device='cuda:0')
epoch: 88 test_true_pfm: 5350.351186449599 sim_pfm: 200.34543679699223
episode: 352 training return: tensor(288.4443, device='cuda:0')
episode: 353 training return: tensor(48.3144, device='cuda:0')
episode: 354 training return: tensor(167.3329, device='cuda:0')
episode: 355 training return: tensor(188.3524, device='cuda:0')
epoch: 89 test_true_pfm: 5206.8589582374325 sim_pfm: 170.1410537926519
episode: 356 training return: tensor(147.9250, device='cuda:0')
episode: 357 training return: tensor(105.9877, device='cuda:0')
episode: 358 training return: tensor(145.8888, device='cuda:0')
episode: 359 training return: tensor(-10.9472, device='cuda:0')
epoch: 90 test_true_pfm: 5273.425329794239 sim_pfm: 243.7293125774013
episode: 360 training return: tensor(82.6727, device='cuda:0')
episode: 361 training return: tensor(210.4073, device='cuda:0')
episode: 362 training return: tensor(1.8276, device='cuda:0')
episode: 363 training return: tensor(172.8783, device='cuda:0')
epoch: 91 test_true_pfm: 5288.362819869328 sim_pfm: 200.71405693032042
episode: 364 training return: tensor(125.9782, device='cuda:0')
episode: 365 training return: tensor(229.3735, device='cuda:0')
episode: 366 training return: tensor(163.6424, device='cuda:0')
episode: 367 training return: tensor(346.2521, device='cuda:0')
epoch: 92 test_true_pfm: 5264.215730287001 sim_pfm: 181.2100181003043
episode: 368 training return: tensor(97.7865, device='cuda:0')
episode: 369 training return: tensor(217.3894, device='cuda:0')
episode: 370 training return: tensor(168.8932, device='cuda:0')
episode: 371 training return: tensor(285.1118, device='cuda:0')
epoch: 93 test_true_pfm: 5254.040848331647 sim_pfm: 189.76586253677183
episode: 372 training return: tensor(198.1746, device='cuda:0')
episode: 373 training return: tensor(180.5770, device='cuda:0')
episode: 374 training return: tensor(305.5589, device='cuda:0')
episode: 375 training return: tensor(111.1024, device='cuda:0')
epoch: 94 test_true_pfm: 5301.325265572425 sim_pfm: 237.48446853885739
episode: 376 training return: tensor(110.0253, device='cuda:0')
episode: 377 training return: tensor(154.9087, device='cuda:0')
episode: 378 training return: tensor(243.5452, device='cuda:0')
episode: 379 training return: tensor(77.6524, device='cuda:0')
epoch: 95 test_true_pfm: 5248.511210120512 sim_pfm: 202.77555535817132
episode: 380 training return: tensor(79.1031, device='cuda:0')
episode: 381 training return: tensor(228.0223, device='cuda:0')
episode: 382 training return: tensor(134.6720, device='cuda:0')
episode: 383 training return: tensor(21.1109, device='cuda:0')
epoch: 96 test_true_pfm: 5234.3813608075325 sim_pfm: 213.33443875251882
episode: 384 training return: tensor(91.8306, device='cuda:0')
episode: 385 training return: tensor(35.3753, device='cuda:0')
episode: 386 training return: tensor(154.7050, device='cuda:0')
episode: 387 training return: tensor(21.4675, device='cuda:0')
epoch: 97 test_true_pfm: 5285.776773943017 sim_pfm: 118.07497617946744
episode: 388 training return: tensor(84.3791, device='cuda:0')
episode: 389 training return: tensor(238.1838, device='cuda:0')
episode: 390 training return: tensor(116.3356, device='cuda:0')
episode: 391 training return: tensor(129.0742, device='cuda:0')
epoch: 98 test_true_pfm: 5172.58569786631 sim_pfm: 130.60574194971318
episode: 392 training return: tensor(182.1590, device='cuda:0')
episode: 393 training return: tensor(182.3268, device='cuda:0')
episode: 394 training return: tensor(164.2437, device='cuda:0')
episode: 395 training return: tensor(158.1790, device='cuda:0')
epoch: 99 test_true_pfm: 5256.018310920982 sim_pfm: 274.48079373331467
episode: 396 training return: tensor(110.6641, device='cuda:0')
episode: 397 training return: tensor(196.7912, device='cuda:0')
episode: 398 training return: tensor(141.2440, device='cuda:0')
episode: 399 training return: tensor(176.4676, device='cuda:0')
epoch: 100 test_true_pfm: 5239.90989469648 sim_pfm: 285.4505340299026
episode: 400 training return: tensor(220.6119, device='cuda:0')
episode: 401 training return: tensor(18.5879, device='cuda:0')
episode: 402 training return: tensor(207.1616, device='cuda:0')
episode: 403 training return: tensor(217.4394, device='cuda:0')
epoch: 101 test_true_pfm: 5300.805693240019 sim_pfm: 234.12615728544188
episode: 404 training return: tensor(141.8337, device='cuda:0')
episode: 405 training return: tensor(48.6873, device='cuda:0')
episode: 406 training return: tensor(153.3081, device='cuda:0')
episode: 407 training return: tensor(-11.3208, device='cuda:0')
epoch: 102 test_true_pfm: 5321.846511534695 sim_pfm: 213.88792790010726
episode: 408 training return: tensor(136.2144, device='cuda:0')
episode: 409 training return: tensor(136.2558, device='cuda:0')
episode: 410 training return: tensor(120.6487, device='cuda:0')
episode: 411 training return: tensor(170.6169, device='cuda:0')
epoch: 103 test_true_pfm: 5316.891610971988 sim_pfm: 214.92507613015673
episode: 412 training return: tensor(156.1404, device='cuda:0')
episode: 413 training return: tensor(129.2004, device='cuda:0')
episode: 414 training return: tensor(179.1741, device='cuda:0')
episode: 415 training return: tensor(235.8612, device='cuda:0')
epoch: 104 test_true_pfm: 5300.377680955466 sim_pfm: 238.4974904683962
episode: 416 training return: tensor(99.6072, device='cuda:0')
episode: 417 training return: tensor(-15.9846, device='cuda:0')
episode: 418 training return: tensor(131.7478, device='cuda:0')
episode: 419 training return: tensor(146.0974, device='cuda:0')
epoch: 105 test_true_pfm: 5243.121711492669 sim_pfm: 144.24667052162113
episode: 420 training return: tensor(231.7673, device='cuda:0')
episode: 421 training return: tensor(199.2002, device='cuda:0')
episode: 422 training return: tensor(260.4513, device='cuda:0')
episode: 423 training return: tensor(199.0435, device='cuda:0')
epoch: 106 test_true_pfm: 5288.315187578387 sim_pfm: 242.0390545311384
episode: 424 training return: tensor(30.2814, device='cuda:0')
episode: 425 training return: tensor(110.5986, device='cuda:0')
episode: 426 training return: tensor(115.9351, device='cuda:0')
episode: 427 training return: tensor(193.0649, device='cuda:0')
epoch: 107 test_true_pfm: 5192.771671705376 sim_pfm: 255.87755498379315
episode: 428 training return: tensor(226.7192, device='cuda:0')
episode: 429 training return: tensor(235.9754, device='cuda:0')
episode: 430 training return: tensor(140.0044, device='cuda:0')
episode: 431 training return: tensor(165.1458, device='cuda:0')
epoch: 108 test_true_pfm: 5278.688548735135 sim_pfm: 196.22129784652498
episode: 432 training return: tensor(81.6998, device='cuda:0')
episode: 433 training return: tensor(196.5788, device='cuda:0')
episode: 434 training return: tensor(160.9906, device='cuda:0')
episode: 435 training return: tensor(110.7646, device='cuda:0')
epoch: 109 test_true_pfm: 5175.7929369256335 sim_pfm: 140.97223581197127
episode: 436 training return: tensor(242.5663, device='cuda:0')
episode: 437 training return: tensor(185.6178, device='cuda:0')
episode: 438 training return: tensor(40.8214, device='cuda:0')
episode: 439 training return: tensor(256.6041, device='cuda:0')
epoch: 110 test_true_pfm: 5308.770795528708 sim_pfm: 228.50656207485977
episode: 440 training return: tensor(259.5176, device='cuda:0')
episode: 441 training return: tensor(261.6958, device='cuda:0')
episode: 442 training return: tensor(3.5474, device='cuda:0')
episode: 443 training return: tensor(100.0448, device='cuda:0')
epoch: 111 test_true_pfm: 5351.6872106137425 sim_pfm: 271.4033066460591
episode: 444 training return: tensor(-4.5041, device='cuda:0')
episode: 445 training return: tensor(219.1256, device='cuda:0')
episode: 446 training return: tensor(231.9383, device='cuda:0')
episode: 447 training return: tensor(212.3079, device='cuda:0')
epoch: 112 test_true_pfm: 5262.097009530665 sim_pfm: 231.08356027283784
episode: 448 training return: tensor(55.5895, device='cuda:0')
episode: 449 training return: tensor(68.2088, device='cuda:0')
episode: 450 training return: tensor(240.0663, device='cuda:0')
episode: 451 training return: tensor(123.1642, device='cuda:0')
epoch: 113 test_true_pfm: 5373.4470350557995 sim_pfm: 124.68889345910672
episode: 452 training return: tensor(186.0789, device='cuda:0')
episode: 453 training return: tensor(137.1073, device='cuda:0')
episode: 454 training return: tensor(149.4940, device='cuda:0')
episode: 455 training return: tensor(204.1454, device='cuda:0')
epoch: 114 test_true_pfm: 5287.882541865455 sim_pfm: 171.55624691597768
episode: 456 training return: tensor(55.9688, device='cuda:0')
episode: 457 training return: tensor(144.8035, device='cuda:0')
episode: 458 training return: tensor(174.6448, device='cuda:0')
episode: 459 training return: tensor(225.7640, device='cuda:0')
epoch: 115 test_true_pfm: 5217.743171154224 sim_pfm: 137.08488254545955
episode: 460 training return: tensor(207.8926, device='cuda:0')
episode: 461 training return: tensor(157.1669, device='cuda:0')
episode: 462 training return: tensor(242.3109, device='cuda:0')
episode: 463 training return: tensor(195.2425, device='cuda:0')
epoch: 116 test_true_pfm: 5343.1672496743095 sim_pfm: 133.6951852584025
episode: 464 training return: tensor(51.2910, device='cuda:0')
episode: 465 training return: tensor(143.6175, device='cuda:0')
episode: 466 training return: tensor(101.5691, device='cuda:0')
episode: 467 training return: tensor(96.5846, device='cuda:0')
epoch: 117 test_true_pfm: 5196.4922695744835 sim_pfm: 234.6183645420533
episode: 468 training return: tensor(191.4655, device='cuda:0')
episode: 469 training return: tensor(108.5764, device='cuda:0')
episode: 470 training return: tensor(107.6714, device='cuda:0')
episode: 471 training return: tensor(122.6849, device='cuda:0')
epoch: 118 test_true_pfm: 5275.690279384356 sim_pfm: 243.4674843165461
episode: 472 training return: tensor(253.5979, device='cuda:0')
episode: 473 training return: tensor(209.6178, device='cuda:0')
episode: 474 training return: tensor(75.6868, device='cuda:0')
episode: 475 training return: tensor(194.7438, device='cuda:0')
epoch: 119 test_true_pfm: 5261.2509516668515 sim_pfm: 245.63474862564667
episode: 476 training return: tensor(220.7209, device='cuda:0')
episode: 477 training return: tensor(183.0519, device='cuda:0')
episode: 478 training return: tensor(116.9073, device='cuda:0')
episode: 479 training return: tensor(126.1084, device='cuda:0')
epoch: 120 test_true_pfm: 5318.804340708787 sim_pfm: 271.23746977362316
episode: 480 training return: tensor(102.3469, device='cuda:0')
episode: 481 training return: tensor(185.6617, device='cuda:0')
episode: 482 training return: tensor(115.4711, device='cuda:0')
episode: 483 training return: tensor(231.5818, device='cuda:0')
epoch: 121 test_true_pfm: 5247.764850105436 sim_pfm: 250.3446152562683
episode: 484 training return: tensor(154.7963, device='cuda:0')
episode: 485 training return: tensor(79.1148, device='cuda:0')
episode: 486 training return: tensor(153.0704, device='cuda:0')
episode: 487 training return: tensor(200.0169, device='cuda:0')
epoch: 122 test_true_pfm: 5324.013037182574 sim_pfm: 228.28513371331306
episode: 488 training return: tensor(204.6732, device='cuda:0')
episode: 489 training return: tensor(248.2834, device='cuda:0')
episode: 490 training return: tensor(252.0800, device='cuda:0')
episode: 491 training return: tensor(110.7028, device='cuda:0')
epoch: 123 test_true_pfm: 3688.949748247089 sim_pfm: 178.18460893984107
episode: 492 training return: tensor(215.0308, device='cuda:0')
episode: 493 training return: tensor(327.1181, device='cuda:0')
episode: 494 training return: tensor(129.7179, device='cuda:0')
episode: 495 training return: tensor(143.2407, device='cuda:0')
epoch: 124 test_true_pfm: 5266.4415662298115 sim_pfm: 158.14548267742308
episode: 496 training return: tensor(210.1318, device='cuda:0')
episode: 497 training return: tensor(147.4657, device='cuda:0')
episode: 498 training return: tensor(109.9150, device='cuda:0')
episode: 499 training return: tensor(279.1047, device='cuda:0')
epoch: 125 test_true_pfm: 5295.6651022534825 sim_pfm: 262.74706257578026
episode: 500 training return: tensor(126.9470, device='cuda:0')
episode: 501 training return: tensor(132.1340, device='cuda:0')
episode: 502 training return: tensor(168.2894, device='cuda:0')
episode: 503 training return: tensor(204.2097, device='cuda:0')
epoch: 126 test_true_pfm: 5289.238762982622 sim_pfm: 215.77658142788763
episode: 504 training return: tensor(130.9928, device='cuda:0')
episode: 505 training return: tensor(23.5660, device='cuda:0')
episode: 506 training return: tensor(71.4820, device='cuda:0')
episode: 507 training return: tensor(235.6455, device='cuda:0')
epoch: 127 test_true_pfm: 5295.155782687499 sim_pfm: 287.7111727233666
episode: 508 training return: tensor(122.4539, device='cuda:0')
episode: 509 training return: tensor(108.0079, device='cuda:0')
episode: 510 training return: tensor(169.4873, device='cuda:0')
episode: 511 training return: tensor(270.4056, device='cuda:0')
epoch: 128 test_true_pfm: 5380.42198960564 sim_pfm: 296.8632168302817
episode: 512 training return: tensor(134.3818, device='cuda:0')
episode: 513 training return: tensor(217.5622, device='cuda:0')
episode: 514 training return: tensor(234.9197, device='cuda:0')
episode: 515 training return: tensor(2.4944, device='cuda:0')
epoch: 129 test_true_pfm: 5405.26988592987 sim_pfm: 254.83289476732412
episode: 516 training return: tensor(133.4988, device='cuda:0')
episode: 517 training return: tensor(145.5279, device='cuda:0')
episode: 518 training return: tensor(128.6983, device='cuda:0')
episode: 519 training return: tensor(243.9252, device='cuda:0')
epoch: 130 test_true_pfm: 5281.921731421232 sim_pfm: 199.23900549227255
episode: 520 training return: tensor(131.0712, device='cuda:0')
episode: 521 training return: tensor(273.1190, device='cuda:0')
episode: 522 training return: tensor(285.2047, device='cuda:0')
episode: 523 training return: tensor(172.4946, device='cuda:0')
epoch: 131 test_true_pfm: 5284.18636105234 sim_pfm: 304.8613992157916
episode: 524 training return: tensor(199.8541, device='cuda:0')
episode: 525 training return: tensor(248.2926, device='cuda:0')
episode: 526 training return: tensor(157.3968, device='cuda:0')
episode: 527 training return: tensor(135.1117, device='cuda:0')
epoch: 132 test_true_pfm: 5404.128000885926 sim_pfm: 206.52905857415558
episode: 528 training return: tensor(229.6175, device='cuda:0')
episode: 529 training return: tensor(340.9863, device='cuda:0')
episode: 530 training return: tensor(317.2737, device='cuda:0')
episode: 531 training return: tensor(171.0437, device='cuda:0')
epoch: 133 test_true_pfm: 5193.710636267331 sim_pfm: 284.25842757614254
episode: 532 training return: tensor(138.3334, device='cuda:0')
episode: 533 training return: tensor(90.7130, device='cuda:0')
episode: 534 training return: tensor(129.0142, device='cuda:0')
episode: 535 training return: tensor(128.0885, device='cuda:0')
epoch: 134 test_true_pfm: 5216.288028964703 sim_pfm: 266.7568002753542
episode: 536 training return: tensor(246.6490, device='cuda:0')
episode: 537 training return: tensor(229.2178, device='cuda:0')
episode: 538 training return: tensor(117.1441, device='cuda:0')
episode: 539 training return: tensor(124.2838, device='cuda:0')
epoch: 135 test_true_pfm: 5305.072998433763 sim_pfm: 303.65923790002125
episode: 540 training return: tensor(222.4923, device='cuda:0')
episode: 541 training return: tensor(384.3715, device='cuda:0')
episode: 542 training return: tensor(191.1988, device='cuda:0')
episode: 543 training return: tensor(232.8855, device='cuda:0')
epoch: 136 test_true_pfm: 5238.305887233364 sim_pfm: 298.15941851381405
episode: 544 training return: tensor(187.6505, device='cuda:0')
episode: 545 training return: tensor(304.2930, device='cuda:0')
episode: 546 training return: tensor(223.8895, device='cuda:0')
episode: 547 training return: tensor(141.5658, device='cuda:0')
epoch: 137 test_true_pfm: 5311.140238839781 sim_pfm: 274.2829301733873
episode: 548 training return: tensor(112.5145, device='cuda:0')
episode: 549 training return: tensor(147.1036, device='cuda:0')
episode: 550 training return: tensor(61.8134, device='cuda:0')
episode: 551 training return: tensor(233.7471, device='cuda:0')
epoch: 138 test_true_pfm: 5303.326847407567 sim_pfm: 269.3738320446185
episode: 552 training return: tensor(190.5107, device='cuda:0')
episode: 553 training return: tensor(108.6187, device='cuda:0')
episode: 554 training return: tensor(152.9566, device='cuda:0')
episode: 555 training return: tensor(38.8427, device='cuda:0')
epoch: 139 test_true_pfm: 5244.324716737695 sim_pfm: 192.58033570221355
episode: 556 training return: tensor(187.6715, device='cuda:0')
episode: 557 training return: tensor(163.9352, device='cuda:0')
episode: 558 training return: tensor(62.0443, device='cuda:0')
episode: 559 training return: tensor(58.0263, device='cuda:0')
epoch: 140 test_true_pfm: 5330.660528925723 sim_pfm: 270.3287443293957
episode: 560 training return: tensor(205.0044, device='cuda:0')
episode: 561 training return: tensor(91.3102, device='cuda:0')
episode: 562 training return: tensor(160.5596, device='cuda:0')
episode: 563 training return: tensor(188.5631, device='cuda:0')
epoch: 141 test_true_pfm: 5322.425192857237 sim_pfm: 289.5657755979725
episode: 564 training return: tensor(206.5847, device='cuda:0')
episode: 565 training return: tensor(318.6853, device='cuda:0')
episode: 566 training return: tensor(224.9271, device='cuda:0')
episode: 567 training return: tensor(256.9194, device='cuda:0')
epoch: 142 test_true_pfm: 5315.358688192749 sim_pfm: 173.4091047399852
episode: 568 training return: tensor(280.8987, device='cuda:0')
episode: 569 training return: tensor(161.4478, device='cuda:0')
episode: 570 training return: tensor(229.1645, device='cuda:0')
episode: 571 training return: tensor(214.6920, device='cuda:0')
epoch: 143 test_true_pfm: 5334.953558344137 sim_pfm: 198.1969381807527
episode: 572 training return: tensor(342.7568, device='cuda:0')
episode: 573 training return: tensor(149.0466, device='cuda:0')
episode: 574 training return: tensor(66.4051, device='cuda:0')
episode: 575 training return: tensor(275.1632, device='cuda:0')
epoch: 144 test_true_pfm: 5399.816487957136 sim_pfm: 214.18210392239658
episode: 576 training return: tensor(207.8075, device='cuda:0')
episode: 577 training return: tensor(275.8554, device='cuda:0')
episode: 578 training return: tensor(198.0883, device='cuda:0')
episode: 579 training return: tensor(248.2504, device='cuda:0')
epoch: 145 test_true_pfm: 5394.903603909017 sim_pfm: 290.7775086372858
episode: 580 training return: tensor(201.4940, device='cuda:0')
episode: 581 training return: tensor(98.5220, device='cuda:0')
episode: 582 training return: tensor(144.0452, device='cuda:0')
episode: 583 training return: tensor(258.6505, device='cuda:0')
epoch: 146 test_true_pfm: 5397.750861734506 sim_pfm: 263.2208476703381
episode: 584 training return: tensor(259.6536, device='cuda:0')
episode: 585 training return: tensor(276.1017, device='cuda:0')
episode: 586 training return: tensor(182.6983, device='cuda:0')
episode: 587 training return: tensor(209.8137, device='cuda:0')
epoch: 147 test_true_pfm: 5290.281001106318 sim_pfm: 214.77761882957807
episode: 588 training return: tensor(164.0632, device='cuda:0')
episode: 589 training return: tensor(52.5762, device='cuda:0')
episode: 590 training return: tensor(160.1617, device='cuda:0')
episode: 591 training return: tensor(241.2263, device='cuda:0')
epoch: 148 test_true_pfm: 5311.7806689409335 sim_pfm: 246.55600753156855
episode: 592 training return: tensor(231.1935, device='cuda:0')
episode: 593 training return: tensor(331.2033, device='cuda:0')
episode: 594 training return: tensor(145.8552, device='cuda:0')
episode: 595 training return: tensor(111.6798, device='cuda:0')
epoch: 149 test_true_pfm: 5364.812011960073 sim_pfm: 168.40617258118195
episode: 596 training return: tensor(148.6096, device='cuda:0')
episode: 597 training return: tensor(189.9076, device='cuda:0')
episode: 598 training return: tensor(296.1559, device='cuda:0')
episode: 599 training return: tensor(176.7271, device='cuda:0')
epoch: 150 test_true_pfm: 5347.516976542708 sim_pfm: 204.95043202745728
