['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'baseline', '--traj', 'medium', '--seed', '4']
episode: 0 training return: tensor(-252.2105, device='cuda:0')
episode: 1 training return: tensor(-200.2799, device='cuda:0')
episode: 2 training return: tensor(-160.8481, device='cuda:0')
episode: 3 training return: tensor(-196.3042, device='cuda:0')
epoch: 1 test_true_pfm: 29.712259764109042 sim_pfm: 259.5160445286077
episode: 4 training return: tensor(-194.0865, device='cuda:0')
episode: 5 training return: tensor(-147.4128, device='cuda:0')
episode: 6 training return: tensor(-160.9596, device='cuda:0')
episode: 7 training return: tensor(-132.9861, device='cuda:0')
epoch: 2 test_true_pfm: 14.873138431996583 sim_pfm: -307.8958328709006
episode: 8 training return: tensor(-134.2588, device='cuda:0')
episode: 9 training return: tensor(-232.6305, device='cuda:0')
episode: 10 training return: tensor(-97.7889, device='cuda:0')
episode: 11 training return: tensor(-104.3179, device='cuda:0')
epoch: 3 test_true_pfm: 13.548611332855183 sim_pfm: -525.9558896195143
episode: 12 training return: tensor(-475.7793, device='cuda:0')
episode: 13 training return: tensor(-332.0686, device='cuda:0')
episode: 14 training return: tensor(-311.1876, device='cuda:0')
episode: 15 training return: tensor(-342.3406, device='cuda:0')
epoch: 4 test_true_pfm: -17.927779696994556 sim_pfm: -345.06930868029593
episode: 16 training return: tensor(-522.5330, device='cuda:0')
episode: 17 training return: tensor(-704.2400, device='cuda:0')
episode: 18 training return: tensor(-396.2185, device='cuda:0')
episode: 19 training return: tensor(-196.9095, device='cuda:0')
epoch: 5 test_true_pfm: 18.405355546389053 sim_pfm: -240.09226284408942
episode: 20 training return: tensor(-173.1168, device='cuda:0')
episode: 21 training return: tensor(-210.6457, device='cuda:0')
episode: 22 training return: tensor(-203.1676, device='cuda:0')
episode: 23 training return: tensor(-299.4816, device='cuda:0')
epoch: 6 test_true_pfm: 3.885876583904443 sim_pfm: -239.69930790290238
episode: 24 training return: tensor(-183.8404, device='cuda:0')
episode: 25 training return: tensor(-95.9294, device='cuda:0')
episode: 26 training return: tensor(-228.9610, device='cuda:0')
episode: 27 training return: tensor(-120.0356, device='cuda:0')
epoch: 7 test_true_pfm: 5.791436732601324 sim_pfm: 30.694177676003893
episode: 28 training return: tensor(-76.2113, device='cuda:0')
episode: 29 training return: tensor(181.2879, device='cuda:0')
episode: 30 training return: tensor(170.4325, device='cuda:0')
episode: 31 training return: tensor(31.2511, device='cuda:0')
epoch: 8 test_true_pfm: 48.977161050459344 sim_pfm: 139.69670143261902
episode: 32 training return: tensor(213.0251, device='cuda:0')
episode: 33 training return: tensor(194.5008, device='cuda:0')
episode: 34 training return: tensor(264.7354, device='cuda:0')
episode: 35 training return: tensor(180.7517, device='cuda:0')
epoch: 9 test_true_pfm: 7.837970137857442 sim_pfm: 210.80202427138573
episode: 36 training return: tensor(315.3987, device='cuda:0')
episode: 37 training return: tensor(145.0330, device='cuda:0')
episode: 38 training return: tensor(142.4153, device='cuda:0')
episode: 39 training return: tensor(125.2477, device='cuda:0')
epoch: 10 test_true_pfm: 15.358522956863487 sim_pfm: 18.697575246880298
episode: 40 training return: tensor(80.0667, device='cuda:0')
episode: 41 training return: tensor(-25.5238, device='cuda:0')
episode: 42 training return: tensor(419.7409, device='cuda:0')
episode: 43 training return: tensor(457.3672, device='cuda:0')
epoch: 11 test_true_pfm: 41.582135580050135 sim_pfm: -68.0491649159696
episode: 44 training return: tensor(-69.3597, device='cuda:0')
episode: 45 training return: tensor(248.9856, device='cuda:0')
episode: 46 training return: tensor(92.4923, device='cuda:0')
episode: 47 training return: tensor(42.2771, device='cuda:0')
epoch: 12 test_true_pfm: 124.65367944119387 sim_pfm: 617.9062421107898
episode: 48 training return: tensor(607.7476, device='cuda:0')
episode: 49 training return: tensor(456.1113, device='cuda:0')
episode: 50 training return: tensor(174.6929, device='cuda:0')
episode: 51 training return: tensor(216.4752, device='cuda:0')
epoch: 13 test_true_pfm: 45.670409330808695 sim_pfm: 90.8226860732655
episode: 52 training return: tensor(188.6968, device='cuda:0')
episode: 53 training return: tensor(253.7719, device='cuda:0')
episode: 54 training return: tensor(301.5999, device='cuda:0')
episode: 55 training return: tensor(184.2368, device='cuda:0')
epoch: 14 test_true_pfm: 32.83356217496045 sim_pfm: 51.18395607623388
episode: 56 training return: tensor(547.7125, device='cuda:0')
episode: 57 training return: tensor(523.0153, device='cuda:0')
episode: 58 training return: tensor(515.3536, device='cuda:0')
episode: 59 training return: tensor(599.1760, device='cuda:0')
epoch: 15 test_true_pfm: 81.24899779800405 sim_pfm: 640.0295924557373
episode: 60 training return: tensor(590.7334, device='cuda:0')
episode: 61 training return: tensor(590.5989, device='cuda:0')
episode: 62 training return: tensor(563.0433, device='cuda:0')
episode: 63 training return: tensor(531.0963, device='cuda:0')
epoch: 16 test_true_pfm: 33.65235621663125 sim_pfm: 568.9650439962046
episode: 64 training return: tensor(611.3275, device='cuda:0')
episode: 65 training return: tensor(606.4082, device='cuda:0')
episode: 66 training return: tensor(599.5737, device='cuda:0')
episode: 67 training return: tensor(430.8014, device='cuda:0')
epoch: 17 test_true_pfm: 144.00247423975557 sim_pfm: 475.7102863477077
episode: 68 training return: tensor(474.7440, device='cuda:0')
episode: 69 training return: tensor(564.5563, device='cuda:0')
episode: 70 training return: tensor(617.5941, device='cuda:0')
episode: 71 training return: tensor(572.5101, device='cuda:0')
epoch: 18 test_true_pfm: 25.006508143328666 sim_pfm: 600.0342493815348
episode: 72 training return: tensor(415.8151, device='cuda:0')
episode: 73 training return: tensor(631.9361, device='cuda:0')
episode: 74 training return: tensor(640.5760, device='cuda:0')
episode: 75 training return: tensor(542.1935, device='cuda:0')
epoch: 19 test_true_pfm: 74.17409070329157 sim_pfm: 331.29259370472283
episode: 76 training return: tensor(429.0635, device='cuda:0')
episode: 77 training return: tensor(14.4951, device='cuda:0')
episode: 78 training return: tensor(349.0892, device='cuda:0')
episode: 79 training return: tensor(670.0364, device='cuda:0')
epoch: 20 test_true_pfm: 110.60689978122204 sim_pfm: 654.7489502628334
episode: 80 training return: tensor(659.5211, device='cuda:0')
episode: 81 training return: tensor(551.4929, device='cuda:0')
episode: 82 training return: tensor(674.7376, device='cuda:0')
episode: 83 training return: tensor(678.7654, device='cuda:0')
epoch: 21 test_true_pfm: 157.9646787720312 sim_pfm: 618.3367145344964
episode: 84 training return: tensor(611.1230, device='cuda:0')
episode: 85 training return: tensor(606.3442, device='cuda:0')
episode: 86 training return: tensor(611.1636, device='cuda:0')
episode: 87 training return: tensor(619.0656, device='cuda:0')
epoch: 22 test_true_pfm: 134.74098653288542 sim_pfm: 659.5537844215403
episode: 88 training return: tensor(625.2427, device='cuda:0')
episode: 89 training return: tensor(657.9274, device='cuda:0')
episode: 90 training return: tensor(654.2295, device='cuda:0')
episode: 91 training return: tensor(683.1072, device='cuda:0')
epoch: 23 test_true_pfm: 131.18929480932584 sim_pfm: 717.0514906721189
episode: 92 training return: tensor(672.0776, device='cuda:0')
episode: 93 training return: tensor(679.0142, device='cuda:0')
episode: 94 training return: tensor(663.2316, device='cuda:0')
episode: 95 training return: tensor(659.8298, device='cuda:0')
epoch: 24 test_true_pfm: 140.47983475901228 sim_pfm: 709.6853536125273
episode: 96 training return: tensor(681.4536, device='cuda:0')
episode: 97 training return: tensor(675.7878, device='cuda:0')
episode: 98 training return: tensor(679.4348, device='cuda:0')
episode: 99 training return: tensor(672.7875, device='cuda:0')
epoch: 25 test_true_pfm: 146.2290464163578 sim_pfm: 692.6653367934748
episode: 100 training return: tensor(691.4821, device='cuda:0')
episode: 101 training return: tensor(651.3670, device='cuda:0')
episode: 102 training return: tensor(672.8990, device='cuda:0')
episode: 103 training return: tensor(696.5417, device='cuda:0')
epoch: 26 test_true_pfm: 103.37784280511741 sim_pfm: 694.1004150927067
episode: 104 training return: tensor(684.3466, device='cuda:0')
episode: 105 training return: tensor(705.3077, device='cuda:0')
episode: 106 training return: tensor(698.6647, device='cuda:0')
episode: 107 training return: tensor(704.7130, device='cuda:0')
epoch: 27 test_true_pfm: 110.285548984075 sim_pfm: 709.9045920380391
episode: 108 training return: tensor(703.5788, device='cuda:0')
episode: 109 training return: tensor(683.4479, device='cuda:0')
episode: 110 training return: tensor(686.0943, device='cuda:0')
episode: 111 training return: tensor(696.5052, device='cuda:0')
epoch: 28 test_true_pfm: 110.56321072480401 sim_pfm: 729.3662270920352
episode: 112 training return: tensor(693.0027, device='cuda:0')
episode: 113 training return: tensor(690.5814, device='cuda:0')
episode: 114 training return: tensor(749.1678, device='cuda:0')
episode: 115 training return: tensor(726.9132, device='cuda:0')
epoch: 29 test_true_pfm: 84.58336991810442 sim_pfm: 730.6556701086462
episode: 116 training return: tensor(719.6292, device='cuda:0')
episode: 117 training return: tensor(723.6159, device='cuda:0')
episode: 118 training return: tensor(703.2928, device='cuda:0')
episode: 119 training return: tensor(716.0146, device='cuda:0')
epoch: 30 test_true_pfm: 102.49997495412212 sim_pfm: 718.3174812889658
episode: 120 training return: tensor(724.1713, device='cuda:0')
episode: 121 training return: tensor(685.1393, device='cuda:0')
episode: 122 training return: tensor(665.6160, device='cuda:0')
episode: 123 training return: tensor(731.1807, device='cuda:0')
epoch: 31 test_true_pfm: 85.60647068560536 sim_pfm: 753.1207522977144
episode: 124 training return: tensor(763.8097, device='cuda:0')
episode: 125 training return: tensor(798.3277, device='cuda:0')
episode: 126 training return: tensor(823.2374, device='cuda:0')
episode: 127 training return: tensor(824.7620, device='cuda:0')
epoch: 32 test_true_pfm: 65.92815726365559 sim_pfm: 842.8293848991627
episode: 128 training return: tensor(823.7327, device='cuda:0')
episode: 129 training return: tensor(813.8448, device='cuda:0')
episode: 130 training return: tensor(817.5169, device='cuda:0')
episode: 131 training return: tensor(815.8531, device='cuda:0')
epoch: 33 test_true_pfm: 38.91199881182603 sim_pfm: 829.9731528799981
episode: 132 training return: tensor(819.3502, device='cuda:0')
episode: 133 training return: tensor(815.0485, device='cuda:0')
episode: 134 training return: tensor(828.4664, device='cuda:0')
episode: 135 training return: tensor(787.5319, device='cuda:0')
epoch: 34 test_true_pfm: 79.63560777744763 sim_pfm: 815.5172044971957
episode: 136 training return: tensor(829.5078, device='cuda:0')
episode: 137 training return: tensor(825.4887, device='cuda:0')
episode: 138 training return: tensor(825.9215, device='cuda:0')
episode: 139 training return: tensor(824.9054, device='cuda:0')
epoch: 35 test_true_pfm: 61.0101994710311 sim_pfm: 856.301925015822
episode: 140 training return: tensor(797.2949, device='cuda:0')
episode: 141 training return: tensor(832.2260, device='cuda:0')
episode: 142 training return: tensor(838.0962, device='cuda:0')
episode: 143 training return: tensor(802.1716, device='cuda:0')
epoch: 36 test_true_pfm: 76.36824217482425 sim_pfm: 838.4966311331839
episode: 144 training return: tensor(834.3550, device='cuda:0')
episode: 145 training return: tensor(807.7212, device='cuda:0')
episode: 146 training return: tensor(801.4719, device='cuda:0')
episode: 147 training return: tensor(788.8069, device='cuda:0')
epoch: 37 test_true_pfm: 86.84697498896855 sim_pfm: 836.6965215101488
episode: 148 training return: tensor(823.7440, device='cuda:0')
episode: 149 training return: tensor(830.8316, device='cuda:0')
episode: 150 training return: tensor(813.7025, device='cuda:0')
episode: 151 training return: tensor(844.7815, device='cuda:0')
epoch: 38 test_true_pfm: 92.30959226865205 sim_pfm: 819.039100348996
episode: 152 training return: tensor(804.5898, device='cuda:0')
episode: 153 training return: tensor(811.1370, device='cuda:0')
episode: 154 training return: tensor(753.5377, device='cuda:0')
episode: 155 training return: tensor(815.1730, device='cuda:0')
epoch: 39 test_true_pfm: 93.09257131487388 sim_pfm: 844.3539387917147
episode: 156 training return: tensor(811.7394, device='cuda:0')
episode: 157 training return: tensor(827.2118, device='cuda:0')
episode: 158 training return: tensor(819.8435, device='cuda:0')
episode: 159 training return: tensor(813.0035, device='cuda:0')
epoch: 40 test_true_pfm: 71.02068497564076 sim_pfm: 853.1493307957426
episode: 160 training return: tensor(822.8182, device='cuda:0')
episode: 161 training return: tensor(792.1138, device='cuda:0')
episode: 162 training return: tensor(818.3249, device='cuda:0')
episode: 163 training return: tensor(846.4251, device='cuda:0')
epoch: 41 test_true_pfm: 63.023004953020084 sim_pfm: 822.8430232355837
episode: 164 training return: tensor(847.8522, device='cuda:0')
episode: 165 training return: tensor(796.3622, device='cuda:0')
episode: 166 training return: tensor(797.7807, device='cuda:0')
episode: 167 training return: tensor(732.6096, device='cuda:0')
epoch: 42 test_true_pfm: 77.10499079869479 sim_pfm: 769.0447535534855
episode: 168 training return: tensor(728.0791, device='cuda:0')
episode: 169 training return: tensor(716.9175, device='cuda:0')
episode: 170 training return: tensor(694.2622, device='cuda:0')
episode: 171 training return: tensor(703.3924, device='cuda:0')
epoch: 43 test_true_pfm: 73.80135539189709 sim_pfm: 697.3585584662854
episode: 172 training return: tensor(664.8169, device='cuda:0')
episode: 173 training return: tensor(691.9970, device='cuda:0')
episode: 174 training return: tensor(674.2817, device='cuda:0')
episode: 175 training return: tensor(797.9484, device='cuda:0')
epoch: 44 test_true_pfm: 67.5160779959336 sim_pfm: 689.2450922480784
episode: 176 training return: tensor(622.2357, device='cuda:0')
episode: 177 training return: tensor(680.6001, device='cuda:0')
episode: 178 training return: tensor(768.4378, device='cuda:0')
episode: 179 training return: tensor(796.0216, device='cuda:0')
epoch: 45 test_true_pfm: 71.29669784271108 sim_pfm: 845.6492034629104
episode: 180 training return: tensor(836.5821, device='cuda:0')
episode: 181 training return: tensor(836.8693, device='cuda:0')
episode: 182 training return: tensor(852.8708, device='cuda:0')
episode: 183 training return: tensor(805.5244, device='cuda:0')
epoch: 46 test_true_pfm: 54.85223285008844 sim_pfm: 802.5599543333985
episode: 184 training return: tensor(827.7117, device='cuda:0')
episode: 185 training return: tensor(848.3264, device='cuda:0')
episode: 186 training return: tensor(830.0161, device='cuda:0')
episode: 187 training return: tensor(831.6121, device='cuda:0')
epoch: 47 test_true_pfm: 74.97126268742238 sim_pfm: 859.1454845115543
episode: 188 training return: tensor(838.9003, device='cuda:0')
episode: 189 training return: tensor(837.3812, device='cuda:0')
episode: 190 training return: tensor(834.2074, device='cuda:0')
episode: 191 training return: tensor(811.7054, device='cuda:0')
epoch: 48 test_true_pfm: 70.30606140844779 sim_pfm: 813.374714837037
episode: 192 training return: tensor(813.2353, device='cuda:0')
episode: 193 training return: tensor(807.3219, device='cuda:0')
episode: 194 training return: tensor(826.6429, device='cuda:0')
episode: 195 training return: tensor(820.5752, device='cuda:0')
epoch: 49 test_true_pfm: 76.1704113407689 sim_pfm: 822.9782529968768
episode: 196 training return: tensor(829.3700, device='cuda:0')
episode: 197 training return: tensor(821.8554, device='cuda:0')
episode: 198 training return: tensor(821.1094, device='cuda:0')
episode: 199 training return: tensor(823.9963, device='cuda:0')
epoch: 50 test_true_pfm: 68.8181659636796 sim_pfm: 825.6237272987607
episode: 200 training return: tensor(841.4614, device='cuda:0')
episode: 201 training return: tensor(838.2275, device='cuda:0')
episode: 202 training return: tensor(834.5862, device='cuda:0')
episode: 203 training return: tensor(834.2573, device='cuda:0')
epoch: 51 test_true_pfm: 72.36729653502867 sim_pfm: 845.6442074443272
episode: 204 training return: tensor(846.8577, device='cuda:0')
episode: 205 training return: tensor(837.8030, device='cuda:0')
episode: 206 training return: tensor(825.0166, device='cuda:0')
episode: 207 training return: tensor(840.5903, device='cuda:0')
epoch: 52 test_true_pfm: 84.7083821173183 sim_pfm: 850.4083882399834
episode: 208 training return: tensor(835.5551, device='cuda:0')
episode: 209 training return: tensor(839.2847, device='cuda:0')
episode: 210 training return: tensor(846.2635, device='cuda:0')
episode: 211 training return: tensor(845.4545, device='cuda:0')
epoch: 53 test_true_pfm: 87.97030510967463 sim_pfm: 872.3433736108243
episode: 212 training return: tensor(850.3520, device='cuda:0')
episode: 213 training return: tensor(842.9285, device='cuda:0')
episode: 214 training return: tensor(844.4446, device='cuda:0')
episode: 215 training return: tensor(838.6997, device='cuda:0')
epoch: 54 test_true_pfm: 83.00130899156376 sim_pfm: 851.6325551586226
episode: 216 training return: tensor(840.8001, device='cuda:0')
episode: 217 training return: tensor(836.9841, device='cuda:0')
episode: 218 training return: tensor(834.9884, device='cuda:0')
episode: 219 training return: tensor(851.7419, device='cuda:0')
epoch: 55 test_true_pfm: 75.32924305818445 sim_pfm: 872.339238298405
episode: 220 training return: tensor(825.6014, device='cuda:0')
episode: 221 training return: tensor(836.3608, device='cuda:0')
episode: 222 training return: tensor(834.6354, device='cuda:0')
episode: 223 training return: tensor(832.2912, device='cuda:0')
epoch: 56 test_true_pfm: 63.85029824220224 sim_pfm: 862.6684485376347
episode: 224 training return: tensor(821.0149, device='cuda:0')
episode: 225 training return: tensor(837.2815, device='cuda:0')
episode: 226 training return: tensor(816.7128, device='cuda:0')
episode: 227 training return: tensor(830.4619, device='cuda:0')
epoch: 57 test_true_pfm: 70.10040268066932 sim_pfm: 841.7945687461645
episode: 228 training return: tensor(846.0675, device='cuda:0')
episode: 229 training return: tensor(816.9835, device='cuda:0')
episode: 230 training return: tensor(829.9186, device='cuda:0')
episode: 231 training return: tensor(832.2921, device='cuda:0')
epoch: 58 test_true_pfm: 101.52178341007506 sim_pfm: 852.423929662106
episode: 232 training return: tensor(820.8439, device='cuda:0')
episode: 233 training return: tensor(826.9204, device='cuda:0')
episode: 234 training return: tensor(811.3158, device='cuda:0')
episode: 235 training return: tensor(825.3895, device='cuda:0')
epoch: 59 test_true_pfm: 104.87809760270143 sim_pfm: 850.9396860927343
episode: 236 training return: tensor(804.9385, device='cuda:0')
episode: 237 training return: tensor(832.0108, device='cuda:0')
episode: 238 training return: tensor(795.0410, device='cuda:0')
episode: 239 training return: tensor(789.2740, device='cuda:0')
epoch: 60 test_true_pfm: 90.96591638347127 sim_pfm: 845.7178027411923
episode: 240 training return: tensor(810.8745, device='cuda:0')
episode: 241 training return: tensor(833.0013, device='cuda:0')
episode: 242 training return: tensor(815.8934, device='cuda:0')
episode: 243 training return: tensor(826.0110, device='cuda:0')
epoch: 61 test_true_pfm: 88.07935399327656 sim_pfm: 840.4784803955001
episode: 244 training return: tensor(801.0663, device='cuda:0')
episode: 245 training return: tensor(846.5311, device='cuda:0')
episode: 246 training return: tensor(816.9487, device='cuda:0')
episode: 247 training return: tensor(834.6606, device='cuda:0')
epoch: 62 test_true_pfm: 108.42384500874797 sim_pfm: 854.7471862928476
episode: 248 training return: tensor(787.1571, device='cuda:0')
episode: 249 training return: tensor(790.0129, device='cuda:0')
episode: 250 training return: tensor(808.9703, device='cuda:0')
episode: 251 training return: tensor(825.5578, device='cuda:0')
epoch: 63 test_true_pfm: 85.23455491865991 sim_pfm: 847.7023034796118
episode: 252 training return: tensor(826.2223, device='cuda:0')
episode: 253 training return: tensor(827.3492, device='cuda:0')
episode: 254 training return: tensor(822.9567, device='cuda:0')
episode: 255 training return: tensor(813.8281, device='cuda:0')
epoch: 64 test_true_pfm: 89.93772477300391 sim_pfm: 827.7197127468826
episode: 256 training return: tensor(797.8753, device='cuda:0')
episode: 257 training return: tensor(811.8508, device='cuda:0')
episode: 258 training return: tensor(845.1837, device='cuda:0')
episode: 259 training return: tensor(836.8101, device='cuda:0')
epoch: 65 test_true_pfm: 85.72270293417485 sim_pfm: 867.3444210748654
episode: 260 training return: tensor(845.9607, device='cuda:0')
episode: 261 training return: tensor(853.6760, device='cuda:0')
episode: 262 training return: tensor(831.0530, device='cuda:0')
episode: 263 training return: tensor(840.4889, device='cuda:0')
epoch: 66 test_true_pfm: 77.32499048432702 sim_pfm: 831.1161334536679
episode: 264 training return: tensor(832.7096, device='cuda:0')
episode: 265 training return: tensor(845.2701, device='cuda:0')
episode: 266 training return: tensor(834.6788, device='cuda:0')
episode: 267 training return: tensor(849.5872, device='cuda:0')
epoch: 67 test_true_pfm: 84.00959600687243 sim_pfm: 845.8066838966682
episode: 268 training return: tensor(827.0613, device='cuda:0')
episode: 269 training return: tensor(821.5457, device='cuda:0')
episode: 270 training return: tensor(836.8093, device='cuda:0')
episode: 271 training return: tensor(852.3002, device='cuda:0')
epoch: 68 test_true_pfm: 79.21895887940369 sim_pfm: 859.855785134621
episode: 272 training return: tensor(826.8109, device='cuda:0')
episode: 273 training return: tensor(833.2319, device='cuda:0')
episode: 274 training return: tensor(827.8795, device='cuda:0')
episode: 275 training return: tensor(839.0356, device='cuda:0')
epoch: 69 test_true_pfm: 81.22098435612763 sim_pfm: 851.1498066491913
episode: 276 training return: tensor(816.1219, device='cuda:0')
episode: 277 training return: tensor(806.4515, device='cuda:0')
episode: 278 training return: tensor(804.6936, device='cuda:0')
episode: 279 training return: tensor(746.7049, device='cuda:0')
epoch: 70 test_true_pfm: 66.97089918068298 sim_pfm: 829.304235613253
episode: 280 training return: tensor(759.3264, device='cuda:0')
episode: 281 training return: tensor(785.0773, device='cuda:0')
episode: 282 training return: tensor(765.0020, device='cuda:0')
episode: 283 training return: tensor(775.4956, device='cuda:0')
epoch: 71 test_true_pfm: 61.895593138333744 sim_pfm: 841.167919057724
episode: 284 training return: tensor(801.9151, device='cuda:0')
episode: 285 training return: tensor(784.0105, device='cuda:0')
episode: 286 training return: tensor(776.6505, device='cuda:0')
episode: 287 training return: tensor(807.8354, device='cuda:0')
epoch: 72 test_true_pfm: 59.976732811750914 sim_pfm: 823.322897612187
episode: 288 training return: tensor(786.7186, device='cuda:0')
episode: 289 training return: tensor(754.8253, device='cuda:0')
episode: 290 training return: tensor(791.7363, device='cuda:0')
episode: 291 training return: tensor(819.3095, device='cuda:0')
epoch: 73 test_true_pfm: 91.24300006460247 sim_pfm: 827.7772054406814
episode: 292 training return: tensor(816.1147, device='cuda:0')
episode: 293 training return: tensor(799.9686, device='cuda:0')
episode: 294 training return: tensor(814.4120, device='cuda:0')
episode: 295 training return: tensor(788.7583, device='cuda:0')
epoch: 74 test_true_pfm: 84.78163233271482 sim_pfm: 836.3724733530078
episode: 296 training return: tensor(818.1279, device='cuda:0')
episode: 297 training return: tensor(825.8257, device='cuda:0')
episode: 298 training return: tensor(802.3147, device='cuda:0')
episode: 299 training return: tensor(779.2889, device='cuda:0')
epoch: 75 test_true_pfm: 79.49056198258714 sim_pfm: 831.7718033913756
episode: 300 training return: tensor(814.6316, device='cuda:0')
episode: 301 training return: tensor(801.6962, device='cuda:0')
episode: 302 training return: tensor(814.5811, device='cuda:0')
episode: 303 training return: tensor(813.8591, device='cuda:0')
epoch: 76 test_true_pfm: 88.93388803064337 sim_pfm: 845.70902904924
episode: 304 training return: tensor(823.2110, device='cuda:0')
episode: 305 training return: tensor(829.2330, device='cuda:0')
episode: 306 training return: tensor(817.6372, device='cuda:0')
episode: 307 training return: tensor(818.9158, device='cuda:0')
epoch: 77 test_true_pfm: 97.7173331798958 sim_pfm: 808.1982494904718
episode: 308 training return: tensor(815.5297, device='cuda:0')
episode: 309 training return: tensor(818.6163, device='cuda:0')
episode: 310 training return: tensor(805.7344, device='cuda:0')
episode: 311 training return: tensor(833.2916, device='cuda:0')
epoch: 78 test_true_pfm: 78.07216283064975 sim_pfm: 862.5521594295278
episode: 312 training return: tensor(804.8784, device='cuda:0')
episode: 313 training return: tensor(807.8073, device='cuda:0')
episode: 314 training return: tensor(806.5228, device='cuda:0')
episode: 315 training return: tensor(803.8337, device='cuda:0')
epoch: 79 test_true_pfm: 84.15611176241217 sim_pfm: 830.6567710983567
episode: 316 training return: tensor(827.8337, device='cuda:0')
episode: 317 training return: tensor(792.6002, device='cuda:0')
episode: 318 training return: tensor(779.2859, device='cuda:0')
episode: 319 training return: tensor(773.1646, device='cuda:0')
epoch: 80 test_true_pfm: 62.07270976628072 sim_pfm: 790.3766506416257
episode: 320 training return: tensor(818.4742, device='cuda:0')
episode: 321 training return: tensor(821.9380, device='cuda:0')
episode: 322 training return: tensor(812.2761, device='cuda:0')
episode: 323 training return: tensor(832.6916, device='cuda:0')
epoch: 81 test_true_pfm: 76.83094819241889 sim_pfm: 827.879415064305
episode: 324 training return: tensor(822.1829, device='cuda:0')
episode: 325 training return: tensor(792.5768, device='cuda:0')
episode: 326 training return: tensor(817.2361, device='cuda:0')
episode: 327 training return: tensor(830.4230, device='cuda:0')
epoch: 82 test_true_pfm: 57.91679866613879 sim_pfm: 774.7618017510395
episode: 328 training return: tensor(798.6760, device='cuda:0')
episode: 329 training return: tensor(837.0777, device='cuda:0')
episode: 330 training return: tensor(841.8175, device='cuda:0')
episode: 331 training return: tensor(486.4608, device='cuda:0')
epoch: 83 test_true_pfm: 69.84109964838129 sim_pfm: 864.9918002091348
episode: 332 training return: tensor(829.6976, device='cuda:0')
episode: 333 training return: tensor(-72.5264, device='cuda:0')
episode: 334 training return: tensor(770.5889, device='cuda:0')
episode: 335 training return: tensor(542.8795, device='cuda:0')
epoch: 84 test_true_pfm: 56.49258624443072 sim_pfm: 389.56557202917173
episode: 336 training return: tensor(167.3488, device='cuda:0')
episode: 337 training return: tensor(842.1206, device='cuda:0')
episode: 338 training return: tensor(771.2268, device='cuda:0')
episode: 339 training return: tensor(826.7579, device='cuda:0')
epoch: 85 test_true_pfm: 86.77222401412891 sim_pfm: 860.526001412142
episode: 340 training return: tensor(826.7413, device='cuda:0')
episode: 341 training return: tensor(813.9009, device='cuda:0')
episode: 342 training return: tensor(815.0574, device='cuda:0')
episode: 343 training return: tensor(810.3690, device='cuda:0')
epoch: 86 test_true_pfm: 83.01593669090315 sim_pfm: 833.0647207736969
episode: 344 training return: tensor(829.6448, device='cuda:0')
episode: 345 training return: tensor(790.7705, device='cuda:0')
episode: 346 training return: tensor(799.9816, device='cuda:0')
episode: 347 training return: tensor(828.2844, device='cuda:0')
epoch: 87 test_true_pfm: 87.13420906345162 sim_pfm: 853.7748750832397
episode: 348 training return: tensor(818.6400, device='cuda:0')
episode: 349 training return: tensor(811.2721, device='cuda:0')
episode: 350 training return: tensor(799.5116, device='cuda:0')
episode: 351 training return: tensor(808.3787, device='cuda:0')
epoch: 88 test_true_pfm: 91.51608788514213 sim_pfm: 819.3463828939945
episode: 352 training return: tensor(795.7186, device='cuda:0')
episode: 353 training return: tensor(805.3970, device='cuda:0')
episode: 354 training return: tensor(786.4434, device='cuda:0')
episode: 355 training return: tensor(780.7336, device='cuda:0')
epoch: 89 test_true_pfm: 93.20896580945187 sim_pfm: 826.5292361743807
episode: 356 training return: tensor(792.0806, device='cuda:0')
episode: 357 training return: tensor(805.8762, device='cuda:0')
episode: 358 training return: tensor(826.1948, device='cuda:0')
episode: 359 training return: tensor(801.6070, device='cuda:0')
epoch: 90 test_true_pfm: 76.57254991568752 sim_pfm: 817.864555531554
episode: 360 training return: tensor(804.4694, device='cuda:0')
episode: 361 training return: tensor(826.7203, device='cuda:0')
episode: 362 training return: tensor(807.0891, device='cuda:0')
episode: 363 training return: tensor(811.7625, device='cuda:0')
epoch: 91 test_true_pfm: 84.78803526146211 sim_pfm: 855.5688516661991
episode: 364 training return: tensor(822.8911, device='cuda:0')
episode: 365 training return: tensor(799.1494, device='cuda:0')
episode: 366 training return: tensor(815.3687, device='cuda:0')
episode: 367 training return: tensor(820.3906, device='cuda:0')
epoch: 92 test_true_pfm: 76.53481045397261 sim_pfm: 839.7506143761799
episode: 368 training return: tensor(816.7098, device='cuda:0')
episode: 369 training return: tensor(796.4017, device='cuda:0')
episode: 370 training return: tensor(828.2417, device='cuda:0')
episode: 371 training return: tensor(825.8270, device='cuda:0')
epoch: 93 test_true_pfm: 83.08253667958216 sim_pfm: 838.520378460642
episode: 372 training return: tensor(824.5531, device='cuda:0')
episode: 373 training return: tensor(816.2811, device='cuda:0')
episode: 374 training return: tensor(797.7112, device='cuda:0')
episode: 375 training return: tensor(807.4765, device='cuda:0')
epoch: 94 test_true_pfm: 90.22888586155362 sim_pfm: 830.8456566496286
episode: 376 training return: tensor(813.8621, device='cuda:0')
episode: 377 training return: tensor(820.1605, device='cuda:0')
episode: 378 training return: tensor(817.5927, device='cuda:0')
episode: 379 training return: tensor(816.1338, device='cuda:0')
epoch: 95 test_true_pfm: 88.1693463475072 sim_pfm: 833.8002948178677
episode: 380 training return: tensor(819.9640, device='cuda:0')
episode: 381 training return: tensor(818.3739, device='cuda:0')
episode: 382 training return: tensor(821.5685, device='cuda:0')
episode: 383 training return: tensor(849.7452, device='cuda:0')
epoch: 96 test_true_pfm: 91.97313611194821 sim_pfm: 864.5770955684595
episode: 384 training return: tensor(843.5484, device='cuda:0')
episode: 385 training return: tensor(820.2095, device='cuda:0')
episode: 386 training return: tensor(830.5085, device='cuda:0')
episode: 387 training return: tensor(830.1407, device='cuda:0')
epoch: 97 test_true_pfm: 93.74512883502044 sim_pfm: 853.7685399186215
episode: 388 training return: tensor(838.5770, device='cuda:0')
episode: 389 training return: tensor(833.2692, device='cuda:0')
episode: 390 training return: tensor(841.7203, device='cuda:0')
episode: 391 training return: tensor(841.9586, device='cuda:0')
epoch: 98 test_true_pfm: 89.34302770445554 sim_pfm: 853.7216066374443
episode: 392 training return: tensor(821.7443, device='cuda:0')
episode: 393 training return: tensor(831.9673, device='cuda:0')
episode: 394 training return: tensor(828.3323, device='cuda:0')
episode: 395 training return: tensor(815.5472, device='cuda:0')
epoch: 99 test_true_pfm: 101.40693158947224 sim_pfm: 850.3270946739591
episode: 396 training return: tensor(834.0074, device='cuda:0')
episode: 397 training return: tensor(838.9105, device='cuda:0')
episode: 398 training return: tensor(815.3893, device='cuda:0')
episode: 399 training return: tensor(840.4256, device='cuda:0')
epoch: 100 test_true_pfm: 89.20610943503604 sim_pfm: 841.8471165967173
episode: 400 training return: tensor(833.1879, device='cuda:0')
episode: 401 training return: tensor(838.6271, device='cuda:0')
episode: 402 training return: tensor(827.6906, device='cuda:0')
episode: 403 training return: tensor(814.9445, device='cuda:0')
epoch: 101 test_true_pfm: 98.79715087826887 sim_pfm: 851.1192874549888
episode: 404 training return: tensor(818.9208, device='cuda:0')
episode: 405 training return: tensor(829.6135, device='cuda:0')
episode: 406 training return: tensor(827.4774, device='cuda:0')
episode: 407 training return: tensor(814.2695, device='cuda:0')
epoch: 102 test_true_pfm: 98.59233479527107 sim_pfm: 856.3497188546229
episode: 408 training return: tensor(831.1688, device='cuda:0')
episode: 409 training return: tensor(821.9635, device='cuda:0')
episode: 410 training return: tensor(827.0869, device='cuda:0')
episode: 411 training return: tensor(816.5035, device='cuda:0')
epoch: 103 test_true_pfm: 93.34506759791677 sim_pfm: 851.5994176482782
episode: 412 training return: tensor(830.3094, device='cuda:0')
episode: 413 training return: tensor(816.1367, device='cuda:0')
episode: 414 training return: tensor(811.9670, device='cuda:0')
episode: 415 training return: tensor(812.0692, device='cuda:0')
epoch: 104 test_true_pfm: 88.13352034517234 sim_pfm: 834.0803552727914
episode: 416 training return: tensor(827.4523, device='cuda:0')
episode: 417 training return: tensor(830.4896, device='cuda:0')
episode: 418 training return: tensor(823.1924, device='cuda:0')
episode: 419 training return: tensor(817.1957, device='cuda:0')
epoch: 105 test_true_pfm: 93.5122423147283 sim_pfm: 845.3353326858487
episode: 420 training return: tensor(817.2872, device='cuda:0')
episode: 421 training return: tensor(811.7828, device='cuda:0')
episode: 422 training return: tensor(812.6651, device='cuda:0')
episode: 423 training return: tensor(836.6376, device='cuda:0')
epoch: 106 test_true_pfm: 94.50413503898217 sim_pfm: 849.6626341643511
episode: 424 training return: tensor(833.5416, device='cuda:0')
episode: 425 training return: tensor(818.3257, device='cuda:0')
episode: 426 training return: tensor(833.0641, device='cuda:0')
episode: 427 training return: tensor(824.4888, device='cuda:0')
epoch: 107 test_true_pfm: 104.25598865653717 sim_pfm: 853.5593978713499
episode: 428 training return: tensor(835.4252, device='cuda:0')
episode: 429 training return: tensor(838.3333, device='cuda:0')
episode: 430 training return: tensor(839.3454, device='cuda:0')
episode: 431 training return: tensor(817.1469, device='cuda:0')
epoch: 108 test_true_pfm: 97.79727232699372 sim_pfm: 858.6308006233536
episode: 432 training return: tensor(836.8986, device='cuda:0')
episode: 433 training return: tensor(831.2875, device='cuda:0')
episode: 434 training return: tensor(840.3730, device='cuda:0')
episode: 435 training return: tensor(824.6460, device='cuda:0')
epoch: 109 test_true_pfm: 84.94782105609784 sim_pfm: 853.2206581167877
episode: 436 training return: tensor(813.5131, device='cuda:0')
episode: 437 training return: tensor(818.0044, device='cuda:0')
episode: 438 training return: tensor(809.3428, device='cuda:0')
episode: 439 training return: tensor(813.4144, device='cuda:0')
epoch: 110 test_true_pfm: 90.31371252950123 sim_pfm: 841.2130242785672
episode: 440 training return: tensor(808.6594, device='cuda:0')
episode: 441 training return: tensor(812.7620, device='cuda:0')
episode: 442 training return: tensor(818.4960, device='cuda:0')
episode: 443 training return: tensor(791.2545, device='cuda:0')
epoch: 111 test_true_pfm: 74.80481769693327 sim_pfm: 839.8012259963783
episode: 444 training return: tensor(826.1398, device='cuda:0')
episode: 445 training return: tensor(834.9368, device='cuda:0')
episode: 446 training return: tensor(803.3333, device='cuda:0')
episode: 447 training return: tensor(802.7272, device='cuda:0')
epoch: 112 test_true_pfm: 97.91185004165666 sim_pfm: 846.9591568653472
episode: 448 training return: tensor(836.2157, device='cuda:0')
episode: 449 training return: tensor(833.9292, device='cuda:0')
episode: 450 training return: tensor(823.5593, device='cuda:0')
episode: 451 training return: tensor(824.4665, device='cuda:0')
epoch: 113 test_true_pfm: 94.17879990245876 sim_pfm: 855.0666033498943
episode: 452 training return: tensor(822.5469, device='cuda:0')
episode: 453 training return: tensor(823.7153, device='cuda:0')
episode: 454 training return: tensor(807.9157, device='cuda:0')
episode: 455 training return: tensor(819.6082, device='cuda:0')
epoch: 114 test_true_pfm: 99.76365123410729 sim_pfm: 847.4919080344029
episode: 456 training return: tensor(830.7549, device='cuda:0')
episode: 457 training return: tensor(842.2475, device='cuda:0')
episode: 458 training return: tensor(832.5116, device='cuda:0')
episode: 459 training return: tensor(813.9069, device='cuda:0')
epoch: 115 test_true_pfm: 92.09446978397727 sim_pfm: 854.377661219798
episode: 460 training return: tensor(815.0241, device='cuda:0')
episode: 461 training return: tensor(814.8723, device='cuda:0')
episode: 462 training return: tensor(817.4102, device='cuda:0')
episode: 463 training return: tensor(788.9388, device='cuda:0')
epoch: 116 test_true_pfm: 81.41961309996209 sim_pfm: 845.628522242792
episode: 464 training return: tensor(823.7017, device='cuda:0')
episode: 465 training return: tensor(821.7814, device='cuda:0')
episode: 466 training return: tensor(810.8979, device='cuda:0')
episode: 467 training return: tensor(836.8460, device='cuda:0')
epoch: 117 test_true_pfm: 88.81573890777051 sim_pfm: 862.5612117067445
episode: 468 training return: tensor(817.1331, device='cuda:0')
episode: 469 training return: tensor(830.3016, device='cuda:0')
episode: 470 training return: tensor(827.2114, device='cuda:0')
episode: 471 training return: tensor(808.4103, device='cuda:0')
epoch: 118 test_true_pfm: 82.3516925211778 sim_pfm: 842.0750508643687
episode: 472 training return: tensor(815.2057, device='cuda:0')
episode: 473 training return: tensor(814.6188, device='cuda:0')
episode: 474 training return: tensor(821.7643, device='cuda:0')
episode: 475 training return: tensor(829.2037, device='cuda:0')
epoch: 119 test_true_pfm: 77.65738321069504 sim_pfm: 843.8209796365351
episode: 476 training return: tensor(832.4774, device='cuda:0')
episode: 477 training return: tensor(830.9701, device='cuda:0')
episode: 478 training return: tensor(826.4788, device='cuda:0')
episode: 479 training return: tensor(833.4261, device='cuda:0')
epoch: 120 test_true_pfm: 94.64601646226203 sim_pfm: 853.987512784428
episode: 480 training return: tensor(824.7124, device='cuda:0')
episode: 481 training return: tensor(840.5344, device='cuda:0')
episode: 482 training return: tensor(808.1058, device='cuda:0')
episode: 483 training return: tensor(811.9982, device='cuda:0')
epoch: 121 test_true_pfm: 90.36865225672153 sim_pfm: 855.3088212262373
episode: 484 training return: tensor(804.1357, device='cuda:0')
episode: 485 training return: tensor(818.5236, device='cuda:0')
episode: 486 training return: tensor(805.2524, device='cuda:0')
episode: 487 training return: tensor(808.0152, device='cuda:0')
epoch: 122 test_true_pfm: 90.04603428421235 sim_pfm: 843.965287464857
episode: 488 training return: tensor(813.7379, device='cuda:0')
episode: 489 training return: tensor(815.6890, device='cuda:0')
episode: 490 training return: tensor(799.7861, device='cuda:0')
episode: 491 training return: tensor(826.4714, device='cuda:0')
epoch: 123 test_true_pfm: 88.36897069419307 sim_pfm: 839.4912635117653
episode: 492 training return: tensor(819.1826, device='cuda:0')
episode: 493 training return: tensor(814.5428, device='cuda:0')
episode: 494 training return: tensor(832.0989, device='cuda:0')
episode: 495 training return: tensor(822.9940, device='cuda:0')
epoch: 124 test_true_pfm: 73.32171091024767 sim_pfm: 831.8509059385397
episode: 496 training return: tensor(815.0618, device='cuda:0')
episode: 497 training return: tensor(786.3917, device='cuda:0')
episode: 498 training return: tensor(808.3379, device='cuda:0')
episode: 499 training return: tensor(823.5979, device='cuda:0')
epoch: 125 test_true_pfm: 74.63717101504392 sim_pfm: 828.163917825889
episode: 500 training return: tensor(829.1105, device='cuda:0')
episode: 501 training return: tensor(836.9513, device='cuda:0')
episode: 502 training return: tensor(834.7175, device='cuda:0')
episode: 503 training return: tensor(803.6212, device='cuda:0')
epoch: 126 test_true_pfm: 81.22017614711277 sim_pfm: 838.2661082773935
episode: 504 training return: tensor(815.3116, device='cuda:0')
episode: 505 training return: tensor(841.2297, device='cuda:0')
episode: 506 training return: tensor(800.7311, device='cuda:0')
episode: 507 training return: tensor(819.4691, device='cuda:0')
epoch: 127 test_true_pfm: 83.52246922605289 sim_pfm: 846.0946500679711
episode: 508 training return: tensor(801.0713, device='cuda:0')
episode: 509 training return: tensor(796.2186, device='cuda:0')
episode: 510 training return: tensor(815.3953, device='cuda:0')
episode: 511 training return: tensor(829.7343, device='cuda:0')
epoch: 128 test_true_pfm: 67.97329579524401 sim_pfm: 823.1780934413895
episode: 512 training return: tensor(826.6817, device='cuda:0')
episode: 513 training return: tensor(804.8643, device='cuda:0')
episode: 514 training return: tensor(815.3404, device='cuda:0')
episode: 515 training return: tensor(827.3772, device='cuda:0')
epoch: 129 test_true_pfm: 83.73275740974289 sim_pfm: 837.2945678429329
episode: 516 training return: tensor(799.1653, device='cuda:0')
episode: 517 training return: tensor(812.4844, device='cuda:0')
episode: 518 training return: tensor(811.9694, device='cuda:0')
episode: 519 training return: tensor(813.5683, device='cuda:0')
epoch: 130 test_true_pfm: 78.23636787317855 sim_pfm: 849.1113267329987
episode: 520 training return: tensor(821.1529, device='cuda:0')
episode: 521 training return: tensor(820.5486, device='cuda:0')
episode: 522 training return: tensor(819.8129, device='cuda:0')
episode: 523 training return: tensor(820.4135, device='cuda:0')
epoch: 131 test_true_pfm: 69.5595118867407 sim_pfm: 835.0464177164249
episode: 524 training return: tensor(804.6749, device='cuda:0')
episode: 525 training return: tensor(804.2601, device='cuda:0')
episode: 526 training return: tensor(813.5331, device='cuda:0')
episode: 527 training return: tensor(805.6820, device='cuda:0')
epoch: 132 test_true_pfm: 69.05143197708655 sim_pfm: 828.8152827579761
episode: 528 training return: tensor(779.6836, device='cuda:0')
episode: 529 training return: tensor(782.2028, device='cuda:0')
episode: 530 training return: tensor(773.6495, device='cuda:0')
episode: 531 training return: tensor(805.5270, device='cuda:0')
epoch: 133 test_true_pfm: 78.60616067308499 sim_pfm: 844.9749132428318
episode: 532 training return: tensor(812.3606, device='cuda:0')
episode: 533 training return: tensor(807.0957, device='cuda:0')
episode: 534 training return: tensor(819.1998, device='cuda:0')
episode: 535 training return: tensor(818.8643, device='cuda:0')
epoch: 134 test_true_pfm: 88.72506912671736 sim_pfm: 852.8274608781212
episode: 536 training return: tensor(804.0300, device='cuda:0')
episode: 537 training return: tensor(808.3735, device='cuda:0')
episode: 538 training return: tensor(797.6021, device='cuda:0')
episode: 539 training return: tensor(803.3986, device='cuda:0')
epoch: 135 test_true_pfm: 83.13359825820868 sim_pfm: 847.3645309695509
episode: 540 training return: tensor(834.0087, device='cuda:0')
episode: 541 training return: tensor(819.1899, device='cuda:0')
episode: 542 training return: tensor(783.4728, device='cuda:0')
episode: 543 training return: tensor(819.6135, device='cuda:0')
epoch: 136 test_true_pfm: 62.04236053483154 sim_pfm: 810.69958267599
episode: 544 training return: tensor(798.8031, device='cuda:0')
episode: 545 training return: tensor(814.8978, device='cuda:0')
episode: 546 training return: tensor(777.8648, device='cuda:0')
episode: 547 training return: tensor(805.3725, device='cuda:0')
epoch: 137 test_true_pfm: 62.4838186750118 sim_pfm: 830.6341423514299
episode: 548 training return: tensor(809.4357, device='cuda:0')
episode: 549 training return: tensor(831.6711, device='cuda:0')
episode: 550 training return: tensor(806.6627, device='cuda:0')
episode: 551 training return: tensor(814.1827, device='cuda:0')
epoch: 138 test_true_pfm: 83.59033648862255 sim_pfm: 856.8640073018148
episode: 552 training return: tensor(806.6194, device='cuda:0')
episode: 553 training return: tensor(838.7292, device='cuda:0')
episode: 554 training return: tensor(829.2551, device='cuda:0')
episode: 555 training return: tensor(822.5949, device='cuda:0')
epoch: 139 test_true_pfm: 81.75656011065931 sim_pfm: 836.295823637303
episode: 556 training return: tensor(827.0377, device='cuda:0')
episode: 557 training return: tensor(835.7054, device='cuda:0')
episode: 558 training return: tensor(838.8969, device='cuda:0')
episode: 559 training return: tensor(830.8730, device='cuda:0')
epoch: 140 test_true_pfm: 98.86731393540072 sim_pfm: 857.9441810239107
episode: 560 training return: tensor(846.3630, device='cuda:0')
episode: 561 training return: tensor(827.0658, device='cuda:0')
episode: 562 training return: tensor(819.5759, device='cuda:0')
episode: 563 training return: tensor(818.9963, device='cuda:0')
epoch: 141 test_true_pfm: 101.00642207990433 sim_pfm: 859.6617377617047
episode: 564 training return: tensor(834.4235, device='cuda:0')
episode: 565 training return: tensor(837.7247, device='cuda:0')
episode: 566 training return: tensor(840.8035, device='cuda:0')
episode: 567 training return: tensor(826.7751, device='cuda:0')
epoch: 142 test_true_pfm: 93.69880791191291 sim_pfm: 852.4393570343382
episode: 568 training return: tensor(835.3205, device='cuda:0')
episode: 569 training return: tensor(803.0991, device='cuda:0')
episode: 570 training return: tensor(824.4329, device='cuda:0')
episode: 571 training return: tensor(826.2422, device='cuda:0')
epoch: 143 test_true_pfm: 87.05676562245229 sim_pfm: 831.2902737892116
episode: 572 training return: tensor(813.7755, device='cuda:0')
episode: 573 training return: tensor(811.7437, device='cuda:0')
episode: 574 training return: tensor(823.4040, device='cuda:0')
episode: 575 training return: tensor(816.4629, device='cuda:0')
epoch: 144 test_true_pfm: 96.61348168295896 sim_pfm: 860.6437119489535
episode: 576 training return: tensor(824.7332, device='cuda:0')
episode: 577 training return: tensor(802.6678, device='cuda:0')
episode: 578 training return: tensor(816.6265, device='cuda:0')
episode: 579 training return: tensor(827.9330, device='cuda:0')
epoch: 145 test_true_pfm: 105.3485862484517 sim_pfm: 864.9439185518771
episode: 580 training return: tensor(840.6139, device='cuda:0')
episode: 581 training return: tensor(819.8636, device='cuda:0')
episode: 582 training return: tensor(815.4661, device='cuda:0')
episode: 583 training return: tensor(826.4907, device='cuda:0')
epoch: 146 test_true_pfm: 98.1112386512938 sim_pfm: 846.1623096253722
episode: 584 training return: tensor(842.6072, device='cuda:0')
episode: 585 training return: tensor(821.7527, device='cuda:0')
episode: 586 training return: tensor(820.2434, device='cuda:0')
episode: 587 training return: tensor(827.0419, device='cuda:0')
epoch: 147 test_true_pfm: 98.42741812044389 sim_pfm: 861.4733951663599
episode: 588 training return: tensor(834.4744, device='cuda:0')
episode: 589 training return: tensor(828.3102, device='cuda:0')
episode: 590 training return: tensor(835.3080, device='cuda:0')
episode: 591 training return: tensor(848.9966, device='cuda:0')
epoch: 148 test_true_pfm: 90.14516290121784 sim_pfm: 855.6665824310854
episode: 592 training return: tensor(819.7011, device='cuda:0')
episode: 593 training return: tensor(824.6498, device='cuda:0')
episode: 594 training return: tensor(826.5271, device='cuda:0')
episode: 595 training return: tensor(845.2646, device='cuda:0')
epoch: 149 test_true_pfm: 98.89665467482634 sim_pfm: 860.8190840328577
episode: 596 training return: tensor(822.1438, device='cuda:0')
episode: 597 training return: tensor(847.0101, device='cuda:0')
episode: 598 training return: tensor(821.8047, device='cuda:0')
episode: 599 training return: tensor(833.5052, device='cuda:0')
epoch: 150 test_true_pfm: 93.6197117550713 sim_pfm: 860.0912351783365
