['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '4', '--data', '100000', '--regu', '0.3']
2399.830811618772
episode: 0 training return: tensor(-263.2221, device='cuda:0')
episode: 1 training return: tensor(61.3875, device='cuda:0')
episode: 2 training return: tensor(-265.1095, device='cuda:0')
episode: 3 training return: tensor(5.6347, device='cuda:0')
epoch: 1 test_true_pfm: 1537.6827460483066 sim_pfm: -205.2940751487234
episode: 4 training return: tensor(-351.1682, device='cuda:0')
episode: 5 training return: tensor(-92.7693, device='cuda:0')
episode: 6 training return: tensor(-409.5055, device='cuda:0')
episode: 7 training return: tensor(-407.6401, device='cuda:0')
epoch: 2 test_true_pfm: 1225.9400582724581 sim_pfm: -211.2540894879494
episode: 8 training return: tensor(-409.6235, device='cuda:0')
episode: 9 training return: tensor(-53.8151, device='cuda:0')
episode: 10 training return: tensor(-271.6848, device='cuda:0')
episode: 11 training return: tensor(-343.4457, device='cuda:0')
epoch: 3 test_true_pfm: 1461.9892266518198 sim_pfm: -321.0542319698531
episode: 12 training return: tensor(-431.7073, device='cuda:0')
episode: 13 training return: tensor(-424.9123, device='cuda:0')
episode: 14 training return: tensor(-327.3996, device='cuda:0')
episode: 15 training return: tensor(-210.5896, device='cuda:0')
epoch: 4 test_true_pfm: 1805.8743813577373 sim_pfm: -262.8183820595344
episode: 16 training return: tensor(-324.9483, device='cuda:0')
episode: 17 training return: tensor(-396.2057, device='cuda:0')
episode: 18 training return: tensor(-334.9674, device='cuda:0')
episode: 19 training return: tensor(-395.1208, device='cuda:0')
epoch: 5 test_true_pfm: 934.1146608283574 sim_pfm: -519.8444512461816
episode: 20 training return: tensor(-325.6096, device='cuda:0')
episode: 21 training return: tensor(-394.4545, device='cuda:0')
episode: 22 training return: tensor(208.1528, device='cuda:0')
episode: 23 training return: tensor(-243.2832, device='cuda:0')
epoch: 6 test_true_pfm: 1213.592959705779 sim_pfm: -419.29125476966146
episode: 24 training return: tensor(-358.0068, device='cuda:0')
episode: 25 training return: tensor(-336.8200, device='cuda:0')
episode: 26 training return: tensor(-317.7059, device='cuda:0')
episode: 27 training return: tensor(-405.4446, device='cuda:0')
epoch: 7 test_true_pfm: 1325.1956193720569 sim_pfm: -396.3157916337174
episode: 28 training return: tensor(-335.5328, device='cuda:0')
episode: 29 training return: tensor(-65.2941, device='cuda:0')
episode: 30 training return: tensor(-330.2044, device='cuda:0')
episode: 31 training return: tensor(-284.4272, device='cuda:0')
epoch: 8 test_true_pfm: 1893.0669102779211 sim_pfm: -154.50994933655602
episode: 32 training return: tensor(-397.3817, device='cuda:0')
episode: 33 training return: tensor(-229.0727, device='cuda:0')
episode: 34 training return: tensor(-405.0850, device='cuda:0')
episode: 35 training return: tensor(-426.3661, device='cuda:0')
epoch: 9 test_true_pfm: 1316.1000987053446 sim_pfm: -376.03021069340565
episode: 36 training return: tensor(-350.7742, device='cuda:0')
episode: 37 training return: tensor(-328.4850, device='cuda:0')
episode: 38 training return: tensor(-298.4511, device='cuda:0')
episode: 39 training return: tensor(-407.8903, device='cuda:0')
epoch: 10 test_true_pfm: 1278.9523648545767 sim_pfm: -393.15278815547936
episode: 40 training return: tensor(-330.4644, device='cuda:0')
episode: 41 training return: tensor(-359.8217, device='cuda:0')
episode: 42 training return: tensor(-168.2662, device='cuda:0')
episode: 43 training return: tensor(-357.9057, device='cuda:0')
epoch: 11 test_true_pfm: 1380.1500654829117 sim_pfm: -365.031424916524
episode: 44 training return: tensor(-244.7223, device='cuda:0')
episode: 45 training return: tensor(-332.6206, device='cuda:0')
episode: 46 training return: tensor(-200.1986, device='cuda:0')
episode: 47 training return: tensor(-416.3081, device='cuda:0')
epoch: 12 test_true_pfm: 2336.543928349807 sim_pfm: 66.1715207012021
episode: 48 training return: tensor(-395.9814, device='cuda:0')
episode: 49 training return: tensor(-371.9926, device='cuda:0')
episode: 50 training return: tensor(-217.8521, device='cuda:0')
episode: 51 training return: tensor(-258.0426, device='cuda:0')
epoch: 13 test_true_pfm: 1980.756042629094 sim_pfm: -158.19301755542014
episode: 52 training return: tensor(-329.0874, device='cuda:0')
episode: 53 training return: tensor(-307.0580, device='cuda:0')
episode: 54 training return: tensor(-59.7562, device='cuda:0')
episode: 55 training return: tensor(-290.2567, device='cuda:0')
epoch: 14 test_true_pfm: 2463.8000135917123 sim_pfm: -135.69560280591637
episode: 56 training return: tensor(-328.7964, device='cuda:0')
episode: 57 training return: tensor(-398.3936, device='cuda:0')
episode: 58 training return: tensor(-157.4134, device='cuda:0')
episode: 59 training return: tensor(-231.5899, device='cuda:0')
epoch: 15 test_true_pfm: 2031.5709153337848 sim_pfm: -68.93550722456227
episode: 60 training return: tensor(-280.6725, device='cuda:0')
episode: 61 training return: tensor(-53.9689, device='cuda:0')
episode: 62 training return: tensor(-143.0939, device='cuda:0')
episode: 63 training return: tensor(-29.1685, device='cuda:0')
epoch: 16 test_true_pfm: 2258.4382423511042 sim_pfm: -271.69635796605144
episode: 64 training return: tensor(-135.9043, device='cuda:0')
episode: 65 training return: tensor(-215.7241, device='cuda:0')
episode: 66 training return: tensor(-5.4388, device='cuda:0')
episode: 67 training return: tensor(-347.7497, device='cuda:0')
epoch: 17 test_true_pfm: 1904.9068793584968 sim_pfm: 50.761591618900034
episode: 68 training return: tensor(-167.1933, device='cuda:0')
episode: 69 training return: tensor(147.5740, device='cuda:0')
episode: 70 training return: tensor(-348.0406, device='cuda:0')
episode: 71 training return: tensor(-398.5514, device='cuda:0')
epoch: 18 test_true_pfm: 2253.7565176392636 sim_pfm: -93.13321417896077
episode: 72 training return: tensor(-403.5120, device='cuda:0')
episode: 73 training return: tensor(-191.2095, device='cuda:0')
episode: 74 training return: tensor(105.8719, device='cuda:0')
episode: 75 training return: tensor(-450.0341, device='cuda:0')
epoch: 19 test_true_pfm: 2257.8483370587173 sim_pfm: -6.272306505590677
episode: 76 training return: tensor(-144.4341, device='cuda:0')
episode: 77 training return: tensor(-137.8803, device='cuda:0')
episode: 78 training return: tensor(-247.8739, device='cuda:0')
episode: 79 training return: tensor(-405.5294, device='cuda:0')
epoch: 20 test_true_pfm: 2229.9083393004944 sim_pfm: -117.12888094565521
episode: 80 training return: tensor(-395.5191, device='cuda:0')
episode: 81 training return: tensor(-323.2230, device='cuda:0')
episode: 82 training return: tensor(-56.6145, device='cuda:0')
episode: 83 training return: tensor(-143.0029, device='cuda:0')
epoch: 21 test_true_pfm: 2415.7469109416293 sim_pfm: -147.18513928325652
episode: 84 training return: tensor(-141.0065, device='cuda:0')
episode: 85 training return: tensor(-288.9470, device='cuda:0')
episode: 86 training return: tensor(-403.7704, device='cuda:0')
episode: 87 training return: tensor(-240.4536, device='cuda:0')
epoch: 22 test_true_pfm: 1764.539502048853 sim_pfm: 78.48495400615502
episode: 88 training return: tensor(-227.9747, device='cuda:0')
episode: 89 training return: tensor(-375.1744, device='cuda:0')
episode: 90 training return: tensor(-310.6382, device='cuda:0')
episode: 91 training return: tensor(-413.4230, device='cuda:0')
epoch: 23 test_true_pfm: 2232.041530691837 sim_pfm: -42.18862535711378
episode: 92 training return: tensor(-413.7836, device='cuda:0')
episode: 93 training return: tensor(-400.2296, device='cuda:0')
episode: 94 training return: tensor(-366.4981, device='cuda:0')
episode: 95 training return: tensor(-362.5059, device='cuda:0')
epoch: 24 test_true_pfm: 2403.2350606339 sim_pfm: 81.12389199484217
episode: 96 training return: tensor(86.3760, device='cuda:0')
episode: 97 training return: tensor(-377.5954, device='cuda:0')
episode: 98 training return: tensor(-165.8110, device='cuda:0')
episode: 99 training return: tensor(-367.2527, device='cuda:0')
epoch: 25 test_true_pfm: 2379.3277078468486 sim_pfm: -139.0329731683014
episode: 100 training return: tensor(-318.7724, device='cuda:0')
episode: 101 training return: tensor(-115.2680, device='cuda:0')
episode: 102 training return: tensor(-395.2731, device='cuda:0')
episode: 103 training return: tensor(-145.4708, device='cuda:0')
epoch: 26 test_true_pfm: 2289.0366226834394 sim_pfm: -99.87798613946264
episode: 104 training return: tensor(-122.8257, device='cuda:0')
episode: 105 training return: tensor(-222.3928, device='cuda:0')
episode: 106 training return: tensor(-11.5343, device='cuda:0')
episode: 107 training return: tensor(-165.2883, device='cuda:0')
epoch: 27 test_true_pfm: 1652.945743648342 sim_pfm: -252.37962556638135
episode: 108 training return: tensor(-361.5251, device='cuda:0')
episode: 109 training return: tensor(100.0459, device='cuda:0')
episode: 110 training return: tensor(-221.6834, device='cuda:0')
episode: 111 training return: tensor(-370.5214, device='cuda:0')
epoch: 28 test_true_pfm: 1881.1821136386764 sim_pfm: -130.98180682018088
episode: 112 training return: tensor(-163.9718, device='cuda:0')
episode: 113 training return: tensor(168.3953, device='cuda:0')
episode: 114 training return: tensor(-189.7601, device='cuda:0')
episode: 115 training return: tensor(-233.1747, device='cuda:0')
epoch: 29 test_true_pfm: 1369.0773664113951 sim_pfm: 31.862268550981145
episode: 116 training return: tensor(-382.3293, device='cuda:0')
episode: 117 training return: tensor(-117.0485, device='cuda:0')
episode: 118 training return: tensor(-407.2092, device='cuda:0')
episode: 119 training return: tensor(-67.5787, device='cuda:0')
epoch: 30 test_true_pfm: 3009.5731141320025 sim_pfm: -175.27144659107822
episode: 120 training return: tensor(-144.4749, device='cuda:0')
episode: 121 training return: tensor(-402.6297, device='cuda:0')
episode: 122 training return: tensor(17.4758, device='cuda:0')
episode: 123 training return: tensor(-238.4956, device='cuda:0')
epoch: 31 test_true_pfm: 2334.2310979593826 sim_pfm: -276.3436583944713
episode: 124 training return: tensor(246.0842, device='cuda:0')
episode: 125 training return: tensor(-122.7070, device='cuda:0')
episode: 126 training return: tensor(-131.7401, device='cuda:0')
episode: 127 training return: tensor(-187.2478, device='cuda:0')
epoch: 32 test_true_pfm: 2318.721278813702 sim_pfm: -30.5949369748317
episode: 128 training return: tensor(-176.9046, device='cuda:0')
episode: 129 training return: tensor(-405.8145, device='cuda:0')
episode: 130 training return: tensor(162.5995, device='cuda:0')
episode: 131 training return: tensor(-20.2622, device='cuda:0')
epoch: 33 test_true_pfm: 2668.3995889246144 sim_pfm: -97.98420939978678
episode: 132 training return: tensor(-263.1130, device='cuda:0')
episode: 133 training return: tensor(109.2082, device='cuda:0')
episode: 134 training return: tensor(-293.4544, device='cuda:0')
episode: 135 training return: tensor(-102.6386, device='cuda:0')
epoch: 34 test_true_pfm: 2636.7465417008357 sim_pfm: -238.98480123541472
episode: 136 training return: tensor(-0.7929, device='cuda:0')
episode: 137 training return: tensor(-410.2983, device='cuda:0')
episode: 138 training return: tensor(-74.9086, device='cuda:0')
episode: 139 training return: tensor(-145.5745, device='cuda:0')
epoch: 35 test_true_pfm: 1587.0373412944684 sim_pfm: -187.6594622716657
episode: 140 training return: tensor(245.0674, device='cuda:0')
episode: 141 training return: tensor(-165.0364, device='cuda:0')
episode: 142 training return: tensor(253.5160, device='cuda:0')
episode: 143 training return: tensor(179.6147, device='cuda:0')
epoch: 36 test_true_pfm: 1992.9979164389358 sim_pfm: -26.54295012342239
episode: 144 training return: tensor(-185.1450, device='cuda:0')
episode: 145 training return: tensor(283.0636, device='cuda:0')
episode: 146 training return: tensor(-238.5313, device='cuda:0')
episode: 147 training return: tensor(229.8929, device='cuda:0')
epoch: 37 test_true_pfm: 2450.5769247407534 sim_pfm: 134.26483225784614
episode: 148 training return: tensor(-312.6706, device='cuda:0')
episode: 149 training return: tensor(-391.4590, device='cuda:0')
episode: 150 training return: tensor(-286.6686, device='cuda:0')
episode: 151 training return: tensor(-328.4921, device='cuda:0')
epoch: 38 test_true_pfm: 1592.398927140065 sim_pfm: -191.83368748480765
episode: 152 training return: tensor(266.1580, device='cuda:0')
episode: 153 training return: tensor(-394.0582, device='cuda:0')
episode: 154 training return: tensor(277.8999, device='cuda:0')
episode: 155 training return: tensor(-405.5907, device='cuda:0')
epoch: 39 test_true_pfm: 2367.571312202787 sim_pfm: -15.677199805960603
episode: 156 training return: tensor(-343.7924, device='cuda:0')
episode: 157 training return: tensor(-37.4173, device='cuda:0')
episode: 158 training return: tensor(-187.2573, device='cuda:0')
episode: 159 training return: tensor(-387.6353, device='cuda:0')
epoch: 40 test_true_pfm: 2372.113370976105 sim_pfm: -298.2551802526771
episode: 160 training return: tensor(-261.1986, device='cuda:0')
episode: 161 training return: tensor(-400.2990, device='cuda:0')
episode: 162 training return: tensor(-325.3387, device='cuda:0')
episode: 163 training return: tensor(55.7591, device='cuda:0')
epoch: 41 test_true_pfm: 2071.1800258695766 sim_pfm: -321.2973587967572
episode: 164 training return: tensor(-390.0298, device='cuda:0')
episode: 165 training return: tensor(-419.6405, device='cuda:0')
episode: 166 training return: tensor(-265.9725, device='cuda:0')
episode: 167 training return: tensor(-77.4308, device='cuda:0')
epoch: 42 test_true_pfm: 1877.0379544736943 sim_pfm: -289.81257740375196
episode: 168 training return: tensor(18.5729, device='cuda:0')
episode: 169 training return: tensor(-126.7474, device='cuda:0')
episode: 170 training return: tensor(-455.1412, device='cuda:0')
episode: 171 training return: tensor(149.4988, device='cuda:0')
epoch: 43 test_true_pfm: 1573.8346652181874 sim_pfm: 64.24225207684019
episode: 172 training return: tensor(-278.6585, device='cuda:0')
episode: 173 training return: tensor(-72.9382, device='cuda:0')
episode: 174 training return: tensor(-7.7645, device='cuda:0')
episode: 175 training return: tensor(251.4704, device='cuda:0')
epoch: 44 test_true_pfm: 1560.5912644350883 sim_pfm: -134.82565516691344
episode: 176 training return: tensor(125.7752, device='cuda:0')
episode: 177 training return: tensor(-381.7779, device='cuda:0')
episode: 178 training return: tensor(233.5305, device='cuda:0')
episode: 179 training return: tensor(174.8281, device='cuda:0')
epoch: 45 test_true_pfm: 1954.1384902980233 sim_pfm: -285.50415495666675
episode: 180 training return: tensor(-416.9272, device='cuda:0')
episode: 181 training return: tensor(-178.8900, device='cuda:0')
episode: 182 training return: tensor(-13.1493, device='cuda:0')
episode: 183 training return: tensor(-389.2854, device='cuda:0')
epoch: 46 test_true_pfm: 2139.645358674686 sim_pfm: -162.27684374390324
episode: 184 training return: tensor(16.4977, device='cuda:0')
episode: 185 training return: tensor(249.9642, device='cuda:0')
episode: 186 training return: tensor(6.9149, device='cuda:0')
episode: 187 training return: tensor(-175.7174, device='cuda:0')
epoch: 47 test_true_pfm: 1920.9702033775 sim_pfm: -10.298080938033914
episode: 188 training return: tensor(-362.9156, device='cuda:0')
episode: 189 training return: tensor(77.5596, device='cuda:0')
episode: 190 training return: tensor(238.2018, device='cuda:0')
episode: 191 training return: tensor(-320.6302, device='cuda:0')
epoch: 48 test_true_pfm: 2128.6395816406716 sim_pfm: -288.8816799870304
episode: 192 training return: tensor(-313.6564, device='cuda:0')
episode: 193 training return: tensor(266.8994, device='cuda:0')
episode: 194 training return: tensor(-243.9538, device='cuda:0')
episode: 195 training return: tensor(264.4204, device='cuda:0')
epoch: 49 test_true_pfm: 1675.1964290782162 sim_pfm: -114.56401422757578
episode: 196 training return: tensor(-155.6513, device='cuda:0')
episode: 197 training return: tensor(-287.0582, device='cuda:0')
episode: 198 training return: tensor(-28.4135, device='cuda:0')
episode: 199 training return: tensor(-405.2172, device='cuda:0')
epoch: 50 test_true_pfm: 1785.2725106523785 sim_pfm: -302.9512908573961
episode: 200 training return: tensor(-359.2212, device='cuda:0')
episode: 201 training return: tensor(211.1148, device='cuda:0')
episode: 202 training return: tensor(60.8762, device='cuda:0')
episode: 203 training return: tensor(-339.2649, device='cuda:0')
epoch: 51 test_true_pfm: 1702.3742233839203 sim_pfm: 0.7086347027022081
episode: 204 training return: tensor(269.3775, device='cuda:0')
episode: 205 training return: tensor(-69.7636, device='cuda:0')
episode: 206 training return: tensor(-335.9283, device='cuda:0')
episode: 207 training return: tensor(276.6663, device='cuda:0')
epoch: 52 test_true_pfm: 1888.4928713262743 sim_pfm: -285.17445973462117
episode: 208 training return: tensor(-229.2052, device='cuda:0')
episode: 209 training return: tensor(248.7326, device='cuda:0')
episode: 210 training return: tensor(221.1862, device='cuda:0')
episode: 211 training return: tensor(-249.1498, device='cuda:0')
epoch: 53 test_true_pfm: 1606.7083155503824 sim_pfm: -289.9583839496093
episode: 212 training return: tensor(-407.6855, device='cuda:0')
episode: 213 training return: tensor(-157.7798, device='cuda:0')
episode: 214 training return: tensor(-18.5427, device='cuda:0')
episode: 215 training return: tensor(11.2747, device='cuda:0')
epoch: 54 test_true_pfm: 1540.3240104617928 sim_pfm: -117.36481316558395
episode: 216 training return: tensor(186.9940, device='cuda:0')
episode: 217 training return: tensor(-234.2901, device='cuda:0')
episode: 218 training return: tensor(-110.8228, device='cuda:0')
episode: 219 training return: tensor(-346.3506, device='cuda:0')
epoch: 55 test_true_pfm: 1742.4853525848064 sim_pfm: -310.3873616281101
episode: 220 training return: tensor(-128.1922, device='cuda:0')
episode: 221 training return: tensor(-101.9504, device='cuda:0')
episode: 222 training return: tensor(-223.8242, device='cuda:0')
episode: 223 training return: tensor(-187.8769, device='cuda:0')
epoch: 56 test_true_pfm: 1560.5433788053904 sim_pfm: -110.12134394441576
episode: 224 training return: tensor(115.5681, device='cuda:0')
episode: 225 training return: tensor(250.2953, device='cuda:0')
episode: 226 training return: tensor(78.1669, device='cuda:0')
episode: 227 training return: tensor(-20.6954, device='cuda:0')
epoch: 57 test_true_pfm: 1993.164600551331 sim_pfm: -96.18538270690867
episode: 228 training return: tensor(73.9457, device='cuda:0')
episode: 229 training return: tensor(299.0217, device='cuda:0')
episode: 230 training return: tensor(-240.5154, device='cuda:0')
episode: 231 training return: tensor(254.4441, device='cuda:0')
epoch: 58 test_true_pfm: 2475.150849787729 sim_pfm: -105.60005683638155
episode: 232 training return: tensor(301.4177, device='cuda:0')
episode: 233 training return: tensor(-384.2597, device='cuda:0')
episode: 234 training return: tensor(-187.9732, device='cuda:0')
episode: 235 training return: tensor(-120.2626, device='cuda:0')
epoch: 59 test_true_pfm: 1683.6586693762474 sim_pfm: -188.0046403091304
episode: 236 training return: tensor(88.8590, device='cuda:0')
episode: 237 training return: tensor(-451.6164, device='cuda:0')
episode: 238 training return: tensor(-53.3786, device='cuda:0')
episode: 239 training return: tensor(-317.0886, device='cuda:0')
epoch: 60 test_true_pfm: 3331.895607757527 sim_pfm: -262.27005540738656
episode: 240 training return: tensor(238.4880, device='cuda:0')
episode: 241 training return: tensor(-205.6558, device='cuda:0')
episode: 242 training return: tensor(-271.6948, device='cuda:0')
episode: 243 training return: tensor(-54.2488, device='cuda:0')
epoch: 61 test_true_pfm: 2456.4367249061083 sim_pfm: -175.00111828519343
episode: 244 training return: tensor(128.3790, device='cuda:0')
episode: 245 training return: tensor(17.6888, device='cuda:0')
episode: 246 training return: tensor(-395.1869, device='cuda:0')
episode: 247 training return: tensor(-271.4977, device='cuda:0')
epoch: 62 test_true_pfm: 1581.0730489574287 sim_pfm: -103.64116038454813
episode: 248 training return: tensor(-212.8424, device='cuda:0')
episode: 249 training return: tensor(222.7572, device='cuda:0')
episode: 250 training return: tensor(-89.6295, device='cuda:0')
episode: 251 training return: tensor(-248.2458, device='cuda:0')
epoch: 63 test_true_pfm: 1636.1123096733625 sim_pfm: -57.28383455591393
episode: 252 training return: tensor(-197.6234, device='cuda:0')
episode: 253 training return: tensor(-148.8568, device='cuda:0')
episode: 254 training return: tensor(-326.6116, device='cuda:0')
episode: 255 training return: tensor(67.3847, device='cuda:0')
epoch: 64 test_true_pfm: 2389.973999986639 sim_pfm: -111.34212795860367
episode: 256 training return: tensor(60.1394, device='cuda:0')
episode: 257 training return: tensor(-223.7129, device='cuda:0')
episode: 258 training return: tensor(-389.8148, device='cuda:0')
episode: 259 training return: tensor(237.9065, device='cuda:0')
epoch: 65 test_true_pfm: 1592.8426257852825 sim_pfm: -304.305452338187
episode: 260 training return: tensor(67.6855, device='cuda:0')
episode: 261 training return: tensor(-178.3028, device='cuda:0')
episode: 262 training return: tensor(-290.5334, device='cuda:0')
episode: 263 training return: tensor(249.3266, device='cuda:0')
epoch: 66 test_true_pfm: 1656.8749218919256 sim_pfm: -231.13289511584057
episode: 264 training return: tensor(29.0086, device='cuda:0')
episode: 265 training return: tensor(-358.4755, device='cuda:0')
episode: 266 training return: tensor(-251.5571, device='cuda:0')
episode: 267 training return: tensor(-125.5246, device='cuda:0')
epoch: 67 test_true_pfm: 1689.4229617831381 sim_pfm: -99.34070782664155
episode: 268 training return: tensor(7.9778, device='cuda:0')
episode: 269 training return: tensor(-178.6135, device='cuda:0')
episode: 270 training return: tensor(-296.1413, device='cuda:0')
episode: 271 training return: tensor(-183.7455, device='cuda:0')
epoch: 68 test_true_pfm: 2250.6748051267455 sim_pfm: -265.10117157542845
episode: 272 training return: tensor(-286.5818, device='cuda:0')
episode: 273 training return: tensor(49.4904, device='cuda:0')
episode: 274 training return: tensor(-371.9622, device='cuda:0')
episode: 275 training return: tensor(-161.1068, device='cuda:0')
epoch: 69 test_true_pfm: 1617.0670393558787 sim_pfm: -90.6578476499805
episode: 276 training return: tensor(-209.3433, device='cuda:0')
episode: 277 training return: tensor(-127.1764, device='cuda:0')
episode: 278 training return: tensor(-2.2903, device='cuda:0')
episode: 279 training return: tensor(-298.2248, device='cuda:0')
epoch: 70 test_true_pfm: 2794.376977279903 sim_pfm: 81.19956902397098
episode: 280 training return: tensor(-408.4802, device='cuda:0')
episode: 281 training return: tensor(-20.6668, device='cuda:0')
episode: 282 training return: tensor(-324.6478, device='cuda:0')
episode: 283 training return: tensor(-201.9174, device='cuda:0')
epoch: 71 test_true_pfm: 1534.3938862274488 sim_pfm: -298.0678002897378
episode: 284 training return: tensor(-380.2660, device='cuda:0')
episode: 285 training return: tensor(140.2361, device='cuda:0')
episode: 286 training return: tensor(-260.3228, device='cuda:0')
episode: 287 training return: tensor(-323.6657, device='cuda:0')
epoch: 72 test_true_pfm: 2560.027756371456 sim_pfm: -67.29560251630998
episode: 288 training return: tensor(-269.2137, device='cuda:0')
episode: 289 training return: tensor(-31.2526, device='cuda:0')
episode: 290 training return: tensor(-285.0453, device='cuda:0')
episode: 291 training return: tensor(-107.0550, device='cuda:0')
epoch: 73 test_true_pfm: 2748.4911048086547 sim_pfm: -0.11281287163728848
episode: 292 training return: tensor(-43.0750, device='cuda:0')
episode: 293 training return: tensor(-154.3583, device='cuda:0')
episode: 294 training return: tensor(-350.6372, device='cuda:0')
episode: 295 training return: tensor(-212.9916, device='cuda:0')
epoch: 74 test_true_pfm: 2261.0433943109083 sim_pfm: -257.2228409102342
episode: 296 training return: tensor(-185.8289, device='cuda:0')
episode: 297 training return: tensor(-27.0748, device='cuda:0')
episode: 298 training return: tensor(-9.6354, device='cuda:0')
episode: 299 training return: tensor(-4.6928, device='cuda:0')
epoch: 75 test_true_pfm: 1807.8438036680438 sim_pfm: -24.380105622966465
episode: 300 training return: tensor(-432.7960, device='cuda:0')
episode: 301 training return: tensor(-417.7571, device='cuda:0')
episode: 302 training return: tensor(226.0537, device='cuda:0')
episode: 303 training return: tensor(-322.3195, device='cuda:0')
epoch: 76 test_true_pfm: 2413.348720577691 sim_pfm: -39.5516070164158
episode: 304 training return: tensor(-256.0454, device='cuda:0')
episode: 305 training return: tensor(254.1966, device='cuda:0')
episode: 306 training return: tensor(-228.6440, device='cuda:0')
episode: 307 training return: tensor(-395.1745, device='cuda:0')
epoch: 77 test_true_pfm: 1626.759188578424 sim_pfm: -105.0405660639226
episode: 308 training return: tensor(-349.2984, device='cuda:0')
episode: 309 training return: tensor(-239.5620, device='cuda:0')
episode: 310 training return: tensor(-234.6141, device='cuda:0')
episode: 311 training return: tensor(-249.4009, device='cuda:0')
epoch: 78 test_true_pfm: 1570.8595104635633 sim_pfm: 9.008890489853608
episode: 312 training return: tensor(-56.6703, device='cuda:0')
episode: 313 training return: tensor(-303.9510, device='cuda:0')
episode: 314 training return: tensor(-153.5353, device='cuda:0')
episode: 315 training return: tensor(262.1423, device='cuda:0')
epoch: 79 test_true_pfm: 1846.346297642873 sim_pfm: 83.40328047720443
episode: 316 training return: tensor(-387.7993, device='cuda:0')
episode: 317 training return: tensor(-242.5199, device='cuda:0')
episode: 318 training return: tensor(73.7471, device='cuda:0')
episode: 319 training return: tensor(-76.5556, device='cuda:0')
epoch: 80 test_true_pfm: 2351.279113917622 sim_pfm: -45.47596367572745
episode: 320 training return: tensor(-55.5581, device='cuda:0')
episode: 321 training return: tensor(-135.3170, device='cuda:0')
episode: 322 training return: tensor(-332.7654, device='cuda:0')
episode: 323 training return: tensor(-272.5820, device='cuda:0')
epoch: 81 test_true_pfm: 3129.839692822399 sim_pfm: 280.76908685741364
episode: 324 training return: tensor(-318.1601, device='cuda:0')
episode: 325 training return: tensor(-237.0627, device='cuda:0')
episode: 326 training return: tensor(-76.2007, device='cuda:0')
episode: 327 training return: tensor(-133.2861, device='cuda:0')
epoch: 82 test_true_pfm: 2317.0749195977937 sim_pfm: -227.30819242540747
episode: 328 training return: tensor(-161.9679, device='cuda:0')
episode: 329 training return: tensor(-257.5309, device='cuda:0')
episode: 330 training return: tensor(-415.2931, device='cuda:0')
episode: 331 training return: tensor(-331.3706, device='cuda:0')
epoch: 83 test_true_pfm: 1556.788586473988 sim_pfm: -295.0149131338112
episode: 332 training return: tensor(-383.8819, device='cuda:0')
episode: 333 training return: tensor(235.5135, device='cuda:0')
episode: 334 training return: tensor(269.3574, device='cuda:0')
episode: 335 training return: tensor(-195.1603, device='cuda:0')
epoch: 84 test_true_pfm: 2153.6877853223477 sim_pfm: -238.61276260965192
episode: 336 training return: tensor(13.7498, device='cuda:0')
episode: 337 training return: tensor(-331.5275, device='cuda:0')
episode: 338 training return: tensor(-60.8318, device='cuda:0')
episode: 339 training return: tensor(-250.0234, device='cuda:0')
epoch: 85 test_true_pfm: 2198.0316339809597 sim_pfm: -236.92208981292788
episode: 340 training return: tensor(-262.3301, device='cuda:0')
episode: 341 training return: tensor(168.8110, device='cuda:0')
episode: 342 training return: tensor(-122.9637, device='cuda:0')
episode: 343 training return: tensor(-361.9131, device='cuda:0')
epoch: 86 test_true_pfm: 2230.246479843917 sim_pfm: -293.2575287730433
episode: 344 training return: tensor(210.5332, device='cuda:0')
episode: 345 training return: tensor(-43.0533, device='cuda:0')
episode: 346 training return: tensor(-18.7676, device='cuda:0')
episode: 347 training return: tensor(-169.4403, device='cuda:0')
epoch: 87 test_true_pfm: 2004.935861163638 sim_pfm: -105.33190173630526
episode: 348 training return: tensor(256.8586, device='cuda:0')
episode: 349 training return: tensor(-339.7569, device='cuda:0')
episode: 350 training return: tensor(-249.1870, device='cuda:0')
episode: 351 training return: tensor(109.7933, device='cuda:0')
epoch: 88 test_true_pfm: 1683.1401036874988 sim_pfm: -99.75042526007746
episode: 352 training return: tensor(-135.4382, device='cuda:0')
episode: 353 training return: tensor(-145.4550, device='cuda:0')
episode: 354 training return: tensor(-494.9393, device='cuda:0')
episode: 355 training return: tensor(-77.8968, device='cuda:0')
epoch: 89 test_true_pfm: 1581.7248163399481 sim_pfm: 80.61441170959733
episode: 356 training return: tensor(-183.6440, device='cuda:0')
episode: 357 training return: tensor(-81.4867, device='cuda:0')
episode: 358 training return: tensor(-287.6050, device='cuda:0')
episode: 359 training return: tensor(-301.3862, device='cuda:0')
epoch: 90 test_true_pfm: 1661.713931652462 sim_pfm: -231.8308599005783
episode: 360 training return: tensor(-216.6063, device='cuda:0')
episode: 361 training return: tensor(-351.7000, device='cuda:0')
episode: 362 training return: tensor(252.2293, device='cuda:0')
episode: 363 training return: tensor(-102.2833, device='cuda:0')
epoch: 91 test_true_pfm: 3222.0806065784504 sim_pfm: 153.83437643708507
episode: 364 training return: tensor(-265.5874, device='cuda:0')
episode: 365 training return: tensor(-314.9067, device='cuda:0')
episode: 366 training return: tensor(269.5354, device='cuda:0')
episode: 367 training return: tensor(-142.9796, device='cuda:0')
epoch: 92 test_true_pfm: 2899.603954420665 sim_pfm: 63.33333926613947
episode: 368 training return: tensor(-277.0513, device='cuda:0')
episode: 369 training return: tensor(303.0734, device='cuda:0')
episode: 370 training return: tensor(-145.0263, device='cuda:0')
episode: 371 training return: tensor(-273.6829, device='cuda:0')
epoch: 93 test_true_pfm: 2336.100322793463 sim_pfm: -47.4018896916726
episode: 372 training return: tensor(-140.9342, device='cuda:0')
episode: 373 training return: tensor(-60.3208, device='cuda:0')
episode: 374 training return: tensor(174.6683, device='cuda:0')
episode: 375 training return: tensor(-408.8272, device='cuda:0')
epoch: 94 test_true_pfm: 3295.285090715906 sim_pfm: 85.45590020168068
episode: 376 training return: tensor(-294.9704, device='cuda:0')
episode: 377 training return: tensor(141.4595, device='cuda:0')
episode: 378 training return: tensor(109.2455, device='cuda:0')
episode: 379 training return: tensor(-170.2891, device='cuda:0')
epoch: 95 test_true_pfm: 2081.6411525202734 sim_pfm: -244.2388230152525
episode: 380 training return: tensor(-223.4366, device='cuda:0')
episode: 381 training return: tensor(213.7971, device='cuda:0')
episode: 382 training return: tensor(-200.9223, device='cuda:0')
episode: 383 training return: tensor(-322.1421, device='cuda:0')
epoch: 96 test_true_pfm: 1883.6262869910013 sim_pfm: -96.80537146234808
episode: 384 training return: tensor(-32.2068, device='cuda:0')
episode: 385 training return: tensor(95.1243, device='cuda:0')
episode: 386 training return: tensor(-271.9396, device='cuda:0')
episode: 387 training return: tensor(-430.5665, device='cuda:0')
epoch: 97 test_true_pfm: 2708.6993108874094 sim_pfm: -183.35649709745
episode: 388 training return: tensor(9.8509, device='cuda:0')
episode: 389 training return: tensor(273.1907, device='cuda:0')
episode: 390 training return: tensor(-144.6366, device='cuda:0')
episode: 391 training return: tensor(-144.6878, device='cuda:0')
epoch: 98 test_true_pfm: 2508.286948280664 sim_pfm: 274.8003798363109
episode: 392 training return: tensor(197.9744, device='cuda:0')
episode: 393 training return: tensor(289.5529, device='cuda:0')
episode: 394 training return: tensor(-404.9817, device='cuda:0')
episode: 395 training return: tensor(-74.8468, device='cuda:0')
epoch: 99 test_true_pfm: 1676.0859556174394 sim_pfm: -220.6248878496505
episode: 396 training return: tensor(13.3340, device='cuda:0')
episode: 397 training return: tensor(109.2346, device='cuda:0')
episode: 398 training return: tensor(14.8817, device='cuda:0')
episode: 399 training return: tensor(32.7537, device='cuda:0')
epoch: 100 test_true_pfm: 1545.4777001290197 sim_pfm: -257.2089703035114
episode: 400 training return: tensor(16.3118, device='cuda:0')
episode: 401 training return: tensor(-352.1586, device='cuda:0')
episode: 402 training return: tensor(-214.4855, device='cuda:0')
episode: 403 training return: tensor(123.2454, device='cuda:0')
epoch: 101 test_true_pfm: 2811.400961815549 sim_pfm: -238.21010850346647
episode: 404 training return: tensor(-315.3364, device='cuda:0')
episode: 405 training return: tensor(98.3525, device='cuda:0')
episode: 406 training return: tensor(149.4278, device='cuda:0')
episode: 407 training return: tensor(-60.4546, device='cuda:0')
epoch: 102 test_true_pfm: 3102.310767181472 sim_pfm: -95.5645590115649
episode: 408 training return: tensor(36.0716, device='cuda:0')
episode: 409 training return: tensor(295.5664, device='cuda:0')
episode: 410 training return: tensor(-50.5011, device='cuda:0')
episode: 411 training return: tensor(246.3631, device='cuda:0')
epoch: 103 test_true_pfm: 1799.4820311551102 sim_pfm: -150.10371434983486
episode: 412 training return: tensor(-261.4211, device='cuda:0')
episode: 413 training return: tensor(-206.2018, device='cuda:0')
episode: 414 training return: tensor(24.0158, device='cuda:0')
episode: 415 training return: tensor(-428.6811, device='cuda:0')
epoch: 104 test_true_pfm: 2700.9361371784307 sim_pfm: 178.18493615060774
episode: 416 training return: tensor(-156.2563, device='cuda:0')
episode: 417 training return: tensor(-123.8377, device='cuda:0')
episode: 418 training return: tensor(-166.2379, device='cuda:0')
episode: 419 training return: tensor(-156.9171, device='cuda:0')
epoch: 105 test_true_pfm: 2661.691774398667 sim_pfm: 305.4918516627319
episode: 420 training return: tensor(-137.0043, device='cuda:0')
episode: 421 training return: tensor(-252.5802, device='cuda:0')
episode: 422 training return: tensor(-184.5929, device='cuda:0')
episode: 423 training return: tensor(-246.9991, device='cuda:0')
epoch: 106 test_true_pfm: 1873.3806166781244 sim_pfm: -194.6905024248796
episode: 424 training return: tensor(278.7169, device='cuda:0')
episode: 425 training return: tensor(-274.6023, device='cuda:0')
episode: 426 training return: tensor(-416.2735, device='cuda:0')
episode: 427 training return: tensor(-85.5848, device='cuda:0')
epoch: 107 test_true_pfm: 2049.6900474777417 sim_pfm: -38.048951843481824
episode: 428 training return: tensor(-318.4362, device='cuda:0')
episode: 429 training return: tensor(-132.8922, device='cuda:0')
episode: 430 training return: tensor(305.2051, device='cuda:0')
episode: 431 training return: tensor(-393.9062, device='cuda:0')
epoch: 108 test_true_pfm: 2800.144942936468 sim_pfm: 95.96498387995719
episode: 432 training return: tensor(-181.0922, device='cuda:0')
episode: 433 training return: tensor(-163.1070, device='cuda:0')
episode: 434 training return: tensor(-275.1161, device='cuda:0')
episode: 435 training return: tensor(-332.3000, device='cuda:0')
epoch: 109 test_true_pfm: 2492.8516323760505 sim_pfm: 130.66941918772258
episode: 436 training return: tensor(-165.3647, device='cuda:0')
episode: 437 training return: tensor(181.0960, device='cuda:0')
episode: 438 training return: tensor(18.3780, device='cuda:0')
episode: 439 training return: tensor(-110.2102, device='cuda:0')
epoch: 110 test_true_pfm: 2193.1894290466685 sim_pfm: 20.713849223820336
episode: 440 training return: tensor(-395.8458, device='cuda:0')
episode: 441 training return: tensor(-304.1721, device='cuda:0')
episode: 442 training return: tensor(112.6015, device='cuda:0')
episode: 443 training return: tensor(-232.2729, device='cuda:0')
epoch: 111 test_true_pfm: 3254.549722397003 sim_pfm: 244.188467426536
episode: 444 training return: tensor(-377.9930, device='cuda:0')
episode: 445 training return: tensor(-167.6883, device='cuda:0')
episode: 446 training return: tensor(-108.1761, device='cuda:0')
episode: 447 training return: tensor(-253.6726, device='cuda:0')
epoch: 112 test_true_pfm: 2039.9316670353503 sim_pfm: -87.91822942338574
episode: 448 training return: tensor(122.1137, device='cuda:0')
episode: 449 training return: tensor(-130.2760, device='cuda:0')
episode: 450 training return: tensor(-60.6735, device='cuda:0')
episode: 451 training return: tensor(238.7774, device='cuda:0')
epoch: 113 test_true_pfm: 1630.0199705245302 sim_pfm: -132.2811085950816
episode: 452 training return: tensor(293.3323, device='cuda:0')
episode: 453 training return: tensor(-277.4930, device='cuda:0')
episode: 454 training return: tensor(-240.9388, device='cuda:0')
episode: 455 training return: tensor(-256.9821, device='cuda:0')
epoch: 114 test_true_pfm: 2303.1159711892387 sim_pfm: 155.1672181338945
episode: 456 training return: tensor(-183.3114, device='cuda:0')
episode: 457 training return: tensor(-321.9065, device='cuda:0')
episode: 458 training return: tensor(-336.0899, device='cuda:0')
episode: 459 training return: tensor(-270.2501, device='cuda:0')
epoch: 115 test_true_pfm: 2101.750512203545 sim_pfm: -216.56842014271146
episode: 460 training return: tensor(-320.7700, device='cuda:0')
episode: 461 training return: tensor(-326.6664, device='cuda:0')
episode: 462 training return: tensor(-234.3649, device='cuda:0')
episode: 463 training return: tensor(308.1382, device='cuda:0')
epoch: 116 test_true_pfm: 2001.9370965907444 sim_pfm: 28.752146639337298
episode: 464 training return: tensor(-246.9842, device='cuda:0')
episode: 465 training return: tensor(152.6867, device='cuda:0')
episode: 466 training return: tensor(-75.0391, device='cuda:0')
episode: 467 training return: tensor(-401.2687, device='cuda:0')
epoch: 117 test_true_pfm: 1840.696601644878 sim_pfm: -197.78497313077483
episode: 468 training return: tensor(-297.4151, device='cuda:0')
episode: 469 training return: tensor(245.0862, device='cuda:0')
episode: 470 training return: tensor(96.4552, device='cuda:0')
episode: 471 training return: tensor(-170.3058, device='cuda:0')
epoch: 118 test_true_pfm: 3062.712608045458 sim_pfm: -80.1479571495632
episode: 472 training return: tensor(-266.3721, device='cuda:0')
episode: 473 training return: tensor(-251.0098, device='cuda:0')
episode: 474 training return: tensor(59.8666, device='cuda:0')
episode: 475 training return: tensor(-81.9726, device='cuda:0')
epoch: 119 test_true_pfm: 1609.1435026392448 sim_pfm: 76.73994499716598
episode: 476 training return: tensor(18.1331, device='cuda:0')
episode: 477 training return: tensor(95.5957, device='cuda:0')
episode: 478 training return: tensor(-196.8956, device='cuda:0')
episode: 479 training return: tensor(-320.0737, device='cuda:0')
epoch: 120 test_true_pfm: 2679.078412288605 sim_pfm: 86.48845052868516
episode: 480 training return: tensor(-339.9945, device='cuda:0')
episode: 481 training return: tensor(113.3565, device='cuda:0')
episode: 482 training return: tensor(-296.5845, device='cuda:0')
episode: 483 training return: tensor(-71.3729, device='cuda:0')
epoch: 121 test_true_pfm: 2406.6973910665197 sim_pfm: -129.12187684412734
episode: 484 training return: tensor(-154.7246, device='cuda:0')
episode: 485 training return: tensor(-306.8982, device='cuda:0')
episode: 486 training return: tensor(-249.0121, device='cuda:0')
episode: 487 training return: tensor(-169.2971, device='cuda:0')
epoch: 122 test_true_pfm: 2954.2011731936122 sim_pfm: 222.59754357360848
episode: 488 training return: tensor(-346.3441, device='cuda:0')
episode: 489 training return: tensor(-319.6277, device='cuda:0')
episode: 490 training return: tensor(-242.6668, device='cuda:0')
episode: 491 training return: tensor(27.1727, device='cuda:0')
epoch: 123 test_true_pfm: 1867.1066363691414 sim_pfm: -219.2935623721763
episode: 492 training return: tensor(-150.3914, device='cuda:0')
episode: 493 training return: tensor(-238.4217, device='cuda:0')
episode: 494 training return: tensor(-330.6153, device='cuda:0')
episode: 495 training return: tensor(-227.5865, device='cuda:0')
epoch: 124 test_true_pfm: 1946.708561809538 sim_pfm: -157.20302572547612
episode: 496 training return: tensor(-71.5615, device='cuda:0')
episode: 497 training return: tensor(-104.8464, device='cuda:0')
episode: 498 training return: tensor(-70.6900, device='cuda:0')
episode: 499 training return: tensor(-184.7616, device='cuda:0')
epoch: 125 test_true_pfm: 1745.7785030902432 sim_pfm: 252.9245879019921
episode: 500 training return: tensor(-281.8705, device='cuda:0')
episode: 501 training return: tensor(-364.7802, device='cuda:0')
episode: 502 training return: tensor(-147.0499, device='cuda:0')
episode: 503 training return: tensor(-352.0710, device='cuda:0')
epoch: 126 test_true_pfm: 2743.9606742729393 sim_pfm: -52.80188305078385
episode: 504 training return: tensor(134.3314, device='cuda:0')
episode: 505 training return: tensor(-247.2886, device='cuda:0')
episode: 506 training return: tensor(-317.6969, device='cuda:0')
episode: 507 training return: tensor(148.0133, device='cuda:0')
epoch: 127 test_true_pfm: 3288.3006618481145 sim_pfm: 124.49525730340004
episode: 508 training return: tensor(-244.8769, device='cuda:0')
episode: 509 training return: tensor(-117.2584, device='cuda:0')
episode: 510 training return: tensor(-372.9995, device='cuda:0')
episode: 511 training return: tensor(-406.8603, device='cuda:0')
epoch: 128 test_true_pfm: 2786.743818718203 sim_pfm: 85.8198017255636
episode: 512 training return: tensor(-66.4521, device='cuda:0')
episode: 513 training return: tensor(-68.3286, device='cuda:0')
episode: 514 training return: tensor(10.7857, device='cuda:0')
episode: 515 training return: tensor(-95.9112, device='cuda:0')
epoch: 129 test_true_pfm: 2241.8796253214527 sim_pfm: -80.93287871781892
episode: 516 training return: tensor(-74.4603, device='cuda:0')
episode: 517 training return: tensor(-238.3605, device='cuda:0')
episode: 518 training return: tensor(-41.7512, device='cuda:0')
episode: 519 training return: tensor(-351.6148, device='cuda:0')
epoch: 130 test_true_pfm: 2205.7024871047674 sim_pfm: 97.8719435011056
episode: 520 training return: tensor(-417.3240, device='cuda:0')
episode: 521 training return: tensor(-250.5013, device='cuda:0')
episode: 522 training return: tensor(-250.2026, device='cuda:0')
episode: 523 training return: tensor(-387.0253, device='cuda:0')
epoch: 131 test_true_pfm: 2728.133975569461 sim_pfm: -90.13634107106675
episode: 524 training return: tensor(-63.3077, device='cuda:0')
episode: 525 training return: tensor(319.8608, device='cuda:0')
episode: 526 training return: tensor(-282.8513, device='cuda:0')
episode: 527 training return: tensor(-155.0427, device='cuda:0')
epoch: 132 test_true_pfm: 3246.5024777336803 sim_pfm: -25.352151846095996
episode: 528 training return: tensor(-275.5592, device='cuda:0')
episode: 529 training return: tensor(-426.4361, device='cuda:0')
episode: 530 training return: tensor(-333.7039, device='cuda:0')
episode: 531 training return: tensor(-125.7800, device='cuda:0')
epoch: 133 test_true_pfm: 1726.0721053146274 sim_pfm: -257.8059595093946
episode: 532 training return: tensor(-249.9811, device='cuda:0')
episode: 533 training return: tensor(286.9352, device='cuda:0')
episode: 534 training return: tensor(-334.1015, device='cuda:0')
episode: 535 training return: tensor(-238.7380, device='cuda:0')
epoch: 134 test_true_pfm: 2515.5037439772427 sim_pfm: -32.55776757312318
episode: 536 training return: tensor(-80.0823, device='cuda:0')
episode: 537 training return: tensor(-19.6805, device='cuda:0')
episode: 538 training return: tensor(-242.0360, device='cuda:0')
episode: 539 training return: tensor(-113.2089, device='cuda:0')
epoch: 135 test_true_pfm: 2553.922476617799 sim_pfm: -26.244702931891272
episode: 540 training return: tensor(-0.8376, device='cuda:0')
episode: 541 training return: tensor(-241.5182, device='cuda:0')
episode: 542 training return: tensor(-337.0424, device='cuda:0')
episode: 543 training return: tensor(-282.1175, device='cuda:0')
epoch: 136 test_true_pfm: 2784.3865215566416 sim_pfm: 6.230968899908476
episode: 544 training return: tensor(-345.6252, device='cuda:0')
episode: 545 training return: tensor(-160.6536, device='cuda:0')
episode: 546 training return: tensor(-76.7649, device='cuda:0')
episode: 547 training return: tensor(-424.8356, device='cuda:0')
epoch: 137 test_true_pfm: 2444.954580305213 sim_pfm: -130.4046312563199
episode: 548 training return: tensor(-68.5753, device='cuda:0')
episode: 549 training return: tensor(-179.6159, device='cuda:0')
episode: 550 training return: tensor(-221.3955, device='cuda:0')
episode: 551 training return: tensor(-153.6638, device='cuda:0')
epoch: 138 test_true_pfm: 2700.0947509528596 sim_pfm: 85.6309785748211
episode: 552 training return: tensor(309.2904, device='cuda:0')
episode: 553 training return: tensor(-224.6765, device='cuda:0')
episode: 554 training return: tensor(-29.5279, device='cuda:0')
episode: 555 training return: tensor(-102.1639, device='cuda:0')
epoch: 139 test_true_pfm: 2601.52366939262 sim_pfm: -54.39436285640113
episode: 556 training return: tensor(9.7921, device='cuda:0')
episode: 557 training return: tensor(32.0290, device='cuda:0')
episode: 558 training return: tensor(-13.3012, device='cuda:0')
episode: 559 training return: tensor(-89.8211, device='cuda:0')
epoch: 140 test_true_pfm: 1711.64384807468 sim_pfm: -284.2363617716862
episode: 560 training return: tensor(-254.9624, device='cuda:0')
episode: 561 training return: tensor(-285.8139, device='cuda:0')
episode: 562 training return: tensor(-189.9170, device='cuda:0')
episode: 563 training return: tensor(-300.9799, device='cuda:0')
epoch: 141 test_true_pfm: 2673.782783025216 sim_pfm: 149.3406574142864
episode: 564 training return: tensor(-160.4365, device='cuda:0')
episode: 565 training return: tensor(-425.0446, device='cuda:0')
episode: 566 training return: tensor(-202.6724, device='cuda:0')
episode: 567 training return: tensor(174.1440, device='cuda:0')
epoch: 142 test_true_pfm: 2720.545827372112 sim_pfm: -70.26039333451384
episode: 568 training return: tensor(-177.1327, device='cuda:0')
episode: 569 training return: tensor(28.4958, device='cuda:0')
episode: 570 training return: tensor(-440.6490, device='cuda:0')
episode: 571 training return: tensor(-166.6084, device='cuda:0')
epoch: 143 test_true_pfm: 3136.1879488534228 sim_pfm: 125.05553125186513
episode: 572 training return: tensor(-265.2452, device='cuda:0')
episode: 573 training return: tensor(-256.4181, device='cuda:0')
episode: 574 training return: tensor(-305.1385, device='cuda:0')
episode: 575 training return: tensor(-64.5786, device='cuda:0')
epoch: 144 test_true_pfm: 3025.9926058779492 sim_pfm: 88.57169433066156
episode: 576 training return: tensor(-408.3376, device='cuda:0')
episode: 577 training return: tensor(-234.7926, device='cuda:0')
episode: 578 training return: tensor(-35.8758, device='cuda:0')
episode: 579 training return: tensor(-294.6164, device='cuda:0')
epoch: 145 test_true_pfm: 3210.2503592593353 sim_pfm: 226.1717991453479
episode: 580 training return: tensor(282.4070, device='cuda:0')
episode: 581 training return: tensor(298.3728, device='cuda:0')
episode: 582 training return: tensor(-317.6765, device='cuda:0')
episode: 583 training return: tensor(-305.0348, device='cuda:0')
epoch: 146 test_true_pfm: 2393.399172621713 sim_pfm: -48.12070047416879
episode: 584 training return: tensor(-352.8818, device='cuda:0')
episode: 585 training return: tensor(-377.7779, device='cuda:0')
episode: 586 training return: tensor(-186.9859, device='cuda:0')
episode: 587 training return: tensor(-67.7929, device='cuda:0')
epoch: 147 test_true_pfm: 1850.7941137970122 sim_pfm: -90.39530983272319
episode: 588 training return: tensor(10.6595, device='cuda:0')
episode: 589 training return: tensor(-328.3598, device='cuda:0')
episode: 590 training return: tensor(-64.4047, device='cuda:0')
episode: 591 training return: tensor(-36.7870, device='cuda:0')
epoch: 148 test_true_pfm: 3359.677974881875 sim_pfm: -73.73754004662624
episode: 592 training return: tensor(-310.9684, device='cuda:0')
episode: 593 training return: tensor(-258.0568, device='cuda:0')
episode: 594 training return: tensor(-77.7542, device='cuda:0')
episode: 595 training return: tensor(-232.0874, device='cuda:0')
epoch: 149 test_true_pfm: 1625.9865344357374 sim_pfm: 17.37674417136198
episode: 596 training return: tensor(-222.8323, device='cuda:0')
episode: 597 training return: tensor(-148.5844, device='cuda:0')
episode: 598 training return: tensor(25.9272, device='cuda:0')
episode: 599 training return: tensor(-41.4490, device='cuda:0')
epoch: 150 test_true_pfm: 2679.979478936213 sim_pfm: 10.78168106956097
