['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '1', '--data', '100000', '--regu', '0.3']
2645.738826569662
episode: 0 training return: tensor(-59.6702, device='cuda:0')
episode: 1 training return: tensor(-448.4433, device='cuda:0')
episode: 2 training return: tensor(-74.3833, device='cuda:0')
episode: 3 training return: tensor(-226.9105, device='cuda:0')
epoch: 1 test_true_pfm: 2395.54956830812 sim_pfm: 15.474636785045732
episode: 4 training return: tensor(-92.3380, device='cuda:0')
episode: 5 training return: tensor(-384.2070, device='cuda:0')
episode: 6 training return: tensor(-265.8748, device='cuda:0')
episode: 7 training return: tensor(-199.9763, device='cuda:0')
epoch: 2 test_true_pfm: 1934.1686886097257 sim_pfm: -262.4594054647605
episode: 8 training return: tensor(-425.5541, device='cuda:0')
episode: 9 training return: tensor(-296.4851, device='cuda:0')
episode: 10 training return: tensor(-534.1990, device='cuda:0')
episode: 11 training return: tensor(-355.4546, device='cuda:0')
epoch: 3 test_true_pfm: 1572.25060764311 sim_pfm: -409.6394760743715
episode: 12 training return: tensor(-445.5292, device='cuda:0')
episode: 13 training return: tensor(-516.1117, device='cuda:0')
episode: 14 training return: tensor(-475.9177, device='cuda:0')
episode: 15 training return: tensor(-562.5851, device='cuda:0')
epoch: 4 test_true_pfm: 953.6313798312067 sim_pfm: -513.8091841098698
episode: 16 training return: tensor(-449.5822, device='cuda:0')
episode: 17 training return: tensor(-503.2388, device='cuda:0')
episode: 18 training return: tensor(-554.6864, device='cuda:0')
episode: 19 training return: tensor(-556.5216, device='cuda:0')
epoch: 5 test_true_pfm: 936.3766756706781 sim_pfm: -534.132912306115
episode: 20 training return: tensor(-520.6586, device='cuda:0')
episode: 21 training return: tensor(-570.5340, device='cuda:0')
episode: 22 training return: tensor(-481.7722, device='cuda:0')
episode: 23 training return: tensor(-448.9555, device='cuda:0')
epoch: 6 test_true_pfm: 1904.1665038720391 sim_pfm: -348.56628967354
episode: 24 training return: tensor(-270.0601, device='cuda:0')
episode: 25 training return: tensor(-528.0942, device='cuda:0')
episode: 26 training return: tensor(-432.6152, device='cuda:0')
episode: 27 training return: tensor(-548.8090, device='cuda:0')
epoch: 7 test_true_pfm: 1373.6293719982707 sim_pfm: -366.22035063500516
episode: 28 training return: tensor(-380.8469, device='cuda:0')
episode: 29 training return: tensor(-385.9151, device='cuda:0')
episode: 30 training return: tensor(-528.5717, device='cuda:0')
episode: 31 training return: tensor(-506.0396, device='cuda:0')
epoch: 8 test_true_pfm: 1395.0054455319307 sim_pfm: -457.88778962249245
episode: 32 training return: tensor(-400.8138, device='cuda:0')
episode: 33 training return: tensor(-482.4346, device='cuda:0')
episode: 34 training return: tensor(-507.2538, device='cuda:0')
episode: 35 training return: tensor(-356.7672, device='cuda:0')
epoch: 9 test_true_pfm: 1154.0410676758822 sim_pfm: -453.39203563538223
episode: 36 training return: tensor(-476.5048, device='cuda:0')
episode: 37 training return: tensor(-390.6401, device='cuda:0')
episode: 38 training return: tensor(-284.1765, device='cuda:0')
episode: 39 training return: tensor(-236.0491, device='cuda:0')
epoch: 10 test_true_pfm: 1753.1902599627902 sim_pfm: -271.3592419098907
episode: 40 training return: tensor(-464.9616, device='cuda:0')
episode: 41 training return: tensor(-455.1268, device='cuda:0')
episode: 42 training return: tensor(-471.0476, device='cuda:0')
episode: 43 training return: tensor(86.8544, device='cuda:0')
epoch: 11 test_true_pfm: 2328.356193182424 sim_pfm: -124.74797680502525
episode: 44 training return: tensor(-77.8989, device='cuda:0')
episode: 45 training return: tensor(-268.0422, device='cuda:0')
episode: 46 training return: tensor(-427.2669, device='cuda:0')
episode: 47 training return: tensor(-419.2950, device='cuda:0')
epoch: 12 test_true_pfm: 1525.8795668123203 sim_pfm: -257.2922259448096
episode: 48 training return: tensor(-70.1937, device='cuda:0')
episode: 49 training return: tensor(-359.9922, device='cuda:0')
episode: 50 training return: tensor(-314.1826, device='cuda:0')
episode: 51 training return: tensor(-221.5527, device='cuda:0')
epoch: 13 test_true_pfm: 2089.505484705284 sim_pfm: -69.98901047536249
episode: 52 training return: tensor(-255.1225, device='cuda:0')
episode: 53 training return: tensor(-361.2837, device='cuda:0')
episode: 54 training return: tensor(-346.0394, device='cuda:0')
episode: 55 training return: tensor(-383.0668, device='cuda:0')
epoch: 14 test_true_pfm: 1381.2600074354805 sim_pfm: -294.491739965335
episode: 56 training return: tensor(-389.4934, device='cuda:0')
episode: 57 training return: tensor(-373.3436, device='cuda:0')
episode: 58 training return: tensor(205.6660, device='cuda:0')
episode: 59 training return: tensor(-158.8536, device='cuda:0')
epoch: 15 test_true_pfm: 2163.923478218668 sim_pfm: -329.7511272188276
episode: 60 training return: tensor(-392.8211, device='cuda:0')
episode: 61 training return: tensor(-11.4308, device='cuda:0')
episode: 62 training return: tensor(-426.6099, device='cuda:0')
episode: 63 training return: tensor(286.4211, device='cuda:0')
epoch: 16 test_true_pfm: 2938.746502979319 sim_pfm: 35.82757811774112
episode: 64 training return: tensor(-247.8849, device='cuda:0')
episode: 65 training return: tensor(-424.0361, device='cuda:0')
episode: 66 training return: tensor(123.8612, device='cuda:0')
episode: 67 training return: tensor(-241.8644, device='cuda:0')
epoch: 17 test_true_pfm: 2754.856054559584 sim_pfm: 130.31515222981884
episode: 68 training return: tensor(-167.9809, device='cuda:0')
episode: 69 training return: tensor(-341.2737, device='cuda:0')
episode: 70 training return: tensor(-94.2361, device='cuda:0')
episode: 71 training return: tensor(-190.8659, device='cuda:0')
epoch: 18 test_true_pfm: 2933.4280267410213 sim_pfm: 83.9895778100472
episode: 72 training return: tensor(-133.1853, device='cuda:0')
episode: 73 training return: tensor(-376.6286, device='cuda:0')
episode: 74 training return: tensor(-0.9826, device='cuda:0')
episode: 75 training return: tensor(-455.5215, device='cuda:0')
epoch: 19 test_true_pfm: 2478.98590160527 sim_pfm: -80.61299937249471
episode: 76 training return: tensor(-6.3598, device='cuda:0')
episode: 77 training return: tensor(-362.4890, device='cuda:0')
episode: 78 training return: tensor(-223.5010, device='cuda:0')
episode: 79 training return: tensor(1.1657, device='cuda:0')
epoch: 20 test_true_pfm: 2450.475778247133 sim_pfm: 118.54743616694275
episode: 80 training return: tensor(-233.1441, device='cuda:0')
episode: 81 training return: tensor(-393.5538, device='cuda:0')
episode: 82 training return: tensor(-118.4092, device='cuda:0')
episode: 83 training return: tensor(-18.6713, device='cuda:0')
epoch: 21 test_true_pfm: 2540.287081558988 sim_pfm: 157.4988930252924
episode: 84 training return: tensor(248.9902, device='cuda:0')
episode: 85 training return: tensor(59.2575, device='cuda:0')
episode: 86 training return: tensor(-235.5441, device='cuda:0')
episode: 87 training return: tensor(-373.5885, device='cuda:0')
epoch: 22 test_true_pfm: 2805.3568862264724 sim_pfm: 166.2317344097731
episode: 88 training return: tensor(-461.5686, device='cuda:0')
episode: 89 training return: tensor(262.1987, device='cuda:0')
episode: 90 training return: tensor(-103.1638, device='cuda:0')
episode: 91 training return: tensor(-286.0425, device='cuda:0')
epoch: 23 test_true_pfm: 2405.2297383688674 sim_pfm: 100.45274836660246
episode: 92 training return: tensor(-43.7466, device='cuda:0')
episode: 93 training return: tensor(-90.8052, device='cuda:0')
episode: 94 training return: tensor(-267.9340, device='cuda:0')
episode: 95 training return: tensor(-373.8160, device='cuda:0')
epoch: 24 test_true_pfm: 2797.3123766963176 sim_pfm: 58.684874793437
episode: 96 training return: tensor(-397.3175, device='cuda:0')
episode: 97 training return: tensor(-221.7960, device='cuda:0')
episode: 98 training return: tensor(-220.2096, device='cuda:0')
episode: 99 training return: tensor(-31.3228, device='cuda:0')
epoch: 25 test_true_pfm: 2745.8515038181376 sim_pfm: -177.91022156272084
episode: 100 training return: tensor(-367.0016, device='cuda:0')
episode: 101 training return: tensor(-122.1023, device='cuda:0')
episode: 102 training return: tensor(-392.2905, device='cuda:0')
episode: 103 training return: tensor(26.2889, device='cuda:0')
epoch: 26 test_true_pfm: 1735.4201873529407 sim_pfm: -91.8138222748239
episode: 104 training return: tensor(-328.5081, device='cuda:0')
episode: 105 training return: tensor(-239.5620, device='cuda:0')
episode: 106 training return: tensor(-246.0216, device='cuda:0')
episode: 107 training return: tensor(-261.7614, device='cuda:0')
epoch: 27 test_true_pfm: 1322.8079762925715 sim_pfm: -306.28967205481604
episode: 108 training return: tensor(-280.3273, device='cuda:0')
episode: 109 training return: tensor(-279.0282, device='cuda:0')
episode: 110 training return: tensor(-405.9592, device='cuda:0')
episode: 111 training return: tensor(-475.9674, device='cuda:0')
epoch: 28 test_true_pfm: 2102.4440552771616 sim_pfm: -79.50286188975831
episode: 112 training return: tensor(-417.9244, device='cuda:0')
episode: 113 training return: tensor(-260.9222, device='cuda:0')
episode: 114 training return: tensor(-293.1209, device='cuda:0')
episode: 115 training return: tensor(-268.3675, device='cuda:0')
epoch: 29 test_true_pfm: 1632.2648571571306 sim_pfm: -242.90352311668298
episode: 116 training return: tensor(-105.4085, device='cuda:0')
episode: 117 training return: tensor(-403.9118, device='cuda:0')
episode: 118 training return: tensor(-283.6282, device='cuda:0')
episode: 119 training return: tensor(-268.3315, device='cuda:0')
epoch: 30 test_true_pfm: 1948.4204626538012 sim_pfm: 6.14380075490529
episode: 120 training return: tensor(-266.5559, device='cuda:0')
episode: 121 training return: tensor(-277.9029, device='cuda:0')
episode: 122 training return: tensor(-175.9617, device='cuda:0')
episode: 123 training return: tensor(-430.6443, device='cuda:0')
epoch: 31 test_true_pfm: 2527.1331996811896 sim_pfm: 108.80734096614954
episode: 124 training return: tensor(-308.5443, device='cuda:0')
episode: 125 training return: tensor(-237.0995, device='cuda:0')
episode: 126 training return: tensor(130.9930, device='cuda:0')
episode: 127 training return: tensor(-270.6261, device='cuda:0')
epoch: 32 test_true_pfm: 1286.5902056136943 sim_pfm: -415.0541286908653
episode: 128 training return: tensor(-30.0838, device='cuda:0')
episode: 129 training return: tensor(-100.6208, device='cuda:0')
episode: 130 training return: tensor(-251.9434, device='cuda:0')
episode: 131 training return: tensor(-308.2369, device='cuda:0')
epoch: 33 test_true_pfm: 1339.10669267695 sim_pfm: -371.53774435422383
episode: 132 training return: tensor(-204.0809, device='cuda:0')
episode: 133 training return: tensor(-27.7058, device='cuda:0')
episode: 134 training return: tensor(-311.2097, device='cuda:0')
episode: 135 training return: tensor(248.7746, device='cuda:0')
epoch: 34 test_true_pfm: 1318.5149047308353 sim_pfm: -418.1446308633701
episode: 136 training return: tensor(-305.0646, device='cuda:0')
episode: 137 training return: tensor(231.4694, device='cuda:0')
episode: 138 training return: tensor(-332.1375, device='cuda:0')
episode: 139 training return: tensor(-365.7426, device='cuda:0')
epoch: 35 test_true_pfm: 1301.4047425072542 sim_pfm: -386.66804481724586
episode: 140 training return: tensor(-175.5448, device='cuda:0')
episode: 141 training return: tensor(-50.7540, device='cuda:0')
episode: 142 training return: tensor(-401.8712, device='cuda:0')
episode: 143 training return: tensor(-376.4424, device='cuda:0')
epoch: 36 test_true_pfm: 2097.9535910774225 sim_pfm: -182.71317628681814
episode: 144 training return: tensor(-135.3532, device='cuda:0')
episode: 145 training return: tensor(-300.5644, device='cuda:0')
episode: 146 training return: tensor(-292.6643, device='cuda:0')
episode: 147 training return: tensor(-76.1890, device='cuda:0')
epoch: 37 test_true_pfm: 1530.6915147713905 sim_pfm: -283.0078154810083
episode: 148 training return: tensor(-253.6668, device='cuda:0')
episode: 149 training return: tensor(-400.9001, device='cuda:0')
episode: 150 training return: tensor(-264.5630, device='cuda:0')
episode: 151 training return: tensor(-431.0165, device='cuda:0')
epoch: 38 test_true_pfm: 1289.6852923039457 sim_pfm: -407.04538615760003
episode: 152 training return: tensor(-224.9240, device='cuda:0')
episode: 153 training return: tensor(-192.1663, device='cuda:0')
episode: 154 training return: tensor(-460.7049, device='cuda:0')
episode: 155 training return: tensor(-223.9170, device='cuda:0')
epoch: 39 test_true_pfm: 1416.8997395713786 sim_pfm: -300.8810151745177
episode: 156 training return: tensor(128.3963, device='cuda:0')
episode: 157 training return: tensor(-329.4102, device='cuda:0')
episode: 158 training return: tensor(-318.0642, device='cuda:0')
episode: 159 training return: tensor(-405.5681, device='cuda:0')
epoch: 40 test_true_pfm: 1605.5483990548662 sim_pfm: -276.0041563703853
episode: 160 training return: tensor(-107.4648, device='cuda:0')
episode: 161 training return: tensor(-413.9335, device='cuda:0')
episode: 162 training return: tensor(-475.4534, device='cuda:0')
episode: 163 training return: tensor(-500.2922, device='cuda:0')
epoch: 41 test_true_pfm: 1301.740256524983 sim_pfm: -359.6124937419081
episode: 164 training return: tensor(-177.0031, device='cuda:0')
episode: 165 training return: tensor(-266.5052, device='cuda:0')
episode: 166 training return: tensor(-256.1151, device='cuda:0')
episode: 167 training return: tensor(-338.0753, device='cuda:0')
epoch: 42 test_true_pfm: 1763.8774868200417 sim_pfm: 60.01309117379909
episode: 168 training return: tensor(-324.3799, device='cuda:0')
episode: 169 training return: tensor(-389.1625, device='cuda:0')
episode: 170 training return: tensor(-537.6151, device='cuda:0')
episode: 171 training return: tensor(-456.0240, device='cuda:0')
epoch: 43 test_true_pfm: 1393.5266398573297 sim_pfm: -377.1189233060674
episode: 172 training return: tensor(-375.9907, device='cuda:0')
episode: 173 training return: tensor(-269.3444, device='cuda:0')
episode: 174 training return: tensor(-450.9539, device='cuda:0')
episode: 175 training return: tensor(-230.1751, device='cuda:0')
epoch: 44 test_true_pfm: 1547.4038540632125 sim_pfm: -35.10499535012059
episode: 176 training return: tensor(-211.0401, device='cuda:0')
episode: 177 training return: tensor(-110.6354, device='cuda:0')
episode: 178 training return: tensor(-450.0229, device='cuda:0')
episode: 179 training return: tensor(-344.4117, device='cuda:0')
epoch: 45 test_true_pfm: 2432.6069360883043 sim_pfm: -145.05101763086472
episode: 180 training return: tensor(-436.0294, device='cuda:0')
episode: 181 training return: tensor(-424.5599, device='cuda:0')
episode: 182 training return: tensor(-457.3235, device='cuda:0')
episode: 183 training return: tensor(-276.9791, device='cuda:0')
epoch: 46 test_true_pfm: 1366.5766717505292 sim_pfm: -356.71378140788875
episode: 184 training return: tensor(-261.6861, device='cuda:0')
episode: 185 training return: tensor(-167.8831, device='cuda:0')
episode: 186 training return: tensor(-446.1152, device='cuda:0')
episode: 187 training return: tensor(-188.8003, device='cuda:0')
epoch: 47 test_true_pfm: 1892.132202325108 sim_pfm: -288.40631858985097
episode: 188 training return: tensor(-277.1309, device='cuda:0')
episode: 189 training return: tensor(-109.7920, device='cuda:0')
episode: 190 training return: tensor(-402.8987, device='cuda:0')
episode: 191 training return: tensor(-310.4358, device='cuda:0')
epoch: 48 test_true_pfm: 2360.94971542136 sim_pfm: -115.10562167111009
episode: 192 training return: tensor(-230.4309, device='cuda:0')
episode: 193 training return: tensor(-330.3509, device='cuda:0')
episode: 194 training return: tensor(-436.5410, device='cuda:0')
episode: 195 training return: tensor(-385.6996, device='cuda:0')
epoch: 49 test_true_pfm: 1767.4300586286042 sim_pfm: 82.81464550617966
episode: 196 training return: tensor(-266.6927, device='cuda:0')
episode: 197 training return: tensor(-244.8752, device='cuda:0')
episode: 198 training return: tensor(-278.4634, device='cuda:0')
episode: 199 training return: tensor(-461.1983, device='cuda:0')
epoch: 50 test_true_pfm: 1400.0879422939415 sim_pfm: -384.74371197388973
episode: 200 training return: tensor(-308.5195, device='cuda:0')
episode: 201 training return: tensor(-354.8715, device='cuda:0')
episode: 202 training return: tensor(-271.4411, device='cuda:0')
episode: 203 training return: tensor(-387.9619, device='cuda:0')
epoch: 51 test_true_pfm: 1938.3329542736549 sim_pfm: -230.8013912992319
episode: 204 training return: tensor(-242.5105, device='cuda:0')
episode: 205 training return: tensor(-255.5253, device='cuda:0')
episode: 206 training return: tensor(-272.7698, device='cuda:0')
episode: 207 training return: tensor(-268.3973, device='cuda:0')
epoch: 52 test_true_pfm: 2076.705536893351 sim_pfm: -51.788745211194815
episode: 208 training return: tensor(-453.2554, device='cuda:0')
episode: 209 training return: tensor(-299.2573, device='cuda:0')
episode: 210 training return: tensor(-335.7952, device='cuda:0')
episode: 211 training return: tensor(-272.9527, device='cuda:0')
epoch: 53 test_true_pfm: 2008.4955368053697 sim_pfm: -119.53508836589754
episode: 212 training return: tensor(-409.5918, device='cuda:0')
episode: 213 training return: tensor(-458.7937, device='cuda:0')
episode: 214 training return: tensor(-457.0507, device='cuda:0')
episode: 215 training return: tensor(-349.5759, device='cuda:0')
epoch: 54 test_true_pfm: 1486.716746917679 sim_pfm: -290.9893131250865
episode: 216 training return: tensor(-283.9841, device='cuda:0')
episode: 217 training return: tensor(-384.5500, device='cuda:0')
episode: 218 training return: tensor(-184.6027, device='cuda:0')
episode: 219 training return: tensor(-445.8759, device='cuda:0')
epoch: 55 test_true_pfm: 1882.7800678352996 sim_pfm: 176.8879246118886
episode: 220 training return: tensor(-406.7957, device='cuda:0')
episode: 221 training return: tensor(-388.1366, device='cuda:0')
episode: 222 training return: tensor(-382.5228, device='cuda:0')
episode: 223 training return: tensor(-256.8665, device='cuda:0')
epoch: 56 test_true_pfm: 2596.468337337705 sim_pfm: 174.72062984277727
episode: 224 training return: tensor(-164.8315, device='cuda:0')
episode: 225 training return: tensor(-374.8270, device='cuda:0')
episode: 226 training return: tensor(-273.9070, device='cuda:0')
episode: 227 training return: tensor(-290.9292, device='cuda:0')
epoch: 57 test_true_pfm: 2092.927725063078 sim_pfm: 105.79405084180569
episode: 228 training return: tensor(-312.6391, device='cuda:0')
episode: 229 training return: tensor(-447.8197, device='cuda:0')
episode: 230 training return: tensor(-254.4591, device='cuda:0')
episode: 231 training return: tensor(-258.4741, device='cuda:0')
epoch: 58 test_true_pfm: 2192.6220470535045 sim_pfm: -39.78140202839859
episode: 232 training return: tensor(-134.6938, device='cuda:0')
episode: 233 training return: tensor(-445.9148, device='cuda:0')
episode: 234 training return: tensor(-55.8931, device='cuda:0')
episode: 235 training return: tensor(-492.2516, device='cuda:0')
epoch: 59 test_true_pfm: 1745.9323943523484 sim_pfm: -226.99649772189636
episode: 236 training return: tensor(-251.8859, device='cuda:0')
episode: 237 training return: tensor(-178.8029, device='cuda:0')
episode: 238 training return: tensor(-26.4241, device='cuda:0')
episode: 239 training return: tensor(-290.9834, device='cuda:0')
epoch: 60 test_true_pfm: 3248.948780596094 sim_pfm: 79.94892063637963
episode: 240 training return: tensor(-420.2856, device='cuda:0')
episode: 241 training return: tensor(-184.2710, device='cuda:0')
episode: 242 training return: tensor(-475.3094, device='cuda:0')
episode: 243 training return: tensor(-390.2205, device='cuda:0')
epoch: 61 test_true_pfm: 1779.9966369624642 sim_pfm: -326.22095977677964
episode: 244 training return: tensor(-512.2122, device='cuda:0')
episode: 245 training return: tensor(-280.9750, device='cuda:0')
episode: 246 training return: tensor(-318.8651, device='cuda:0')
episode: 247 training return: tensor(-383.2575, device='cuda:0')
epoch: 62 test_true_pfm: 1575.3527293817924 sim_pfm: -318.5240712679806
episode: 248 training return: tensor(-356.7155, device='cuda:0')
episode: 249 training return: tensor(-216.2166, device='cuda:0')
episode: 250 training return: tensor(-270.5504, device='cuda:0')
episode: 251 training return: tensor(-232.6071, device='cuda:0')
epoch: 63 test_true_pfm: 2778.823169985171 sim_pfm: -9.546639536099974
episode: 252 training return: tensor(-276.4136, device='cuda:0')
episode: 253 training return: tensor(-327.6330, device='cuda:0')
episode: 254 training return: tensor(-93.9323, device='cuda:0')
episode: 255 training return: tensor(313.9238, device='cuda:0')
epoch: 64 test_true_pfm: 2280.5855030986745 sim_pfm: -167.37935880490113
episode: 256 training return: tensor(-130.1319, device='cuda:0')
episode: 257 training return: tensor(-433.6822, device='cuda:0')
episode: 258 training return: tensor(-391.7487, device='cuda:0')
episode: 259 training return: tensor(-354.2195, device='cuda:0')
epoch: 65 test_true_pfm: 1947.31908357512 sim_pfm: -166.1064447957324
episode: 260 training return: tensor(-465.8624, device='cuda:0')
episode: 261 training return: tensor(-383.5096, device='cuda:0')
episode: 262 training return: tensor(-186.0698, device='cuda:0')
episode: 263 training return: tensor(-445.9201, device='cuda:0')
epoch: 66 test_true_pfm: 2067.8515749335947 sim_pfm: 229.02803927646406
episode: 264 training return: tensor(-430.5555, device='cuda:0')
episode: 265 training return: tensor(-145.9537, device='cuda:0')
episode: 266 training return: tensor(-450.7926, device='cuda:0')
episode: 267 training return: tensor(-292.6259, device='cuda:0')
epoch: 67 test_true_pfm: 2083.9170646033367 sim_pfm: -18.223982515502332
episode: 268 training return: tensor(-328.3668, device='cuda:0')
episode: 269 training return: tensor(-39.7926, device='cuda:0')
episode: 270 training return: tensor(-147.3134, device='cuda:0')
episode: 271 training return: tensor(-371.5061, device='cuda:0')
epoch: 68 test_true_pfm: 2016.7181482031508 sim_pfm: 21.1727324447517
episode: 272 training return: tensor(-286.5461, device='cuda:0')
episode: 273 training return: tensor(-455.4383, device='cuda:0')
episode: 274 training return: tensor(-449.3359, device='cuda:0')
episode: 275 training return: tensor(-246.1859, device='cuda:0')
epoch: 69 test_true_pfm: 1557.8948105055988 sim_pfm: -303.1290931483575
episode: 276 training return: tensor(-401.3691, device='cuda:0')
episode: 277 training return: tensor(-181.2912, device='cuda:0')
episode: 278 training return: tensor(-280.6962, device='cuda:0')
episode: 279 training return: tensor(-358.4699, device='cuda:0')
epoch: 70 test_true_pfm: 3288.397160883049 sim_pfm: -26.200194361328613
episode: 280 training return: tensor(-236.7574, device='cuda:0')
episode: 281 training return: tensor(-146.6811, device='cuda:0')
episode: 282 training return: tensor(-275.4087, device='cuda:0')
episode: 283 training return: tensor(-80.2051, device='cuda:0')
epoch: 71 test_true_pfm: 2459.785829525065 sim_pfm: 213.38725522865812
episode: 284 training return: tensor(-264.1594, device='cuda:0')
episode: 285 training return: tensor(-508.6839, device='cuda:0')
episode: 286 training return: tensor(-205.0781, device='cuda:0')
episode: 287 training return: tensor(-251.9099, device='cuda:0')
epoch: 72 test_true_pfm: 2304.393067510881 sim_pfm: 143.03884610601622
episode: 288 training return: tensor(-210.9777, device='cuda:0')
episode: 289 training return: tensor(-426.4669, device='cuda:0')
episode: 290 training return: tensor(-234.2408, device='cuda:0')
episode: 291 training return: tensor(-441.3620, device='cuda:0')
epoch: 73 test_true_pfm: 2155.7781453407333 sim_pfm: -46.95038491111094
episode: 292 training return: tensor(-406.9824, device='cuda:0')
episode: 293 training return: tensor(-302.0605, device='cuda:0')
episode: 294 training return: tensor(-285.5373, device='cuda:0')
episode: 295 training return: tensor(-262.7683, device='cuda:0')
epoch: 74 test_true_pfm: 2513.513219038816 sim_pfm: -134.30044999009502
episode: 296 training return: tensor(-252.1572, device='cuda:0')
episode: 297 training return: tensor(-297.6902, device='cuda:0')
episode: 298 training return: tensor(-444.0724, device='cuda:0')
episode: 299 training return: tensor(-458.0183, device='cuda:0')
epoch: 75 test_true_pfm: 1986.4451290790269 sim_pfm: -102.09159232998111
episode: 300 training return: tensor(-354.7225, device='cuda:0')
episode: 301 training return: tensor(-439.0767, device='cuda:0')
episode: 302 training return: tensor(-507.8397, device='cuda:0')
episode: 303 training return: tensor(-392.7433, device='cuda:0')
epoch: 76 test_true_pfm: 3123.5515080084915 sim_pfm: 131.09163189283572
episode: 304 training return: tensor(-399.5866, device='cuda:0')
episode: 305 training return: tensor(-421.8740, device='cuda:0')
episode: 306 training return: tensor(-244.4715, device='cuda:0')
episode: 307 training return: tensor(-89.1305, device='cuda:0')
epoch: 77 test_true_pfm: 3015.9536818271185 sim_pfm: 130.78684541139714
episode: 308 training return: tensor(-253.8047, device='cuda:0')
episode: 309 training return: tensor(-431.9929, device='cuda:0')
episode: 310 training return: tensor(-377.0695, device='cuda:0')
episode: 311 training return: tensor(-352.9219, device='cuda:0')
epoch: 78 test_true_pfm: 2542.7851632865727 sim_pfm: 147.78592726186616
episode: 312 training return: tensor(-378.1490, device='cuda:0')
episode: 313 training return: tensor(200.7129, device='cuda:0')
episode: 314 training return: tensor(-256.3151, device='cuda:0')
episode: 315 training return: tensor(-407.2323, device='cuda:0')
epoch: 79 test_true_pfm: 1672.3655751632352 sim_pfm: -314.0703731754232
episode: 316 training return: tensor(-191.2741, device='cuda:0')
episode: 317 training return: tensor(-127.6418, device='cuda:0')
episode: 318 training return: tensor(-208.9285, device='cuda:0')
episode: 319 training return: tensor(-404.5375, device='cuda:0')
epoch: 80 test_true_pfm: 2600.3476782968487 sim_pfm: 49.491247936850414
episode: 320 training return: tensor(-298.0464, device='cuda:0')
episode: 321 training return: tensor(-309.3314, device='cuda:0')
episode: 322 training return: tensor(-338.0812, device='cuda:0')
episode: 323 training return: tensor(-254.7195, device='cuda:0')
epoch: 81 test_true_pfm: 1599.3281773155843 sim_pfm: -234.77928906970192
episode: 324 training return: tensor(-166.0636, device='cuda:0')
episode: 325 training return: tensor(-131.9666, device='cuda:0')
episode: 326 training return: tensor(-370.4308, device='cuda:0')
episode: 327 training return: tensor(-284.0789, device='cuda:0')
epoch: 82 test_true_pfm: 1941.3370663284015 sim_pfm: -186.00999753247015
episode: 328 training return: tensor(-355.8392, device='cuda:0')
episode: 329 training return: tensor(-257.1797, device='cuda:0')
episode: 330 training return: tensor(74.8158, device='cuda:0')
episode: 331 training return: tensor(-402.4039, device='cuda:0')
epoch: 83 test_true_pfm: 2099.1856375722277 sim_pfm: -296.5075836777299
episode: 332 training return: tensor(40.5695, device='cuda:0')
episode: 333 training return: tensor(-351.0308, device='cuda:0')
episode: 334 training return: tensor(-457.2736, device='cuda:0')
episode: 335 training return: tensor(-261.6750, device='cuda:0')
epoch: 84 test_true_pfm: 2849.673135956094 sim_pfm: 4.762141306612951
episode: 336 training return: tensor(-443.7814, device='cuda:0')
episode: 337 training return: tensor(-411.6002, device='cuda:0')
episode: 338 training return: tensor(-290.1272, device='cuda:0')
episode: 339 training return: tensor(-455.7241, device='cuda:0')
epoch: 85 test_true_pfm: 1810.4331176613166 sim_pfm: -52.82225942207151
episode: 340 training return: tensor(-373.8604, device='cuda:0')
episode: 341 training return: tensor(-98.7612, device='cuda:0')
episode: 342 training return: tensor(-256.3175, device='cuda:0')
episode: 343 training return: tensor(-474.0785, device='cuda:0')
epoch: 86 test_true_pfm: 2868.8139727406747 sim_pfm: 170.95935269639207
episode: 344 training return: tensor(-263.5982, device='cuda:0')
episode: 345 training return: tensor(-443.7856, device='cuda:0')
episode: 346 training return: tensor(-75.1658, device='cuda:0')
episode: 347 training return: tensor(-474.2827, device='cuda:0')
epoch: 87 test_true_pfm: 2685.471126181518 sim_pfm: -210.56895688393465
episode: 348 training return: tensor(-335.8880, device='cuda:0')
episode: 349 training return: tensor(-250.0193, device='cuda:0')
episode: 350 training return: tensor(-304.4590, device='cuda:0')
episode: 351 training return: tensor(-441.7126, device='cuda:0')
epoch: 88 test_true_pfm: 2774.495508766769 sim_pfm: 287.52606458160636
episode: 352 training return: tensor(-256.4265, device='cuda:0')
episode: 353 training return: tensor(-451.1106, device='cuda:0')
episode: 354 training return: tensor(-373.6089, device='cuda:0')
episode: 355 training return: tensor(-437.6329, device='cuda:0')
epoch: 89 test_true_pfm: 1657.8275885802323 sim_pfm: 15.77237317780964
episode: 356 training return: tensor(15.0862, device='cuda:0')
episode: 357 training return: tensor(-366.9614, device='cuda:0')
episode: 358 training return: tensor(-453.3327, device='cuda:0')
episode: 359 training return: tensor(-442.6882, device='cuda:0')
epoch: 90 test_true_pfm: 1797.8251787092006 sim_pfm: -218.89574661895554
episode: 360 training return: tensor(-191.0352, device='cuda:0')
episode: 361 training return: tensor(-363.7255, device='cuda:0')
episode: 362 training return: tensor(-274.7213, device='cuda:0')
episode: 363 training return: tensor(-196.7043, device='cuda:0')
epoch: 91 test_true_pfm: 1744.0631712557013 sim_pfm: -266.5185632433374
episode: 364 training return: tensor(-273.9561, device='cuda:0')
episode: 365 training return: tensor(-132.0251, device='cuda:0')
episode: 366 training return: tensor(-334.5862, device='cuda:0')
episode: 367 training return: tensor(-387.5592, device='cuda:0')
epoch: 92 test_true_pfm: 3063.5537658144917 sim_pfm: 279.0019607533468
episode: 368 training return: tensor(-256.3146, device='cuda:0')
episode: 369 training return: tensor(-336.8131, device='cuda:0')
episode: 370 training return: tensor(289.8615, device='cuda:0')
episode: 371 training return: tensor(-96.7313, device='cuda:0')
epoch: 93 test_true_pfm: 3072.532941085403 sim_pfm: 139.55845004691705
episode: 372 training return: tensor(-184.0097, device='cuda:0')
episode: 373 training return: tensor(-203.6683, device='cuda:0')
episode: 374 training return: tensor(-344.5008, device='cuda:0')
episode: 375 training return: tensor(-311.8097, device='cuda:0')
epoch: 94 test_true_pfm: 2775.1017756123965 sim_pfm: 184.8245912284668
episode: 376 training return: tensor(-304.5591, device='cuda:0')
episode: 377 training return: tensor(-460.2868, device='cuda:0')
episode: 378 training return: tensor(-434.3766, device='cuda:0')
episode: 379 training return: tensor(-254.3667, device='cuda:0')
epoch: 95 test_true_pfm: 2497.2172381656114 sim_pfm: 83.86431821995454
episode: 380 training return: tensor(-280.9956, device='cuda:0')
episode: 381 training return: tensor(-444.5087, device='cuda:0')
episode: 382 training return: tensor(-237.4228, device='cuda:0')
episode: 383 training return: tensor(-360.7151, device='cuda:0')
epoch: 96 test_true_pfm: 2389.5965521090716 sim_pfm: 236.10378387845898
episode: 384 training return: tensor(-314.9914, device='cuda:0')
episode: 385 training return: tensor(81.8040, device='cuda:0')
episode: 386 training return: tensor(-377.9789, device='cuda:0')
episode: 387 training return: tensor(-396.6583, device='cuda:0')
epoch: 97 test_true_pfm: 2726.8205069717574 sim_pfm: 95.93785297633924
episode: 388 training return: tensor(-469.6547, device='cuda:0')
episode: 389 training return: tensor(124.4616, device='cuda:0')
episode: 390 training return: tensor(-282.9981, device='cuda:0')
episode: 391 training return: tensor(-305.8409, device='cuda:0')
epoch: 98 test_true_pfm: 2876.1136547434457 sim_pfm: 89.52362135487299
episode: 392 training return: tensor(-24.7754, device='cuda:0')
episode: 393 training return: tensor(-234.6706, device='cuda:0')
episode: 394 training return: tensor(-386.7086, device='cuda:0')
episode: 395 training return: tensor(-272.1526, device='cuda:0')
epoch: 99 test_true_pfm: 2860.3478142921545 sim_pfm: 294.72072610732477
episode: 396 training return: tensor(-205.0174, device='cuda:0')
episode: 397 training return: tensor(-262.0005, device='cuda:0')
episode: 398 training return: tensor(-381.9102, device='cuda:0')
episode: 399 training return: tensor(-253.1371, device='cuda:0')
epoch: 100 test_true_pfm: 2055.7545044179774 sim_pfm: -50.1173839016119
episode: 400 training return: tensor(-452.0769, device='cuda:0')
episode: 401 training return: tensor(-332.6843, device='cuda:0')
episode: 402 training return: tensor(-321.5865, device='cuda:0')
episode: 403 training return: tensor(-269.0568, device='cuda:0')
epoch: 101 test_true_pfm: 2500.5437903504085 sim_pfm: -158.26703398816366
episode: 404 training return: tensor(-431.9882, device='cuda:0')
episode: 405 training return: tensor(-275.6937, device='cuda:0')
episode: 406 training return: tensor(-425.3199, device='cuda:0')
episode: 407 training return: tensor(-338.0885, device='cuda:0')
epoch: 102 test_true_pfm: 3306.082427542675 sim_pfm: 303.82159866487683
episode: 408 training return: tensor(-337.8957, device='cuda:0')
episode: 409 training return: tensor(-256.8971, device='cuda:0')
episode: 410 training return: tensor(-372.2602, device='cuda:0')
episode: 411 training return: tensor(-289.2541, device='cuda:0')
epoch: 103 test_true_pfm: 2397.4890760545263 sim_pfm: 27.012563391666237
episode: 412 training return: tensor(-385.8267, device='cuda:0')
episode: 413 training return: tensor(-359.9979, device='cuda:0')
episode: 414 training return: tensor(-459.0900, device='cuda:0')
episode: 415 training return: tensor(-428.1230, device='cuda:0')
epoch: 104 test_true_pfm: 3042.332407395236 sim_pfm: 217.5465690220784
episode: 416 training return: tensor(-282.1120, device='cuda:0')
episode: 417 training return: tensor(-376.8717, device='cuda:0')
episode: 418 training return: tensor(-430.6765, device='cuda:0')
episode: 419 training return: tensor(-307.7861, device='cuda:0')
epoch: 105 test_true_pfm: 2518.770419629593 sim_pfm: 47.962956223171204
episode: 420 training return: tensor(-219.5257, device='cuda:0')
episode: 421 training return: tensor(-369.0749, device='cuda:0')
episode: 422 training return: tensor(-329.7316, device='cuda:0')
episode: 423 training return: tensor(-270.2602, device='cuda:0')
epoch: 106 test_true_pfm: 2405.801996296363 sim_pfm: -27.747682611603523
episode: 424 training return: tensor(-410.7349, device='cuda:0')
episode: 425 training return: tensor(-256.3682, device='cuda:0')
episode: 426 training return: tensor(-288.1082, device='cuda:0')
episode: 427 training return: tensor(-273.2001, device='cuda:0')
epoch: 107 test_true_pfm: 1771.579710754693 sim_pfm: -247.243600983648
episode: 428 training return: tensor(-143.5042, device='cuda:0')
episode: 429 training return: tensor(-270.0168, device='cuda:0')
episode: 430 training return: tensor(-439.0707, device='cuda:0')
episode: 431 training return: tensor(-358.2070, device='cuda:0')
epoch: 108 test_true_pfm: 2585.9593258674286 sim_pfm: -157.89450577735747
episode: 432 training return: tensor(-148.6859, device='cuda:0')
episode: 433 training return: tensor(-262.9604, device='cuda:0')
episode: 434 training return: tensor(57.4092, device='cuda:0')
episode: 435 training return: tensor(-371.9385, device='cuda:0')
epoch: 109 test_true_pfm: 1736.3548775369484 sim_pfm: 30.81351656947906
episode: 436 training return: tensor(-351.6769, device='cuda:0')
episode: 437 training return: tensor(-194.9153, device='cuda:0')
episode: 438 training return: tensor(-279.6320, device='cuda:0')
episode: 439 training return: tensor(-425.1095, device='cuda:0')
epoch: 110 test_true_pfm: 2218.4876853404285 sim_pfm: 115.21372117763774
episode: 440 training return: tensor(-415.7017, device='cuda:0')
episode: 441 training return: tensor(-73.5564, device='cuda:0')
episode: 442 training return: tensor(-429.0341, device='cuda:0')
episode: 443 training return: tensor(-417.3074, device='cuda:0')
epoch: 111 test_true_pfm: 2153.006074642924 sim_pfm: 169.00417015546313
episode: 444 training return: tensor(-326.2523, device='cuda:0')
episode: 445 training return: tensor(-431.3831, device='cuda:0')
episode: 446 training return: tensor(-170.7825, device='cuda:0')
episode: 447 training return: tensor(-342.2673, device='cuda:0')
epoch: 112 test_true_pfm: 3011.4652893532425 sim_pfm: 270.58127338332514
episode: 448 training return: tensor(-277.6918, device='cuda:0')
episode: 449 training return: tensor(-176.8336, device='cuda:0')
episode: 450 training return: tensor(-105.2051, device='cuda:0')
episode: 451 training return: tensor(-317.1000, device='cuda:0')
epoch: 113 test_true_pfm: 2735.5556992349248 sim_pfm: 127.50244807869119
episode: 452 training return: tensor(-314.3947, device='cuda:0')
episode: 453 training return: tensor(-281.8407, device='cuda:0')
episode: 454 training return: tensor(-352.5310, device='cuda:0')
episode: 455 training return: tensor(-240.8161, device='cuda:0')
epoch: 114 test_true_pfm: 3132.0789586107835 sim_pfm: 267.649610981481
episode: 456 training return: tensor(-342.0305, device='cuda:0')
episode: 457 training return: tensor(-384.3368, device='cuda:0')
episode: 458 training return: tensor(-449.8589, device='cuda:0')
episode: 459 training return: tensor(-163.0844, device='cuda:0')
epoch: 115 test_true_pfm: 1816.2358363541655 sim_pfm: -102.39808382947619
episode: 460 training return: tensor(-283.0373, device='cuda:0')
episode: 461 training return: tensor(-275.7434, device='cuda:0')
episode: 462 training return: tensor(-442.6698, device='cuda:0')
episode: 463 training return: tensor(-245.2982, device='cuda:0')
epoch: 116 test_true_pfm: 3129.5645799941763 sim_pfm: 294.91660150846775
episode: 464 training return: tensor(-405.4945, device='cuda:0')
episode: 465 training return: tensor(-231.9101, device='cuda:0')
episode: 466 training return: tensor(191.8488, device='cuda:0')
episode: 467 training return: tensor(-453.1499, device='cuda:0')
epoch: 117 test_true_pfm: 1950.1780613009287 sim_pfm: -228.0147381563826
episode: 468 training return: tensor(-273.2310, device='cuda:0')
episode: 469 training return: tensor(-399.3888, device='cuda:0')
episode: 470 training return: tensor(85.5842, device='cuda:0')
episode: 471 training return: tensor(-371.9928, device='cuda:0')
epoch: 118 test_true_pfm: 2941.9904086372194 sim_pfm: 234.54862727915557
episode: 472 training return: tensor(-189.8517, device='cuda:0')
episode: 473 training return: tensor(-266.1540, device='cuda:0')
episode: 474 training return: tensor(-259.0789, device='cuda:0')
episode: 475 training return: tensor(-317.1979, device='cuda:0')
epoch: 119 test_true_pfm: 3335.462845084739 sim_pfm: 196.27918209818503
episode: 476 training return: tensor(-455.2018, device='cuda:0')
episode: 477 training return: tensor(-313.5489, device='cuda:0')
episode: 478 training return: tensor(-267.6182, device='cuda:0')
episode: 479 training return: tensor(-260.1241, device='cuda:0')
epoch: 120 test_true_pfm: 2948.856558523203 sim_pfm: 117.46635355235776
episode: 480 training return: tensor(-217.6471, device='cuda:0')
episode: 481 training return: tensor(-289.9584, device='cuda:0')
episode: 482 training return: tensor(-192.7881, device='cuda:0')
episode: 483 training return: tensor(-295.7092, device='cuda:0')
epoch: 121 test_true_pfm: 1943.59969407609 sim_pfm: 126.45338818216503
episode: 484 training return: tensor(-238.8337, device='cuda:0')
episode: 485 training return: tensor(-120.4438, device='cuda:0')
episode: 486 training return: tensor(-314.6462, device='cuda:0')
episode: 487 training return: tensor(-360.0380, device='cuda:0')
epoch: 122 test_true_pfm: 2408.48022971449 sim_pfm: -22.38594282672663
episode: 488 training return: tensor(-191.0407, device='cuda:0')
episode: 489 training return: tensor(-161.4637, device='cuda:0')
episode: 490 training return: tensor(-303.9478, device='cuda:0')
episode: 491 training return: tensor(-287.5229, device='cuda:0')
epoch: 123 test_true_pfm: 1993.5269814068542 sim_pfm: 41.66490681485933
episode: 492 training return: tensor(-433.2306, device='cuda:0')
episode: 493 training return: tensor(-344.0103, device='cuda:0')
episode: 494 training return: tensor(-380.9305, device='cuda:0')
episode: 495 training return: tensor(262.7911, device='cuda:0')
epoch: 124 test_true_pfm: 2657.8656699284074 sim_pfm: 218.8973808247441
episode: 496 training return: tensor(-290.3539, device='cuda:0')
episode: 497 training return: tensor(-252.1619, device='cuda:0')
episode: 498 training return: tensor(-274.4852, device='cuda:0')
episode: 499 training return: tensor(-346.1408, device='cuda:0')
epoch: 125 test_true_pfm: 2725.9971906177975 sim_pfm: -107.61426420913388
episode: 500 training return: tensor(-501.2844, device='cuda:0')
episode: 501 training return: tensor(-265.6082, device='cuda:0')
episode: 502 training return: tensor(-8.4880, device='cuda:0')
episode: 503 training return: tensor(-266.3482, device='cuda:0')
epoch: 126 test_true_pfm: 2056.1805023132797 sim_pfm: -92.38966348062968
episode: 504 training return: tensor(-249.3732, device='cuda:0')
episode: 505 training return: tensor(-366.1181, device='cuda:0')
episode: 506 training return: tensor(-213.9859, device='cuda:0')
episode: 507 training return: tensor(-172.7229, device='cuda:0')
epoch: 127 test_true_pfm: 2796.7066481771017 sim_pfm: 177.01536304327115
episode: 508 training return: tensor(-160.5627, device='cuda:0')
episode: 509 training return: tensor(-361.0988, device='cuda:0')
episode: 510 training return: tensor(-198.7070, device='cuda:0')
episode: 511 training return: tensor(-378.2784, device='cuda:0')
epoch: 128 test_true_pfm: 1682.3791646530826 sim_pfm: -215.10353204553635
episode: 512 training return: tensor(-250.9765, device='cuda:0')
episode: 513 training return: tensor(-362.0464, device='cuda:0')
episode: 514 training return: tensor(-272.8889, device='cuda:0')
episode: 515 training return: tensor(-214.7271, device='cuda:0')
epoch: 129 test_true_pfm: 2712.592121732895 sim_pfm: -184.54559615871403
episode: 516 training return: tensor(-433.9254, device='cuda:0')
episode: 517 training return: tensor(-373.7192, device='cuda:0')
episode: 518 training return: tensor(-290.0932, device='cuda:0')
episode: 519 training return: tensor(-252.5016, device='cuda:0')
epoch: 130 test_true_pfm: 2543.7277575202957 sim_pfm: -40.876557248062454
episode: 520 training return: tensor(-347.6600, device='cuda:0')
episode: 521 training return: tensor(-238.3267, device='cuda:0')
episode: 522 training return: tensor(-266.3934, device='cuda:0')
episode: 523 training return: tensor(-122.9506, device='cuda:0')
epoch: 131 test_true_pfm: 3320.0540809929366 sim_pfm: 211.19442136818543
episode: 524 training return: tensor(-342.4831, device='cuda:0')
episode: 525 training return: tensor(-165.4309, device='cuda:0')
episode: 526 training return: tensor(-348.3159, device='cuda:0')
episode: 527 training return: tensor(-179.0518, device='cuda:0')
epoch: 132 test_true_pfm: 1526.5684862429632 sim_pfm: -307.94593771368574
episode: 528 training return: tensor(-279.4658, device='cuda:0')
episode: 529 training return: tensor(-168.7298, device='cuda:0')
episode: 530 training return: tensor(-407.9855, device='cuda:0')
episode: 531 training return: tensor(-270.3044, device='cuda:0')
epoch: 133 test_true_pfm: 2675.3535935643495 sim_pfm: 259.12687233862624
episode: 532 training return: tensor(-416.9661, device='cuda:0')
episode: 533 training return: tensor(-320.3482, device='cuda:0')
episode: 534 training return: tensor(-456.3864, device='cuda:0')
episode: 535 training return: tensor(-319.9258, device='cuda:0')
epoch: 134 test_true_pfm: 2895.6492982642644 sim_pfm: 238.5157466541083
episode: 536 training return: tensor(-270.3503, device='cuda:0')
episode: 537 training return: tensor(-336.4262, device='cuda:0')
episode: 538 training return: tensor(-428.1043, device='cuda:0')
episode: 539 training return: tensor(-361.0396, device='cuda:0')
epoch: 135 test_true_pfm: 2716.58303936494 sim_pfm: 284.52187160990434
episode: 540 training return: tensor(-174.9254, device='cuda:0')
episode: 541 training return: tensor(-444.5672, device='cuda:0')
episode: 542 training return: tensor(-224.1616, device='cuda:0')
episode: 543 training return: tensor(-382.0433, device='cuda:0')
epoch: 136 test_true_pfm: 2024.8885586375466 sim_pfm: -62.045927012106404
episode: 544 training return: tensor(-360.2072, device='cuda:0')
episode: 545 training return: tensor(-266.0901, device='cuda:0')
episode: 546 training return: tensor(-315.1571, device='cuda:0')
episode: 547 training return: tensor(-167.7015, device='cuda:0')
epoch: 137 test_true_pfm: 2976.9698258593985 sim_pfm: -95.49873686442152
episode: 548 training return: tensor(-254.1989, device='cuda:0')
episode: 549 training return: tensor(-63.3267, device='cuda:0')
episode: 550 training return: tensor(-442.5827, device='cuda:0')
episode: 551 training return: tensor(-254.2104, device='cuda:0')
epoch: 138 test_true_pfm: 1871.0199145043723 sim_pfm: 13.343942894677943
episode: 552 training return: tensor(-339.7135, device='cuda:0')
episode: 553 training return: tensor(-401.7011, device='cuda:0')
episode: 554 training return: tensor(-222.7954, device='cuda:0')
episode: 555 training return: tensor(-355.2248, device='cuda:0')
epoch: 139 test_true_pfm: 3126.098843145645 sim_pfm: -59.00268032770449
episode: 556 training return: tensor(-374.0721, device='cuda:0')
episode: 557 training return: tensor(-308.6587, device='cuda:0')
episode: 558 training return: tensor(-218.2764, device='cuda:0')
episode: 559 training return: tensor(-423.0646, device='cuda:0')
epoch: 140 test_true_pfm: 2591.08591847991 sim_pfm: 252.19474143348634
episode: 560 training return: tensor(46.1457, device='cuda:0')
episode: 561 training return: tensor(-428.6311, device='cuda:0')
episode: 562 training return: tensor(-318.2345, device='cuda:0')
episode: 563 training return: tensor(-302.1114, device='cuda:0')
epoch: 141 test_true_pfm: 3008.701678319046 sim_pfm: 268.9157586110329
episode: 564 training return: tensor(-344.2299, device='cuda:0')
episode: 565 training return: tensor(-426.9427, device='cuda:0')
episode: 566 training return: tensor(-238.7324, device='cuda:0')
episode: 567 training return: tensor(-248.6332, device='cuda:0')
epoch: 142 test_true_pfm: 3169.208349314764 sim_pfm: 168.58956228906754
episode: 568 training return: tensor(-310.4184, device='cuda:0')
episode: 569 training return: tensor(-247.4415, device='cuda:0')
episode: 570 training return: tensor(-266.4467, device='cuda:0')
episode: 571 training return: tensor(-12.7591, device='cuda:0')
epoch: 143 test_true_pfm: 2207.5914656321625 sim_pfm: 19.210161609080387
episode: 572 training return: tensor(-157.1016, device='cuda:0')
episode: 573 training return: tensor(-427.6245, device='cuda:0')
episode: 574 training return: tensor(219.5521, device='cuda:0')
episode: 575 training return: tensor(-232.6632, device='cuda:0')
epoch: 144 test_true_pfm: 2547.125652601793 sim_pfm: -87.38182859894975
episode: 576 training return: tensor(-301.0953, device='cuda:0')
episode: 577 training return: tensor(-495.4728, device='cuda:0')
episode: 578 training return: tensor(82.5587, device='cuda:0')
episode: 579 training return: tensor(-381.3020, device='cuda:0')
epoch: 145 test_true_pfm: 3335.9652674778863 sim_pfm: 299.9136487013602
episode: 580 training return: tensor(-248.6616, device='cuda:0')
episode: 581 training return: tensor(-349.7339, device='cuda:0')
episode: 582 training return: tensor(-243.3745, device='cuda:0')
episode: 583 training return: tensor(-337.9247, device='cuda:0')
epoch: 146 test_true_pfm: 2256.8960830781048 sim_pfm: 35.86463066463087
episode: 584 training return: tensor(-291.3657, device='cuda:0')
episode: 585 training return: tensor(-358.3307, device='cuda:0')
episode: 586 training return: tensor(-215.6898, device='cuda:0')
episode: 587 training return: tensor(-379.5034, device='cuda:0')
epoch: 147 test_true_pfm: 2899.4876552336013 sim_pfm: 28.38202563477292
episode: 588 training return: tensor(-400.4390, device='cuda:0')
episode: 589 training return: tensor(-56.0778, device='cuda:0')
episode: 590 training return: tensor(-195.5947, device='cuda:0')
episode: 591 training return: tensor(-303.3383, device='cuda:0')
epoch: 148 test_true_pfm: 1690.956135977521 sim_pfm: -77.47428334047436
episode: 592 training return: tensor(-70.3592, device='cuda:0')
episode: 593 training return: tensor(-475.7199, device='cuda:0')
episode: 594 training return: tensor(-352.6898, device='cuda:0')
episode: 595 training return: tensor(-319.6914, device='cuda:0')
epoch: 149 test_true_pfm: 3178.8104737280705 sim_pfm: 9.910364284509948
episode: 596 training return: tensor(33.8949, device='cuda:0')
episode: 597 training return: tensor(-437.0308, device='cuda:0')
episode: 598 training return: tensor(193.4209, device='cuda:0')
episode: 599 training return: tensor(3.6273, device='cuda:0')
epoch: 150 test_true_pfm: 1580.5488422820074 sim_pfm: -283.70251815665205
