['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'baseline', '--traj', 'expert', '--seed', '4']
episode: 0 training return: tensor(-961.7776, device='cuda:0')
episode: 1 training return: tensor(-974.4611, device='cuda:0')
episode: 2 training return: tensor(-928.8177, device='cuda:0')
episode: 3 training return: tensor(-888.0734, device='cuda:0')
epoch: 1 test_true_pfm: -10.805828229990171 sim_pfm: -918.5406700912475
episode: 4 training return: tensor(-972.8292, device='cuda:0')
episode: 5 training return: tensor(-996.1214, device='cuda:0')
episode: 6 training return: tensor(-872.5775, device='cuda:0')
episode: 7 training return: tensor(-968.3850, device='cuda:0')
epoch: 2 test_true_pfm: 224.3330501333253 sim_pfm: -973.0347968538603
episode: 8 training return: tensor(-969.6758, device='cuda:0')
episode: 9 training return: tensor(-956.5460, device='cuda:0')
episode: 10 training return: tensor(-932.9046, device='cuda:0')
episode: 11 training return: tensor(-892.7316, device='cuda:0')
epoch: 3 test_true_pfm: 252.4773103006239 sim_pfm: -927.7040373203345
episode: 12 training return: tensor(-923.5985, device='cuda:0')
episode: 13 training return: tensor(-942.4803, device='cuda:0')
episode: 14 training return: tensor(-914.1908, device='cuda:0')
episode: 15 training return: tensor(-942.7159, device='cuda:0')
epoch: 4 test_true_pfm: 251.58095402732798 sim_pfm: -935.9747583073719
episode: 16 training return: tensor(-946.8442, device='cuda:0')
episode: 17 training return: tensor(-930.6498, device='cuda:0')
episode: 18 training return: tensor(-900.7966, device='cuda:0')
episode: 19 training return: tensor(-866.1486, device='cuda:0')
epoch: 5 test_true_pfm: 293.6696712137357 sim_pfm: -901.4139310827401
episode: 20 training return: tensor(-888.5967, device='cuda:0')
episode: 21 training return: tensor(-831.6409, device='cuda:0')
episode: 22 training return: tensor(-828.2991, device='cuda:0')
episode: 23 training return: tensor(-924.7582, device='cuda:0')
epoch: 6 test_true_pfm: 255.2991424225784 sim_pfm: -851.5003657939669
episode: 24 training return: tensor(-820.2128, device='cuda:0')
episode: 25 training return: tensor(-853.5139, device='cuda:0')
episode: 26 training return: tensor(-790.7570, device='cuda:0')
episode: 27 training return: tensor(-744.4796, device='cuda:0')
epoch: 7 test_true_pfm: 278.61271358954554 sim_pfm: -835.7576816709867
episode: 28 training return: tensor(-866.0807, device='cuda:0')
episode: 29 training return: tensor(-887.7903, device='cuda:0')
episode: 30 training return: tensor(-999.5309, device='cuda:0')
episode: 31 training return: tensor(-995.0846, device='cuda:0')
epoch: 8 test_true_pfm: 376.30379031289567 sim_pfm: -816.5000962691071
episode: 32 training return: tensor(-724.0256, device='cuda:0')
episode: 33 training return: tensor(-558.5017, device='cuda:0')
episode: 34 training return: tensor(-553.6274, device='cuda:0')
episode: 35 training return: tensor(-954.7704, device='cuda:0')
epoch: 9 test_true_pfm: 152.10836737731483 sim_pfm: -639.4190041263743
episode: 36 training return: tensor(-967.0853, device='cuda:0')
episode: 37 training return: tensor(-580.2875, device='cuda:0')
episode: 38 training return: tensor(-981.4897, device='cuda:0')
episode: 39 training return: tensor(-658.2943, device='cuda:0')
epoch: 10 test_true_pfm: 169.9688707149533 sim_pfm: -597.809677915473
episode: 40 training return: tensor(-620.3318, device='cuda:0')
episode: 41 training return: tensor(-607.1021, device='cuda:0')
episode: 42 training return: tensor(-569.3725, device='cuda:0')
episode: 43 training return: tensor(-654.3592, device='cuda:0')
epoch: 11 test_true_pfm: 294.8003924792495 sim_pfm: -868.7835426721722
episode: 44 training return: tensor(-731.5542, device='cuda:0')
episode: 45 training return: tensor(-770.5173, device='cuda:0')
episode: 46 training return: tensor(-689.1403, device='cuda:0')
episode: 47 training return: tensor(-638.9451, device='cuda:0')
epoch: 12 test_true_pfm: 269.42750255311296 sim_pfm: -874.512985107877
episode: 48 training return: tensor(-589.7701, device='cuda:0')
episode: 49 training return: tensor(-553.5809, device='cuda:0')
episode: 50 training return: tensor(-521.9305, device='cuda:0')
episode: 51 training return: tensor(-487.4538, device='cuda:0')
epoch: 13 test_true_pfm: 345.93309784670913 sim_pfm: -452.12952162870596
episode: 52 training return: tensor(-456.5696, device='cuda:0')
episode: 53 training return: tensor(-520.1031, device='cuda:0')
episode: 54 training return: tensor(-422.2874, device='cuda:0')
episode: 55 training return: tensor(-435.8779, device='cuda:0')
epoch: 14 test_true_pfm: 253.7215895176611 sim_pfm: -419.73783099053736
episode: 56 training return: tensor(-461.2336, device='cuda:0')
episode: 57 training return: tensor(-437.9167, device='cuda:0')
episode: 58 training return: tensor(-467.1171, device='cuda:0')
episode: 59 training return: tensor(-402.6226, device='cuda:0')
epoch: 15 test_true_pfm: 199.53992435664986 sim_pfm: -436.84330099244835
episode: 60 training return: tensor(-430.4787, device='cuda:0')
episode: 61 training return: tensor(-412.0723, device='cuda:0')
episode: 62 training return: tensor(-403.1570, device='cuda:0')
episode: 63 training return: tensor(-515.4460, device='cuda:0')
epoch: 16 test_true_pfm: 396.1481115948711 sim_pfm: -419.67974464502186
episode: 64 training return: tensor(-480.3550, device='cuda:0')
episode: 65 training return: tensor(-402.4079, device='cuda:0')
episode: 66 training return: tensor(-459.8458, device='cuda:0')
episode: 67 training return: tensor(-428.5843, device='cuda:0')
epoch: 17 test_true_pfm: 300.42777715180404 sim_pfm: -388.1305589429103
episode: 68 training return: tensor(-492.4109, device='cuda:0')
episode: 69 training return: tensor(-390.7763, device='cuda:0')
episode: 70 training return: tensor(-390.9944, device='cuda:0')
episode: 71 training return: tensor(-402.9306, device='cuda:0')
epoch: 18 test_true_pfm: 241.58760085774307 sim_pfm: -402.8985057019163
episode: 72 training return: tensor(-396.1585, device='cuda:0')
episode: 73 training return: tensor(-387.9999, device='cuda:0')
episode: 74 training return: tensor(-395.0128, device='cuda:0')
episode: 75 training return: tensor(-378.6736, device='cuda:0')
epoch: 19 test_true_pfm: 292.9135261909662 sim_pfm: -364.6211741522614
episode: 76 training return: tensor(-415.5093, device='cuda:0')
episode: 77 training return: tensor(-396.3694, device='cuda:0')
episode: 78 training return: tensor(-394.1486, device='cuda:0')
episode: 79 training return: tensor(-377.0283, device='cuda:0')
epoch: 20 test_true_pfm: 426.84740128548157 sim_pfm: -361.8269194739017
episode: 80 training return: tensor(-395.5435, device='cuda:0')
episode: 81 training return: tensor(-435.7220, device='cuda:0')
episode: 82 training return: tensor(-362.9662, device='cuda:0')
episode: 83 training return: tensor(-442.8311, device='cuda:0')
epoch: 21 test_true_pfm: 232.28673470379394 sim_pfm: -398.2346769389308
episode: 84 training return: tensor(-382.9290, device='cuda:0')
episode: 85 training return: tensor(-385.6975, device='cuda:0')
episode: 86 training return: tensor(-391.7850, device='cuda:0')
episode: 87 training return: tensor(-385.1632, device='cuda:0')
epoch: 22 test_true_pfm: 243.5316194308688 sim_pfm: -364.54434634407517
episode: 88 training return: tensor(-380.9061, device='cuda:0')
episode: 89 training return: tensor(-377.2458, device='cuda:0')
episode: 90 training return: tensor(-380.6684, device='cuda:0')
episode: 91 training return: tensor(-382.6342, device='cuda:0')
epoch: 23 test_true_pfm: 228.87835279397464 sim_pfm: -375.41341683718684
episode: 92 training return: tensor(-437.0515, device='cuda:0')
episode: 93 training return: tensor(-382.3568, device='cuda:0')
episode: 94 training return: tensor(-402.5805, device='cuda:0')
episode: 95 training return: tensor(-385.1168, device='cuda:0')
epoch: 24 test_true_pfm: 279.7326783104469 sim_pfm: -370.8071880112596
episode: 96 training return: tensor(-384.6239, device='cuda:0')
episode: 97 training return: tensor(-372.0860, device='cuda:0')
episode: 98 training return: tensor(-386.6729, device='cuda:0')
episode: 99 training return: tensor(-367.9992, device='cuda:0')
epoch: 25 test_true_pfm: 249.80648372272546 sim_pfm: -367.9656927708226
episode: 100 training return: tensor(-385.4108, device='cuda:0')
episode: 101 training return: tensor(-370.9154, device='cuda:0')
episode: 102 training return: tensor(-415.6850, device='cuda:0')
episode: 103 training return: tensor(-392.2451, device='cuda:0')
epoch: 26 test_true_pfm: 244.3671458553772 sim_pfm: -352.74071293123416
episode: 104 training return: tensor(-388.4437, device='cuda:0')
episode: 105 training return: tensor(-356.2213, device='cuda:0')
episode: 106 training return: tensor(-354.8296, device='cuda:0')
episode: 107 training return: tensor(-390.4257, device='cuda:0')
epoch: 27 test_true_pfm: 307.3585701413492 sim_pfm: -370.12461344501935
episode: 108 training return: tensor(-388.3069, device='cuda:0')
episode: 109 training return: tensor(-391.2327, device='cuda:0')
episode: 110 training return: tensor(-373.1483, device='cuda:0')
episode: 111 training return: tensor(-377.7976, device='cuda:0')
epoch: 28 test_true_pfm: 275.1222011300328 sim_pfm: -383.9979360586828
episode: 112 training return: tensor(-372.6555, device='cuda:0')
episode: 113 training return: tensor(-369.8898, device='cuda:0')
episode: 114 training return: tensor(-362.0548, device='cuda:0')
episode: 115 training return: tensor(-395.6091, device='cuda:0')
epoch: 29 test_true_pfm: 222.06216876252702 sim_pfm: -350.9977559011895
episode: 116 training return: tensor(-375.9518, device='cuda:0')
episode: 117 training return: tensor(-366.9180, device='cuda:0')
episode: 118 training return: tensor(-398.3527, device='cuda:0')
episode: 119 training return: tensor(-384.2151, device='cuda:0')
epoch: 30 test_true_pfm: 262.03094383758776 sim_pfm: -359.7529688208985
episode: 120 training return: tensor(-346.7430, device='cuda:0')
episode: 121 training return: tensor(-352.6505, device='cuda:0')
episode: 122 training return: tensor(-381.4074, device='cuda:0')
episode: 123 training return: tensor(-363.7302, device='cuda:0')
epoch: 31 test_true_pfm: 266.54601997394957 sim_pfm: -370.47478086253005
episode: 124 training return: tensor(-371.6534, device='cuda:0')
episode: 125 training return: tensor(-357.9691, device='cuda:0')
episode: 126 training return: tensor(-375.3608, device='cuda:0')
episode: 127 training return: tensor(-368.0784, device='cuda:0')
epoch: 32 test_true_pfm: 241.12025243233128 sim_pfm: -357.46598143134423
episode: 128 training return: tensor(-363.0854, device='cuda:0')
episode: 129 training return: tensor(-360.2822, device='cuda:0')
episode: 130 training return: tensor(-364.0432, device='cuda:0')
episode: 131 training return: tensor(-349.0601, device='cuda:0')
epoch: 33 test_true_pfm: 283.13723601611883 sim_pfm: -340.79145458825707
episode: 132 training return: tensor(-347.7074, device='cuda:0')
episode: 133 training return: tensor(-360.2990, device='cuda:0')
episode: 134 training return: tensor(-347.3269, device='cuda:0')
episode: 135 training return: tensor(-365.0567, device='cuda:0')
epoch: 34 test_true_pfm: 272.21939200882656 sim_pfm: -361.6137866886177
episode: 136 training return: tensor(-363.2053, device='cuda:0')
episode: 137 training return: tensor(-354.0854, device='cuda:0')
episode: 138 training return: tensor(-348.9335, device='cuda:0')
episode: 139 training return: tensor(-351.6973, device='cuda:0')
epoch: 35 test_true_pfm: 256.82498546987387 sim_pfm: -335.0188838899485
episode: 140 training return: tensor(-349.8386, device='cuda:0')
episode: 141 training return: tensor(-376.4608, device='cuda:0')
episode: 142 training return: tensor(-341.9163, device='cuda:0')
episode: 143 training return: tensor(-342.1377, device='cuda:0')
epoch: 36 test_true_pfm: 260.07446388177465 sim_pfm: -343.8986024668363
episode: 144 training return: tensor(-350.5010, device='cuda:0')
episode: 145 training return: tensor(-369.8282, device='cuda:0')
episode: 146 training return: tensor(-354.0832, device='cuda:0')
episode: 147 training return: tensor(-352.0364, device='cuda:0')
epoch: 37 test_true_pfm: 249.25133533838843 sim_pfm: -342.7428707194437
episode: 148 training return: tensor(-346.7050, device='cuda:0')
episode: 149 training return: tensor(-380.1961, device='cuda:0')
episode: 150 training return: tensor(-355.9987, device='cuda:0')
episode: 151 training return: tensor(-348.6227, device='cuda:0')
epoch: 38 test_true_pfm: 248.02654675653685 sim_pfm: -338.74613167477463
episode: 152 training return: tensor(-365.0378, device='cuda:0')
episode: 153 training return: tensor(-350.2192, device='cuda:0')
episode: 154 training return: tensor(-344.4328, device='cuda:0')
episode: 155 training return: tensor(-338.2967, device='cuda:0')
epoch: 39 test_true_pfm: 311.93933754148765 sim_pfm: -331.96236822692055
episode: 156 training return: tensor(-340.8605, device='cuda:0')
episode: 157 training return: tensor(-351.2502, device='cuda:0')
episode: 158 training return: tensor(-350.8928, device='cuda:0')
episode: 159 training return: tensor(-348.6020, device='cuda:0')
epoch: 40 test_true_pfm: 288.52273125450216 sim_pfm: -335.0181816854553
episode: 160 training return: tensor(-334.8934, device='cuda:0')
episode: 161 training return: tensor(-338.0143, device='cuda:0')
episode: 162 training return: tensor(-332.2483, device='cuda:0')
episode: 163 training return: tensor(-351.4473, device='cuda:0')
epoch: 41 test_true_pfm: 257.82906326089113 sim_pfm: -315.22172277762246
episode: 164 training return: tensor(-328.6387, device='cuda:0')
episode: 165 training return: tensor(-361.0866, device='cuda:0')
episode: 166 training return: tensor(-355.0154, device='cuda:0')
episode: 167 training return: tensor(-341.5574, device='cuda:0')
epoch: 42 test_true_pfm: 350.68584863989486 sim_pfm: -318.94275026840234
episode: 168 training return: tensor(-358.8181, device='cuda:0')
episode: 169 training return: tensor(-336.6776, device='cuda:0')
episode: 170 training return: tensor(-352.0204, device='cuda:0')
episode: 171 training return: tensor(-347.2755, device='cuda:0')
epoch: 43 test_true_pfm: 305.31047583215746 sim_pfm: -330.3470692078117
episode: 172 training return: tensor(-353.0084, device='cuda:0')
episode: 173 training return: tensor(-351.9130, device='cuda:0')
episode: 174 training return: tensor(-344.2271, device='cuda:0')
episode: 175 training return: tensor(-347.0747, device='cuda:0')
epoch: 44 test_true_pfm: 346.64953260559577 sim_pfm: -314.0294902903649
episode: 176 training return: tensor(-340.3325, device='cuda:0')
episode: 177 training return: tensor(-345.5969, device='cuda:0')
episode: 178 training return: tensor(-328.5169, device='cuda:0')
episode: 179 training return: tensor(-356.4145, device='cuda:0')
epoch: 45 test_true_pfm: 267.7079760634782 sim_pfm: -333.32204256928526
episode: 180 training return: tensor(-336.0681, device='cuda:0')
episode: 181 training return: tensor(-353.8842, device='cuda:0')
episode: 182 training return: tensor(-345.8979, device='cuda:0')
episode: 183 training return: tensor(-343.8757, device='cuda:0')
epoch: 46 test_true_pfm: 290.63179550470227 sim_pfm: -332.40432304883143
episode: 184 training return: tensor(-328.5142, device='cuda:0')
episode: 185 training return: tensor(-335.4658, device='cuda:0')
episode: 186 training return: tensor(-347.5648, device='cuda:0')
episode: 187 training return: tensor(-339.9965, device='cuda:0')
epoch: 47 test_true_pfm: 285.85320109445524 sim_pfm: -317.5044777902464
episode: 188 training return: tensor(-336.7325, device='cuda:0')
episode: 189 training return: tensor(-356.5449, device='cuda:0')
episode: 190 training return: tensor(-333.1313, device='cuda:0')
episode: 191 training return: tensor(-366.3208, device='cuda:0')
epoch: 48 test_true_pfm: 228.25334963851756 sim_pfm: -332.58597341358353
episode: 192 training return: tensor(-336.4865, device='cuda:0')
episode: 193 training return: tensor(-335.2196, device='cuda:0')
episode: 194 training return: tensor(-338.1181, device='cuda:0')
episode: 195 training return: tensor(-345.0048, device='cuda:0')
epoch: 49 test_true_pfm: 317.36549503989306 sim_pfm: -302.1266303564965
episode: 196 training return: tensor(-324.8009, device='cuda:0')
episode: 197 training return: tensor(-344.7946, device='cuda:0')
episode: 198 training return: tensor(-332.2276, device='cuda:0')
episode: 199 training return: tensor(-338.2480, device='cuda:0')
epoch: 50 test_true_pfm: 256.0218209558628 sim_pfm: -316.02638395697187
episode: 200 training return: tensor(-331.5448, device='cuda:0')
episode: 201 training return: tensor(-335.3501, device='cuda:0')
episode: 202 training return: tensor(-340.7993, device='cuda:0')
episode: 203 training return: tensor(-338.7132, device='cuda:0')
epoch: 51 test_true_pfm: 304.09033818162305 sim_pfm: -339.02733167020295
episode: 204 training return: tensor(-347.9137, device='cuda:0')
episode: 205 training return: tensor(-343.5374, device='cuda:0')
episode: 206 training return: tensor(-336.3990, device='cuda:0')
episode: 207 training return: tensor(-376.4148, device='cuda:0')
epoch: 52 test_true_pfm: 408.29839307426346 sim_pfm: -272.92995420790976
episode: 208 training return: tensor(-347.4748, device='cuda:0')
episode: 209 training return: tensor(-351.1405, device='cuda:0')
episode: 210 training return: tensor(-314.7540, device='cuda:0')
episode: 211 training return: tensor(-332.4758, device='cuda:0')
epoch: 53 test_true_pfm: 337.698681582685 sim_pfm: -297.97350353761186
episode: 212 training return: tensor(-318.0481, device='cuda:0')
episode: 213 training return: tensor(-327.7709, device='cuda:0')
episode: 214 training return: tensor(-329.7278, device='cuda:0')
episode: 215 training return: tensor(-329.6820, device='cuda:0')
epoch: 54 test_true_pfm: 259.78117041728973 sim_pfm: -322.5200426755473
episode: 216 training return: tensor(-323.4439, device='cuda:0')
episode: 217 training return: tensor(-335.7747, device='cuda:0')
episode: 218 training return: tensor(-332.8977, device='cuda:0')
episode: 219 training return: tensor(-337.9701, device='cuda:0')
epoch: 55 test_true_pfm: 338.65125575123716 sim_pfm: -297.4950974121651
episode: 220 training return: tensor(-329.5663, device='cuda:0')
episode: 221 training return: tensor(-321.8146, device='cuda:0')
episode: 222 training return: tensor(-310.2462, device='cuda:0')
episode: 223 training return: tensor(-332.1458, device='cuda:0')
epoch: 56 test_true_pfm: 350.84828831209006 sim_pfm: -271.19619733343524
episode: 224 training return: tensor(-335.8005, device='cuda:0')
episode: 225 training return: tensor(-346.0205, device='cuda:0')
episode: 226 training return: tensor(-333.9574, device='cuda:0')
episode: 227 training return: tensor(-350.5844, device='cuda:0')
epoch: 57 test_true_pfm: 260.02798684705846 sim_pfm: -325.83052104579593
episode: 228 training return: tensor(-349.9681, device='cuda:0')
episode: 229 training return: tensor(-337.6725, device='cuda:0')
episode: 230 training return: tensor(-333.9589, device='cuda:0')
episode: 231 training return: tensor(-340.0323, device='cuda:0')
epoch: 58 test_true_pfm: 309.5525766559722 sim_pfm: -278.1831764055726
episode: 232 training return: tensor(-336.5297, device='cuda:0')
episode: 233 training return: tensor(-333.6846, device='cuda:0')
episode: 234 training return: tensor(-333.5023, device='cuda:0')
episode: 235 training return: tensor(-343.4597, device='cuda:0')
epoch: 59 test_true_pfm: 247.3651818971119 sim_pfm: -301.8638396084037
episode: 236 training return: tensor(-342.4681, device='cuda:0')
episode: 237 training return: tensor(-342.0889, device='cuda:0')
episode: 238 training return: tensor(-335.8433, device='cuda:0')
episode: 239 training return: tensor(-325.1821, device='cuda:0')
epoch: 60 test_true_pfm: 312.789664277292 sim_pfm: -293.7634702628323
episode: 240 training return: tensor(-341.2118, device='cuda:0')
episode: 241 training return: tensor(-321.0572, device='cuda:0')
episode: 242 training return: tensor(-300.7145, device='cuda:0')
episode: 243 training return: tensor(-310.3117, device='cuda:0')
epoch: 61 test_true_pfm: 302.1973121045152 sim_pfm: -273.5403198429849
episode: 244 training return: tensor(-327.8691, device='cuda:0')
episode: 245 training return: tensor(-345.5098, device='cuda:0')
episode: 246 training return: tensor(-335.0298, device='cuda:0')
episode: 247 training return: tensor(-326.4220, device='cuda:0')
epoch: 62 test_true_pfm: 354.0957478918504 sim_pfm: -271.6064553053584
episode: 248 training return: tensor(-356.1238, device='cuda:0')
episode: 249 training return: tensor(-317.6873, device='cuda:0')
episode: 250 training return: tensor(-294.1374, device='cuda:0')
episode: 251 training return: tensor(-330.4188, device='cuda:0')
epoch: 63 test_true_pfm: 334.8646609026763 sim_pfm: -294.312001496631
episode: 252 training return: tensor(-334.6546, device='cuda:0')
episode: 253 training return: tensor(-321.8016, device='cuda:0')
episode: 254 training return: tensor(-311.9837, device='cuda:0')
episode: 255 training return: tensor(-318.7401, device='cuda:0')
epoch: 64 test_true_pfm: 355.61702299310855 sim_pfm: -255.2535116354314
episode: 256 training return: tensor(-320.1550, device='cuda:0')
episode: 257 training return: tensor(-338.7739, device='cuda:0')
episode: 258 training return: tensor(-327.8553, device='cuda:0')
episode: 259 training return: tensor(-326.6935, device='cuda:0')
epoch: 65 test_true_pfm: 419.6446364131673 sim_pfm: -240.57812203245703
episode: 260 training return: tensor(-344.8444, device='cuda:0')
episode: 261 training return: tensor(-308.3494, device='cuda:0')
episode: 262 training return: tensor(-319.5381, device='cuda:0')
episode: 263 training return: tensor(-320.4095, device='cuda:0')
epoch: 66 test_true_pfm: 318.7499886943303 sim_pfm: -294.8669435624227
episode: 264 training return: tensor(-325.7320, device='cuda:0')
episode: 265 training return: tensor(-329.2163, device='cuda:0')
episode: 266 training return: tensor(-340.7249, device='cuda:0')
episode: 267 training return: tensor(-329.0019, device='cuda:0')
epoch: 67 test_true_pfm: 444.5809184585296 sim_pfm: -245.0932904720345
episode: 268 training return: tensor(-319.2494, device='cuda:0')
episode: 269 training return: tensor(-321.5005, device='cuda:0')
episode: 270 training return: tensor(-293.9415, device='cuda:0')
episode: 271 training return: tensor(-319.8997, device='cuda:0')
epoch: 68 test_true_pfm: 379.1679497251207 sim_pfm: -249.74909193553808
episode: 272 training return: tensor(-330.1630, device='cuda:0')
episode: 273 training return: tensor(-287.7966, device='cuda:0')
episode: 274 training return: tensor(-325.0486, device='cuda:0')
episode: 275 training return: tensor(-327.4130, device='cuda:0')
epoch: 69 test_true_pfm: 325.0807717271939 sim_pfm: -272.2940258436526
episode: 276 training return: tensor(-320.4910, device='cuda:0')
episode: 277 training return: tensor(-325.7318, device='cuda:0')
episode: 278 training return: tensor(-319.9756, device='cuda:0')
episode: 279 training return: tensor(-294.4790, device='cuda:0')
epoch: 70 test_true_pfm: 362.9762048894231 sim_pfm: -275.78731642545125
episode: 280 training return: tensor(-329.1115, device='cuda:0')
episode: 281 training return: tensor(-278.4357, device='cuda:0')
episode: 282 training return: tensor(-304.2901, device='cuda:0')
episode: 283 training return: tensor(-299.2468, device='cuda:0')
epoch: 71 test_true_pfm: 367.361910880753 sim_pfm: -257.55153600826935
episode: 284 training return: tensor(-321.9743, device='cuda:0')
episode: 285 training return: tensor(-316.5567, device='cuda:0')
episode: 286 training return: tensor(-338.4076, device='cuda:0')
episode: 287 training return: tensor(-296.6361, device='cuda:0')
epoch: 72 test_true_pfm: 348.32005911777713 sim_pfm: -261.7119367480239
episode: 288 training return: tensor(-327.5710, device='cuda:0')
episode: 289 training return: tensor(-298.5052, device='cuda:0')
episode: 290 training return: tensor(-349.8965, device='cuda:0')
episode: 291 training return: tensor(-324.8398, device='cuda:0')
epoch: 73 test_true_pfm: 339.14312875801664 sim_pfm: -274.52287110468995
episode: 292 training return: tensor(-339.1042, device='cuda:0')
episode: 293 training return: tensor(-318.6300, device='cuda:0')
episode: 294 training return: tensor(-305.7604, device='cuda:0')
episode: 295 training return: tensor(-318.4974, device='cuda:0')
epoch: 74 test_true_pfm: 350.5022954489398 sim_pfm: -279.8146531538417
episode: 296 training return: tensor(-329.9187, device='cuda:0')
episode: 297 training return: tensor(-291.8619, device='cuda:0')
episode: 298 training return: tensor(-307.0137, device='cuda:0')
episode: 299 training return: tensor(-315.9660, device='cuda:0')
epoch: 75 test_true_pfm: 296.8973417248742 sim_pfm: -316.4243248157242
episode: 300 training return: tensor(-329.7754, device='cuda:0')
episode: 301 training return: tensor(-324.6604, device='cuda:0')
episode: 302 training return: tensor(-337.3971, device='cuda:0')
episode: 303 training return: tensor(-301.8510, device='cuda:0')
epoch: 76 test_true_pfm: 289.4993376752213 sim_pfm: -309.1931666083692
episode: 304 training return: tensor(-322.8080, device='cuda:0')
episode: 305 training return: tensor(-353.0684, device='cuda:0')
episode: 306 training return: tensor(-319.9474, device='cuda:0')
episode: 307 training return: tensor(-334.6212, device='cuda:0')
epoch: 77 test_true_pfm: 382.8887142526896 sim_pfm: -234.46551450918196
episode: 308 training return: tensor(-338.1557, device='cuda:0')
episode: 309 training return: tensor(-320.7217, device='cuda:0')
episode: 310 training return: tensor(-322.7841, device='cuda:0')
episode: 311 training return: tensor(-324.9321, device='cuda:0')
epoch: 78 test_true_pfm: 343.7209085084134 sim_pfm: -288.05643442625296
episode: 312 training return: tensor(-292.6139, device='cuda:0')
episode: 313 training return: tensor(-323.7095, device='cuda:0')
episode: 314 training return: tensor(-324.8991, device='cuda:0')
episode: 315 training return: tensor(-317.1335, device='cuda:0')
epoch: 79 test_true_pfm: 374.1287547912062 sim_pfm: -247.6176944207691
episode: 316 training return: tensor(-329.5342, device='cuda:0')
episode: 317 training return: tensor(-297.5113, device='cuda:0')
episode: 318 training return: tensor(-328.4610, device='cuda:0')
episode: 319 training return: tensor(-319.0978, device='cuda:0')
epoch: 80 test_true_pfm: 410.5752108885661 sim_pfm: -244.87497478385922
episode: 320 training return: tensor(-317.9362, device='cuda:0')
episode: 321 training return: tensor(-322.8465, device='cuda:0')
episode: 322 training return: tensor(-330.8215, device='cuda:0')
episode: 323 training return: tensor(-313.0081, device='cuda:0')
epoch: 81 test_true_pfm: 449.0257684539231 sim_pfm: -218.34371355346715
episode: 324 training return: tensor(-345.8849, device='cuda:0')
episode: 325 training return: tensor(-314.1589, device='cuda:0')
episode: 326 training return: tensor(-294.8880, device='cuda:0')
episode: 327 training return: tensor(-336.2758, device='cuda:0')
epoch: 82 test_true_pfm: 330.52926850334484 sim_pfm: -273.78120130837004
episode: 328 training return: tensor(-287.8353, device='cuda:0')
episode: 329 training return: tensor(-320.1408, device='cuda:0')
episode: 330 training return: tensor(-327.7994, device='cuda:0')
episode: 331 training return: tensor(-319.2706, device='cuda:0')
epoch: 83 test_true_pfm: 363.2719306289852 sim_pfm: -222.53093056729995
episode: 332 training return: tensor(-312.5285, device='cuda:0')
episode: 333 training return: tensor(-319.7714, device='cuda:0')
episode: 334 training return: tensor(-306.3388, device='cuda:0')
episode: 335 training return: tensor(-279.1658, device='cuda:0')
epoch: 84 test_true_pfm: 387.040490867746 sim_pfm: -230.64485293124258
episode: 336 training return: tensor(-337.7720, device='cuda:0')
episode: 337 training return: tensor(-306.5863, device='cuda:0')
episode: 338 training return: tensor(-316.5406, device='cuda:0')
episode: 339 training return: tensor(-322.2827, device='cuda:0')
epoch: 85 test_true_pfm: 394.3695540454915 sim_pfm: -259.1640432427327
episode: 340 training return: tensor(-271.4323, device='cuda:0')
episode: 341 training return: tensor(-285.6940, device='cuda:0')
episode: 342 training return: tensor(-317.2848, device='cuda:0')
episode: 343 training return: tensor(-330.5811, device='cuda:0')
epoch: 86 test_true_pfm: 426.99741151367306 sim_pfm: -214.22495451398814
episode: 344 training return: tensor(-311.1514, device='cuda:0')
episode: 345 training return: tensor(-321.0059, device='cuda:0')
episode: 346 training return: tensor(-306.6628, device='cuda:0')
episode: 347 training return: tensor(-322.6932, device='cuda:0')
epoch: 87 test_true_pfm: 336.80205929255624 sim_pfm: -297.8599889550824
episode: 348 training return: tensor(-338.9265, device='cuda:0')
episode: 349 training return: tensor(-308.4870, device='cuda:0')
episode: 350 training return: tensor(-328.7261, device='cuda:0')
episode: 351 training return: tensor(-310.8424, device='cuda:0')
epoch: 88 test_true_pfm: 378.80725545324896 sim_pfm: -253.1851108619788
episode: 352 training return: tensor(-325.6951, device='cuda:0')
episode: 353 training return: tensor(-295.7098, device='cuda:0')
episode: 354 training return: tensor(-342.8764, device='cuda:0')
episode: 355 training return: tensor(-289.1264, device='cuda:0')
epoch: 89 test_true_pfm: 314.57016151624265 sim_pfm: -288.0889263690139
episode: 356 training return: tensor(-348.9355, device='cuda:0')
episode: 357 training return: tensor(-328.1677, device='cuda:0')
episode: 358 training return: tensor(-305.8422, device='cuda:0')
episode: 359 training return: tensor(-276.2756, device='cuda:0')
epoch: 90 test_true_pfm: 441.0000592208499 sim_pfm: -243.68486756659695
episode: 360 training return: tensor(-328.3171, device='cuda:0')
episode: 361 training return: tensor(-312.3801, device='cuda:0')
episode: 362 training return: tensor(-307.6675, device='cuda:0')
episode: 363 training return: tensor(-290.8887, device='cuda:0')
epoch: 91 test_true_pfm: 358.5045100883793 sim_pfm: -253.35195720975753
episode: 364 training return: tensor(-275.3868, device='cuda:0')
episode: 365 training return: tensor(-307.5023, device='cuda:0')
episode: 366 training return: tensor(-329.5306, device='cuda:0')
episode: 367 training return: tensor(-283.7254, device='cuda:0')
epoch: 92 test_true_pfm: 393.9826513187427 sim_pfm: -256.74420376886457
episode: 368 training return: tensor(-305.8752, device='cuda:0')
episode: 369 training return: tensor(-290.9622, device='cuda:0')
episode: 370 training return: tensor(-329.8360, device='cuda:0')
episode: 371 training return: tensor(-340.6014, device='cuda:0')
epoch: 93 test_true_pfm: 427.33215014241296 sim_pfm: -225.16551520085582
episode: 372 training return: tensor(-316.9443, device='cuda:0')
episode: 373 training return: tensor(-350.9688, device='cuda:0')
episode: 374 training return: tensor(-339.7588, device='cuda:0')
episode: 375 training return: tensor(-329.1572, device='cuda:0')
epoch: 94 test_true_pfm: 399.62441208789295 sim_pfm: -256.9497550835561
episode: 376 training return: tensor(-314.2314, device='cuda:0')
episode: 377 training return: tensor(-349.8796, device='cuda:0')
episode: 378 training return: tensor(-335.8837, device='cuda:0')
episode: 379 training return: tensor(-321.5984, device='cuda:0')
epoch: 95 test_true_pfm: 483.56651914031846 sim_pfm: -199.94920919276774
episode: 380 training return: tensor(-340.2372, device='cuda:0')
episode: 381 training return: tensor(-292.5434, device='cuda:0')
episode: 382 training return: tensor(-310.2112, device='cuda:0')
episode: 383 training return: tensor(-290.4714, device='cuda:0')
epoch: 96 test_true_pfm: 477.6415442895768 sim_pfm: -220.98831197148925
episode: 384 training return: tensor(-324.8871, device='cuda:0')
episode: 385 training return: tensor(-331.3668, device='cuda:0')
episode: 386 training return: tensor(-325.4132, device='cuda:0')
episode: 387 training return: tensor(-275.7461, device='cuda:0')
epoch: 97 test_true_pfm: 482.57084357809305 sim_pfm: -213.7358313696459
episode: 388 training return: tensor(-315.1115, device='cuda:0')
episode: 389 training return: tensor(-291.6581, device='cuda:0')
episode: 390 training return: tensor(-328.6248, device='cuda:0')
episode: 391 training return: tensor(-318.2884, device='cuda:0')
epoch: 98 test_true_pfm: 440.14263297045653 sim_pfm: -236.362349450986
episode: 392 training return: tensor(-309.4194, device='cuda:0')
episode: 393 training return: tensor(-329.2643, device='cuda:0')
episode: 394 training return: tensor(-268.8284, device='cuda:0')
episode: 395 training return: tensor(-315.5057, device='cuda:0')
epoch: 99 test_true_pfm: 387.0012303937737 sim_pfm: -243.41965209454065
episode: 396 training return: tensor(-318.9955, device='cuda:0')
episode: 397 training return: tensor(-301.8928, device='cuda:0')
episode: 398 training return: tensor(-281.2774, device='cuda:0')
episode: 399 training return: tensor(-311.3477, device='cuda:0')
epoch: 100 test_true_pfm: 427.79002044045563 sim_pfm: -217.30461753427517
episode: 400 training return: tensor(-307.9716, device='cuda:0')
episode: 401 training return: tensor(-310.6971, device='cuda:0')
episode: 402 training return: tensor(-311.5709, device='cuda:0')
episode: 403 training return: tensor(-318.3710, device='cuda:0')
epoch: 101 test_true_pfm: 414.8938887979703 sim_pfm: -220.57256304596862
episode: 404 training return: tensor(-300.0219, device='cuda:0')
episode: 405 training return: tensor(-317.0634, device='cuda:0')
episode: 406 training return: tensor(-334.0677, device='cuda:0')
episode: 407 training return: tensor(-292.0635, device='cuda:0')
epoch: 102 test_true_pfm: 373.99079050582753 sim_pfm: -250.8679137910561
episode: 408 training return: tensor(-295.7283, device='cuda:0')
episode: 409 training return: tensor(-335.7709, device='cuda:0')
episode: 410 training return: tensor(-303.7287, device='cuda:0')
episode: 411 training return: tensor(-310.6661, device='cuda:0')
epoch: 103 test_true_pfm: 398.53144560484526 sim_pfm: -237.65484296524664
episode: 412 training return: tensor(-317.3546, device='cuda:0')
episode: 413 training return: tensor(-326.9621, device='cuda:0')
episode: 414 training return: tensor(-304.8757, device='cuda:0')
episode: 415 training return: tensor(-325.6158, device='cuda:0')
epoch: 104 test_true_pfm: 388.72990536320486 sim_pfm: -233.25986868185768
episode: 416 training return: tensor(-307.3178, device='cuda:0')
episode: 417 training return: tensor(-316.6706, device='cuda:0')
episode: 418 training return: tensor(-292.8365, device='cuda:0')
episode: 419 training return: tensor(-324.9190, device='cuda:0')
epoch: 105 test_true_pfm: 461.79507030415203 sim_pfm: -223.2001392917009
episode: 420 training return: tensor(-309.1759, device='cuda:0')
episode: 421 training return: tensor(-311.6028, device='cuda:0')
episode: 422 training return: tensor(-284.3031, device='cuda:0')
episode: 423 training return: tensor(-307.4490, device='cuda:0')
epoch: 106 test_true_pfm: 470.9176597494516 sim_pfm: -218.04689922996718
episode: 424 training return: tensor(-318.3702, device='cuda:0')
episode: 425 training return: tensor(-320.1591, device='cuda:0')
episode: 426 training return: tensor(-292.4782, device='cuda:0')
episode: 427 training return: tensor(-299.4993, device='cuda:0')
epoch: 107 test_true_pfm: 435.10431377150854 sim_pfm: -239.42536520197368
episode: 428 training return: tensor(-311.5845, device='cuda:0')
episode: 429 training return: tensor(-268.1404, device='cuda:0')
episode: 430 training return: tensor(-302.8802, device='cuda:0')
episode: 431 training return: tensor(-332.4715, device='cuda:0')
epoch: 108 test_true_pfm: 397.5822916238896 sim_pfm: -253.10469281394035
episode: 432 training return: tensor(-310.7876, device='cuda:0')
episode: 433 training return: tensor(-328.6249, device='cuda:0')
episode: 434 training return: tensor(-298.7113, device='cuda:0')
episode: 435 training return: tensor(-315.4090, device='cuda:0')
epoch: 109 test_true_pfm: 380.74555739672866 sim_pfm: -266.67983916254406
episode: 436 training return: tensor(-317.7016, device='cuda:0')
episode: 437 training return: tensor(-308.4149, device='cuda:0')
episode: 438 training return: tensor(-307.7047, device='cuda:0')
episode: 439 training return: tensor(-314.7628, device='cuda:0')
epoch: 110 test_true_pfm: 414.4167771563355 sim_pfm: -213.30525671706224
episode: 440 training return: tensor(-308.5008, device='cuda:0')
episode: 441 training return: tensor(-323.7779, device='cuda:0')
episode: 442 training return: tensor(-274.8008, device='cuda:0')
episode: 443 training return: tensor(-330.3044, device='cuda:0')
epoch: 111 test_true_pfm: 430.6101781532032 sim_pfm: -204.17687397177602
episode: 444 training return: tensor(-319.1558, device='cuda:0')
episode: 445 training return: tensor(-296.9077, device='cuda:0')
episode: 446 training return: tensor(-299.7657, device='cuda:0')
episode: 447 training return: tensor(-316.8281, device='cuda:0')
epoch: 112 test_true_pfm: 438.74730903533464 sim_pfm: -211.5304587208278
episode: 448 training return: tensor(-276.9231, device='cuda:0')
episode: 449 training return: tensor(-309.1908, device='cuda:0')
episode: 450 training return: tensor(-285.9306, device='cuda:0')
episode: 451 training return: tensor(-328.6312, device='cuda:0')
epoch: 113 test_true_pfm: 437.433690360065 sim_pfm: -207.64507858012803
episode: 452 training return: tensor(-309.1431, device='cuda:0')
episode: 453 training return: tensor(-301.3691, device='cuda:0')
episode: 454 training return: tensor(-288.0372, device='cuda:0')
episode: 455 training return: tensor(-316.3414, device='cuda:0')
epoch: 114 test_true_pfm: 499.58598887639795 sim_pfm: -215.66749698826848
episode: 456 training return: tensor(-314.7041, device='cuda:0')
episode: 457 training return: tensor(-327.9163, device='cuda:0')
episode: 458 training return: tensor(-272.3285, device='cuda:0')
episode: 459 training return: tensor(-322.9052, device='cuda:0')
epoch: 115 test_true_pfm: 419.10105066791203 sim_pfm: -223.16336385874698
episode: 460 training return: tensor(-260.9925, device='cuda:0')
episode: 461 training return: tensor(-325.0989, device='cuda:0')
episode: 462 training return: tensor(-320.3423, device='cuda:0')
episode: 463 training return: tensor(-320.7639, device='cuda:0')
epoch: 116 test_true_pfm: 415.47495874958344 sim_pfm: -229.02993289753795
episode: 464 training return: tensor(-324.6222, device='cuda:0')
episode: 465 training return: tensor(-310.6701, device='cuda:0')
episode: 466 training return: tensor(-308.0681, device='cuda:0')
episode: 467 training return: tensor(-317.5866, device='cuda:0')
epoch: 117 test_true_pfm: 378.35924663306474 sim_pfm: -244.45548536241404
episode: 468 training return: tensor(-311.1894, device='cuda:0')
episode: 469 training return: tensor(-310.3029, device='cuda:0')
episode: 470 training return: tensor(-271.7940, device='cuda:0')
episode: 471 training return: tensor(-310.2137, device='cuda:0')
epoch: 118 test_true_pfm: 450.94090536328116 sim_pfm: -227.59439126067446
episode: 472 training return: tensor(-315.7912, device='cuda:0')
episode: 473 training return: tensor(-324.8286, device='cuda:0')
episode: 474 training return: tensor(-306.3871, device='cuda:0')
episode: 475 training return: tensor(-334.3134, device='cuda:0')
epoch: 119 test_true_pfm: 367.89775911405513 sim_pfm: -249.67417750305808
episode: 476 training return: tensor(-261.2143, device='cuda:0')
episode: 477 training return: tensor(-299.2675, device='cuda:0')
episode: 478 training return: tensor(-291.6390, device='cuda:0')
episode: 479 training return: tensor(-305.5005, device='cuda:0')
epoch: 120 test_true_pfm: 420.8758363236338 sim_pfm: -212.518585523086
episode: 480 training return: tensor(-284.8989, device='cuda:0')
episode: 481 training return: tensor(-317.7079, device='cuda:0')
episode: 482 training return: tensor(-286.2267, device='cuda:0')
episode: 483 training return: tensor(-280.7640, device='cuda:0')
epoch: 121 test_true_pfm: 391.4141441100201 sim_pfm: -228.7527739041834
episode: 484 training return: tensor(-273.5997, device='cuda:0')
episode: 485 training return: tensor(-317.4054, device='cuda:0')
episode: 486 training return: tensor(-306.5455, device='cuda:0')
episode: 487 training return: tensor(-334.2122, device='cuda:0')
epoch: 122 test_true_pfm: 408.09375945375524 sim_pfm: -240.5412083536697
episode: 488 training return: tensor(-296.8506, device='cuda:0')
episode: 489 training return: tensor(-276.7480, device='cuda:0')
episode: 490 training return: tensor(-302.8656, device='cuda:0')
episode: 491 training return: tensor(-269.7076, device='cuda:0')
epoch: 123 test_true_pfm: 477.516982037113 sim_pfm: -214.5281944978051
episode: 492 training return: tensor(-315.9568, device='cuda:0')
episode: 493 training return: tensor(-320.6773, device='cuda:0')
episode: 494 training return: tensor(-320.8647, device='cuda:0')
episode: 495 training return: tensor(-311.5356, device='cuda:0')
epoch: 124 test_true_pfm: 395.99575419213267 sim_pfm: -234.04381644370733
episode: 496 training return: tensor(-320.4173, device='cuda:0')
episode: 497 training return: tensor(-312.1784, device='cuda:0')
episode: 498 training return: tensor(-307.2343, device='cuda:0')
episode: 499 training return: tensor(-275.5153, device='cuda:0')
epoch: 125 test_true_pfm: 414.29052507370517 sim_pfm: -220.47447146145473
episode: 500 training return: tensor(-269.2590, device='cuda:0')
episode: 501 training return: tensor(-293.9519, device='cuda:0')
episode: 502 training return: tensor(-298.7480, device='cuda:0')
episode: 503 training return: tensor(-313.3517, device='cuda:0')
epoch: 126 test_true_pfm: 465.57190263572596 sim_pfm: -228.496692814806
episode: 504 training return: tensor(-303.1743, device='cuda:0')
episode: 505 training return: tensor(-315.7089, device='cuda:0')
episode: 506 training return: tensor(-316.4573, device='cuda:0')
episode: 507 training return: tensor(-301.0294, device='cuda:0')
epoch: 127 test_true_pfm: 382.1351394558519 sim_pfm: -221.8630411901589
episode: 508 training return: tensor(-311.7091, device='cuda:0')
episode: 509 training return: tensor(-302.1710, device='cuda:0')
episode: 510 training return: tensor(-296.5478, device='cuda:0')
episode: 511 training return: tensor(-286.0644, device='cuda:0')
epoch: 128 test_true_pfm: 435.68638661544077 sim_pfm: -217.5242812516323
episode: 512 training return: tensor(-295.6551, device='cuda:0')
episode: 513 training return: tensor(-270.4323, device='cuda:0')
episode: 514 training return: tensor(-311.3619, device='cuda:0')
episode: 515 training return: tensor(-294.4410, device='cuda:0')
epoch: 129 test_true_pfm: 431.20636647034775 sim_pfm: -220.92270599398762
episode: 516 training return: tensor(-325.8220, device='cuda:0')
episode: 517 training return: tensor(-312.8893, device='cuda:0')
episode: 518 training return: tensor(-294.6651, device='cuda:0')
episode: 519 training return: tensor(-345.8314, device='cuda:0')
epoch: 130 test_true_pfm: 425.85848209682064 sim_pfm: -238.02373741553552
episode: 520 training return: tensor(-307.2067, device='cuda:0')
episode: 521 training return: tensor(-300.7961, device='cuda:0')
episode: 522 training return: tensor(-299.1117, device='cuda:0')
episode: 523 training return: tensor(-274.3686, device='cuda:0')
epoch: 131 test_true_pfm: 464.55839546395555 sim_pfm: -207.95666473956467
episode: 524 training return: tensor(-302.4122, device='cuda:0')
episode: 525 training return: tensor(-284.9592, device='cuda:0')
episode: 526 training return: tensor(-241.2838, device='cuda:0')
episode: 527 training return: tensor(-283.2864, device='cuda:0')
epoch: 132 test_true_pfm: 461.6972682171008 sim_pfm: -222.48999637085944
episode: 528 training return: tensor(-304.2143, device='cuda:0')
episode: 529 training return: tensor(-282.3128, device='cuda:0')
episode: 530 training return: tensor(-318.6680, device='cuda:0')
episode: 531 training return: tensor(-313.8941, device='cuda:0')
epoch: 133 test_true_pfm: 485.92138649719215 sim_pfm: -206.60746107040904
episode: 532 training return: tensor(-312.5403, device='cuda:0')
episode: 533 training return: tensor(-264.4721, device='cuda:0')
episode: 534 training return: tensor(-300.7027, device='cuda:0')
episode: 535 training return: tensor(-263.3564, device='cuda:0')
epoch: 134 test_true_pfm: 332.2032973856329 sim_pfm: -247.36089799869418
episode: 536 training return: tensor(-290.2198, device='cuda:0')
episode: 537 training return: tensor(-297.8092, device='cuda:0')
episode: 538 training return: tensor(-287.3832, device='cuda:0')
episode: 539 training return: tensor(-262.6415, device='cuda:0')
epoch: 135 test_true_pfm: 420.5815853417448 sim_pfm: -225.65725758722206
episode: 540 training return: tensor(-329.0727, device='cuda:0')
episode: 541 training return: tensor(-295.0126, device='cuda:0')
episode: 542 training return: tensor(-313.4511, device='cuda:0')
episode: 543 training return: tensor(-313.3977, device='cuda:0')
epoch: 136 test_true_pfm: 441.30984934889756 sim_pfm: -207.68347157605845
episode: 544 training return: tensor(-287.5319, device='cuda:0')
episode: 545 training return: tensor(-267.2158, device='cuda:0')
episode: 546 training return: tensor(-299.7916, device='cuda:0')
episode: 547 training return: tensor(-278.1207, device='cuda:0')
epoch: 137 test_true_pfm: 518.8119709870766 sim_pfm: -199.38478274672525
episode: 548 training return: tensor(-280.5380, device='cuda:0')
episode: 549 training return: tensor(-264.3283, device='cuda:0')
episode: 550 training return: tensor(-292.2202, device='cuda:0')
episode: 551 training return: tensor(-319.7365, device='cuda:0')
epoch: 138 test_true_pfm: 409.72082680588636 sim_pfm: -243.46102898849253
episode: 552 training return: tensor(-266.9533, device='cuda:0')
episode: 553 training return: tensor(-256.7349, device='cuda:0')
episode: 554 training return: tensor(-319.7924, device='cuda:0')
episode: 555 training return: tensor(-312.0538, device='cuda:0')
epoch: 139 test_true_pfm: 378.85973518472787 sim_pfm: -237.92112445753688
episode: 556 training return: tensor(-298.9839, device='cuda:0')
episode: 557 training return: tensor(-294.4419, device='cuda:0')
episode: 558 training return: tensor(-286.5754, device='cuda:0')
episode: 559 training return: tensor(-275.4875, device='cuda:0')
epoch: 140 test_true_pfm: 453.54922702717204 sim_pfm: -208.47050525145218
episode: 560 training return: tensor(-312.9022, device='cuda:0')
episode: 561 training return: tensor(-313.2123, device='cuda:0')
episode: 562 training return: tensor(-302.8043, device='cuda:0')
episode: 563 training return: tensor(-293.8608, device='cuda:0')
epoch: 141 test_true_pfm: 423.40744510967676 sim_pfm: -221.72151336919828
episode: 564 training return: tensor(-307.9874, device='cuda:0')
episode: 565 training return: tensor(-253.5727, device='cuda:0')
episode: 566 training return: tensor(-288.0683, device='cuda:0')
episode: 567 training return: tensor(-282.9537, device='cuda:0')
epoch: 142 test_true_pfm: 463.50719536446985 sim_pfm: -203.30603777770497
episode: 568 training return: tensor(-266.1505, device='cuda:0')
episode: 569 training return: tensor(-307.0896, device='cuda:0')
episode: 570 training return: tensor(-296.9596, device='cuda:0')
episode: 571 training return: tensor(-303.3888, device='cuda:0')
epoch: 143 test_true_pfm: 514.7135602916245 sim_pfm: -218.39971351985392
episode: 572 training return: tensor(-289.9671, device='cuda:0')
episode: 573 training return: tensor(-309.8416, device='cuda:0')
episode: 574 training return: tensor(-271.5741, device='cuda:0')
episode: 575 training return: tensor(-293.0501, device='cuda:0')
epoch: 144 test_true_pfm: 455.36346611989006 sim_pfm: -206.12783333534026
episode: 576 training return: tensor(-249.7130, device='cuda:0')
episode: 577 training return: tensor(-283.2263, device='cuda:0')
episode: 578 training return: tensor(-266.4751, device='cuda:0')
episode: 579 training return: tensor(-295.2788, device='cuda:0')
epoch: 145 test_true_pfm: 457.18692059711935 sim_pfm: -215.61879325394207
episode: 580 training return: tensor(-323.3083, device='cuda:0')
episode: 581 training return: tensor(-286.5729, device='cuda:0')
episode: 582 training return: tensor(-310.8913, device='cuda:0')
episode: 583 training return: tensor(-244.9221, device='cuda:0')
epoch: 146 test_true_pfm: 433.5420807444896 sim_pfm: -215.07540310026766
episode: 584 training return: tensor(-314.5416, device='cuda:0')
episode: 585 training return: tensor(-275.2760, device='cuda:0')
episode: 586 training return: tensor(-259.4241, device='cuda:0')
episode: 587 training return: tensor(-304.4495, device='cuda:0')
epoch: 147 test_true_pfm: 510.0715437216063 sim_pfm: -206.18924518713416
episode: 588 training return: tensor(-325.0701, device='cuda:0')
episode: 589 training return: tensor(-304.0626, device='cuda:0')
episode: 590 training return: tensor(-267.7366, device='cuda:0')
episode: 591 training return: tensor(-260.0320, device='cuda:0')
epoch: 148 test_true_pfm: 478.7276968193926 sim_pfm: -191.355018236946
episode: 592 training return: tensor(-301.0157, device='cuda:0')
episode: 593 training return: tensor(-257.4244, device='cuda:0')
episode: 594 training return: tensor(-274.9482, device='cuda:0')
episode: 595 training return: tensor(-305.8708, device='cuda:0')
epoch: 149 test_true_pfm: 497.383847848789 sim_pfm: -203.39249666365018
episode: 596 training return: tensor(-297.9207, device='cuda:0')
episode: 597 training return: tensor(-260.1431, device='cuda:0')
episode: 598 training return: tensor(-268.0789, device='cuda:0')
episode: 599 training return: tensor(-304.1735, device='cuda:0')
epoch: 150 test_true_pfm: 443.8202198728939 sim_pfm: -201.21342458143286
