['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '9', '--data', '100000', '--regu', '0.2']
epoch: 0 training_loss 0.27415375247597695 test_loss: 0.20415916442871093
epoch: 1 training_loss 0.2119940024614334 test_loss: 0.21072678565979003
epoch: 2 training_loss 0.1942401745915413 test_loss: 0.2686625957489014
epoch: 3 training_loss 0.20195677280426025 test_loss: 0.19590349197387696
epoch: 4 training_loss 0.19251476816833019 test_loss: 0.1954830050468445
epoch: 5 training_loss 0.19632810235023498 test_loss: 0.1865037202835083
epoch: 6 training_loss 0.18871327996253967 test_loss: 0.17748878002166749
epoch: 7 training_loss 0.2021781811118126 test_loss: 0.19242655038833617
epoch: 8 training_loss 0.19321152977645398 test_loss: 0.1861846089363098
epoch: 9 training_loss 0.18797248430550098 test_loss: 0.1761740565299988
epoch: 10 training_loss 0.19055970132350922 test_loss: 0.20314378738403321
epoch: 11 training_loss 0.1985855820029974 test_loss: 0.1881609797477722
epoch: 12 training_loss 0.18431129224598408 test_loss: 0.19025006294250488
epoch: 13 training_loss 0.18526985630393028 test_loss: 0.1843816637992859
epoch: 14 training_loss 0.181866462752223 test_loss: 0.1931508183479309
epoch: 15 training_loss 0.18839083530008793 test_loss: 0.20439114570617675
epoch: 16 training_loss 0.17823840238153935 test_loss: 0.18991774320602417
epoch: 17 training_loss 0.18836342811584472 test_loss: 0.17429180145263673
epoch: 18 training_loss 0.1883158352971077 test_loss: 0.17514641284942628
epoch: 19 training_loss 0.1899496302008629 test_loss: 0.18636903762817383
epoch: 20 training_loss 0.18442058861255645 test_loss: 0.20066280364990235
epoch: 21 training_loss 0.1948466894030571 test_loss: 0.16108996868133546
epoch: 22 training_loss 0.17590643979609014 test_loss: 0.18959951400756836
epoch: 23 training_loss 0.17979655243456363 test_loss: 0.1873422384262085
epoch: 24 training_loss 0.18115148045122623 test_loss: 0.20702955722808838
epoch: 25 training_loss 0.17770991176366807 test_loss: 0.192669141292572
epoch: 26 training_loss 0.17972337521612644 test_loss: 0.19362692832946776
epoch: 27 training_loss 0.1816358271241188 test_loss: 0.18406916856765748
epoch: 28 training_loss 0.1795317167788744 test_loss: 0.18249588012695311
epoch: 29 training_loss 0.18483640499413012 test_loss: 0.19975868463516236
epoch: 30 training_loss 0.1814851575344801 test_loss: 0.17329498529434204
epoch: 31 training_loss 0.17694576345384122 test_loss: 0.18255236148834228
epoch: 32 training_loss 0.17847267247736454 test_loss: 0.18400055170059204
epoch: 33 training_loss 0.180129584223032 test_loss: 0.18465681076049806
epoch: 34 training_loss 0.1860962525010109 test_loss: 0.1967573046684265
epoch: 35 training_loss 0.18499697431921958 test_loss: 0.17073782682418823
epoch: 36 training_loss 0.18635140657424926 test_loss: 0.19879298210144042
epoch: 37 training_loss 0.18540689535439014 test_loss: 0.19139860868453978
epoch: 38 training_loss 0.18565869010984898 test_loss: 0.18587539196014405
epoch: 39 training_loss 0.17781328082084655 test_loss: 0.18811928033828734
epoch: 40 training_loss 0.17843553438782692 test_loss: 0.17459614276885987
epoch: 41 training_loss 0.1826797830313444 test_loss: 0.19144582748413086
epoch: 42 training_loss 0.180506307259202 test_loss: 0.16411460638046266
epoch: 43 training_loss 0.18403179794549943 test_loss: 0.1830485463142395
epoch: 44 training_loss 0.1778664706647396 test_loss: 0.16622529029846192
epoch: 45 training_loss 0.19159054949879648 test_loss: 0.18876584768295288
epoch: 46 training_loss 0.18809048250317573 test_loss: 0.1944981813430786
epoch: 47 training_loss 0.1832553669810295 test_loss: 0.1843921184539795
epoch: 48 training_loss 0.18530030623078347 test_loss: 0.19433451890945436
epoch: 49 training_loss 0.17912733547389506 test_loss: 0.17441893815994264
epoch: 50 training_loss 0.1799384892731905 test_loss: 0.17297419309616088
epoch: 51 training_loss 0.17972531035542488 test_loss: 0.17842737436294556
epoch: 52 training_loss 0.18354318998754024 test_loss: 0.18311657905578613
epoch: 53 training_loss 0.18134087309241295 test_loss: 0.17148265838623047
epoch: 54 training_loss 0.1791809108108282 test_loss: 0.17558094263076782
epoch: 55 training_loss 0.17766244910657406 test_loss: 0.17231101989746095
epoch: 56 training_loss 0.18241373397409916 test_loss: 0.173617422580719
epoch: 57 training_loss 0.17931346356868744 test_loss: 0.19499198198318482
epoch: 58 training_loss 0.17183734536170958 test_loss: 0.1766716718673706
epoch: 59 training_loss 0.17562536969780923 test_loss: 0.18448970317840577
epoch: 60 training_loss 0.1823552418127656 test_loss: 0.18356539011001588
epoch: 61 training_loss 0.1796174632012844 test_loss: 0.20046260356903076
epoch: 62 training_loss 0.17133985303342342 test_loss: 0.20178804397583008
epoch: 63 training_loss 0.17415848903357983 test_loss: 0.18877934217453002
epoch: 64 training_loss 0.18287051878869534 test_loss: 0.17245090007781982
epoch: 65 training_loss 0.18663036979734898 test_loss: 0.17876391410827636
epoch: 66 training_loss 0.18104292213916778 test_loss: 0.20183231830596923
epoch: 67 training_loss 0.16888581804931163 test_loss: 0.18614165782928466
epoch: 68 training_loss 0.18383565440773963 test_loss: 0.18107881546020507
epoch: 69 training_loss 0.1837948651611805 test_loss: 0.17984176874160768
epoch: 70 training_loss 0.17726428784430026 test_loss: 0.18752015829086305
epoch: 71 training_loss 0.18283066436648368 test_loss: 0.19116562604904175
epoch: 72 training_loss 0.18169372700154782 test_loss: 0.1851361870765686
epoch: 73 training_loss 0.17834529533982277 test_loss: 0.19085408449172975
epoch: 74 training_loss 0.18407363340258598 test_loss: 0.18291906118392945
epoch: 75 training_loss 0.17802181608974935 test_loss: 0.1819535732269287
epoch: 76 training_loss 0.1877407695353031 test_loss: 0.19052770137786865
epoch: 77 training_loss 0.18349057219922543 test_loss: 0.17450101375579835
epoch: 78 training_loss 0.18565606992691755 test_loss: 0.16856173276901246
epoch: 79 training_loss 0.18782455071806908 test_loss: 0.1883804440498352
epoch: 80 training_loss 0.18162549547851087 test_loss: 0.18564053773880004
epoch: 81 training_loss 0.17700119078159332 test_loss: 0.1645005464553833
epoch: 82 training_loss 0.17888040997087956 test_loss: 0.17435368299484252
epoch: 83 training_loss 0.17959402829408647 test_loss: 0.17827093601226807
epoch: 84 training_loss 0.181553068831563 test_loss: 0.17837475538253783
epoch: 85 training_loss 0.17875467754900456 test_loss: 0.19094687700271606
epoch: 86 training_loss 0.18078112721443176 test_loss: 0.17648842334747314
epoch: 87 training_loss 0.17437627777457237 test_loss: 0.1907153010368347
epoch: 88 training_loss 0.17834262579679488 test_loss: 0.17899163961410522
epoch: 89 training_loss 0.18009030751883984 test_loss: 0.19506967067718506
epoch: 90 training_loss 0.18190018087625504 test_loss: 0.18746438026428222
epoch: 91 training_loss 0.1776349825412035 test_loss: 0.18062145709991456
epoch: 92 training_loss 0.17353407189249992 test_loss: 0.18732458353042603
epoch: 93 training_loss 0.16994927898049356 test_loss: 0.17228569984436035
epoch: 94 training_loss 0.17889917753636836 test_loss: 0.18064186573028565
epoch: 95 training_loss 0.178320442289114 test_loss: 0.18760373592376708
epoch: 96 training_loss 0.17793881304562092 test_loss: 0.18979860544204713
epoch: 97 training_loss 0.1835586331784725 test_loss: 0.17417480945587158
epoch: 98 training_loss 0.17672861479222773 test_loss: 0.1848774552345276
epoch: 99 training_loss 0.16958195991814137 test_loss: 0.19043020009994507
epoch: 100 training_loss 0.18196392394602298 test_loss: 0.1862184762954712
epoch: 101 training_loss 0.1813086223602295 test_loss: 0.1841797113418579
epoch: 102 training_loss 0.1785409615188837 test_loss: 0.1755651354789734
epoch: 103 training_loss 0.17256034091114997 test_loss: 0.16646541357040406
epoch: 104 training_loss 0.1754190854728222 test_loss: 0.17621589899063111
epoch: 105 training_loss 0.17494207210838794 test_loss: 0.19529691934585572
epoch: 106 training_loss 0.17796931706368924 test_loss: 0.18471630811691284
epoch: 107 training_loss 0.18165330961346626 test_loss: 0.17866110801696777
epoch: 108 training_loss 0.18632321365177631 test_loss: 0.17422659397125245
epoch: 109 training_loss 0.1818018688261509 test_loss: 0.1844886302947998
epoch: 110 training_loss 0.17899059012532234 test_loss: 0.1822136640548706
epoch: 111 training_loss 0.17891960315406322 test_loss: 0.17633627653121947
epoch: 112 training_loss 0.1782247844338417 test_loss: 0.18468486070632933
epoch: 113 training_loss 0.17916072182357312 test_loss: 0.17567381858825684
epoch: 114 training_loss 0.18024399429559707 test_loss: 0.16772048473358153
epoch: 115 training_loss 0.17859083883464336 test_loss: 0.16369967460632323
epoch: 116 training_loss 0.17938499085605145 test_loss: 0.18453673124313355
epoch: 117 training_loss 0.17566174678504468 test_loss: 0.1816003918647766
epoch: 118 training_loss 0.174570202678442 test_loss: 0.1787269115447998
epoch: 119 training_loss 0.1764378920942545 test_loss: 0.1779114842414856
epoch: 120 training_loss 0.18278662398457526 test_loss: 0.19375349283218385
epoch: 121 training_loss 0.17312447734177114 test_loss: 0.18585352897644042
epoch: 122 training_loss 0.17422235563397406 test_loss: 0.17925411462783813
epoch: 123 training_loss 0.17746420264244078 test_loss: 0.1921623945236206
epoch: 124 training_loss 0.17563642650842667 test_loss: 0.1708608388900757
epoch: 125 training_loss 0.1814098408818245 test_loss: 0.17369436025619506
epoch: 126 training_loss 0.17448877200484275 test_loss: 0.171592378616333
epoch: 127 training_loss 0.18596317753195762 test_loss: 0.18673462867736818
epoch: 128 training_loss 0.18063183709979058 test_loss: 0.18041504621505738
epoch: 129 training_loss 0.18488414019346236 test_loss: 0.17091513872146608
epoch: 130 training_loss 0.17431292057037354 test_loss: 0.18548597097396852
epoch: 131 training_loss 0.17950965903699398 test_loss: 0.16007176637649537
epoch: 132 training_loss 0.1736571764200926 test_loss: 0.1936769962310791
epoch: 133 training_loss 0.1767283058911562 test_loss: 0.2082935094833374
epoch: 134 training_loss 0.17950251907110215 test_loss: 0.17755295038223268
epoch: 135 training_loss 0.18049461990594864 test_loss: 0.18044244050979613
epoch: 136 training_loss 0.18431100487709046 test_loss: 0.18250732421875
epoch: 137 training_loss 0.170505004003644 test_loss: 0.18036168813705444
epoch: 138 training_loss 0.1809534451365471 test_loss: 0.1630951166152954
epoch: 139 training_loss 0.18500344060361384 test_loss: 0.17915611267089843
epoch: 140 training_loss 0.17555030927062035 test_loss: 0.1694775938987732
epoch: 141 training_loss 0.1862942536920309 test_loss: 0.19368743896484375
epoch: 142 training_loss 0.18070919282734393 test_loss: 0.16935124397277831
epoch: 143 training_loss 0.17789049603044987 test_loss: 0.18230233192443848
epoch: 144 training_loss 0.17572069004178048 test_loss: 0.1997588872909546
epoch: 145 training_loss 0.16990819104015828 test_loss: 0.19800121784210206
epoch: 146 training_loss 0.18204905427992343 test_loss: 0.18300049304962157
epoch: 147 training_loss 0.1800297375023365 test_loss: 0.17510949373245238
epoch: 148 training_loss 0.1729047593101859 test_loss: 0.1817103385925293
epoch: 149 training_loss 0.1748615963757038 test_loss: 0.17341119050979614
epoch: 0 training_loss 7.804482684135437 test_loss: 4.65643310546875
epoch: 1 training_loss 3.747379457950592 test_loss: 3.061259651184082
epoch: 2 training_loss 2.716301598548889 test_loss: 2.3205215454101564
epoch: 3 training_loss 2.119787732362747 test_loss: 1.9689249038696288
epoch: 4 training_loss 1.8133695495128632 test_loss: 1.6797523498535156
epoch: 5 training_loss 1.583972623348236 test_loss: 1.5188047409057617
epoch: 6 training_loss 1.452094885110855 test_loss: 1.414950466156006
epoch: 7 training_loss 1.3230002629756927 test_loss: 1.3179883003234862
epoch: 8 training_loss 1.2612937819957732 test_loss: 1.254709815979004
epoch: 9 training_loss 1.177533831000328 test_loss: 1.1352056503295898
epoch: 10 training_loss 1.11913225710392 test_loss: 1.120841884613037
epoch: 11 training_loss 1.1045247495174408 test_loss: 1.1034067153930665
epoch: 12 training_loss 1.045412768125534 test_loss: 1.0228928565979003
epoch: 13 training_loss 1.0120157951116562 test_loss: 1.0329405784606933
epoch: 14 training_loss 0.9859943658113479 test_loss: 0.9751528739929199
epoch: 15 training_loss 0.9557390981912612 test_loss: 0.955527400970459
epoch: 16 training_loss 0.9494505286216736 test_loss: 0.9140076637268066
epoch: 17 training_loss 0.9133794462680817 test_loss: 0.9215734481811524
epoch: 18 training_loss 0.909127796292305 test_loss: 0.8903421401977539
epoch: 19 training_loss 0.8941549491882325 test_loss: 0.8600278854370117
epoch: 20 training_loss 0.8543549424409866 test_loss: 0.8757855415344238
epoch: 21 training_loss 0.8584130448102951 test_loss: 0.8335668563842773
epoch: 22 training_loss 0.8292353022098541 test_loss: 0.8368549346923828
epoch: 23 training_loss 0.8130531835556031 test_loss: 0.8453330993652344
epoch: 24 training_loss 0.8063345032930375 test_loss: 0.8166298866271973
epoch: 25 training_loss 0.791566481590271 test_loss: 0.797395133972168
epoch: 26 training_loss 0.7907807385921478 test_loss: 0.7764854431152344
epoch: 27 training_loss 0.7783228045701981 test_loss: 0.8022830009460449
epoch: 28 training_loss 0.758766902089119 test_loss: 0.7426632881164551
epoch: 29 training_loss 0.7857029193639755 test_loss: 0.7324905395507812
epoch: 30 training_loss 0.7352268415689468 test_loss: 0.7437081336975098
epoch: 31 training_loss 0.7382867097854614 test_loss: 0.7353866100311279
epoch: 32 training_loss 0.7309774363040924 test_loss: 0.729073429107666
epoch: 33 training_loss 0.7109006053209305 test_loss: 0.6886240005493164
epoch: 34 training_loss 0.7098885542154312 test_loss: 0.7087608814239502
epoch: 35 training_loss 0.703784111738205 test_loss: 0.7230200290679931
epoch: 36 training_loss 0.7061933809518814 test_loss: 0.6865665912628174
epoch: 37 training_loss 0.6972080844640732 test_loss: 0.698953914642334
epoch: 38 training_loss 0.6965014779567719 test_loss: 0.6701586723327637
epoch: 39 training_loss 0.6770330280065536 test_loss: 0.6667016983032227
epoch: 40 training_loss 0.6817853844165802 test_loss: 0.71706223487854
epoch: 41 training_loss 0.6629896914958954 test_loss: 0.6779726982116699
epoch: 42 training_loss 0.6706413042545318 test_loss: 0.7184255123138428
epoch: 43 training_loss 0.6661221635341644 test_loss: 0.6488718032836914
epoch: 44 training_loss 0.6523911106586456 test_loss: 0.6723776817321777
epoch: 45 training_loss 0.6453186619281769 test_loss: 0.6523650646209717
epoch: 46 training_loss 0.6477720141410828 test_loss: 0.6638355731964112
epoch: 47 training_loss 0.6612601518630982 test_loss: 0.6300588607788086
epoch: 48 training_loss 0.6440174639225006 test_loss: 0.6354376316070557
epoch: 49 training_loss 0.6255116075277328 test_loss: 0.6098223209381104
epoch: 50 training_loss 0.6380907297134399 test_loss: 0.6166517734527588
epoch: 51 training_loss 0.6313883256912232 test_loss: 0.6210627079010009
epoch: 52 training_loss 0.6242605918645858 test_loss: 0.6300417900085449
epoch: 53 training_loss 0.6218491512537002 test_loss: 0.6101471900939941
epoch: 54 training_loss 0.6134931892156601 test_loss: 0.6374109745025635
epoch: 55 training_loss 0.6130620700120926 test_loss: 0.6079935073852539
epoch: 56 training_loss 0.6226871311664581 test_loss: 0.6359570503234864
epoch: 57 training_loss 0.6048356294631958 test_loss: 0.6081897735595703
epoch: 58 training_loss 0.6020018422603607 test_loss: 0.6083600521087646
epoch: 59 training_loss 0.6085962784290314 test_loss: 0.6115277290344239
epoch: 60 training_loss 0.5916586661338806 test_loss: 0.5729607105255127
epoch: 61 training_loss 0.5895831769704819 test_loss: 0.5794538974761962
epoch: 62 training_loss 0.5908243280649185 test_loss: 0.5791197299957276
epoch: 63 training_loss 0.5810012036561966 test_loss: 0.5941617965698243
epoch: 64 training_loss 0.5919140374660492 test_loss: 0.5652352333068847
epoch: 65 training_loss 0.5872124511003495 test_loss: 0.6271867752075195
epoch: 66 training_loss 0.5843334716558456 test_loss: 0.5753005027770997
epoch: 67 training_loss 0.5749447295069694 test_loss: 0.6401642799377442
epoch: 68 training_loss 0.5835489028692246 test_loss: 0.5697711944580078
epoch: 69 training_loss 0.57659462839365 test_loss: 0.5934367179870605
epoch: 70 training_loss 0.5807267045974731 test_loss: 0.5703972816467285
epoch: 71 training_loss 0.5652111884951592 test_loss: 0.5793842792510986
epoch: 72 training_loss 0.5663228034973145 test_loss: 0.5664420604705811
epoch: 73 training_loss 0.5621232637763023 test_loss: 0.5493313312530518
epoch: 74 training_loss 0.5613877725601196 test_loss: 0.5515639305114746
epoch: 75 training_loss 0.5664722582697869 test_loss: 0.551729154586792
epoch: 76 training_loss 0.5545611903071404 test_loss: 0.5648592948913574
epoch: 77 training_loss 0.5626338875293732 test_loss: 0.5470672607421875
epoch: 78 training_loss 0.5596600034832955 test_loss: 0.574630880355835
epoch: 79 training_loss 0.5625973528623581 test_loss: 0.5481943130493164
epoch: 80 training_loss 0.5489230424165725 test_loss: 0.5795236587524414
epoch: 81 training_loss 0.5611632972955704 test_loss: 0.5819778442382812
epoch: 82 training_loss 0.5415066301822662 test_loss: 0.565239143371582
epoch: 83 training_loss 0.5502126246690751 test_loss: 0.5295721530914307
epoch: 84 training_loss 0.5454786401987076 test_loss: 0.536644458770752
epoch: 85 training_loss 0.5529790732264519 test_loss: 0.5892171859741211
epoch: 86 training_loss 0.5448834985494614 test_loss: 0.5397779464721679
epoch: 87 training_loss 0.5388424488902092 test_loss: 0.5387153148651123
epoch: 88 training_loss 0.5367284432053566 test_loss: 0.5236637592315674
epoch: 89 training_loss 0.534097825884819 test_loss: 0.5475526809692383
epoch: 90 training_loss 0.5336523059010506 test_loss: 0.5399274349212646
epoch: 91 training_loss 0.5368436953425407 test_loss: 0.5294963836669921
epoch: 92 training_loss 0.5356305265426635 test_loss: 0.5259249687194825
epoch: 93 training_loss 0.5330035957694054 test_loss: 0.5324606418609619
epoch: 94 training_loss 0.531100282073021 test_loss: 0.5132535934448242
epoch: 95 training_loss 0.5343755879998207 test_loss: 0.5591527462005615
epoch: 96 training_loss 0.5362986174225807 test_loss: 0.547411584854126
epoch: 97 training_loss 0.5262004151940346 test_loss: 0.5385982036590576
epoch: 98 training_loss 0.5292278689146042 test_loss: 0.5301239490509033
epoch: 99 training_loss 0.5201844123005867 test_loss: 0.5477478981018067
epoch: 100 training_loss 0.5209651482105255 test_loss: 0.5326241493225098
epoch: 101 training_loss 0.5309000054001808 test_loss: 0.5302431106567382
epoch: 102 training_loss 0.5177542781829834 test_loss: 0.5110886096954346
epoch: 103 training_loss 0.530615696310997 test_loss: 0.5128760814666748
epoch: 104 training_loss 0.5117820185422898 test_loss: 0.518859577178955
epoch: 105 training_loss 0.5219946575164794 test_loss: 0.5167880058288574
epoch: 106 training_loss 0.51910011947155 test_loss: 0.5053267002105712
epoch: 107 training_loss 0.5098037603497505 test_loss: 0.5071007251739502
epoch: 108 training_loss 0.5094773128628731 test_loss: 0.5219590187072753
epoch: 109 training_loss 0.5196148502826691 test_loss: 0.5086288452148438
epoch: 110 training_loss 0.5068330752849579 test_loss: 0.5061142921447754
epoch: 111 training_loss 0.5156943076848983 test_loss: 0.5113451480865479
epoch: 112 training_loss 0.508996687233448 test_loss: 0.4983109474182129
epoch: 113 training_loss 0.5036403876543045 test_loss: 0.5193253517150879
epoch: 114 training_loss 0.5077174454927444 test_loss: 0.5253359794616699
epoch: 115 training_loss 0.5170884439349175 test_loss: 0.5125051021575928
epoch: 116 training_loss 0.5092533659934998 test_loss: 0.5237595081329346
epoch: 117 training_loss 0.497719546854496 test_loss: 0.537175989151001
epoch: 118 training_loss 0.5105953174829483 test_loss: 0.500944185256958
epoch: 119 training_loss 0.49978580981493 test_loss: 0.4971782207489014
epoch: 120 training_loss 0.5072357782721519 test_loss: 0.5018992900848389
epoch: 121 training_loss 0.5113197967410088 test_loss: 0.4981544494628906
epoch: 122 training_loss 0.5055112606287002 test_loss: 0.5067113876342774
epoch: 123 training_loss 0.4984281691908836 test_loss: 0.5101268291473389
epoch: 124 training_loss 0.49856515526771544 test_loss: 0.4995249271392822
epoch: 125 training_loss 0.508869354724884 test_loss: 0.5293378353118896
epoch: 126 training_loss 0.503337145447731 test_loss: 0.5073318958282471
epoch: 127 training_loss 0.4954813462495804 test_loss: 0.5072245597839355
epoch: 128 training_loss 0.49164292752742766 test_loss: 0.49373726844787597
epoch: 129 training_loss 0.49063377916812895 test_loss: 0.5092059135437011
epoch: 130 training_loss 0.49989770412445067 test_loss: 0.518348503112793
epoch: 131 training_loss 0.4967527008056641 test_loss: 0.48854403495788573
epoch: 132 training_loss 0.48503756791353225 test_loss: 0.49724578857421875
epoch: 133 training_loss 0.495000801384449 test_loss: 0.5117781162261963
epoch: 134 training_loss 0.5000989624857902 test_loss: 0.5099258899688721
epoch: 135 training_loss 0.5092702999711036 test_loss: 0.5625437736511231
epoch: 136 training_loss 0.49895587652921675 test_loss: 0.4768490791320801
epoch: 137 training_loss 0.4976980024576187 test_loss: 0.48769164085388184
epoch: 138 training_loss 0.48535247713327406 test_loss: 0.49988975524902346
epoch: 139 training_loss 0.49281858384609223 test_loss: 0.4873355388641357
epoch: 140 training_loss 0.4908322060108185 test_loss: 0.5032877922058105
epoch: 141 training_loss 0.49021976202726364 test_loss: 0.48961305618286133
epoch: 142 training_loss 0.4895773285627365 test_loss: 0.47430858612060545
epoch: 143 training_loss 0.48267330408096315 test_loss: 0.46671314239501954
epoch: 144 training_loss 0.4865477982163429 test_loss: 0.46417574882507323
epoch: 145 training_loss 0.4831583234667778 test_loss: 0.48553805351257323
epoch: 146 training_loss 0.4782788518071175 test_loss: 0.47455272674560545
epoch: 147 training_loss 0.48858754843473434 test_loss: 0.4845710277557373
epoch: 148 training_loss 0.48679445803165433 test_loss: 0.48301024436950685
epoch: 149 training_loss 0.48154882818460465 test_loss: 0.49274659156799316
2804.147233325448
episode: 0 training return: tensor(-137.2267, device='cuda:0')
episode: 1 training return: tensor(88.9002, device='cuda:0')
episode: 2 training return: tensor(112.9968, device='cuda:0')
episode: 3 training return: tensor(-340.5989, device='cuda:0')
epoch: 1 test_true_pfm: 2281.965102207259 sim_pfm: 84.90334401397074
episode: 4 training return: tensor(-454.3766, device='cuda:0')
episode: 5 training return: tensor(-309.3217, device='cuda:0')
episode: 6 training return: tensor(158.6718, device='cuda:0')
episode: 7 training return: tensor(-388.5743, device='cuda:0')
epoch: 2 test_true_pfm: 2404.3285684089215 sim_pfm: -92.19865028559191
episode: 8 training return: tensor(-372.3034, device='cuda:0')
episode: 9 training return: tensor(-333.9372, device='cuda:0')
episode: 10 training return: tensor(-457.2022, device='cuda:0')
episode: 11 training return: tensor(-456.3496, device='cuda:0')
epoch: 3 test_true_pfm: 2231.0922079604547 sim_pfm: 5.706623454442403
episode: 12 training return: tensor(-332.7755, device='cuda:0')
episode: 13 training return: tensor(-306.8098, device='cuda:0')
episode: 14 training return: tensor(-398.5910, device='cuda:0')
episode: 15 training return: tensor(-458.3336, device='cuda:0')
epoch: 4 test_true_pfm: 2946.312939194695 sim_pfm: -156.41585279532592
episode: 16 training return: tensor(105.5762, device='cuda:0')
episode: 17 training return: tensor(-537.5782, device='cuda:0')
episode: 18 training return: tensor(-526.7880, device='cuda:0')
episode: 19 training return: tensor(-424.4826, device='cuda:0')
epoch: 5 test_true_pfm: 1392.2317444490454 sim_pfm: -419.61761970913113
episode: 20 training return: tensor(-401.5626, device='cuda:0')
episode: 21 training return: tensor(-385.9783, device='cuda:0')
episode: 22 training return: tensor(-381.0776, device='cuda:0')
episode: 23 training return: tensor(-430.5176, device='cuda:0')
epoch: 6 test_true_pfm: 1628.681643239828 sim_pfm: -324.43631033486844
episode: 24 training return: tensor(-393.2158, device='cuda:0')
episode: 25 training return: tensor(-344.3972, device='cuda:0')
episode: 26 training return: tensor(-409.7875, device='cuda:0')
episode: 27 training return: tensor(-305.4771, device='cuda:0')
epoch: 7 test_true_pfm: 1683.8605828223688 sim_pfm: -289.2167431301593
episode: 28 training return: tensor(-434.8434, device='cuda:0')
episode: 29 training return: tensor(-421.8680, device='cuda:0')
episode: 30 training return: tensor(-446.6235, device='cuda:0')
episode: 31 training return: tensor(-403.7173, device='cuda:0')
epoch: 8 test_true_pfm: 1919.1216882871447 sim_pfm: -163.41182725009276
episode: 32 training return: tensor(-432.8286, device='cuda:0')
episode: 33 training return: tensor(-382.9112, device='cuda:0')
episode: 34 training return: tensor(-352.2350, device='cuda:0')
episode: 35 training return: tensor(-34.6214, device='cuda:0')
epoch: 9 test_true_pfm: 1814.0791514827727 sim_pfm: -284.3207952658413
episode: 36 training return: tensor(-57.5813, device='cuda:0')
episode: 37 training return: tensor(-359.0742, device='cuda:0')
episode: 38 training return: tensor(-447.2783, device='cuda:0')
episode: 39 training return: tensor(-321.8228, device='cuda:0')
epoch: 10 test_true_pfm: 1334.02255333088 sim_pfm: -368.49821390169865
episode: 40 training return: tensor(-315.0446, device='cuda:0')
episode: 41 training return: tensor(-358.2280, device='cuda:0')
episode: 42 training return: tensor(-452.6172, device='cuda:0')
episode: 43 training return: tensor(-426.6757, device='cuda:0')
epoch: 11 test_true_pfm: 2168.876939929831 sim_pfm: -327.5649055605948
episode: 44 training return: tensor(-346.6871, device='cuda:0')
episode: 45 training return: tensor(-384.5222, device='cuda:0')
episode: 46 training return: tensor(48.9322, device='cuda:0')
episode: 47 training return: tensor(-297.0700, device='cuda:0')
epoch: 12 test_true_pfm: 2711.4164297993843 sim_pfm: -246.56838352462123
episode: 48 training return: tensor(-317.7150, device='cuda:0')
episode: 49 training return: tensor(-240.0724, device='cuda:0')
episode: 50 training return: tensor(10.9565, device='cuda:0')
episode: 51 training return: tensor(-381.1847, device='cuda:0')
epoch: 13 test_true_pfm: 1475.0443131200718 sim_pfm: -322.0923007312619
episode: 52 training return: tensor(-134.2419, device='cuda:0')
episode: 53 training return: tensor(-431.3907, device='cuda:0')
episode: 54 training return: tensor(-463.3087, device='cuda:0')
episode: 55 training return: tensor(-232.3590, device='cuda:0')
epoch: 14 test_true_pfm: 2743.5977066499613 sim_pfm: 45.63061702626874
episode: 56 training return: tensor(-239.8252, device='cuda:0')
episode: 57 training return: tensor(-228.0243, device='cuda:0')
episode: 58 training return: tensor(-270.0002, device='cuda:0')
episode: 59 training return: tensor(-229.4825, device='cuda:0')
epoch: 15 test_true_pfm: 3067.977140754869 sim_pfm: -57.473307016082494
episode: 60 training return: tensor(-369.5106, device='cuda:0')
episode: 61 training return: tensor(-194.7169, device='cuda:0')
episode: 62 training return: tensor(46.0251, device='cuda:0')
episode: 63 training return: tensor(-280.4563, device='cuda:0')
epoch: 16 test_true_pfm: 2961.769905620835 sim_pfm: -34.31941965566754
episode: 64 training return: tensor(157.0762, device='cuda:0')
episode: 65 training return: tensor(-244.5208, device='cuda:0')
episode: 66 training return: tensor(-101.0357, device='cuda:0')
episode: 67 training return: tensor(-140.7767, device='cuda:0')
epoch: 17 test_true_pfm: 2498.4558501635347 sim_pfm: 116.68448220735688
episode: 68 training return: tensor(-425.4549, device='cuda:0')
episode: 69 training return: tensor(28.1677, device='cuda:0')
episode: 70 training return: tensor(1.4281, device='cuda:0')
episode: 71 training return: tensor(-159.5367, device='cuda:0')
epoch: 18 test_true_pfm: 2778.30094669289 sim_pfm: 113.89283940117457
episode: 72 training return: tensor(-452.5149, device='cuda:0')
episode: 73 training return: tensor(-87.1021, device='cuda:0')
episode: 74 training return: tensor(-70.4163, device='cuda:0')
episode: 75 training return: tensor(-344.0814, device='cuda:0')
epoch: 19 test_true_pfm: 2827.0452280029763 sim_pfm: 94.01589326952428
episode: 76 training return: tensor(-503.1052, device='cuda:0')
episode: 77 training return: tensor(95.9986, device='cuda:0')
episode: 78 training return: tensor(-295.5192, device='cuda:0')
episode: 79 training return: tensor(-29.1471, device='cuda:0')
epoch: 20 test_true_pfm: 2719.2894191446226 sim_pfm: -43.53634207732588
episode: 80 training return: tensor(-390.3355, device='cuda:0')
episode: 81 training return: tensor(-412.3872, device='cuda:0')
episode: 82 training return: tensor(-133.6568, device='cuda:0')
episode: 83 training return: tensor(-140.6507, device='cuda:0')
epoch: 21 test_true_pfm: 2053.7865018341786 sim_pfm: 10.999190686697451
episode: 84 training return: tensor(-428.6500, device='cuda:0')
episode: 85 training return: tensor(31.7632, device='cuda:0')
episode: 86 training return: tensor(-43.4269, device='cuda:0')
episode: 87 training return: tensor(154.9011, device='cuda:0')
epoch: 22 test_true_pfm: 2756.658803048707 sim_pfm: -32.847241110216906
episode: 88 training return: tensor(-284.2680, device='cuda:0')
episode: 89 training return: tensor(-5.6943, device='cuda:0')
episode: 90 training return: tensor(-369.6224, device='cuda:0')
episode: 91 training return: tensor(-241.0028, device='cuda:0')
epoch: 23 test_true_pfm: 2598.2882971061185 sim_pfm: 15.294265685350789
episode: 92 training return: tensor(-281.6883, device='cuda:0')
episode: 93 training return: tensor(-198.3159, device='cuda:0')
episode: 94 training return: tensor(-326.8405, device='cuda:0')
episode: 95 training return: tensor(-370.9861, device='cuda:0')
epoch: 24 test_true_pfm: 2978.5514519574067 sim_pfm: -7.9695454646231765
episode: 96 training return: tensor(-28.9534, device='cuda:0')
episode: 97 training return: tensor(177.6510, device='cuda:0')
episode: 98 training return: tensor(-319.4496, device='cuda:0')
episode: 99 training return: tensor(-190.4791, device='cuda:0')
epoch: 25 test_true_pfm: 2371.91650044038 sim_pfm: 20.278333047162352
episode: 100 training return: tensor(-426.3941, device='cuda:0')
episode: 101 training return: tensor(-118.6152, device='cuda:0')
episode: 102 training return: tensor(107.2807, device='cuda:0')
episode: 103 training return: tensor(-329.8226, device='cuda:0')
epoch: 26 test_true_pfm: 2611.8084478728783 sim_pfm: -261.34713683125057
episode: 104 training return: tensor(-268.4648, device='cuda:0')
episode: 105 training return: tensor(-363.3422, device='cuda:0')
episode: 106 training return: tensor(-375.1641, device='cuda:0')
episode: 107 training return: tensor(-220.4041, device='cuda:0')
epoch: 27 test_true_pfm: 2932.772555061527 sim_pfm: 110.74493496324673
episode: 108 training return: tensor(-112.7065, device='cuda:0')
episode: 109 training return: tensor(-389.3753, device='cuda:0')
episode: 110 training return: tensor(33.6820, device='cuda:0')
episode: 111 training return: tensor(-208.7678, device='cuda:0')
epoch: 28 test_true_pfm: 2450.161197725216 sim_pfm: 150.61055786766033
episode: 112 training return: tensor(-47.9054, device='cuda:0')
episode: 113 training return: tensor(24.7482, device='cuda:0')
episode: 114 training return: tensor(55.8585, device='cuda:0')
episode: 115 training return: tensor(-15.6995, device='cuda:0')
epoch: 29 test_true_pfm: 2982.955688763217 sim_pfm: -20.21087841357803
episode: 116 training return: tensor(-282.4426, device='cuda:0')
episode: 117 training return: tensor(-449.4068, device='cuda:0')
episode: 118 training return: tensor(-452.7251, device='cuda:0')
episode: 119 training return: tensor(-241.4919, device='cuda:0')
epoch: 30 test_true_pfm: 2789.003770771598 sim_pfm: 115.35227707960682
episode: 120 training return: tensor(111.9856, device='cuda:0')
episode: 121 training return: tensor(137.2074, device='cuda:0')
episode: 122 training return: tensor(-457.1049, device='cuda:0')
episode: 123 training return: tensor(-247.1700, device='cuda:0')
epoch: 31 test_true_pfm: 2937.876918071908 sim_pfm: 67.12370313415886
episode: 124 training return: tensor(-390.4689, device='cuda:0')
episode: 125 training return: tensor(114.0172, device='cuda:0')
episode: 126 training return: tensor(-447.1673, device='cuda:0')
episode: 127 training return: tensor(-262.9960, device='cuda:0')
epoch: 32 test_true_pfm: 3222.508857388164 sim_pfm: 68.91754090383377
episode: 128 training return: tensor(-374.2726, device='cuda:0')
episode: 129 training return: tensor(-411.3333, device='cuda:0')
episode: 130 training return: tensor(-334.7657, device='cuda:0')
episode: 131 training return: tensor(-446.6216, device='cuda:0')
epoch: 33 test_true_pfm: 2591.7537169504503 sim_pfm: 63.57710014648425
episode: 132 training return: tensor(87.0096, device='cuda:0')
episode: 133 training return: tensor(-215.4886, device='cuda:0')
episode: 134 training return: tensor(-446.5748, device='cuda:0')
episode: 135 training return: tensor(-86.2617, device='cuda:0')
epoch: 34 test_true_pfm: 2332.19207601985 sim_pfm: 9.524057676268663
episode: 136 training return: tensor(-241.9420, device='cuda:0')
episode: 137 training return: tensor(-285.8231, device='cuda:0')
episode: 138 training return: tensor(-373.5574, device='cuda:0')
episode: 139 training return: tensor(-185.0998, device='cuda:0')
epoch: 35 test_true_pfm: 2799.7635235994894 sim_pfm: 4.400796177370164
episode: 140 training return: tensor(-422.0088, device='cuda:0')
episode: 141 training return: tensor(134.4899, device='cuda:0')
episode: 142 training return: tensor(-439.2751, device='cuda:0')
episode: 143 training return: tensor(-447.9146, device='cuda:0')
epoch: 36 test_true_pfm: 3105.316851531217 sim_pfm: 5.519503876566887
episode: 144 training return: tensor(-262.2573, device='cuda:0')
episode: 145 training return: tensor(90.9675, device='cuda:0')
episode: 146 training return: tensor(114.3495, device='cuda:0')
episode: 147 training return: tensor(-127.5456, device='cuda:0')
epoch: 37 test_true_pfm: 3015.692519414492 sim_pfm: -0.8062948792648967
episode: 148 training return: tensor(-394.5272, device='cuda:0')
episode: 149 training return: tensor(144.7146, device='cuda:0')
episode: 150 training return: tensor(125.0237, device='cuda:0')
episode: 151 training return: tensor(-322.3658, device='cuda:0')
epoch: 38 test_true_pfm: 3016.419419305666 sim_pfm: 55.22369226294298
episode: 152 training return: tensor(-300.8377, device='cuda:0')
episode: 153 training return: tensor(-133.3871, device='cuda:0')
episode: 154 training return: tensor(-323.1385, device='cuda:0')
episode: 155 training return: tensor(-306.6691, device='cuda:0')
epoch: 39 test_true_pfm: 3198.9523069921447 sim_pfm: 8.701944973872742
episode: 156 training return: tensor(-427.2549, device='cuda:0')
episode: 157 training return: tensor(-393.9660, device='cuda:0')
episode: 158 training return: tensor(-309.0961, device='cuda:0')
episode: 159 training return: tensor(-290.7034, device='cuda:0')
epoch: 40 test_true_pfm: 3187.5834675627298 sim_pfm: 44.29905916689313
episode: 160 training return: tensor(-268.9319, device='cuda:0')
episode: 161 training return: tensor(-88.2933, device='cuda:0')
episode: 162 training return: tensor(-362.5727, device='cuda:0')
episode: 163 training return: tensor(-427.9838, device='cuda:0')
epoch: 41 test_true_pfm: 2469.4156262099477 sim_pfm: 25.321534758326987
episode: 164 training return: tensor(-337.7440, device='cuda:0')
episode: 165 training return: tensor(49.4404, device='cuda:0')
episode: 166 training return: tensor(-113.1215, device='cuda:0')
episode: 167 training return: tensor(-351.5937, device='cuda:0')
epoch: 42 test_true_pfm: 2177.929378010267 sim_pfm: 127.56436821032548
episode: 168 training return: tensor(-278.8182, device='cuda:0')
episode: 169 training return: tensor(-11.7867, device='cuda:0')
episode: 170 training return: tensor(-384.9796, device='cuda:0')
episode: 171 training return: tensor(102.3297, device='cuda:0')
epoch: 43 test_true_pfm: 2647.4910507303516 sim_pfm: -121.43531390671463
episode: 172 training return: tensor(-410.2524, device='cuda:0')
episode: 173 training return: tensor(-439.0630, device='cuda:0')
episode: 174 training return: tensor(-21.2261, device='cuda:0')
episode: 175 training return: tensor(-226.5769, device='cuda:0')
epoch: 44 test_true_pfm: 2847.415134384098 sim_pfm: -231.5449622105225
episode: 176 training return: tensor(-446.9125, device='cuda:0')
episode: 177 training return: tensor(-447.8723, device='cuda:0')
episode: 178 training return: tensor(183.0043, device='cuda:0')
episode: 179 training return: tensor(-309.5887, device='cuda:0')
epoch: 45 test_true_pfm: 2463.394500392443 sim_pfm: -265.7116590696193
episode: 180 training return: tensor(-176.9844, device='cuda:0')
episode: 181 training return: tensor(-167.7302, device='cuda:0')
episode: 182 training return: tensor(-269.9613, device='cuda:0')
episode: 183 training return: tensor(-454.3554, device='cuda:0')
epoch: 46 test_true_pfm: 3131.2277544894173 sim_pfm: 130.15581758638533
episode: 184 training return: tensor(-245.6067, device='cuda:0')
episode: 185 training return: tensor(-225.0527, device='cuda:0')
episode: 186 training return: tensor(43.1247, device='cuda:0')
episode: 187 training return: tensor(-337.0545, device='cuda:0')
epoch: 47 test_true_pfm: 2565.283387670075 sim_pfm: 32.46659777887786
episode: 188 training return: tensor(-380.8381, device='cuda:0')
episode: 189 training return: tensor(-389.3147, device='cuda:0')
episode: 190 training return: tensor(-333.5235, device='cuda:0')
episode: 191 training return: tensor(56.4749, device='cuda:0')
epoch: 48 test_true_pfm: 3054.5493745722215 sim_pfm: -13.912000605409654
episode: 192 training return: tensor(-321.1205, device='cuda:0')
episode: 193 training return: tensor(-82.6325, device='cuda:0')
episode: 194 training return: tensor(-333.5202, device='cuda:0')
episode: 195 training return: tensor(-455.3299, device='cuda:0')
epoch: 49 test_true_pfm: 2835.612180932732 sim_pfm: -91.22481644003226
episode: 196 training return: tensor(-333.9381, device='cuda:0')
episode: 197 training return: tensor(-116.5235, device='cuda:0')
episode: 198 training return: tensor(-275.8915, device='cuda:0')
episode: 199 training return: tensor(-250.9366, device='cuda:0')
epoch: 50 test_true_pfm: 2714.8767382429473 sim_pfm: -96.49579609474556
episode: 200 training return: tensor(-403.0317, device='cuda:0')
episode: 201 training return: tensor(-194.1564, device='cuda:0')
episode: 202 training return: tensor(-458.3318, device='cuda:0')
episode: 203 training return: tensor(-367.0604, device='cuda:0')
epoch: 51 test_true_pfm: 2599.547607863134 sim_pfm: -2.420450915621283
episode: 204 training return: tensor(-331.0133, device='cuda:0')
episode: 205 training return: tensor(45.4172, device='cuda:0')
episode: 206 training return: tensor(116.0990, device='cuda:0')
episode: 207 training return: tensor(-371.7991, device='cuda:0')
epoch: 52 test_true_pfm: 2241.593563257696 sim_pfm: -79.5668127782313
episode: 208 training return: tensor(-430.3063, device='cuda:0')
episode: 209 training return: tensor(30.4671, device='cuda:0')
episode: 210 training return: tensor(-14.1873, device='cuda:0')
episode: 211 training return: tensor(-401.8872, device='cuda:0')
epoch: 53 test_true_pfm: 2378.7641865087094 sim_pfm: -240.37019418395357
episode: 212 training return: tensor(-272.5828, device='cuda:0')
episode: 213 training return: tensor(-369.2989, device='cuda:0')
episode: 214 training return: tensor(-76.3789, device='cuda:0')
episode: 215 training return: tensor(-455.2538, device='cuda:0')
epoch: 54 test_true_pfm: 3207.4230969423443 sim_pfm: 46.39309686624134
episode: 216 training return: tensor(-441.2815, device='cuda:0')
episode: 217 training return: tensor(-365.5467, device='cuda:0')
episode: 218 training return: tensor(64.0521, device='cuda:0')
episode: 219 training return: tensor(-192.5850, device='cuda:0')
epoch: 55 test_true_pfm: 2228.267534347968 sim_pfm: 86.13370074572352
episode: 220 training return: tensor(-336.3185, device='cuda:0')
episode: 221 training return: tensor(-24.8980, device='cuda:0')
episode: 222 training return: tensor(185.3155, device='cuda:0')
episode: 223 training return: tensor(-264.9001, device='cuda:0')
epoch: 56 test_true_pfm: 2904.667353755995 sim_pfm: -60.38212622813686
episode: 224 training return: tensor(-451.9958, device='cuda:0')
episode: 225 training return: tensor(145.6730, device='cuda:0')
episode: 226 training return: tensor(-457.1141, device='cuda:0')
episode: 227 training return: tensor(-99.2563, device='cuda:0')
epoch: 57 test_true_pfm: 3282.7649606540767 sim_pfm: -76.46529968165366
episode: 228 training return: tensor(-429.4653, device='cuda:0')
episode: 229 training return: tensor(131.2255, device='cuda:0')
episode: 230 training return: tensor(-181.5813, device='cuda:0')
episode: 231 training return: tensor(-257.5776, device='cuda:0')
epoch: 58 test_true_pfm: 2566.954486721401 sim_pfm: -34.39793346915394
episode: 232 training return: tensor(-180.1790, device='cuda:0')
episode: 233 training return: tensor(-188.4596, device='cuda:0')
episode: 234 training return: tensor(-361.8583, device='cuda:0')
episode: 235 training return: tensor(-359.8259, device='cuda:0')
epoch: 59 test_true_pfm: 3236.960898547366 sim_pfm: 95.07836911929189
episode: 236 training return: tensor(-314.3026, device='cuda:0')
episode: 237 training return: tensor(-285.3515, device='cuda:0')
episode: 238 training return: tensor(-164.9556, device='cuda:0')
episode: 239 training return: tensor(-185.7982, device='cuda:0')
epoch: 60 test_true_pfm: 2663.5049444937317 sim_pfm: -51.27736107476327
episode: 240 training return: tensor(-451.8073, device='cuda:0')
episode: 241 training return: tensor(-353.0132, device='cuda:0')
episode: 242 training return: tensor(-281.4784, device='cuda:0')
episode: 243 training return: tensor(-319.8650, device='cuda:0')
epoch: 61 test_true_pfm: 2962.364455636798 sim_pfm: 37.45759971611551
episode: 244 training return: tensor(-359.3263, device='cuda:0')
episode: 245 training return: tensor(-214.7917, device='cuda:0')
episode: 246 training return: tensor(-261.9878, device='cuda:0')
episode: 247 training return: tensor(-127.0604, device='cuda:0')
epoch: 62 test_true_pfm: 2419.287704234232 sim_pfm: 54.73602821598373
episode: 248 training return: tensor(-412.0455, device='cuda:0')
episode: 249 training return: tensor(-450.2807, device='cuda:0')
episode: 250 training return: tensor(-244.2879, device='cuda:0')
episode: 251 training return: tensor(-384.0702, device='cuda:0')
epoch: 63 test_true_pfm: 2714.880348997139 sim_pfm: -54.83661947411019
episode: 252 training return: tensor(-182.1155, device='cuda:0')
episode: 253 training return: tensor(-129.6248, device='cuda:0')
episode: 254 training return: tensor(-368.8628, device='cuda:0')
episode: 255 training return: tensor(-172.3325, device='cuda:0')
epoch: 64 test_true_pfm: 2383.2693993631256 sim_pfm: -7.202599188865861
episode: 256 training return: tensor(-208.2936, device='cuda:0')
episode: 257 training return: tensor(-340.6245, device='cuda:0')
episode: 258 training return: tensor(-311.0392, device='cuda:0')
episode: 259 training return: tensor(159.8774, device='cuda:0')
epoch: 65 test_true_pfm: 3136.3519513672486 sim_pfm: 44.68302187073277
episode: 260 training return: tensor(-56.1179, device='cuda:0')
episode: 261 training return: tensor(-327.4097, device='cuda:0')
episode: 262 training return: tensor(-497.1422, device='cuda:0')
episode: 263 training return: tensor(-377.8774, device='cuda:0')
epoch: 66 test_true_pfm: 2637.9053027106706 sim_pfm: -231.0028139600181
episode: 264 training return: tensor(-455.0681, device='cuda:0')
episode: 265 training return: tensor(-363.3581, device='cuda:0')
episode: 266 training return: tensor(146.6605, device='cuda:0')
episode: 267 training return: tensor(-103.8743, device='cuda:0')
epoch: 67 test_true_pfm: 2008.4497712896598 sim_pfm: 58.280446694834005
episode: 268 training return: tensor(-87.6579, device='cuda:0')
episode: 269 training return: tensor(-153.6893, device='cuda:0')
episode: 270 training return: tensor(-184.4025, device='cuda:0')
episode: 271 training return: tensor(-348.6438, device='cuda:0')
epoch: 68 test_true_pfm: 3233.5389824591007 sim_pfm: -32.5440408916135
episode: 272 training return: tensor(-53.0678, device='cuda:0')
episode: 273 training return: tensor(-272.0627, device='cuda:0')
episode: 274 training return: tensor(146.1405, device='cuda:0')
episode: 275 training return: tensor(-272.8998, device='cuda:0')
epoch: 69 test_true_pfm: 3294.8063402836356 sim_pfm: -149.00130558001305
episode: 276 training return: tensor(-289.5886, device='cuda:0')
episode: 277 training return: tensor(130.0499, device='cuda:0')
episode: 278 training return: tensor(-52.4798, device='cuda:0')
episode: 279 training return: tensor(210.9692, device='cuda:0')
epoch: 70 test_true_pfm: 2090.3440597974623 sim_pfm: -340.32676459073747
episode: 280 training return: tensor(132.7671, device='cuda:0')
episode: 281 training return: tensor(124.7746, device='cuda:0')
episode: 282 training return: tensor(-295.9843, device='cuda:0')
episode: 283 training return: tensor(-341.5201, device='cuda:0')
epoch: 71 test_true_pfm: 2038.8068527014104 sim_pfm: 69.25073638393467
episode: 284 training return: tensor(-438.3908, device='cuda:0')
episode: 285 training return: tensor(-310.1896, device='cuda:0')
episode: 286 training return: tensor(-194.6786, device='cuda:0')
episode: 287 training return: tensor(-70.4735, device='cuda:0')
epoch: 72 test_true_pfm: 2884.4517431855234 sim_pfm: -75.36500674504593
episode: 288 training return: tensor(-379.0213, device='cuda:0')
episode: 289 training return: tensor(-94.7488, device='cuda:0')
episode: 290 training return: tensor(128.7634, device='cuda:0')
episode: 291 training return: tensor(-229.2164, device='cuda:0')
epoch: 73 test_true_pfm: 3228.9620452613112 sim_pfm: -27.48358395105849
episode: 292 training return: tensor(-238.2694, device='cuda:0')
episode: 293 training return: tensor(-247.8916, device='cuda:0')
episode: 294 training return: tensor(132.4209, device='cuda:0')
episode: 295 training return: tensor(-133.2536, device='cuda:0')
epoch: 74 test_true_pfm: 2996.7199983146106 sim_pfm: 14.745803286738616
episode: 296 training return: tensor(-379.1131, device='cuda:0')
episode: 297 training return: tensor(-414.2835, device='cuda:0')
episode: 298 training return: tensor(115.7175, device='cuda:0')
episode: 299 training return: tensor(53.2100, device='cuda:0')
epoch: 75 test_true_pfm: 2416.9135738095497 sim_pfm: 170.82725845974832
episode: 300 training return: tensor(-259.5864, device='cuda:0')
episode: 301 training return: tensor(-141.0969, device='cuda:0')
episode: 302 training return: tensor(-226.5542, device='cuda:0')
episode: 303 training return: tensor(-342.1004, device='cuda:0')
epoch: 76 test_true_pfm: 2262.109073294464 sim_pfm: -131.7029928229167
episode: 304 training return: tensor(-432.2246, device='cuda:0')
episode: 305 training return: tensor(-333.2380, device='cuda:0')
episode: 306 training return: tensor(-324.9778, device='cuda:0')
episode: 307 training return: tensor(-467.3242, device='cuda:0')
epoch: 77 test_true_pfm: 2273.7642320814143 sim_pfm: -96.04932458458158
episode: 308 training return: tensor(-221.1891, device='cuda:0')
episode: 309 training return: tensor(-177.4305, device='cuda:0')
episode: 310 training return: tensor(155.6231, device='cuda:0')
episode: 311 training return: tensor(-402.0271, device='cuda:0')
epoch: 78 test_true_pfm: 2077.9592340668173 sim_pfm: -385.98143583910615
episode: 312 training return: tensor(-381.0620, device='cuda:0')
episode: 313 training return: tensor(-503.7661, device='cuda:0')
episode: 314 training return: tensor(-342.1854, device='cuda:0')
episode: 315 training return: tensor(-71.1567, device='cuda:0')
epoch: 79 test_true_pfm: 1572.845355235678 sim_pfm: -355.2628391100831
episode: 316 training return: tensor(-139.3033, device='cuda:0')
episode: 317 training return: tensor(-407.0381, device='cuda:0')
episode: 318 training return: tensor(-259.4267, device='cuda:0')
episode: 319 training return: tensor(147.5494, device='cuda:0')
epoch: 80 test_true_pfm: 3285.578755256309 sim_pfm: -147.53462986810095
episode: 320 training return: tensor(-350.2346, device='cuda:0')
episode: 321 training return: tensor(2.3081, device='cuda:0')
episode: 322 training return: tensor(-415.7904, device='cuda:0')
episode: 323 training return: tensor(-408.0839, device='cuda:0')
epoch: 81 test_true_pfm: 3154.028625345964 sim_pfm: -246.5808398788795
episode: 324 training return: tensor(-91.7866, device='cuda:0')
episode: 325 training return: tensor(-70.1128, device='cuda:0')
episode: 326 training return: tensor(-335.0660, device='cuda:0')
episode: 327 training return: tensor(-280.0107, device='cuda:0')
epoch: 82 test_true_pfm: 2042.3642558769789 sim_pfm: -79.65695410439123
episode: 328 training return: tensor(-47.8586, device='cuda:0')
episode: 329 training return: tensor(-518.0425, device='cuda:0')
episode: 330 training return: tensor(-495.1991, device='cuda:0')
episode: 331 training return: tensor(-298.0479, device='cuda:0')
epoch: 83 test_true_pfm: 2295.629771613314 sim_pfm: -140.3785473065509
episode: 332 training return: tensor(176.8321, device='cuda:0')
episode: 333 training return: tensor(53.7467, device='cuda:0')
episode: 334 training return: tensor(145.0225, device='cuda:0')
episode: 335 training return: tensor(145.3461, device='cuda:0')
epoch: 84 test_true_pfm: 2975.2865677686495 sim_pfm: -28.09364688308172
episode: 336 training return: tensor(-172.2821, device='cuda:0')
episode: 337 training return: tensor(-49.4253, device='cuda:0')
episode: 338 training return: tensor(151.0697, device='cuda:0')
episode: 339 training return: tensor(-261.1520, device='cuda:0')
epoch: 85 test_true_pfm: 2169.682326790614 sim_pfm: 92.12206615154476
episode: 340 training return: tensor(-335.3892, device='cuda:0')
episode: 341 training return: tensor(-203.5664, device='cuda:0')
episode: 342 training return: tensor(68.7808, device='cuda:0')
episode: 343 training return: tensor(160.2309, device='cuda:0')
epoch: 86 test_true_pfm: 1603.7687571886363 sim_pfm: -154.1994435726859
episode: 344 training return: tensor(-97.0612, device='cuda:0')
episode: 345 training return: tensor(74.5870, device='cuda:0')
episode: 346 training return: tensor(65.8315, device='cuda:0')
episode: 347 training return: tensor(-286.2044, device='cuda:0')
epoch: 87 test_true_pfm: 2917.814075234169 sim_pfm: -195.86618515422256
episode: 348 training return: tensor(-377.9004, device='cuda:0')
episode: 349 training return: tensor(-324.6300, device='cuda:0')
episode: 350 training return: tensor(-143.1537, device='cuda:0')
episode: 351 training return: tensor(-403.6201, device='cuda:0')
epoch: 88 test_true_pfm: 1506.636915236175 sim_pfm: -412.3134536566892
episode: 352 training return: tensor(159.3083, device='cuda:0')
episode: 353 training return: tensor(-352.7295, device='cuda:0')
episode: 354 training return: tensor(-352.1918, device='cuda:0')
episode: 355 training return: tensor(-254.8398, device='cuda:0')
epoch: 89 test_true_pfm: 2723.448122066548 sim_pfm: -159.06290512893852
episode: 356 training return: tensor(-144.8444, device='cuda:0')
episode: 357 training return: tensor(-434.2692, device='cuda:0')
episode: 358 training return: tensor(-211.0405, device='cuda:0')
episode: 359 training return: tensor(-102.6978, device='cuda:0')
epoch: 90 test_true_pfm: 2040.8407773416966 sim_pfm: -185.4695004837316
episode: 360 training return: tensor(-189.5880, device='cuda:0')
episode: 361 training return: tensor(-138.5732, device='cuda:0')
episode: 362 training return: tensor(-504.2365, device='cuda:0')
episode: 363 training return: tensor(-398.3287, device='cuda:0')
epoch: 91 test_true_pfm: 2537.240021078587 sim_pfm: -99.5694698865215
episode: 364 training return: tensor(-100.9172, device='cuda:0')
episode: 365 training return: tensor(-251.4208, device='cuda:0')
episode: 366 training return: tensor(-434.5193, device='cuda:0')
episode: 367 training return: tensor(-206.4850, device='cuda:0')
epoch: 92 test_true_pfm: 2276.622415331146 sim_pfm: -32.999988675524946
episode: 368 training return: tensor(-153.1700, device='cuda:0')
episode: 369 training return: tensor(-215.2600, device='cuda:0')
episode: 370 training return: tensor(-253.5161, device='cuda:0')
episode: 371 training return: tensor(-404.9862, device='cuda:0')
epoch: 93 test_true_pfm: 2529.8485792755932 sim_pfm: 127.02651743389045
episode: 372 training return: tensor(-145.1736, device='cuda:0')
episode: 373 training return: tensor(-141.2644, device='cuda:0')
episode: 374 training return: tensor(-330.5869, device='cuda:0')
episode: 375 training return: tensor(-449.8871, device='cuda:0')
epoch: 94 test_true_pfm: 2269.2416072545216 sim_pfm: -378.2988395100304
episode: 376 training return: tensor(-279.9772, device='cuda:0')
episode: 377 training return: tensor(-182.7582, device='cuda:0')
episode: 378 training return: tensor(145.6136, device='cuda:0')
episode: 379 training return: tensor(-93.8396, device='cuda:0')
epoch: 95 test_true_pfm: 2086.440320844204 sim_pfm: -192.215500517011
episode: 380 training return: tensor(-378.2523, device='cuda:0')
episode: 381 training return: tensor(76.5185, device='cuda:0')
episode: 382 training return: tensor(-340.6873, device='cuda:0')
episode: 383 training return: tensor(-397.3261, device='cuda:0')
epoch: 96 test_true_pfm: 2244.032287519017 sim_pfm: -134.7361715566658
episode: 384 training return: tensor(-445.6319, device='cuda:0')
episode: 385 training return: tensor(-127.9944, device='cuda:0')
episode: 386 training return: tensor(-275.4745, device='cuda:0')
episode: 387 training return: tensor(-98.4217, device='cuda:0')
epoch: 97 test_true_pfm: 2967.9549261748784 sim_pfm: 104.77337322440387
episode: 388 training return: tensor(-303.0905, device='cuda:0')
episode: 389 training return: tensor(-181.4203, device='cuda:0')
episode: 390 training return: tensor(-168.4824, device='cuda:0')
episode: 391 training return: tensor(-355.0143, device='cuda:0')
epoch: 98 test_true_pfm: 1620.7345567221612 sim_pfm: -221.74454913055524
episode: 392 training return: tensor(-218.0031, device='cuda:0')
episode: 393 training return: tensor(-369.1672, device='cuda:0')
episode: 394 training return: tensor(168.2190, device='cuda:0')
episode: 395 training return: tensor(-173.3415, device='cuda:0')
epoch: 99 test_true_pfm: 2147.5775451083177 sim_pfm: -366.78037589468295
episode: 396 training return: tensor(-403.9312, device='cuda:0')
episode: 397 training return: tensor(161.7043, device='cuda:0')
episode: 398 training return: tensor(-269.3563, device='cuda:0')
episode: 399 training return: tensor(-345.1955, device='cuda:0')
epoch: 100 test_true_pfm: 2218.599587741454 sim_pfm: 32.715777659943946
episode: 400 training return: tensor(-289.5335, device='cuda:0')
episode: 401 training return: tensor(-224.3761, device='cuda:0')
episode: 402 training return: tensor(-296.8801, device='cuda:0')
episode: 403 training return: tensor(-331.5224, device='cuda:0')
epoch: 101 test_true_pfm: 2102.635556160028 sim_pfm: -195.61765570568
episode: 404 training return: tensor(-414.1258, device='cuda:0')
episode: 405 training return: tensor(-446.3502, device='cuda:0')
episode: 406 training return: tensor(-297.9918, device='cuda:0')
episode: 407 training return: tensor(-74.2505, device='cuda:0')
epoch: 102 test_true_pfm: 1539.3627982169285 sim_pfm: -37.45103102562522
episode: 408 training return: tensor(-263.8528, device='cuda:0')
episode: 409 training return: tensor(178.9362, device='cuda:0')
episode: 410 training return: tensor(123.8771, device='cuda:0')
episode: 411 training return: tensor(-217.1427, device='cuda:0')
epoch: 103 test_true_pfm: 2113.462531140525 sim_pfm: -405.07740624361514
episode: 412 training return: tensor(181.9616, device='cuda:0')
episode: 413 training return: tensor(-32.9143, device='cuda:0')
episode: 414 training return: tensor(-396.5320, device='cuda:0')
episode: 415 training return: tensor(-349.2664, device='cuda:0')
epoch: 104 test_true_pfm: 2336.468947929216 sim_pfm: -135.37122081896328
episode: 416 training return: tensor(-389.6614, device='cuda:0')
episode: 417 training return: tensor(-48.3906, device='cuda:0')
episode: 418 training return: tensor(-237.7987, device='cuda:0')
episode: 419 training return: tensor(-188.7654, device='cuda:0')
epoch: 105 test_true_pfm: 1813.2967824586892 sim_pfm: -123.4380724306878
episode: 420 training return: tensor(-199.9694, device='cuda:0')
episode: 421 training return: tensor(93.8559, device='cuda:0')
episode: 422 training return: tensor(133.7897, device='cuda:0')
episode: 423 training return: tensor(-273.7720, device='cuda:0')
epoch: 106 test_true_pfm: 2733.2967741967077 sim_pfm: 36.31975799793145
episode: 424 training return: tensor(-302.2148, device='cuda:0')
episode: 425 training return: tensor(-461.2259, device='cuda:0')
episode: 426 training return: tensor(-148.5465, device='cuda:0')
episode: 427 training return: tensor(-349.5512, device='cuda:0')
epoch: 107 test_true_pfm: 2289.4027867704917 sim_pfm: -261.69740029635915
episode: 428 training return: tensor(146.2397, device='cuda:0')
episode: 429 training return: tensor(47.8526, device='cuda:0')
episode: 430 training return: tensor(-420.9123, device='cuda:0')
episode: 431 training return: tensor(135.6265, device='cuda:0')
epoch: 108 test_true_pfm: 1731.159565781695 sim_pfm: -142.86168065834013
episode: 432 training return: tensor(-89.0795, device='cuda:0')
episode: 433 training return: tensor(-369.7321, device='cuda:0')
episode: 434 training return: tensor(53.7756, device='cuda:0')
episode: 435 training return: tensor(26.0236, device='cuda:0')
epoch: 109 test_true_pfm: 2681.9552766236575 sim_pfm: -42.21119848988019
episode: 436 training return: tensor(-141.4312, device='cuda:0')
episode: 437 training return: tensor(-224.7160, device='cuda:0')
episode: 438 training return: tensor(-257.5256, device='cuda:0')
episode: 439 training return: tensor(131.8354, device='cuda:0')
epoch: 110 test_true_pfm: 1914.2356140291893 sim_pfm: -189.7319438229703
episode: 440 training return: tensor(-373.0833, device='cuda:0')
episode: 441 training return: tensor(-144.0020, device='cuda:0')
episode: 442 training return: tensor(-305.8116, device='cuda:0')
episode: 443 training return: tensor(-321.2265, device='cuda:0')
epoch: 111 test_true_pfm: 2681.6245726033085 sim_pfm: -248.99410872732793
episode: 444 training return: tensor(183.2121, device='cuda:0')
episode: 445 training return: tensor(-446.4351, device='cuda:0')
episode: 446 training return: tensor(-84.0601, device='cuda:0')
episode: 447 training return: tensor(149.2038, device='cuda:0')
epoch: 112 test_true_pfm: 2352.711720952037 sim_pfm: -346.3018773327931
episode: 448 training return: tensor(-299.2386, device='cuda:0')
episode: 449 training return: tensor(-221.6422, device='cuda:0')
episode: 450 training return: tensor(-217.5350, device='cuda:0')
episode: 451 training return: tensor(-480.7435, device='cuda:0')
epoch: 113 test_true_pfm: 1599.4135894778574 sim_pfm: -367.0335113790546
episode: 452 training return: tensor(-352.0679, device='cuda:0')
episode: 453 training return: tensor(-269.8464, device='cuda:0')
episode: 454 training return: tensor(-408.4619, device='cuda:0')
episode: 455 training return: tensor(-223.4831, device='cuda:0')
epoch: 114 test_true_pfm: 3151.0052144996457 sim_pfm: -51.23619881447909
episode: 456 training return: tensor(-422.3823, device='cuda:0')
episode: 457 training return: tensor(-154.2302, device='cuda:0')
episode: 458 training return: tensor(169.0553, device='cuda:0')
episode: 459 training return: tensor(-401.1210, device='cuda:0')
epoch: 115 test_true_pfm: 2009.2055880255612 sim_pfm: 130.4863433999708
episode: 460 training return: tensor(-305.3264, device='cuda:0')
episode: 461 training return: tensor(-176.1900, device='cuda:0')
episode: 462 training return: tensor(-298.3042, device='cuda:0')
episode: 463 training return: tensor(-23.9282, device='cuda:0')
epoch: 116 test_true_pfm: 1633.0733341997257 sim_pfm: -297.4585656708805
episode: 464 training return: tensor(-173.4952, device='cuda:0')
episode: 465 training return: tensor(-490.7840, device='cuda:0')
episode: 466 training return: tensor(-202.5210, device='cuda:0')
episode: 467 training return: tensor(-327.3517, device='cuda:0')
epoch: 117 test_true_pfm: 1499.859019186435 sim_pfm: -247.36209336902053
episode: 468 training return: tensor(-402.9208, device='cuda:0')
episode: 469 training return: tensor(-170.4563, device='cuda:0')
episode: 470 training return: tensor(-323.9991, device='cuda:0')
episode: 471 training return: tensor(-202.5217, device='cuda:0')
epoch: 118 test_true_pfm: 2538.997104232887 sim_pfm: -0.3475953873906595
episode: 472 training return: tensor(-407.3430, device='cuda:0')
episode: 473 training return: tensor(-34.8976, device='cuda:0')
episode: 474 training return: tensor(-296.3429, device='cuda:0')
episode: 475 training return: tensor(125.5125, device='cuda:0')
epoch: 119 test_true_pfm: 2034.878497762942 sim_pfm: -280.1400224766888
episode: 476 training return: tensor(-425.2007, device='cuda:0')
episode: 477 training return: tensor(-250.1445, device='cuda:0')
episode: 478 training return: tensor(-104.3575, device='cuda:0')
episode: 479 training return: tensor(-121.2121, device='cuda:0')
epoch: 120 test_true_pfm: 2234.8411274979994 sim_pfm: -213.10530136784655
episode: 480 training return: tensor(-256.2144, device='cuda:0')
episode: 481 training return: tensor(-364.1996, device='cuda:0')
episode: 482 training return: tensor(-290.8764, device='cuda:0')
episode: 483 training return: tensor(-498.7759, device='cuda:0')
epoch: 121 test_true_pfm: 2144.408566912908 sim_pfm: -185.95363839614825
episode: 484 training return: tensor(-315.2979, device='cuda:0')
episode: 485 training return: tensor(-99.5636, device='cuda:0')
episode: 486 training return: tensor(-462.1714, device='cuda:0')
episode: 487 training return: tensor(132.7233, device='cuda:0')
epoch: 122 test_true_pfm: 1745.7981514281082 sim_pfm: -379.7880549809197
episode: 488 training return: tensor(193.4624, device='cuda:0')
episode: 489 training return: tensor(-302.0658, device='cuda:0')
episode: 490 training return: tensor(-369.9060, device='cuda:0')
episode: 491 training return: tensor(-270.6103, device='cuda:0')
epoch: 123 test_true_pfm: 1988.4961375282383 sim_pfm: -185.32373596931575
episode: 492 training return: tensor(-184.7269, device='cuda:0')
episode: 493 training return: tensor(129.1754, device='cuda:0')
episode: 494 training return: tensor(-140.2609, device='cuda:0')
episode: 495 training return: tensor(71.7394, device='cuda:0')
epoch: 124 test_true_pfm: 1478.0054804401116 sim_pfm: -383.95361793365254
episode: 496 training return: tensor(-293.5727, device='cuda:0')
episode: 497 training return: tensor(112.8202, device='cuda:0')
episode: 498 training return: tensor(-95.1663, device='cuda:0')
episode: 499 training return: tensor(-27.8795, device='cuda:0')
epoch: 125 test_true_pfm: 2370.0577798222835 sim_pfm: -415.88970933875925
episode: 500 training return: tensor(-445.4557, device='cuda:0')
episode: 501 training return: tensor(-188.4493, device='cuda:0')
episode: 502 training return: tensor(-206.8934, device='cuda:0')
episode: 503 training return: tensor(-295.1349, device='cuda:0')
epoch: 126 test_true_pfm: 2074.5674056195217 sim_pfm: -359.2778437612772
episode: 504 training return: tensor(-272.9341, device='cuda:0')
episode: 505 training return: tensor(-161.9338, device='cuda:0')
episode: 506 training return: tensor(135.2443, device='cuda:0')
episode: 507 training return: tensor(-339.2985, device='cuda:0')
epoch: 127 test_true_pfm: 2619.1039963158337 sim_pfm: 141.39255915556956
episode: 508 training return: tensor(-354.4109, device='cuda:0')
episode: 509 training return: tensor(-230.0056, device='cuda:0')
episode: 510 training return: tensor(-235.3342, device='cuda:0')
episode: 511 training return: tensor(-171.2010, device='cuda:0')
epoch: 128 test_true_pfm: 1988.719111075252 sim_pfm: -267.0081136069978
episode: 512 training return: tensor(165.8818, device='cuda:0')
episode: 513 training return: tensor(-164.7243, device='cuda:0')
episode: 514 training return: tensor(-11.9619, device='cuda:0')
episode: 515 training return: tensor(123.0846, device='cuda:0')
epoch: 129 test_true_pfm: 1698.5604374985878 sim_pfm: -330.2104230245847
episode: 516 training return: tensor(-262.0542, device='cuda:0')
episode: 517 training return: tensor(-172.0438, device='cuda:0')
episode: 518 training return: tensor(-372.7197, device='cuda:0')
episode: 519 training return: tensor(-42.2117, device='cuda:0')
epoch: 130 test_true_pfm: 2077.655696035601 sim_pfm: -370.09704122603097
episode: 520 training return: tensor(-422.9238, device='cuda:0')
episode: 521 training return: tensor(184.8886, device='cuda:0')
episode: 522 training return: tensor(-415.3288, device='cuda:0')
episode: 523 training return: tensor(-104.4368, device='cuda:0')
epoch: 131 test_true_pfm: 1444.1832040801507 sim_pfm: 47.14846055182473
episode: 524 training return: tensor(131.3106, device='cuda:0')
episode: 525 training return: tensor(-535.5094, device='cuda:0')
episode: 526 training return: tensor(-280.0947, device='cuda:0')
episode: 527 training return: tensor(-493.2663, device='cuda:0')
epoch: 132 test_true_pfm: 2141.317522089144 sim_pfm: -146.30182014739452
episode: 528 training return: tensor(-335.7897, device='cuda:0')
episode: 529 training return: tensor(-257.5655, device='cuda:0')
episode: 530 training return: tensor(-458.2032, device='cuda:0')
episode: 531 training return: tensor(-322.1872, device='cuda:0')
epoch: 133 test_true_pfm: 2862.5910533021724 sim_pfm: -313.64687109305913
episode: 532 training return: tensor(-286.9325, device='cuda:0')
episode: 533 training return: tensor(-371.9712, device='cuda:0')
episode: 534 training return: tensor(-381.9719, device='cuda:0')
episode: 535 training return: tensor(-277.1919, device='cuda:0')
epoch: 134 test_true_pfm: 3089.211451766576 sim_pfm: 14.927637609818097
episode: 536 training return: tensor(47.9848, device='cuda:0')
episode: 537 training return: tensor(-417.0530, device='cuda:0')
episode: 538 training return: tensor(-277.7821, device='cuda:0')
episode: 539 training return: tensor(-482.2524, device='cuda:0')
epoch: 135 test_true_pfm: 3122.202766819504 sim_pfm: 7.5767436317692045
episode: 540 training return: tensor(-332.7628, device='cuda:0')
episode: 541 training return: tensor(-86.8586, device='cuda:0')
episode: 542 training return: tensor(-185.4283, device='cuda:0')
episode: 543 training return: tensor(-123.2398, device='cuda:0')
epoch: 136 test_true_pfm: 2320.533283924629 sim_pfm: -69.70462622253883
episode: 544 training return: tensor(158.0502, device='cuda:0')
episode: 545 training return: tensor(-302.2514, device='cuda:0')
episode: 546 training return: tensor(-175.7810, device='cuda:0')
episode: 547 training return: tensor(160.4244, device='cuda:0')
epoch: 137 test_true_pfm: 2484.822120503685 sim_pfm: -128.96126458583362
episode: 548 training return: tensor(-339.8207, device='cuda:0')
episode: 549 training return: tensor(162.8306, device='cuda:0')
episode: 550 training return: tensor(-91.2569, device='cuda:0')
episode: 551 training return: tensor(-337.4475, device='cuda:0')
epoch: 138 test_true_pfm: 3090.7658192825475 sim_pfm: -53.994702315899
episode: 552 training return: tensor(-305.1000, device='cuda:0')
episode: 553 training return: tensor(-252.7038, device='cuda:0')
episode: 554 training return: tensor(-153.9892, device='cuda:0')
episode: 555 training return: tensor(-400.1083, device='cuda:0')
epoch: 139 test_true_pfm: 2718.3264708220427 sim_pfm: -58.44966732930819
episode: 556 training return: tensor(-142.9738, device='cuda:0')
episode: 557 training return: tensor(-326.4430, device='cuda:0')
episode: 558 training return: tensor(-371.3147, device='cuda:0')
episode: 559 training return: tensor(-302.1250, device='cuda:0')
epoch: 140 test_true_pfm: 2703.4195720143703 sim_pfm: -360.7702216332934
episode: 560 training return: tensor(119.5298, device='cuda:0')
episode: 561 training return: tensor(48.4284, device='cuda:0')
episode: 562 training return: tensor(-390.7437, device='cuda:0')
episode: 563 training return: tensor(-343.2810, device='cuda:0')
epoch: 141 test_true_pfm: 2861.883732599436 sim_pfm: -275.7835952335542
episode: 564 training return: tensor(-249.5313, device='cuda:0')
episode: 565 training return: tensor(-153.4832, device='cuda:0')
episode: 566 training return: tensor(-53.3378, device='cuda:0')
episode: 567 training return: tensor(-153.0249, device='cuda:0')
epoch: 142 test_true_pfm: 2507.914586974936 sim_pfm: -42.94630129078481
episode: 568 training return: tensor(-433.2825, device='cuda:0')
episode: 569 training return: tensor(-415.4601, device='cuda:0')
episode: 570 training return: tensor(-247.9995, device='cuda:0')
episode: 571 training return: tensor(-135.1756, device='cuda:0')
epoch: 143 test_true_pfm: 2550.8527530267206 sim_pfm: -12.122184886306059
episode: 572 training return: tensor(-351.2785, device='cuda:0')
episode: 573 training return: tensor(153.2279, device='cuda:0')
episode: 574 training return: tensor(-339.9018, device='cuda:0')
episode: 575 training return: tensor(-258.7810, device='cuda:0')
epoch: 144 test_true_pfm: 2205.83322121193 sim_pfm: -105.40318892498424
episode: 576 training return: tensor(-76.0087, device='cuda:0')
episode: 577 training return: tensor(-314.9845, device='cuda:0')
episode: 578 training return: tensor(-129.3387, device='cuda:0')
episode: 579 training return: tensor(-132.9916, device='cuda:0')
epoch: 145 test_true_pfm: 2333.1557379568317 sim_pfm: -217.6167898150452
episode: 580 training return: tensor(-371.0406, device='cuda:0')
episode: 581 training return: tensor(-449.0768, device='cuda:0')
episode: 582 training return: tensor(-314.9173, device='cuda:0')
episode: 583 training return: tensor(-299.0738, device='cuda:0')
epoch: 146 test_true_pfm: 2675.8828823612116 sim_pfm: -362.8258434930079
episode: 584 training return: tensor(-213.2330, device='cuda:0')
episode: 585 training return: tensor(138.8999, device='cuda:0')
episode: 586 training return: tensor(-370.2588, device='cuda:0')
episode: 587 training return: tensor(-227.7663, device='cuda:0')
epoch: 147 test_true_pfm: 2859.5524458290897 sim_pfm: 96.84231362487965
episode: 588 training return: tensor(-247.6106, device='cuda:0')
episode: 589 training return: tensor(-119.2439, device='cuda:0')
episode: 590 training return: tensor(-209.1776, device='cuda:0')
episode: 591 training return: tensor(-84.1474, device='cuda:0')
epoch: 148 test_true_pfm: 2323.382086125567 sim_pfm: 149.30250488110082
episode: 592 training return: tensor(-187.5555, device='cuda:0')
episode: 593 training return: tensor(-114.9234, device='cuda:0')
episode: 594 training return: tensor(-46.3643, device='cuda:0')
episode: 595 training return: tensor(-110.6379, device='cuda:0')
epoch: 149 test_true_pfm: 2093.873644817322 sim_pfm: -416.631358979784
episode: 596 training return: tensor(-313.8952, device='cuda:0')
episode: 597 training return: tensor(-360.2265, device='cuda:0')
episode: 598 training return: tensor(-96.3903, device='cuda:0')
episode: 599 training return: tensor(-403.3647, device='cuda:0')
epoch: 150 test_true_pfm: 3355.248316650033 sim_pfm: -304.1553575689225
