['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '5', '--data', '100000', '--regu', '0.2']
epoch: 0 training_loss 0.25198232762515543 test_loss: 0.21790533065795897
epoch: 1 training_loss 0.20868634678423403 test_loss: 0.19569871425628663
epoch: 2 training_loss 0.20256898529827594 test_loss: 0.19930328130722047
epoch: 3 training_loss 0.19724661886692046 test_loss: 0.1827265739440918
epoch: 4 training_loss 0.1930391702055931 test_loss: 0.1937918782234192
epoch: 5 training_loss 0.19036486066877842 test_loss: 0.18856712579727172
epoch: 6 training_loss 0.18927246622741223 test_loss: 0.18364908695220947
epoch: 7 training_loss 0.19522471606731415 test_loss: 0.19371211528778076
epoch: 8 training_loss 0.1919867417961359 test_loss: 0.20983872413635254
epoch: 9 training_loss 0.19008065968751908 test_loss: 0.21105375289916992
epoch: 10 training_loss 0.19421398133039475 test_loss: 0.18564248085021973
epoch: 11 training_loss 0.1804448466002941 test_loss: 0.18467549085617066
epoch: 12 training_loss 0.1874220137298107 test_loss: 0.17860461473464967
epoch: 13 training_loss 0.18928023092448712 test_loss: 0.19205497503280639
epoch: 14 training_loss 0.18947791174054146 test_loss: 0.17060213088989257
epoch: 15 training_loss 0.19107716016471385 test_loss: 0.19638047218322754
epoch: 16 training_loss 0.19066278345882892 test_loss: 0.2052828550338745
epoch: 17 training_loss 0.1897224546968937 test_loss: 0.18730348348617554
epoch: 18 training_loss 0.19521687425673007 test_loss: 0.1890942096710205
epoch: 19 training_loss 0.1837888740748167 test_loss: 0.18283926248550414
epoch: 20 training_loss 0.17861326359212398 test_loss: 0.20167951583862304
epoch: 21 training_loss 0.18624154888093472 test_loss: 0.20628459453582765
epoch: 22 training_loss 0.1897034416347742 test_loss: 0.18612174987792968
epoch: 23 training_loss 0.1846988795697689 test_loss: 0.1861759066581726
epoch: 24 training_loss 0.18189930990338327 test_loss: 0.17472307682037352
epoch: 25 training_loss 0.18541745200753212 test_loss: 0.19049484729766847
epoch: 26 training_loss 0.17368251353502273 test_loss: 0.19198166131973265
epoch: 27 training_loss 0.1773366006463766 test_loss: 0.18936537504196166
epoch: 28 training_loss 0.18208332419395445 test_loss: 0.1728633999824524
epoch: 29 training_loss 0.18819862902164458 test_loss: 0.18125704526901246
epoch: 30 training_loss 0.1828337377309799 test_loss: 0.17644264698028564
epoch: 31 training_loss 0.18652659937739371 test_loss: 0.18841969966888428
epoch: 32 training_loss 0.17545644454658033 test_loss: 0.18320759534835815
epoch: 33 training_loss 0.18356407389044763 test_loss: 0.18794909715652466
epoch: 34 training_loss 0.17702527970075607 test_loss: 0.163094425201416
epoch: 35 training_loss 0.18200388312339782 test_loss: 0.1929054617881775
epoch: 36 training_loss 0.18002727024257184 test_loss: 0.17584482431411744
epoch: 37 training_loss 0.1786679795384407 test_loss: 0.1819215774536133
epoch: 38 training_loss 0.1841447424888611 test_loss: 0.1624719977378845
epoch: 39 training_loss 0.1831466545164585 test_loss: 0.16668341159820557
epoch: 40 training_loss 0.17796491794288158 test_loss: 0.18918222188949585
epoch: 41 training_loss 0.17391942046582698 test_loss: 0.1702876567840576
epoch: 42 training_loss 0.18014367088675498 test_loss: 0.17111169099807738
epoch: 43 training_loss 0.176433727145195 test_loss: 0.17331594228744507
epoch: 44 training_loss 0.17591366447508336 test_loss: 0.18018896579742433
epoch: 45 training_loss 0.1803598190844059 test_loss: 0.19015684127807617
epoch: 46 training_loss 0.17568425342440605 test_loss: 0.20828614234924317
epoch: 47 training_loss 0.17969643115997314 test_loss: 0.17572036981582642
epoch: 48 training_loss 0.16816195756196975 test_loss: 0.17754122018814086
epoch: 49 training_loss 0.18148232981562615 test_loss: 0.17829729318618776
epoch: 50 training_loss 0.18115702494978905 test_loss: 0.1812535524368286
epoch: 51 training_loss 0.17797319583594798 test_loss: 0.16939128637313844
epoch: 52 training_loss 0.1769670832157135 test_loss: 0.16005562543869017
epoch: 53 training_loss 0.18012472219765185 test_loss: 0.17374099493026735
epoch: 54 training_loss 0.17741205118596554 test_loss: 0.16647398471832275
epoch: 55 training_loss 0.18182185798883438 test_loss: 0.18998334407806397
epoch: 56 training_loss 0.18607136391103268 test_loss: 0.1880982995033264
epoch: 57 training_loss 0.18665871143341065 test_loss: 0.1623738169670105
epoch: 58 training_loss 0.17553097508847715 test_loss: 0.1740460991859436
epoch: 59 training_loss 0.17979236662387849 test_loss: 0.20639433860778808
epoch: 60 training_loss 0.17491554610431195 test_loss: 0.1839737892150879
epoch: 61 training_loss 0.18281459160149097 test_loss: 0.18262696266174316
epoch: 62 training_loss 0.17961959548294545 test_loss: 0.1757749557495117
epoch: 63 training_loss 0.18070737414062024 test_loss: 0.19390645027160644
epoch: 64 training_loss 0.18479579109698535 test_loss: 0.18886775970458985
epoch: 65 training_loss 0.17857987470924855 test_loss: 0.1889781355857849
epoch: 66 training_loss 0.1778670760244131 test_loss: 0.17606288194656372
epoch: 67 training_loss 0.1819709685444832 test_loss: 0.1652951121330261
epoch: 68 training_loss 0.18524799577891826 test_loss: 0.19946165084838868
epoch: 69 training_loss 0.18334230154752731 test_loss: 0.18584810495376586
epoch: 70 training_loss 0.17546162880957128 test_loss: 0.17834961414337158
epoch: 71 training_loss 0.17829268597066403 test_loss: 0.18944381475448607
epoch: 72 training_loss 0.17412653632462025 test_loss: 0.17580868005752565
epoch: 73 training_loss 0.17867798618972303 test_loss: 0.16223645210266113
epoch: 74 training_loss 0.1834359796345234 test_loss: 0.16824302673339844
epoch: 75 training_loss 0.1756799629330635 test_loss: 0.19333068132400513
epoch: 76 training_loss 0.17212028101086616 test_loss: 0.17364029884338378
epoch: 77 training_loss 0.1789677307754755 test_loss: 0.18344749212265016
epoch: 78 training_loss 0.1849295227229595 test_loss: 0.18315294981002808
epoch: 79 training_loss 0.17991887860000133 test_loss: 0.1733794927597046
epoch: 80 training_loss 0.18161993712186814 test_loss: 0.1716725707054138
epoch: 81 training_loss 0.181585214138031 test_loss: 0.1767873764038086
epoch: 82 training_loss 0.1737344315648079 test_loss: 0.1762966275215149
epoch: 83 training_loss 0.17506037086248397 test_loss: 0.19619786739349365
epoch: 84 training_loss 0.17375405617058276 test_loss: 0.1773722767829895
epoch: 85 training_loss 0.1719079829752445 test_loss: 0.17851319313049316
epoch: 86 training_loss 0.1769038225710392 test_loss: 0.16803253889083863
epoch: 87 training_loss 0.1794739631563425 test_loss: 0.1807566523551941
epoch: 88 training_loss 0.17623689360916614 test_loss: 0.1854198694229126
epoch: 89 training_loss 0.18454719327390193 test_loss: 0.18223923444747925
epoch: 90 training_loss 0.17549156427383422 test_loss: 0.18823690414428712
epoch: 91 training_loss 0.17527847893536092 test_loss: 0.1728479266166687
epoch: 92 training_loss 0.17449155017733575 test_loss: 0.17458463907241822
epoch: 93 training_loss 0.18724263116717338 test_loss: 0.19865455627441406
epoch: 94 training_loss 0.174163765758276 test_loss: 0.16627910137176513
epoch: 95 training_loss 0.17546006113290788 test_loss: 0.1817665696144104
epoch: 96 training_loss 0.18114810302853585 test_loss: 0.17186551094055175
epoch: 97 training_loss 0.18607689879834652 test_loss: 0.17703571319580078
epoch: 98 training_loss 0.1837053032964468 test_loss: 0.18883363008499146
epoch: 99 training_loss 0.1753773019462824 test_loss: 0.19138290882110595
epoch: 100 training_loss 0.1837895180284977 test_loss: 0.18851675987243652
epoch: 101 training_loss 0.17492483429610728 test_loss: 0.18010308742523193
epoch: 102 training_loss 0.17327372498810292 test_loss: 0.20450053215026856
epoch: 103 training_loss 0.17944041736423968 test_loss: 0.17955777645111085
epoch: 104 training_loss 0.1772673647105694 test_loss: 0.18411344289779663
epoch: 105 training_loss 0.17482287280261516 test_loss: 0.17792003154754638
epoch: 106 training_loss 0.1763731424510479 test_loss: 0.17764949798583984
epoch: 107 training_loss 0.17411760337650775 test_loss: 0.1732874631881714
epoch: 108 training_loss 0.17732276558876037 test_loss: 0.18498128652572632
epoch: 109 training_loss 0.18067812897264957 test_loss: 0.17800381183624267
epoch: 110 training_loss 0.17921566627919674 test_loss: 0.17891839742660523
epoch: 111 training_loss 0.1820783331245184 test_loss: 0.17277733087539673
epoch: 112 training_loss 0.17759254403412342 test_loss: 0.19443600177764891
epoch: 113 training_loss 0.18060157455503942 test_loss: 0.19197872877120972
epoch: 114 training_loss 0.1772652754932642 test_loss: 0.17095906734466554
epoch: 115 training_loss 0.18212977647781373 test_loss: 0.18698214292526244
epoch: 116 training_loss 0.1769219644367695 test_loss: 0.1766375184059143
epoch: 117 training_loss 0.17709754675626754 test_loss: 0.185647714138031
epoch: 118 training_loss 0.1840285938233137 test_loss: 0.1705930709838867
epoch: 119 training_loss 0.17473817959427834 test_loss: 0.18006449937820435
epoch: 120 training_loss 0.17700655005872248 test_loss: 0.16242833137512208
epoch: 121 training_loss 0.17360300831496717 test_loss: 0.17381991147994996
epoch: 122 training_loss 0.1736793574690819 test_loss: 0.16536060571670533
epoch: 123 training_loss 0.1827584759145975 test_loss: 0.17986130714416504
epoch: 124 training_loss 0.1843565557152033 test_loss: 0.17929872274398803
epoch: 125 training_loss 0.17452354237437248 test_loss: 0.17089221477508545
epoch: 126 training_loss 0.17536553494632245 test_loss: 0.18902988433837892
epoch: 127 training_loss 0.17567179270088673 test_loss: 0.1807962417602539
epoch: 128 training_loss 0.18234604015946387 test_loss: 0.17905343770980836
epoch: 129 training_loss 0.17738658465445042 test_loss: 0.17243539094924926
epoch: 130 training_loss 0.18232290513813496 test_loss: 0.1821741580963135
epoch: 131 training_loss 0.18034375488758086 test_loss: 0.17319189310073851
epoch: 132 training_loss 0.18029399104416372 test_loss: 0.18774690628051757
epoch: 133 training_loss 0.17765402406454087 test_loss: 0.1805402398109436
epoch: 134 training_loss 0.1828991301357746 test_loss: 0.18292804956436157
epoch: 135 training_loss 0.1780705052614212 test_loss: 0.18795132637023926
epoch: 136 training_loss 0.1778783068805933 test_loss: 0.16208412647247314
epoch: 137 training_loss 0.17243008807301521 test_loss: 0.18473774194717407
epoch: 138 training_loss 0.17681357607245446 test_loss: 0.20772912502288818
epoch: 139 training_loss 0.17792508944869043 test_loss: 0.18378952741622925
epoch: 140 training_loss 0.18548202939331532 test_loss: 0.18047804832458497
epoch: 141 training_loss 0.18387903422117233 test_loss: 0.18678215742111207
epoch: 142 training_loss 0.17811030693352223 test_loss: 0.1886606216430664
epoch: 143 training_loss 0.17552160680294038 test_loss: 0.16878092288970947
epoch: 144 training_loss 0.18010092347860338 test_loss: 0.18559638261795045
epoch: 145 training_loss 0.17259406358003615 test_loss: 0.17458276748657225
epoch: 146 training_loss 0.1698595357686281 test_loss: 0.16691555976867675
epoch: 147 training_loss 0.17904859192669392 test_loss: 0.17727771997451783
epoch: 148 training_loss 0.1725770839303732 test_loss: 0.1821999192237854
epoch: 149 training_loss 0.17947252988815307 test_loss: 0.18548640012741088
epoch: 0 training_loss 8.960717253684997 test_loss: 5.172692108154297
epoch: 1 training_loss 4.067068710327148 test_loss: 3.152635955810547
epoch: 2 training_loss 2.650134539604187 test_loss: 2.260669708251953
epoch: 3 training_loss 2.0544344353675843 test_loss: 1.945491409301758
epoch: 4 training_loss 1.804288660287857 test_loss: 1.7036798477172852
epoch: 5 training_loss 1.619603044986725 test_loss: 1.5503294944763184
epoch: 6 training_loss 1.485889574289322 test_loss: 1.471160316467285
epoch: 7 training_loss 1.366510750055313 test_loss: 1.3195388793945313
epoch: 8 training_loss 1.2722650539875031 test_loss: 1.2513227462768555
epoch: 9 training_loss 1.2043744218349457 test_loss: 1.1763212203979492
epoch: 10 training_loss 1.161209796667099 test_loss: 1.1345674514770507
epoch: 11 training_loss 1.1224285763502122 test_loss: 1.0977286338806151
epoch: 12 training_loss 1.0830031031370162 test_loss: 1.046895408630371
epoch: 13 training_loss 1.0251557451486588 test_loss: 0.9851051330566406
epoch: 14 training_loss 0.9644614899158478 test_loss: 0.9632928848266602
epoch: 15 training_loss 0.9712949484586716 test_loss: 0.9166075706481933
epoch: 16 training_loss 0.9208896225690841 test_loss: 0.9270500183105469
epoch: 17 training_loss 0.8899116677045822 test_loss: 0.8752324104309082
epoch: 18 training_loss 0.8773192858695984 test_loss: 0.8650736808776855
epoch: 19 training_loss 0.8715976983308792 test_loss: 0.8575017929077149
epoch: 20 training_loss 0.8473873668909073 test_loss: 0.8227984428405761
epoch: 21 training_loss 0.8246035629510879 test_loss: 0.8174765586853028
epoch: 22 training_loss 0.8188350743055344 test_loss: 0.9395703315734864
epoch: 23 training_loss 0.8087926667928695 test_loss: 0.7717508792877197
epoch: 24 training_loss 0.7732559514045715 test_loss: 0.776055383682251
epoch: 25 training_loss 0.7669238519668579 test_loss: 0.743332052230835
epoch: 26 training_loss 0.7661355185508728 test_loss: 0.7956981658935547
epoch: 27 training_loss 0.7429919737577438 test_loss: 0.7813214302062989
epoch: 28 training_loss 0.7304850769042969 test_loss: 0.7360517501831054
epoch: 29 training_loss 0.7251377540826798 test_loss: 0.7158791065216065
epoch: 30 training_loss 0.7278996986150742 test_loss: 0.7122366428375244
epoch: 31 training_loss 0.716063871383667 test_loss: 0.7380660533905029
epoch: 32 training_loss 0.7092387634515762 test_loss: 0.7021955013275146
epoch: 33 training_loss 0.705017397403717 test_loss: 0.692041015625
epoch: 34 training_loss 0.69196912586689 test_loss: 0.723015308380127
epoch: 35 training_loss 0.6946135449409485 test_loss: 0.678537654876709
epoch: 36 training_loss 0.6933606392145157 test_loss: 0.6624187469482422
epoch: 37 training_loss 0.6866974592208862 test_loss: 0.7149327754974365
epoch: 38 training_loss 0.6650874233245849 test_loss: 0.6517600059509278
epoch: 39 training_loss 0.6614430332183838 test_loss: 0.6577698230743408
epoch: 40 training_loss 0.6684849643707276 test_loss: 0.6408339023590088
epoch: 41 training_loss 0.6489667528867722 test_loss: 0.6645822048187255
epoch: 42 training_loss 0.6558837842941284 test_loss: 0.6601346015930176
epoch: 43 training_loss 0.6624613893032074 test_loss: 0.6697976589202881
epoch: 44 training_loss 0.6582213073968888 test_loss: 0.635270071029663
epoch: 45 training_loss 0.6534934216737747 test_loss: 0.6289556980133056
epoch: 46 training_loss 0.6333822983503342 test_loss: 0.6161988735198974
epoch: 47 training_loss 0.6437017291784286 test_loss: 0.6073781490325928
epoch: 48 training_loss 0.6399391019344329 test_loss: 0.6207997322082519
epoch: 49 training_loss 0.6278922879695892 test_loss: 0.624222707748413
epoch: 50 training_loss 0.6264603871107102 test_loss: 0.6084744453430175
epoch: 51 training_loss 0.6243177324533462 test_loss: 0.6488820075988769
epoch: 52 training_loss 0.6141332989931106 test_loss: 0.6014182090759277
epoch: 53 training_loss 0.608864620923996 test_loss: 0.6117372512817383
epoch: 54 training_loss 0.6168046987056732 test_loss: 0.5913392543792725
epoch: 55 training_loss 0.6145340174436569 test_loss: 0.5884900569915772
epoch: 56 training_loss 0.6015995228290558 test_loss: 0.6011913299560547
epoch: 57 training_loss 0.6044102072715759 test_loss: 0.5844295501708985
epoch: 58 training_loss 0.5848354148864746 test_loss: 0.5746797084808349
epoch: 59 training_loss 0.596085883975029 test_loss: 0.5996719837188721
epoch: 60 training_loss 0.5966957229375839 test_loss: 0.5782723426818848
epoch: 61 training_loss 0.5861938408017159 test_loss: 0.5756910800933838
epoch: 62 training_loss 0.594431112408638 test_loss: 0.5999678134918213
epoch: 63 training_loss 0.5869784933328629 test_loss: 0.5999765396118164
epoch: 64 training_loss 0.5838213545084 test_loss: 0.5783008575439453
epoch: 65 training_loss 0.5763522633910179 test_loss: 0.5995687484741211
epoch: 66 training_loss 0.5878203731775283 test_loss: 0.5940519332885742
epoch: 67 training_loss 0.5676256841421128 test_loss: 0.5792320728302002
epoch: 68 training_loss 0.5788912212848664 test_loss: 0.5722999572753906
epoch: 69 training_loss 0.576474472284317 test_loss: 0.572398328781128
epoch: 70 training_loss 0.5705906361341476 test_loss: 0.5955868244171143
epoch: 71 training_loss 0.570881564617157 test_loss: 0.5866190910339355
epoch: 72 training_loss 0.5749690181016922 test_loss: 0.582210636138916
epoch: 73 training_loss 0.5624842509627342 test_loss: 0.5372464656829834
epoch: 74 training_loss 0.5680377230048179 test_loss: 0.5708922863006591
epoch: 75 training_loss 0.568562051653862 test_loss: 0.5911351680755615
epoch: 76 training_loss 0.5668990132212639 test_loss: 0.577522087097168
epoch: 77 training_loss 0.5698344954848289 test_loss: 0.5502337455749512
epoch: 78 training_loss 0.5524884155392646 test_loss: 0.5392520427703857
epoch: 79 training_loss 0.5679748821258545 test_loss: 0.5429519176483154
epoch: 80 training_loss 0.5436466097831726 test_loss: 0.5476164817810059
epoch: 81 training_loss 0.5444341155886651 test_loss: 0.559180498123169
epoch: 82 training_loss 0.5513281700015068 test_loss: 0.5903566837310791
epoch: 83 training_loss 0.5550242924690246 test_loss: 0.5495793342590332
epoch: 84 training_loss 0.54346759557724 test_loss: 0.5237419128417968
epoch: 85 training_loss 0.5449894434213638 test_loss: 0.5278548240661621
epoch: 86 training_loss 0.5465221998095512 test_loss: 0.5328137874603271
epoch: 87 training_loss 0.5386578726768494 test_loss: 0.5492341041564941
epoch: 88 training_loss 0.5467452216148376 test_loss: 0.5260348796844483
epoch: 89 training_loss 0.5366877910494804 test_loss: 0.5340277671813964
epoch: 90 training_loss 0.5278350055217743 test_loss: 0.5295104026794434
epoch: 91 training_loss 0.5324182590842247 test_loss: 0.5371730327606201
epoch: 92 training_loss 0.5312914901971817 test_loss: 0.5366457462310791
epoch: 93 training_loss 0.5297347509860992 test_loss: 0.5158259868621826
epoch: 94 training_loss 0.5334748291969299 test_loss: 0.5327888011932373
epoch: 95 training_loss 0.5305067190527916 test_loss: 0.532170581817627
epoch: 96 training_loss 0.5300591990351677 test_loss: 0.5339914798736572
epoch: 97 training_loss 0.5330116590857505 test_loss: 0.5154392719268799
epoch: 98 training_loss 0.5371248793601989 test_loss: 0.5321545600891113
epoch: 99 training_loss 0.5356065937876702 test_loss: 0.5147082805633545
epoch: 100 training_loss 0.5252904731035233 test_loss: 0.5069738864898682
epoch: 101 training_loss 0.5213194650411606 test_loss: 0.5293911933898926
epoch: 102 training_loss 0.5316392055153847 test_loss: 0.5110970497131347
epoch: 103 training_loss 0.5249337637424469 test_loss: 0.5178404808044433
epoch: 104 training_loss 0.5153945222496986 test_loss: 0.5069332599639893
epoch: 105 training_loss 0.5159030449390412 test_loss: 0.5253992557525635
epoch: 106 training_loss 0.5215748777985573 test_loss: 0.5219041347503662
epoch: 107 training_loss 0.5149367958307266 test_loss: 0.5095754623413086
epoch: 108 training_loss 0.5083872732520104 test_loss: 0.5226267814636231
epoch: 109 training_loss 0.5145636346936225 test_loss: 0.5106105327606201
epoch: 110 training_loss 0.5119244265556335 test_loss: 0.4879417896270752
epoch: 111 training_loss 0.5070899322628974 test_loss: 0.5207016468048096
epoch: 112 training_loss 0.5220716446638107 test_loss: 0.49175467491149905
epoch: 113 training_loss 0.5103689911961555 test_loss: 0.5516853332519531
epoch: 114 training_loss 0.5076810267567634 test_loss: 0.49693756103515624
epoch: 115 training_loss 0.5096011364459991 test_loss: 0.5082338333129883
epoch: 116 training_loss 0.5110795465111733 test_loss: 0.5138492584228516
epoch: 117 training_loss 0.5043267297744751 test_loss: 0.49458088874816897
epoch: 118 training_loss 0.5044837176799775 test_loss: 0.4975725650787354
epoch: 119 training_loss 0.4977515834569931 test_loss: 0.49686145782470703
epoch: 120 training_loss 0.5170002201199532 test_loss: 0.5255480766296386
epoch: 121 training_loss 0.5031122720241546 test_loss: 0.48224449157714844
epoch: 122 training_loss 0.5001695111393929 test_loss: 0.49175219535827636
epoch: 123 training_loss 0.4995164462924004 test_loss: 0.493761682510376
epoch: 124 training_loss 0.49364953249692917 test_loss: 0.49205656051635743
epoch: 125 training_loss 0.4929425185918808 test_loss: 0.49066600799560545
epoch: 126 training_loss 0.49964668780565263 test_loss: 0.5093912124633789
epoch: 127 training_loss 0.5095803990960122 test_loss: 0.48667545318603517
epoch: 128 training_loss 0.49749779343605044 test_loss: 0.505033302307129
epoch: 129 training_loss 0.493254636824131 test_loss: 0.48018946647644045
epoch: 130 training_loss 0.5090526980161667 test_loss: 0.4804352283477783
epoch: 131 training_loss 0.49311011642217634 test_loss: 0.49190306663513184
epoch: 132 training_loss 0.49033577233552933 test_loss: 0.4975541114807129
epoch: 133 training_loss 0.4992236852645874 test_loss: 0.48604536056518555
epoch: 134 training_loss 0.4839263468980789 test_loss: 0.4720911026000977
epoch: 135 training_loss 0.49019611299037935 test_loss: 0.47901253700256347
epoch: 136 training_loss 0.488905286192894 test_loss: 0.48015727996826174
epoch: 137 training_loss 0.49251687705516817 test_loss: 0.48378920555114746
epoch: 138 training_loss 0.4846159127354622 test_loss: 0.48601694107055665
epoch: 139 training_loss 0.48778145283460617 test_loss: 0.49671359062194825
epoch: 140 training_loss 0.4921660503745079 test_loss: 0.4789266586303711
epoch: 141 training_loss 0.4915656316280365 test_loss: 0.48585052490234376
epoch: 142 training_loss 0.48172662168741226 test_loss: 0.4971482753753662
epoch: 143 training_loss 0.48285480558872224 test_loss: 0.4807115077972412
epoch: 144 training_loss 0.4789240491390228 test_loss: 0.4920823574066162
epoch: 145 training_loss 0.4903487929701805 test_loss: 0.46915383338928224
epoch: 146 training_loss 0.4771058443188667 test_loss: 0.47705793380737305
epoch: 147 training_loss 0.48515299677848817 test_loss: 0.46912841796875
epoch: 148 training_loss 0.48512565463781354 test_loss: 0.4724161624908447
epoch: 149 training_loss 0.47750831216573714 test_loss: 0.48340544700622556
1984.6578319863079
episode: 0 training return: tensor(262.8223, device='cuda:0')
episode: 1 training return: tensor(-327.3478, device='cuda:0')
episode: 2 training return: tensor(-185.3453, device='cuda:0')
episode: 3 training return: tensor(-322.7135, device='cuda:0')
epoch: 1 test_true_pfm: 2388.324838343749 sim_pfm: 83.21234794791478
episode: 4 training return: tensor(-361.3056, device='cuda:0')
episode: 5 training return: tensor(-251.8577, device='cuda:0')
episode: 6 training return: tensor(-410.8112, device='cuda:0')
episode: 7 training return: tensor(-411.4211, device='cuda:0')
epoch: 2 test_true_pfm: 1422.2607558246134 sim_pfm: -247.7297473915775
episode: 8 training return: tensor(-95.1837, device='cuda:0')
episode: 9 training return: tensor(-243.0072, device='cuda:0')
episode: 10 training return: tensor(-407.9463, device='cuda:0')
episode: 11 training return: tensor(149.1572, device='cuda:0')
epoch: 3 test_true_pfm: 1838.548263342871 sim_pfm: -104.73912396582698
episode: 12 training return: tensor(-414.3703, device='cuda:0')
episode: 13 training return: tensor(-285.7301, device='cuda:0')
episode: 14 training return: tensor(-415.3806, device='cuda:0')
episode: 15 training return: tensor(-400.5805, device='cuda:0')
epoch: 4 test_true_pfm: 1215.4951490609744 sim_pfm: -353.5174654670991
episode: 16 training return: tensor(-153.9640, device='cuda:0')
episode: 17 training return: tensor(-153.0741, device='cuda:0')
episode: 18 training return: tensor(-414.9977, device='cuda:0')
episode: 19 training return: tensor(-244.9156, device='cuda:0')
epoch: 5 test_true_pfm: 1693.4472964514478 sim_pfm: -385.32741774556536
episode: 20 training return: tensor(17.4610, device='cuda:0')
episode: 21 training return: tensor(-315.4680, device='cuda:0')
episode: 22 training return: tensor(-242.5571, device='cuda:0')
episode: 23 training return: tensor(-436.5052, device='cuda:0')
epoch: 6 test_true_pfm: 1238.3419976121015 sim_pfm: -428.9442966791491
episode: 24 training return: tensor(-285.4915, device='cuda:0')
episode: 25 training return: tensor(-433.6913, device='cuda:0')
episode: 26 training return: tensor(-329.6172, device='cuda:0')
episode: 27 training return: tensor(-324.1671, device='cuda:0')
epoch: 7 test_true_pfm: 1250.232376415673 sim_pfm: -435.6661212650749
episode: 28 training return: tensor(-183.5975, device='cuda:0')
episode: 29 training return: tensor(-408.3807, device='cuda:0')
episode: 30 training return: tensor(-333.0788, device='cuda:0')
episode: 31 training return: tensor(-238.1689, device='cuda:0')
epoch: 8 test_true_pfm: 1114.4934868882008 sim_pfm: -441.1450543543324
episode: 32 training return: tensor(-342.7841, device='cuda:0')
episode: 33 training return: tensor(20.7161, device='cuda:0')
episode: 34 training return: tensor(-164.9136, device='cuda:0')
episode: 35 training return: tensor(-489.5170, device='cuda:0')
epoch: 9 test_true_pfm: 1785.2035163928606 sim_pfm: -245.73158564046025
episode: 36 training return: tensor(-420.4391, device='cuda:0')
episode: 37 training return: tensor(-490.2959, device='cuda:0')
episode: 38 training return: tensor(-326.6823, device='cuda:0')
episode: 39 training return: tensor(-279.1104, device='cuda:0')
epoch: 10 test_true_pfm: 1859.3075536556269 sim_pfm: -267.8619049534124
episode: 40 training return: tensor(-274.8468, device='cuda:0')
episode: 41 training return: tensor(-371.2368, device='cuda:0')
episode: 42 training return: tensor(-227.8157, device='cuda:0')
episode: 43 training return: tensor(-368.5018, device='cuda:0')
epoch: 11 test_true_pfm: 1574.25384448177 sim_pfm: -304.74269079913694
episode: 44 training return: tensor(-227.0758, device='cuda:0')
episode: 45 training return: tensor(-353.3584, device='cuda:0')
episode: 46 training return: tensor(-340.8762, device='cuda:0')
episode: 47 training return: tensor(-408.0164, device='cuda:0')
epoch: 12 test_true_pfm: 1624.0375119449861 sim_pfm: -162.5001663127138
episode: 48 training return: tensor(-406.1893, device='cuda:0')
episode: 49 training return: tensor(-58.1236, device='cuda:0')
episode: 50 training return: tensor(-360.8078, device='cuda:0')
episode: 51 training return: tensor(-432.0426, device='cuda:0')
epoch: 13 test_true_pfm: 1599.0750776353216 sim_pfm: -299.8352704665934
episode: 52 training return: tensor(-333.2797, device='cuda:0')
episode: 53 training return: tensor(-239.6168, device='cuda:0')
episode: 54 training return: tensor(-408.7984, device='cuda:0')
episode: 55 training return: tensor(-433.7701, device='cuda:0')
epoch: 14 test_true_pfm: 1594.1001917411493 sim_pfm: -196.03965079041276
episode: 56 training return: tensor(-452.8376, device='cuda:0')
episode: 57 training return: tensor(-348.5698, device='cuda:0')
episode: 58 training return: tensor(-66.7567, device='cuda:0')
episode: 59 training return: tensor(-237.8215, device='cuda:0')
epoch: 15 test_true_pfm: 1550.6707348487823 sim_pfm: -313.7892825318656
episode: 60 training return: tensor(-444.6584, device='cuda:0')
episode: 61 training return: tensor(-288.7360, device='cuda:0')
episode: 62 training return: tensor(-314.0106, device='cuda:0')
episode: 63 training return: tensor(-362.3106, device='cuda:0')
epoch: 16 test_true_pfm: 1613.6704569060369 sim_pfm: -117.97806116876502
episode: 64 training return: tensor(-258.7293, device='cuda:0')
episode: 65 training return: tensor(-399.3591, device='cuda:0')
episode: 66 training return: tensor(-288.8205, device='cuda:0')
episode: 67 training return: tensor(42.1508, device='cuda:0')
epoch: 17 test_true_pfm: 1634.353080664543 sim_pfm: -215.931862421489
episode: 68 training return: tensor(-234.1898, device='cuda:0')
episode: 69 training return: tensor(-329.0218, device='cuda:0')
episode: 70 training return: tensor(-300.6600, device='cuda:0')
episode: 71 training return: tensor(-393.8301, device='cuda:0')
epoch: 18 test_true_pfm: 1632.9892825110685 sim_pfm: -281.81961251950514
episode: 72 training return: tensor(-329.6634, device='cuda:0')
episode: 73 training return: tensor(-388.6021, device='cuda:0')
episode: 74 training return: tensor(-421.3192, device='cuda:0')
episode: 75 training return: tensor(-401.0527, device='cuda:0')
epoch: 19 test_true_pfm: 1648.4376833889885 sim_pfm: -287.1855480668407
episode: 76 training return: tensor(-332.6033, device='cuda:0')
episode: 77 training return: tensor(-306.3167, device='cuda:0')
episode: 78 training return: tensor(-392.0951, device='cuda:0')
episode: 79 training return: tensor(-324.7070, device='cuda:0')
epoch: 20 test_true_pfm: 1618.631131044901 sim_pfm: -255.86950202301765
episode: 80 training return: tensor(-284.7089, device='cuda:0')
episode: 81 training return: tensor(-242.6749, device='cuda:0')
episode: 82 training return: tensor(-267.7962, device='cuda:0')
episode: 83 training return: tensor(-267.1744, device='cuda:0')
epoch: 21 test_true_pfm: 1705.2534636292555 sim_pfm: -259.75225308630615
episode: 84 training return: tensor(129.6252, device='cuda:0')
episode: 85 training return: tensor(-313.8256, device='cuda:0')
episode: 86 training return: tensor(-361.4186, device='cuda:0')
episode: 87 training return: tensor(-419.3641, device='cuda:0')
epoch: 22 test_true_pfm: 1777.0308206090288 sim_pfm: -249.1562776369974
episode: 88 training return: tensor(-184.7328, device='cuda:0')
episode: 89 training return: tensor(-52.2487, device='cuda:0')
episode: 90 training return: tensor(-320.3641, device='cuda:0')
episode: 91 training return: tensor(-369.6342, device='cuda:0')
epoch: 23 test_true_pfm: 1838.4872198887062 sim_pfm: -185.09127550168583
episode: 92 training return: tensor(-144.1238, device='cuda:0')
episode: 93 training return: tensor(-231.9982, device='cuda:0')
episode: 94 training return: tensor(-364.4157, device='cuda:0')
episode: 95 training return: tensor(-402.9948, device='cuda:0')
epoch: 24 test_true_pfm: 1863.5778822823033 sim_pfm: -160.87624390935525
episode: 96 training return: tensor(-125.0258, device='cuda:0')
episode: 97 training return: tensor(236.3738, device='cuda:0')
episode: 98 training return: tensor(-254.2712, device='cuda:0')
episode: 99 training return: tensor(-358.7842, device='cuda:0')
epoch: 25 test_true_pfm: 1891.1213503918268 sim_pfm: -182.1844336083935
episode: 100 training return: tensor(-69.6121, device='cuda:0')
episode: 101 training return: tensor(-174.1478, device='cuda:0')
episode: 102 training return: tensor(-318.7213, device='cuda:0')
episode: 103 training return: tensor(-314.3677, device='cuda:0')
epoch: 26 test_true_pfm: 2132.2098686824265 sim_pfm: -172.88500046543777
episode: 104 training return: tensor(-358.2704, device='cuda:0')
episode: 105 training return: tensor(-236.8558, device='cuda:0')
episode: 106 training return: tensor(-409.5623, device='cuda:0')
episode: 107 training return: tensor(-179.7712, device='cuda:0')
epoch: 27 test_true_pfm: 1825.6083949414888 sim_pfm: -182.73821645975113
episode: 108 training return: tensor(-404.1208, device='cuda:0')
episode: 109 training return: tensor(-302.7178, device='cuda:0')
episode: 110 training return: tensor(-220.6799, device='cuda:0')
episode: 111 training return: tensor(-4.2781, device='cuda:0')
epoch: 28 test_true_pfm: 2297.0578200800032 sim_pfm: -232.08619736329032
episode: 112 training return: tensor(-334.1800, device='cuda:0')
episode: 113 training return: tensor(-351.9264, device='cuda:0')
episode: 114 training return: tensor(-413.2519, device='cuda:0')
episode: 115 training return: tensor(109.5949, device='cuda:0')
epoch: 29 test_true_pfm: 1786.407645171458 sim_pfm: -218.1265741313497
episode: 116 training return: tensor(-332.3219, device='cuda:0')
episode: 117 training return: tensor(-318.3349, device='cuda:0')
episode: 118 training return: tensor(-384.8032, device='cuda:0')
episode: 119 training return: tensor(-242.0757, device='cuda:0')
epoch: 30 test_true_pfm: 1828.238086547614 sim_pfm: -204.43508110366142
episode: 120 training return: tensor(-355.2207, device='cuda:0')
episode: 121 training return: tensor(-328.8254, device='cuda:0')
episode: 122 training return: tensor(224.2516, device='cuda:0')
episode: 123 training return: tensor(-55.0295, device='cuda:0')
epoch: 31 test_true_pfm: 2064.542875119589 sim_pfm: -130.42275326978415
episode: 124 training return: tensor(-100.0380, device='cuda:0')
episode: 125 training return: tensor(-245.0764, device='cuda:0')
episode: 126 training return: tensor(-347.3442, device='cuda:0')
episode: 127 training return: tensor(131.5041, device='cuda:0')
epoch: 32 test_true_pfm: 2074.409414713707 sim_pfm: -20.335196405028302
episode: 128 training return: tensor(-406.7094, device='cuda:0')
episode: 129 training return: tensor(120.5757, device='cuda:0')
episode: 130 training return: tensor(-356.6219, device='cuda:0')
episode: 131 training return: tensor(-398.7908, device='cuda:0')
epoch: 33 test_true_pfm: 1663.3115125790791 sim_pfm: -108.09868024437067
episode: 132 training return: tensor(-410.3445, device='cuda:0')
episode: 133 training return: tensor(-260.8798, device='cuda:0')
episode: 134 training return: tensor(-310.6140, device='cuda:0')
episode: 135 training return: tensor(-142.5717, device='cuda:0')
epoch: 34 test_true_pfm: 1889.4548902908036 sim_pfm: -216.53403169537583
episode: 136 training return: tensor(-320.0160, device='cuda:0')
episode: 137 training return: tensor(-361.2207, device='cuda:0')
episode: 138 training return: tensor(-298.6353, device='cuda:0')
episode: 139 training return: tensor(-309.9966, device='cuda:0')
epoch: 35 test_true_pfm: 2520.8208140976694 sim_pfm: 21.011485901932854
episode: 140 training return: tensor(-136.0024, device='cuda:0')
episode: 141 training return: tensor(-210.7647, device='cuda:0')
episode: 142 training return: tensor(-316.7467, device='cuda:0')
episode: 143 training return: tensor(-405.0318, device='cuda:0')
epoch: 36 test_true_pfm: 2338.876900753635 sim_pfm: 206.64527965996726
episode: 144 training return: tensor(-317.9476, device='cuda:0')
episode: 145 training return: tensor(-352.1390, device='cuda:0')
episode: 146 training return: tensor(-42.0003, device='cuda:0')
episode: 147 training return: tensor(-314.2486, device='cuda:0')
epoch: 37 test_true_pfm: 2253.49014240747 sim_pfm: 102.83117403811775
episode: 148 training return: tensor(-48.5713, device='cuda:0')
episode: 149 training return: tensor(-381.2124, device='cuda:0')
episode: 150 training return: tensor(-336.6591, device='cuda:0')
episode: 151 training return: tensor(-271.2122, device='cuda:0')
epoch: 38 test_true_pfm: 3063.064741198479 sim_pfm: -51.44314576499164
episode: 152 training return: tensor(252.2571, device='cuda:0')
episode: 153 training return: tensor(18.2892, device='cuda:0')
episode: 154 training return: tensor(-342.5434, device='cuda:0')
episode: 155 training return: tensor(-398.3581, device='cuda:0')
epoch: 39 test_true_pfm: 2301.5904057312355 sim_pfm: -30.074928786916036
episode: 156 training return: tensor(304.1778, device='cuda:0')
episode: 157 training return: tensor(-117.2928, device='cuda:0')
episode: 158 training return: tensor(-143.3576, device='cuda:0')
episode: 159 training return: tensor(-57.2382, device='cuda:0')
epoch: 40 test_true_pfm: 2779.915139082487 sim_pfm: 44.93387516473498
episode: 160 training return: tensor(-245.1145, device='cuda:0')
episode: 161 training return: tensor(-397.8221, device='cuda:0')
episode: 162 training return: tensor(-76.8439, device='cuda:0')
episode: 163 training return: tensor(261.4779, device='cuda:0')
epoch: 41 test_true_pfm: 2688.8117246415472 sim_pfm: -122.55009722205189
episode: 164 training return: tensor(121.6864, device='cuda:0')
episode: 165 training return: tensor(-324.6127, device='cuda:0')
episode: 166 training return: tensor(-245.4077, device='cuda:0')
episode: 167 training return: tensor(254.4912, device='cuda:0')
epoch: 42 test_true_pfm: 2827.0201034096567 sim_pfm: -2.3963577211640463
episode: 168 training return: tensor(-367.3219, device='cuda:0')
episode: 169 training return: tensor(231.2981, device='cuda:0')
episode: 170 training return: tensor(-350.0627, device='cuda:0')
episode: 171 training return: tensor(-398.6672, device='cuda:0')
epoch: 43 test_true_pfm: 2248.630748870175 sim_pfm: 34.67750755111532
episode: 172 training return: tensor(-57.6311, device='cuda:0')
episode: 173 training return: tensor(-357.5200, device='cuda:0')
episode: 174 training return: tensor(-325.2335, device='cuda:0')
episode: 175 training return: tensor(-362.3041, device='cuda:0')
epoch: 44 test_true_pfm: 1973.7402603357407 sim_pfm: -56.942194133802936
episode: 176 training return: tensor(-32.7371, device='cuda:0')
episode: 177 training return: tensor(-348.1278, device='cuda:0')
episode: 178 training return: tensor(264.9122, device='cuda:0')
episode: 179 training return: tensor(99.3769, device='cuda:0')
epoch: 45 test_true_pfm: 2763.7083383661115 sim_pfm: 88.12760461699993
episode: 180 training return: tensor(-26.6736, device='cuda:0')
episode: 181 training return: tensor(201.3266, device='cuda:0')
episode: 182 training return: tensor(-395.1554, device='cuda:0')
episode: 183 training return: tensor(96.3708, device='cuda:0')
epoch: 46 test_true_pfm: 2368.8840798685555 sim_pfm: -173.65271187045923
episode: 184 training return: tensor(198.3445, device='cuda:0')
episode: 185 training return: tensor(-404.9164, device='cuda:0')
episode: 186 training return: tensor(-320.4077, device='cuda:0')
episode: 187 training return: tensor(-95.7362, device='cuda:0')
epoch: 47 test_true_pfm: 2970.143825663812 sim_pfm: 105.85175134055316
episode: 188 training return: tensor(-228.0518, device='cuda:0')
episode: 189 training return: tensor(-348.8490, device='cuda:0')
episode: 190 training return: tensor(35.6785, device='cuda:0')
episode: 191 training return: tensor(-314.6941, device='cuda:0')
epoch: 48 test_true_pfm: 1776.3447063892872 sim_pfm: -1.8258914546264957
episode: 192 training return: tensor(-123.7304, device='cuda:0')
episode: 193 training return: tensor(76.4450, device='cuda:0')
episode: 194 training return: tensor(-85.4487, device='cuda:0')
episode: 195 training return: tensor(-330.5442, device='cuda:0')
epoch: 49 test_true_pfm: 2381.0684034874935 sim_pfm: -124.47851346324508
episode: 196 training return: tensor(-249.3120, device='cuda:0')
episode: 197 training return: tensor(-321.0989, device='cuda:0')
episode: 198 training return: tensor(-392.1036, device='cuda:0')
episode: 199 training return: tensor(-310.5998, device='cuda:0')
epoch: 50 test_true_pfm: 2421.5207674850435 sim_pfm: 11.849708294573551
episode: 200 training return: tensor(-86.1053, device='cuda:0')
episode: 201 training return: tensor(-394.4676, device='cuda:0')
episode: 202 training return: tensor(-126.5621, device='cuda:0')
episode: 203 training return: tensor(113.2561, device='cuda:0')
epoch: 51 test_true_pfm: 2766.4189271944106 sim_pfm: 166.60163523741844
episode: 204 training return: tensor(297.9724, device='cuda:0')
episode: 205 training return: tensor(-76.2842, device='cuda:0')
episode: 206 training return: tensor(-247.5653, device='cuda:0')
episode: 207 training return: tensor(124.4061, device='cuda:0')
epoch: 52 test_true_pfm: 2440.6573443970105 sim_pfm: -21.21620166829477
episode: 208 training return: tensor(-255.1398, device='cuda:0')
episode: 209 training return: tensor(-246.1992, device='cuda:0')
episode: 210 training return: tensor(-263.0740, device='cuda:0')
episode: 211 training return: tensor(143.2991, device='cuda:0')
epoch: 53 test_true_pfm: 2089.7147768244663 sim_pfm: -83.92064352946666
episode: 212 training return: tensor(-134.4552, device='cuda:0')
episode: 213 training return: tensor(-311.3377, device='cuda:0')
episode: 214 training return: tensor(300.6512, device='cuda:0')
episode: 215 training return: tensor(-364.9944, device='cuda:0')
epoch: 54 test_true_pfm: 2379.308755454257 sim_pfm: -161.2767652263865
episode: 216 training return: tensor(-144.1067, device='cuda:0')
episode: 217 training return: tensor(-6.6909, device='cuda:0')
episode: 218 training return: tensor(41.5461, device='cuda:0')
episode: 219 training return: tensor(-230.3058, device='cuda:0')
epoch: 55 test_true_pfm: 2258.6060390988996 sim_pfm: 268.99192482000217
episode: 220 training return: tensor(-300.9233, device='cuda:0')
episode: 221 training return: tensor(-340.6270, device='cuda:0')
episode: 222 training return: tensor(-220.7720, device='cuda:0')
episode: 223 training return: tensor(-320.3919, device='cuda:0')
epoch: 56 test_true_pfm: 2683.955605131783 sim_pfm: -9.298588590463623
episode: 224 training return: tensor(-149.3269, device='cuda:0')
episode: 225 training return: tensor(-324.8289, device='cuda:0')
episode: 226 training return: tensor(-313.1451, device='cuda:0')
episode: 227 training return: tensor(-314.2761, device='cuda:0')
epoch: 57 test_true_pfm: 2489.935931912462 sim_pfm: 95.20843896343528
episode: 228 training return: tensor(-328.1176, device='cuda:0')
episode: 229 training return: tensor(-108.8030, device='cuda:0')
episode: 230 training return: tensor(-329.4418, device='cuda:0')
episode: 231 training return: tensor(-305.3050, device='cuda:0')
epoch: 58 test_true_pfm: 2813.93447533084 sim_pfm: -19.513355287606828
episode: 232 training return: tensor(-314.1316, device='cuda:0')
episode: 233 training return: tensor(-316.8121, device='cuda:0')
episode: 234 training return: tensor(-236.4950, device='cuda:0')
episode: 235 training return: tensor(-174.0733, device='cuda:0')
epoch: 59 test_true_pfm: 1894.6595974769182 sim_pfm: 4.621959925784419
episode: 236 training return: tensor(-161.3572, device='cuda:0')
episode: 237 training return: tensor(82.6990, device='cuda:0')
episode: 238 training return: tensor(-343.2319, device='cuda:0')
episode: 239 training return: tensor(-357.7059, device='cuda:0')
epoch: 60 test_true_pfm: 2090.466135525385 sim_pfm: -171.42335903898734
episode: 240 training return: tensor(-68.8592, device='cuda:0')
episode: 241 training return: tensor(-353.9345, device='cuda:0')
episode: 242 training return: tensor(-156.9288, device='cuda:0')
episode: 243 training return: tensor(-69.3227, device='cuda:0')
epoch: 61 test_true_pfm: 1932.5027793072652 sim_pfm: -198.41435255108323
episode: 244 training return: tensor(-336.1840, device='cuda:0')
episode: 245 training return: tensor(-281.9341, device='cuda:0')
episode: 246 training return: tensor(-356.2839, device='cuda:0')
episode: 247 training return: tensor(60.3654, device='cuda:0')
epoch: 62 test_true_pfm: 2785.817887375209 sim_pfm: 107.29260893476506
episode: 248 training return: tensor(-322.6197, device='cuda:0')
episode: 249 training return: tensor(-59.7472, device='cuda:0')
episode: 250 training return: tensor(-342.4540, device='cuda:0')
episode: 251 training return: tensor(121.0766, device='cuda:0')
epoch: 63 test_true_pfm: 2694.0387026189596 sim_pfm: -125.50061319636491
episode: 252 training return: tensor(-392.1868, device='cuda:0')
episode: 253 training return: tensor(-61.8103, device='cuda:0')
episode: 254 training return: tensor(-359.6854, device='cuda:0')
episode: 255 training return: tensor(-110.2837, device='cuda:0')
epoch: 64 test_true_pfm: 2621.5419139783685 sim_pfm: 126.30144465481862
episode: 256 training return: tensor(-238.1145, device='cuda:0')
episode: 257 training return: tensor(-402.2026, device='cuda:0')
episode: 258 training return: tensor(-267.8973, device='cuda:0')
episode: 259 training return: tensor(-362.6476, device='cuda:0')
epoch: 65 test_true_pfm: 1962.2306770106024 sim_pfm: -205.92551690576752
episode: 260 training return: tensor(-330.2509, device='cuda:0')
episode: 261 training return: tensor(-366.5695, device='cuda:0')
episode: 262 training return: tensor(-166.3401, device='cuda:0')
episode: 263 training return: tensor(242.7385, device='cuda:0')
epoch: 66 test_true_pfm: 2760.0109910198553 sim_pfm: 271.98295406921534
episode: 264 training return: tensor(-396.2168, device='cuda:0')
episode: 265 training return: tensor(-344.7905, device='cuda:0')
episode: 266 training return: tensor(-237.7515, device='cuda:0')
episode: 267 training return: tensor(-62.5877, device='cuda:0')
epoch: 67 test_true_pfm: 2688.365989386875 sim_pfm: 10.823398964790007
episode: 268 training return: tensor(-409.1998, device='cuda:0')
episode: 269 training return: tensor(-168.5668, device='cuda:0')
episode: 270 training return: tensor(-374.0521, device='cuda:0')
episode: 271 training return: tensor(-421.0408, device='cuda:0')
epoch: 68 test_true_pfm: 1946.2386181820148 sim_pfm: -38.43452587265832
episode: 272 training return: tensor(-171.9880, device='cuda:0')
episode: 273 training return: tensor(-406.7685, device='cuda:0')
episode: 274 training return: tensor(191.2334, device='cuda:0')
episode: 275 training return: tensor(306.1611, device='cuda:0')
epoch: 69 test_true_pfm: 2111.6301497452355 sim_pfm: 178.80634943877035
episode: 276 training return: tensor(45.9540, device='cuda:0')
episode: 277 training return: tensor(-189.8647, device='cuda:0')
episode: 278 training return: tensor(8.9987, device='cuda:0')
episode: 279 training return: tensor(-249.9550, device='cuda:0')
epoch: 70 test_true_pfm: 2524.8311497276186 sim_pfm: 36.435049947933294
episode: 280 training return: tensor(-275.4993, device='cuda:0')
episode: 281 training return: tensor(-234.9146, device='cuda:0')
episode: 282 training return: tensor(-314.3458, device='cuda:0')
episode: 283 training return: tensor(210.7264, device='cuda:0')
epoch: 71 test_true_pfm: 2411.3069523432596 sim_pfm: -204.1712813260965
episode: 284 training return: tensor(-31.6526, device='cuda:0')
episode: 285 training return: tensor(-357.1582, device='cuda:0')
episode: 286 training return: tensor(-230.1496, device='cuda:0')
episode: 287 training return: tensor(-338.3800, device='cuda:0')
epoch: 72 test_true_pfm: 2340.1852130053644 sim_pfm: -12.237527411120633
episode: 288 training return: tensor(-279.8293, device='cuda:0')
episode: 289 training return: tensor(-391.6802, device='cuda:0')
episode: 290 training return: tensor(273.3302, device='cuda:0')
episode: 291 training return: tensor(-333.9677, device='cuda:0')
epoch: 73 test_true_pfm: 2041.7636139116148 sim_pfm: -19.911209089215845
episode: 292 training return: tensor(4.0532, device='cuda:0')
episode: 293 training return: tensor(-328.5854, device='cuda:0')
episode: 294 training return: tensor(-249.1096, device='cuda:0')
episode: 295 training return: tensor(-403.0599, device='cuda:0')
epoch: 74 test_true_pfm: 2467.7176111322283 sim_pfm: -57.408764569476865
episode: 296 training return: tensor(-397.7662, device='cuda:0')
episode: 297 training return: tensor(306.8708, device='cuda:0')
episode: 298 training return: tensor(-311.4267, device='cuda:0')
episode: 299 training return: tensor(-257.6936, device='cuda:0')
epoch: 75 test_true_pfm: 2870.2637232588895 sim_pfm: -86.51699813703696
episode: 300 training return: tensor(-368.1428, device='cuda:0')
episode: 301 training return: tensor(-370.1612, device='cuda:0')
episode: 302 training return: tensor(-201.4434, device='cuda:0')
episode: 303 training return: tensor(-152.1353, device='cuda:0')
epoch: 76 test_true_pfm: 2205.152419561751 sim_pfm: -236.58743483247235
episode: 304 training return: tensor(9.2903, device='cuda:0')
episode: 305 training return: tensor(-310.6490, device='cuda:0')
episode: 306 training return: tensor(-158.4386, device='cuda:0')
episode: 307 training return: tensor(-278.1852, device='cuda:0')
epoch: 77 test_true_pfm: 1795.9793215176007 sim_pfm: 18.094492194223374
episode: 308 training return: tensor(193.1834, device='cuda:0')
episode: 309 training return: tensor(-272.8796, device='cuda:0')
episode: 310 training return: tensor(245.2676, device='cuda:0')
episode: 311 training return: tensor(-353.5638, device='cuda:0')
epoch: 78 test_true_pfm: 2333.7186224002303 sim_pfm: 102.70431407021049
episode: 312 training return: tensor(-153.7130, device='cuda:0')
episode: 313 training return: tensor(-275.1506, device='cuda:0')
episode: 314 training return: tensor(-341.3062, device='cuda:0')
episode: 315 training return: tensor(-353.4054, device='cuda:0')
epoch: 79 test_true_pfm: 2627.0806185029655 sim_pfm: 94.58883016711722
episode: 316 training return: tensor(-278.6947, device='cuda:0')
episode: 317 training return: tensor(-279.5079, device='cuda:0')
episode: 318 training return: tensor(-198.6337, device='cuda:0')
episode: 319 training return: tensor(-108.1914, device='cuda:0')
epoch: 80 test_true_pfm: 1673.6948849404969 sim_pfm: -129.17794533073902
episode: 320 training return: tensor(-326.7517, device='cuda:0')
episode: 321 training return: tensor(302.7096, device='cuda:0')
episode: 322 training return: tensor(262.0657, device='cuda:0')
episode: 323 training return: tensor(257.1788, device='cuda:0')
epoch: 81 test_true_pfm: 2265.5588685188136 sim_pfm: -56.169440132992655
episode: 324 training return: tensor(-397.5058, device='cuda:0')
episode: 325 training return: tensor(-394.0363, device='cuda:0')
episode: 326 training return: tensor(-315.7198, device='cuda:0')
episode: 327 training return: tensor(-351.3096, device='cuda:0')
epoch: 82 test_true_pfm: 2085.223658462692 sim_pfm: 23.72398455142199
episode: 328 training return: tensor(-322.6309, device='cuda:0')
episode: 329 training return: tensor(241.4707, device='cuda:0')
episode: 330 training return: tensor(-282.5031, device='cuda:0')
episode: 331 training return: tensor(-324.0634, device='cuda:0')
epoch: 83 test_true_pfm: 1817.6508846422996 sim_pfm: 123.87469017431916
episode: 332 training return: tensor(-181.5750, device='cuda:0')
episode: 333 training return: tensor(16.6486, device='cuda:0')
episode: 334 training return: tensor(-363.3176, device='cuda:0')
episode: 335 training return: tensor(-342.7455, device='cuda:0')
epoch: 84 test_true_pfm: 1875.102207123926 sim_pfm: 286.28140290236723
episode: 336 training return: tensor(-335.6264, device='cuda:0')
episode: 337 training return: tensor(-96.9082, device='cuda:0')
episode: 338 training return: tensor(-356.4186, device='cuda:0')
episode: 339 training return: tensor(-405.0253, device='cuda:0')
epoch: 85 test_true_pfm: 2161.790748190108 sim_pfm: -208.154376451935
episode: 340 training return: tensor(-90.1933, device='cuda:0')
episode: 341 training return: tensor(248.5237, device='cuda:0')
episode: 342 training return: tensor(-282.7172, device='cuda:0')
episode: 343 training return: tensor(-327.3145, device='cuda:0')
epoch: 86 test_true_pfm: 1761.3471529378496 sim_pfm: -234.34438061434776
episode: 344 training return: tensor(220.3863, device='cuda:0')
episode: 345 training return: tensor(-46.0866, device='cuda:0')
episode: 346 training return: tensor(72.6446, device='cuda:0')
episode: 347 training return: tensor(-47.3963, device='cuda:0')
epoch: 87 test_true_pfm: 1858.224563713807 sim_pfm: 47.27266084123403
episode: 348 training return: tensor(-71.1318, device='cuda:0')
episode: 349 training return: tensor(-342.6265, device='cuda:0')
episode: 350 training return: tensor(-323.4989, device='cuda:0')
episode: 351 training return: tensor(-354.5226, device='cuda:0')
epoch: 88 test_true_pfm: 1955.4173485674528 sim_pfm: -72.3371928142539
episode: 352 training return: tensor(-342.9557, device='cuda:0')
episode: 353 training return: tensor(4.3988, device='cuda:0')
episode: 354 training return: tensor(203.6087, device='cuda:0')
episode: 355 training return: tensor(-258.8359, device='cuda:0')
epoch: 89 test_true_pfm: 1917.8817475516323 sim_pfm: 191.60815051423074
episode: 356 training return: tensor(-249.6733, device='cuda:0')
episode: 357 training return: tensor(-213.2121, device='cuda:0')
episode: 358 training return: tensor(-365.6897, device='cuda:0')
episode: 359 training return: tensor(-254.8715, device='cuda:0')
epoch: 90 test_true_pfm: 2381.706493683912 sim_pfm: 167.85578572334876
episode: 360 training return: tensor(-109.3812, device='cuda:0')
episode: 361 training return: tensor(-338.8257, device='cuda:0')
episode: 362 training return: tensor(284.3856, device='cuda:0')
episode: 363 training return: tensor(-337.1695, device='cuda:0')
epoch: 91 test_true_pfm: 2252.8324320456372 sim_pfm: -63.11035179888131
episode: 364 training return: tensor(-342.6351, device='cuda:0')
episode: 365 training return: tensor(-328.4744, device='cuda:0')
episode: 366 training return: tensor(-341.1082, device='cuda:0')
episode: 367 training return: tensor(-324.9949, device='cuda:0')
epoch: 92 test_true_pfm: 2099.308354307588 sim_pfm: -191.4076025178656
episode: 368 training return: tensor(-283.5112, device='cuda:0')
episode: 369 training return: tensor(-148.0706, device='cuda:0')
episode: 370 training return: tensor(241.2436, device='cuda:0')
episode: 371 training return: tensor(-360.0867, device='cuda:0')
epoch: 93 test_true_pfm: 2254.197321635458 sim_pfm: 118.88097261358052
episode: 372 training return: tensor(-332.3912, device='cuda:0')
episode: 373 training return: tensor(-263.7004, device='cuda:0')
episode: 374 training return: tensor(52.6638, device='cuda:0')
episode: 375 training return: tensor(-101.3530, device='cuda:0')
epoch: 94 test_true_pfm: 2666.209506824714 sim_pfm: -170.9848701748997
episode: 376 training return: tensor(-406.5333, device='cuda:0')
episode: 377 training return: tensor(98.0737, device='cuda:0')
episode: 378 training return: tensor(-230.4266, device='cuda:0')
episode: 379 training return: tensor(164.1878, device='cuda:0')
epoch: 95 test_true_pfm: 2864.680648625535 sim_pfm: -48.94289204689752
episode: 380 training return: tensor(-373.0207, device='cuda:0')
episode: 381 training return: tensor(-382.1592, device='cuda:0')
episode: 382 training return: tensor(115.1225, device='cuda:0')
episode: 383 training return: tensor(-274.1704, device='cuda:0')
epoch: 96 test_true_pfm: 2109.9358357296032 sim_pfm: -104.97853235046689
episode: 384 training return: tensor(34.1728, device='cuda:0')
episode: 385 training return: tensor(265.9044, device='cuda:0')
episode: 386 training return: tensor(145.1823, device='cuda:0')
episode: 387 training return: tensor(-145.8983, device='cuda:0')
epoch: 97 test_true_pfm: 2741.9219174959658 sim_pfm: -260.64513081560534
episode: 388 training return: tensor(-130.5267, device='cuda:0')
episode: 389 training return: tensor(-339.1034, device='cuda:0')
episode: 390 training return: tensor(-362.7913, device='cuda:0')
episode: 391 training return: tensor(-389.6105, device='cuda:0')
epoch: 98 test_true_pfm: 1769.9953754784162 sim_pfm: -127.47390759767343
episode: 392 training return: tensor(-325.8428, device='cuda:0')
episode: 393 training return: tensor(-335.7482, device='cuda:0')
episode: 394 training return: tensor(-350.9325, device='cuda:0')
episode: 395 training return: tensor(-348.3592, device='cuda:0')
epoch: 99 test_true_pfm: 2228.0087502416823 sim_pfm: 79.9141566270652
episode: 396 training return: tensor(-399.1770, device='cuda:0')
episode: 397 training return: tensor(-233.2631, device='cuda:0')
episode: 398 training return: tensor(91.7746, device='cuda:0')
episode: 399 training return: tensor(-177.0102, device='cuda:0')
epoch: 100 test_true_pfm: 2790.3453097250112 sim_pfm: 76.8525028139508
episode: 400 training return: tensor(-240.3968, device='cuda:0')
episode: 401 training return: tensor(-134.2358, device='cuda:0')
episode: 402 training return: tensor(14.6433, device='cuda:0')
episode: 403 training return: tensor(-173.9398, device='cuda:0')
epoch: 101 test_true_pfm: 2538.842284376555 sim_pfm: 235.81112562826215
episode: 404 training return: tensor(219.1929, device='cuda:0')
episode: 405 training return: tensor(-50.9660, device='cuda:0')
episode: 406 training return: tensor(173.7353, device='cuda:0')
episode: 407 training return: tensor(297.5218, device='cuda:0')
epoch: 102 test_true_pfm: 2283.6630562284895 sim_pfm: -157.31394844998917
episode: 408 training return: tensor(-16.2016, device='cuda:0')
episode: 409 training return: tensor(-349.0069, device='cuda:0')
episode: 410 training return: tensor(-255.4229, device='cuda:0')
episode: 411 training return: tensor(-251.1847, device='cuda:0')
epoch: 103 test_true_pfm: 2280.931218598484 sim_pfm: 129.43763454351574
episode: 412 training return: tensor(-357.1519, device='cuda:0')
episode: 413 training return: tensor(-313.0535, device='cuda:0')
episode: 414 training return: tensor(-219.3895, device='cuda:0')
episode: 415 training return: tensor(198.8354, device='cuda:0')
epoch: 104 test_true_pfm: 1769.6698011817778 sim_pfm: -203.2565856159781
episode: 416 training return: tensor(-280.1574, device='cuda:0')
episode: 417 training return: tensor(-401.1928, device='cuda:0')
episode: 418 training return: tensor(-328.8123, device='cuda:0')
episode: 419 training return: tensor(264.8402, device='cuda:0')
epoch: 105 test_true_pfm: 1866.2402696777538 sim_pfm: -196.96656866643266
episode: 420 training return: tensor(189.7497, device='cuda:0')
episode: 421 training return: tensor(-143.4896, device='cuda:0')
episode: 422 training return: tensor(-340.7881, device='cuda:0')
episode: 423 training return: tensor(-239.5480, device='cuda:0')
epoch: 106 test_true_pfm: 1978.0587271285156 sim_pfm: -256.5407084872325
episode: 424 training return: tensor(-147.3101, device='cuda:0')
episode: 425 training return: tensor(-185.9427, device='cuda:0')
episode: 426 training return: tensor(-121.5959, device='cuda:0')
episode: 427 training return: tensor(43.0155, device='cuda:0')
epoch: 107 test_true_pfm: 2126.020570901532 sim_pfm: -49.33594239569114
episode: 428 training return: tensor(-331.7590, device='cuda:0')
episode: 429 training return: tensor(-364.2211, device='cuda:0')
episode: 430 training return: tensor(-332.4220, device='cuda:0')
episode: 431 training return: tensor(-338.8237, device='cuda:0')
epoch: 108 test_true_pfm: 2870.009768653427 sim_pfm: -216.16961417743005
episode: 432 training return: tensor(-233.1394, device='cuda:0')
episode: 433 training return: tensor(113.1317, device='cuda:0')
episode: 434 training return: tensor(-410.4153, device='cuda:0')
episode: 435 training return: tensor(-264.7169, device='cuda:0')
epoch: 109 test_true_pfm: 2222.2302282129326 sim_pfm: -34.366639019475166
episode: 436 training return: tensor(-351.4529, device='cuda:0')
episode: 437 training return: tensor(-343.1416, device='cuda:0')
episode: 438 training return: tensor(-344.6086, device='cuda:0')
episode: 439 training return: tensor(-354.1582, device='cuda:0')
epoch: 110 test_true_pfm: 2808.3816171090853 sim_pfm: -92.09427108666084
episode: 440 training return: tensor(-321.3048, device='cuda:0')
episode: 441 training return: tensor(-353.5514, device='cuda:0')
episode: 442 training return: tensor(-266.6557, device='cuda:0')
episode: 443 training return: tensor(-353.5957, device='cuda:0')
epoch: 111 test_true_pfm: 2392.846746677476 sim_pfm: -247.57637715246528
episode: 444 training return: tensor(-421.8228, device='cuda:0')
episode: 445 training return: tensor(47.8929, device='cuda:0')
episode: 446 training return: tensor(-178.9588, device='cuda:0')
episode: 447 training return: tensor(-170.4047, device='cuda:0')
epoch: 112 test_true_pfm: 2522.27330990527 sim_pfm: -195.84511970340586
episode: 448 training return: tensor(249.1260, device='cuda:0')
episode: 449 training return: tensor(-344.4922, device='cuda:0')
episode: 450 training return: tensor(222.9300, device='cuda:0')
episode: 451 training return: tensor(125.9404, device='cuda:0')
epoch: 113 test_true_pfm: 1776.8342086810378 sim_pfm: -102.74634560228635
episode: 452 training return: tensor(-106.7156, device='cuda:0')
episode: 453 training return: tensor(-28.7966, device='cuda:0')
episode: 454 training return: tensor(-244.7882, device='cuda:0')
episode: 455 training return: tensor(35.1404, device='cuda:0')
epoch: 114 test_true_pfm: 1742.5847440194755 sim_pfm: -129.6531623171953
episode: 456 training return: tensor(-241.3917, device='cuda:0')
episode: 457 training return: tensor(-219.8033, device='cuda:0')
episode: 458 training return: tensor(-146.5433, device='cuda:0')
episode: 459 training return: tensor(-305.9696, device='cuda:0')
epoch: 115 test_true_pfm: 2195.6986172320544 sim_pfm: -250.7330756020577
episode: 460 training return: tensor(-229.2847, device='cuda:0')
episode: 461 training return: tensor(-358.1681, device='cuda:0')
episode: 462 training return: tensor(-406.8942, device='cuda:0')
episode: 463 training return: tensor(-168.7482, device='cuda:0')
epoch: 116 test_true_pfm: 2061.206049899924 sim_pfm: -252.4402231311736
episode: 464 training return: tensor(239.2095, device='cuda:0')
episode: 465 training return: tensor(302.2618, device='cuda:0')
episode: 466 training return: tensor(-247.4103, device='cuda:0')
episode: 467 training return: tensor(127.7524, device='cuda:0')
epoch: 117 test_true_pfm: 2393.430483277593 sim_pfm: -43.16940715404538
episode: 468 training return: tensor(-275.3704, device='cuda:0')
episode: 469 training return: tensor(35.0133, device='cuda:0')
episode: 470 training return: tensor(-241.4920, device='cuda:0')
episode: 471 training return: tensor(274.6510, device='cuda:0')
epoch: 118 test_true_pfm: 2205.547837502076 sim_pfm: -174.68174849621332
episode: 472 training return: tensor(61.3491, device='cuda:0')
episode: 473 training return: tensor(-382.0446, device='cuda:0')
episode: 474 training return: tensor(275.9304, device='cuda:0')
episode: 475 training return: tensor(-132.2362, device='cuda:0')
epoch: 119 test_true_pfm: 1793.6541272908783 sim_pfm: -176.34729082820317
episode: 476 training return: tensor(-194.1091, device='cuda:0')
episode: 477 training return: tensor(-76.4925, device='cuda:0')
episode: 478 training return: tensor(-327.2869, device='cuda:0')
episode: 479 training return: tensor(-123.6071, device='cuda:0')
epoch: 120 test_true_pfm: 1950.1611598161442 sim_pfm: -197.57955194547927
episode: 480 training return: tensor(-49.4636, device='cuda:0')
episode: 481 training return: tensor(0.8166, device='cuda:0')
episode: 482 training return: tensor(-164.8848, device='cuda:0')
episode: 483 training return: tensor(244.1292, device='cuda:0')
epoch: 121 test_true_pfm: 2522.992016089965 sim_pfm: -62.73452244201326
episode: 484 training return: tensor(-282.3611, device='cuda:0')
episode: 485 training return: tensor(-436.5438, device='cuda:0')
episode: 486 training return: tensor(-178.7757, device='cuda:0')
episode: 487 training return: tensor(-138.1605, device='cuda:0')
epoch: 122 test_true_pfm: 2225.807898454814 sim_pfm: -205.25473036104813
episode: 488 training return: tensor(-160.6401, device='cuda:0')
episode: 489 training return: tensor(72.9375, device='cuda:0')
episode: 490 training return: tensor(-324.6186, device='cuda:0')
episode: 491 training return: tensor(-75.4010, device='cuda:0')
epoch: 123 test_true_pfm: 2266.8033736411107 sim_pfm: -191.45659611335336
episode: 492 training return: tensor(-360.6474, device='cuda:0')
episode: 493 training return: tensor(237.5507, device='cuda:0')
episode: 494 training return: tensor(-267.8027, device='cuda:0')
episode: 495 training return: tensor(-176.3956, device='cuda:0')
epoch: 124 test_true_pfm: 1754.0780513914476 sim_pfm: -244.13891476466475
episode: 496 training return: tensor(-229.1848, device='cuda:0')
episode: 497 training return: tensor(-367.5457, device='cuda:0')
episode: 498 training return: tensor(-336.0687, device='cuda:0')
episode: 499 training return: tensor(-286.9397, device='cuda:0')
epoch: 125 test_true_pfm: 1780.2775084959005 sim_pfm: -201.13275259911703
episode: 500 training return: tensor(-319.4422, device='cuda:0')
episode: 501 training return: tensor(-277.9232, device='cuda:0')
episode: 502 training return: tensor(-52.9761, device='cuda:0')
episode: 503 training return: tensor(-275.7113, device='cuda:0')
epoch: 126 test_true_pfm: 2521.6846431413073 sim_pfm: -134.16547361544022
episode: 504 training return: tensor(-69.5994, device='cuda:0')
episode: 505 training return: tensor(-192.9227, device='cuda:0')
episode: 506 training return: tensor(-69.7026, device='cuda:0')
episode: 507 training return: tensor(-194.4453, device='cuda:0')
epoch: 127 test_true_pfm: 1984.438464693753 sim_pfm: -177.32652518836161
episode: 508 training return: tensor(-253.7941, device='cuda:0')
episode: 509 training return: tensor(115.2505, device='cuda:0')
episode: 510 training return: tensor(-272.8920, device='cuda:0')
episode: 511 training return: tensor(-318.8157, device='cuda:0')
epoch: 128 test_true_pfm: 2294.55326267637 sim_pfm: -51.363294673307486
episode: 512 training return: tensor(-249.4830, device='cuda:0')
episode: 513 training return: tensor(-87.0918, device='cuda:0')
episode: 514 training return: tensor(-374.7170, device='cuda:0')
episode: 515 training return: tensor(242.3183, device='cuda:0')
epoch: 129 test_true_pfm: 2371.7396627225808 sim_pfm: -33.809597975574434
episode: 516 training return: tensor(78.9261, device='cuda:0')
episode: 517 training return: tensor(40.5286, device='cuda:0')
episode: 518 training return: tensor(-144.2413, device='cuda:0')
episode: 519 training return: tensor(-84.4816, device='cuda:0')
epoch: 130 test_true_pfm: 2780.892695407463 sim_pfm: -82.69356175217156
episode: 520 training return: tensor(-228.6254, device='cuda:0')
episode: 521 training return: tensor(101.2693, device='cuda:0')
episode: 522 training return: tensor(-351.4925, device='cuda:0')
episode: 523 training return: tensor(-231.6875, device='cuda:0')
epoch: 131 test_true_pfm: 2015.660986852623 sim_pfm: -168.1525843491157
episode: 524 training return: tensor(-62.4806, device='cuda:0')
episode: 525 training return: tensor(-179.6685, device='cuda:0')
episode: 526 training return: tensor(-292.4615, device='cuda:0')
episode: 527 training return: tensor(-330.1172, device='cuda:0')
epoch: 132 test_true_pfm: 1670.2602732592838 sim_pfm: -239.5224427956467
episode: 528 training return: tensor(-141.4422, device='cuda:0')
episode: 529 training return: tensor(-293.0139, device='cuda:0')
episode: 530 training return: tensor(-209.7273, device='cuda:0')
episode: 531 training return: tensor(-355.5241, device='cuda:0')
epoch: 133 test_true_pfm: 2365.4951632045377 sim_pfm: -10.687677831932282
episode: 532 training return: tensor(-98.2702, device='cuda:0')
episode: 533 training return: tensor(-358.1232, device='cuda:0')
episode: 534 training return: tensor(-358.2690, device='cuda:0')
episode: 535 training return: tensor(-272.0795, device='cuda:0')
epoch: 134 test_true_pfm: 1903.5457694865233 sim_pfm: -26.261195042092975
episode: 536 training return: tensor(-210.8774, device='cuda:0')
episode: 537 training return: tensor(-403.9561, device='cuda:0')
episode: 538 training return: tensor(-103.1864, device='cuda:0')
episode: 539 training return: tensor(-152.4855, device='cuda:0')
epoch: 135 test_true_pfm: 1954.2307632992997 sim_pfm: -207.0581533483928
episode: 540 training return: tensor(-55.2268, device='cuda:0')
episode: 541 training return: tensor(-95.6134, device='cuda:0')
episode: 542 training return: tensor(35.7903, device='cuda:0')
episode: 543 training return: tensor(44.0513, device='cuda:0')
epoch: 136 test_true_pfm: 2348.499247338155 sim_pfm: -92.55660554937397
episode: 544 training return: tensor(-404.7898, device='cuda:0')
episode: 545 training return: tensor(-338.5833, device='cuda:0')
episode: 546 training return: tensor(-124.5344, device='cuda:0')
episode: 547 training return: tensor(-238.8184, device='cuda:0')
epoch: 137 test_true_pfm: 2270.02900556833 sim_pfm: -221.31628782954067
episode: 548 training return: tensor(-344.1367, device='cuda:0')
episode: 549 training return: tensor(-352.9316, device='cuda:0')
episode: 550 training return: tensor(254.8996, device='cuda:0')
episode: 551 training return: tensor(-326.3103, device='cuda:0')
epoch: 138 test_true_pfm: 2413.185300043739 sim_pfm: 97.4007435523672
episode: 552 training return: tensor(-128.8282, device='cuda:0')
episode: 553 training return: tensor(-220.7888, device='cuda:0')
episode: 554 training return: tensor(-303.0696, device='cuda:0')
episode: 555 training return: tensor(239.5065, device='cuda:0')
epoch: 139 test_true_pfm: 2221.407969647816 sim_pfm: -210.28091306084147
episode: 556 training return: tensor(193.1594, device='cuda:0')
episode: 557 training return: tensor(-313.0402, device='cuda:0')
episode: 558 training return: tensor(-391.9115, device='cuda:0')
episode: 559 training return: tensor(-355.0865, device='cuda:0')
epoch: 140 test_true_pfm: 1652.3332199250244 sim_pfm: -124.43239424206938
episode: 560 training return: tensor(-327.0110, device='cuda:0')
episode: 561 training return: tensor(111.1446, device='cuda:0')
episode: 562 training return: tensor(96.7265, device='cuda:0')
episode: 563 training return: tensor(-342.6749, device='cuda:0')
epoch: 141 test_true_pfm: 1700.696868214946 sim_pfm: -235.9944962890198
episode: 564 training return: tensor(-152.6051, device='cuda:0')
episode: 565 training return: tensor(-317.7856, device='cuda:0')
episode: 566 training return: tensor(-75.6947, device='cuda:0')
episode: 567 training return: tensor(241.0326, device='cuda:0')
epoch: 142 test_true_pfm: 2728.470595534927 sim_pfm: -78.28095690339494
episode: 568 training return: tensor(-148.6577, device='cuda:0')
episode: 569 training return: tensor(-342.4469, device='cuda:0')
episode: 570 training return: tensor(-267.0876, device='cuda:0')
episode: 571 training return: tensor(-224.7661, device='cuda:0')
epoch: 143 test_true_pfm: 1771.4648568797068 sim_pfm: -162.3656840514935
episode: 572 training return: tensor(-350.0637, device='cuda:0')
episode: 573 training return: tensor(-396.5411, device='cuda:0')
episode: 574 training return: tensor(297.1828, device='cuda:0')
episode: 575 training return: tensor(-281.5381, device='cuda:0')
epoch: 144 test_true_pfm: 1767.413773165741 sim_pfm: -245.4136326632773
episode: 576 training return: tensor(-344.9739, device='cuda:0')
episode: 577 training return: tensor(-328.1586, device='cuda:0')
episode: 578 training return: tensor(-370.0764, device='cuda:0')
episode: 579 training return: tensor(-355.2564, device='cuda:0')
epoch: 145 test_true_pfm: 2295.905657435705 sim_pfm: -178.93956165698668
episode: 580 training return: tensor(-209.7687, device='cuda:0')
episode: 581 training return: tensor(-419.0803, device='cuda:0')
episode: 582 training return: tensor(-339.1604, device='cuda:0')
episode: 583 training return: tensor(153.8771, device='cuda:0')
epoch: 146 test_true_pfm: 2866.662513611825 sim_pfm: -59.18315862926344
episode: 584 training return: tensor(-236.2862, device='cuda:0')
episode: 585 training return: tensor(-275.8018, device='cuda:0')
episode: 586 training return: tensor(-265.5234, device='cuda:0')
episode: 587 training return: tensor(-171.2704, device='cuda:0')
epoch: 147 test_true_pfm: 1809.8618258976833 sim_pfm: -65.11323397121548
episode: 588 training return: tensor(-392.5727, device='cuda:0')
episode: 589 training return: tensor(247.7546, device='cuda:0')
episode: 590 training return: tensor(-100.2283, device='cuda:0')
episode: 591 training return: tensor(-8.3054, device='cuda:0')
epoch: 148 test_true_pfm: 1966.2749056744262 sim_pfm: -68.541828891146
episode: 592 training return: tensor(-251.3831, device='cuda:0')
episode: 593 training return: tensor(-311.7404, device='cuda:0')
episode: 594 training return: tensor(-135.1724, device='cuda:0')
episode: 595 training return: tensor(235.9411, device='cuda:0')
epoch: 149 test_true_pfm: 2770.773182617146 sim_pfm: 128.67065852570036
episode: 596 training return: tensor(-163.0538, device='cuda:0')
episode: 597 training return: tensor(-348.6447, device='cuda:0')
episode: 598 training return: tensor(-192.1800, device='cuda:0')
episode: 599 training return: tensor(-400.4238, device='cuda:0')
epoch: 150 test_true_pfm: 1769.099769358679 sim_pfm: -251.56269274155298
