['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '0']
epoch: 0 training_loss 0.4285514585673809 test_loss: 0.33136835098266604
epoch: 1 training_loss 0.286974625736475 test_loss: 0.29857215881347654
epoch: 2 training_loss 0.25866259723901747 test_loss: 0.2549133777618408
epoch: 3 training_loss 0.24262841284275055 test_loss: 0.22959399223327637
epoch: 4 training_loss 0.22811692230403424 test_loss: 0.23563342094421386
epoch: 5 training_loss 0.22562441118061544 test_loss: 0.22195844650268554
epoch: 6 training_loss 0.251972743421793 test_loss: 0.23391153812408447
epoch: 7 training_loss 0.21658328413963318 test_loss: 0.23141129016876222
epoch: 8 training_loss 0.21401931583881378 test_loss: 0.216493558883667
epoch: 9 training_loss 0.2304594771564007 test_loss: 0.21639673709869384
epoch: 10 training_loss 0.2191057950258255 test_loss: 0.2570145606994629
epoch: 11 training_loss 0.21652714148163796 test_loss: 0.21910219192504882
epoch: 12 training_loss 0.2169893530011177 test_loss: 0.2217254877090454
epoch: 13 training_loss 0.21814423888921738 test_loss: 0.21276466846466063
epoch: 14 training_loss 0.21657841071486472 test_loss: 0.22259213924407958
epoch: 15 training_loss 0.20176280550658704 test_loss: 0.21392340660095216
epoch: 16 training_loss 0.21747937098145484 test_loss: 0.2050320863723755
epoch: 17 training_loss 0.21459711089730263 test_loss: 0.193539035320282
epoch: 18 training_loss 0.21315548725426198 test_loss: 0.19541462659835815
epoch: 19 training_loss 0.20517179638147354 test_loss: 0.21219539642333984
epoch: 20 training_loss 0.20673919275403022 test_loss: 0.2200321912765503
epoch: 21 training_loss 0.2020784854888916 test_loss: 0.21245169639587402
epoch: 22 training_loss 0.20897425279021264 test_loss: 0.20845799446105956
epoch: 23 training_loss 0.21224489465355872 test_loss: 0.21266136169433594
epoch: 24 training_loss 0.21818600237369537 test_loss: 0.2106032133102417
epoch: 25 training_loss 0.20206259675323962 test_loss: 0.19752403497695922
epoch: 26 training_loss 0.21329211309552193 test_loss: 0.22803118228912353
epoch: 27 training_loss 0.20050763458013535 test_loss: 0.19849767684936523
epoch: 28 training_loss 0.1955791898071766 test_loss: 0.20989663600921632
epoch: 29 training_loss 0.19449241429567338 test_loss: 0.20298707485198975
epoch: 30 training_loss 0.20282413929700852 test_loss: 0.1914210319519043
epoch: 31 training_loss 0.20151854909956454 test_loss: 0.21608877182006836
epoch: 32 training_loss 0.196171168833971 test_loss: 0.21797072887420654
epoch: 33 training_loss 0.19603715017437934 test_loss: 0.2014087438583374
epoch: 34 training_loss 0.1984649431705475 test_loss: 0.20512003898620607
epoch: 35 training_loss 0.1943887734413147 test_loss: 0.2055884838104248
epoch: 36 training_loss 0.19768141850829124 test_loss: 0.2410809278488159
epoch: 37 training_loss 0.20705699227750302 test_loss: 0.17586325407028197
epoch: 38 training_loss 0.2008820990473032 test_loss: 0.2006401777267456
epoch: 39 training_loss 0.19342420876026153 test_loss: 0.1885944128036499
epoch: 40 training_loss 0.19368479892611504 test_loss: 0.2083596706390381
epoch: 41 training_loss 0.19676852107048035 test_loss: 0.19770323038101195
epoch: 42 training_loss 0.20237563572824002 test_loss: 0.18482874631881713
epoch: 43 training_loss 0.20160872422158718 test_loss: 0.20502727031707763
epoch: 44 training_loss 0.19797710582613945 test_loss: 0.1848146677017212
epoch: 45 training_loss 0.1875964692234993 test_loss: 0.1879764676094055
epoch: 46 training_loss 0.1929183527827263 test_loss: 0.20784165859222412
epoch: 47 training_loss 0.19337313741445541 test_loss: 0.20885930061340333
epoch: 48 training_loss 0.19737018704414366 test_loss: 0.21245439052581788
epoch: 49 training_loss 0.18600432343780995 test_loss: 0.18794946670532225
epoch: 50 training_loss 0.1983396925777197 test_loss: 0.19055033922195436
epoch: 51 training_loss 0.19314745858311652 test_loss: 0.18457850217819213
epoch: 52 training_loss 0.1978593886643648 test_loss: 0.19497538805007936
epoch: 53 training_loss 0.19741426788270475 test_loss: 0.187510883808136
epoch: 54 training_loss 0.18503215834498404 test_loss: 0.18719438314437867
epoch: 55 training_loss 0.19038952261209488 test_loss: 0.18485870361328124
epoch: 56 training_loss 0.18562964096665382 test_loss: 0.18264588117599487
epoch: 57 training_loss 0.18715810500085353 test_loss: 0.19487749338150023
epoch: 58 training_loss 0.1976106620579958 test_loss: 0.175160014629364
epoch: 59 training_loss 0.2000961972028017 test_loss: 0.19111218452453613
epoch: 60 training_loss 0.19647671535611153 test_loss: 0.20332257747650145
epoch: 61 training_loss 0.18973777644336223 test_loss: 0.19042686223983765
epoch: 62 training_loss 0.19383480817079543 test_loss: 0.18963063955307008
epoch: 63 training_loss 0.187559874355793 test_loss: 0.19734817743301392
epoch: 64 training_loss 0.19452956765890123 test_loss: 0.18827515840530396
epoch: 65 training_loss 0.18235186368227005 test_loss: 0.21854090690612793
epoch: 66 training_loss 0.19660650730133056 test_loss: 0.2139051675796509
epoch: 67 training_loss 0.1888121944665909 test_loss: 0.1796170473098755
epoch: 68 training_loss 0.18723185442388057 test_loss: 0.17616312503814696
epoch: 69 training_loss 0.19064679376780988 test_loss: 0.18076037168502807
epoch: 70 training_loss 0.19117062643170357 test_loss: 0.18853271007537842
epoch: 71 training_loss 0.17884466722607611 test_loss: 0.21227500438690186
epoch: 72 training_loss 0.18582590773701668 test_loss: 0.19287872314453125
epoch: 73 training_loss 0.18721625231206418 test_loss: 0.2077033042907715
epoch: 74 training_loss 0.19110983461141587 test_loss: 0.2026909589767456
epoch: 75 training_loss 0.18923107236623765 test_loss: 0.2032027006149292
epoch: 76 training_loss 0.185814925506711 test_loss: 0.1843735694885254
epoch: 77 training_loss 0.18635723903775214 test_loss: 0.20379228591918946
epoch: 78 training_loss 0.1843114260584116 test_loss: 0.1948699474334717
epoch: 79 training_loss 0.18324627302587032 test_loss: 0.19920718669891357
epoch: 80 training_loss 0.18489879585802554 test_loss: 0.1868203639984131
epoch: 81 training_loss 0.19235675498843194 test_loss: 0.19988813400268554
epoch: 82 training_loss 0.1781619591265917 test_loss: 0.18342479467391967
epoch: 83 training_loss 0.18267045095562934 test_loss: 0.19828007221221924
epoch: 84 training_loss 0.1886708477884531 test_loss: 0.1932311773300171
epoch: 85 training_loss 0.18653536386787892 test_loss: 0.19008338451385498
epoch: 86 training_loss 0.18901769809424876 test_loss: 0.1826043486595154
epoch: 87 training_loss 0.18926573626697063 test_loss: 0.17401812076568604
epoch: 88 training_loss 0.18726722180843353 test_loss: 0.1902977705001831
epoch: 89 training_loss 0.18023825511336328 test_loss: 0.19754565954208375
epoch: 90 training_loss 0.18858224220573902 test_loss: 0.18306039571762084
epoch: 91 training_loss 0.1891034848242998 test_loss: 0.18694381713867186
epoch: 92 training_loss 0.1805031581968069 test_loss: 0.19331873655319215
epoch: 93 training_loss 0.18095988921821118 test_loss: 0.17708754539489746
epoch: 94 training_loss 0.17848931185901165 test_loss: 0.18282160758972169
epoch: 95 training_loss 0.19242766812443735 test_loss: 0.18229525089263915
epoch: 96 training_loss 0.1829338487237692 test_loss: 0.1753036379814148
epoch: 97 training_loss 0.180933353677392 test_loss: 0.17525871992111205
epoch: 98 training_loss 0.1977995177358389 test_loss: 0.1872578740119934
epoch: 99 training_loss 0.1823104800283909 test_loss: 0.20059025287628174
epoch: 100 training_loss 0.18538894735276698 test_loss: 0.20686964988708495
epoch: 101 training_loss 0.18758455060422422 test_loss: 0.1861305356025696
epoch: 102 training_loss 0.1882692576944828 test_loss: 0.1645285964012146
epoch: 103 training_loss 0.18810466788709163 test_loss: 0.2129150629043579
epoch: 104 training_loss 0.1876722140610218 test_loss: 0.18465452194213866
epoch: 105 training_loss 0.1859716759622097 test_loss: 0.18669562339782714
epoch: 106 training_loss 0.19166383460164071 test_loss: 0.1866668939590454
epoch: 107 training_loss 0.19200625956058504 test_loss: 0.18997451066970825
epoch: 108 training_loss 0.18206585116684437 test_loss: 0.18163293600082397
epoch: 109 training_loss 0.17345372200012207 test_loss: 0.1732447028160095
epoch: 110 training_loss 0.18126798003911973 test_loss: 0.19681414365768432
epoch: 111 training_loss 0.1863806702941656 test_loss: 0.19521574974060057
epoch: 112 training_loss 0.1924789323657751 test_loss: 0.18176251649856567
epoch: 113 training_loss 0.18784051664173604 test_loss: 0.19203712940216064
epoch: 114 training_loss 0.18452961191534997 test_loss: 0.18482850790023803
epoch: 115 training_loss 0.1887716145813465 test_loss: 0.212060809135437
epoch: 116 training_loss 0.18008801095187665 test_loss: 0.19115697145462035
epoch: 117 training_loss 0.18475525505840779 test_loss: 0.20160412788391113
epoch: 118 training_loss 0.186648925319314 test_loss: 0.1865386962890625
epoch: 119 training_loss 0.18544775739312172 test_loss: 0.1821411967277527
epoch: 120 training_loss 0.18604742906987667 test_loss: 0.20238239765167237
epoch: 121 training_loss 0.1768597251921892 test_loss: 0.19162313938140868
epoch: 122 training_loss 0.18727741487324237 test_loss: 0.18781001567840577
epoch: 123 training_loss 0.18666967660188674 test_loss: 0.18654651641845704
epoch: 124 training_loss 0.17257486909627914 test_loss: 0.17692574262619018
epoch: 125 training_loss 0.18015628211200238 test_loss: 0.18864940404891967
epoch: 126 training_loss 0.1818623860925436 test_loss: 0.17909005880355836
epoch: 127 training_loss 0.17970726549625396 test_loss: 0.17648627758026122
epoch: 128 training_loss 0.17431986674666405 test_loss: 0.19329297542572021
epoch: 129 training_loss 0.18228900782763957 test_loss: 0.1816152811050415
epoch: 130 training_loss 0.18368959940969945 test_loss: 0.19088737964630126
epoch: 131 training_loss 0.18491938903927804 test_loss: 0.18892792463302613
epoch: 132 training_loss 0.17824781984090804 test_loss: 0.19015222787857056
epoch: 133 training_loss 0.1809368873387575 test_loss: 0.19272249937057495
epoch: 134 training_loss 0.1852591608464718 test_loss: 0.17706388235092163
epoch: 135 training_loss 0.183737068772316 test_loss: 0.20089001655578614
epoch: 136 training_loss 0.19058763951063157 test_loss: 0.1991802453994751
epoch: 137 training_loss 0.178897500410676 test_loss: 0.2015754461288452
epoch: 138 training_loss 0.18338959977030755 test_loss: 0.1855007529258728
epoch: 139 training_loss 0.17831403322517872 test_loss: 0.19083354473114014
epoch: 140 training_loss 0.1787873362749815 test_loss: 0.1962185502052307
epoch: 141 training_loss 0.18269998528063297 test_loss: 0.18920062780380248
epoch: 142 training_loss 0.18363302506506443 test_loss: 0.17623865604400635
epoch: 143 training_loss 0.1811497589945793 test_loss: 0.17510535717010497
epoch: 144 training_loss 0.183797123208642 test_loss: 0.17934658527374267
epoch: 145 training_loss 0.17597992420196534 test_loss: 0.1786176323890686
epoch: 146 training_loss 0.18501423366367817 test_loss: 0.17560372352600098
epoch: 147 training_loss 0.1864117692410946 test_loss: 0.18440667390823365
epoch: 148 training_loss 0.18358300790190696 test_loss: 0.17080734968185424
epoch: 149 training_loss 0.18089979477226734 test_loss: 0.16759494543075562
epoch: 0 training_loss 45.149374122619626 test_loss: 25.481230163574217
epoch: 1 training_loss 20.82218179702759 test_loss: 18.44042053222656
epoch: 2 training_loss 16.000634174346924 test_loss: 14.417701721191406
epoch: 3 training_loss 13.263960466384887 test_loss: 12.238169860839843
epoch: 4 training_loss 11.775014677047729 test_loss: 11.532479095458985
epoch: 5 training_loss 10.739803142547608 test_loss: 10.193273162841797
epoch: 6 training_loss 9.990141830444337 test_loss: 9.528179931640626
epoch: 7 training_loss 9.357623052597045 test_loss: 8.79595718383789
epoch: 8 training_loss 8.749785118103027 test_loss: 8.474566650390624
epoch: 9 training_loss 8.203773612976073 test_loss: 7.857286071777343
epoch: 10 training_loss 7.6759274244308475 test_loss: 8.317819213867187
epoch: 11 training_loss 7.494771165847778 test_loss: 7.409650421142578
epoch: 12 training_loss 7.112482924461364 test_loss: 6.8336036682128904
epoch: 13 training_loss 6.799576997756958 test_loss: 6.5698486328125
epoch: 14 training_loss 6.727537565231323 test_loss: 6.860198211669922
epoch: 15 training_loss 6.622581825256348 test_loss: 6.692668914794922
epoch: 16 training_loss 6.416117372512818 test_loss: 6.377113723754883
epoch: 17 training_loss 6.23384051322937 test_loss: 6.032803344726562
epoch: 18 training_loss 6.087094206809997 test_loss: 5.664521789550781
epoch: 19 training_loss 5.790074510574341 test_loss: 5.987050628662109
epoch: 20 training_loss 5.819301862716674 test_loss: 5.687782669067383
epoch: 21 training_loss 5.613546710014344 test_loss: 5.485298156738281
epoch: 22 training_loss 5.461204137802124 test_loss: 5.808420944213867
epoch: 23 training_loss 5.375138745307923 test_loss: 5.286316680908203
epoch: 24 training_loss 5.360000667572021 test_loss: 5.611063003540039
epoch: 25 training_loss 5.213064067363739 test_loss: 5.062903213500976
epoch: 26 training_loss 5.066794476509094 test_loss: 5.041679382324219
epoch: 27 training_loss 5.0259946656227115 test_loss: 4.852180480957031
epoch: 28 training_loss 5.020571553707123 test_loss: 4.863388442993164
epoch: 29 training_loss 4.836858422756195 test_loss: 4.853759384155273
epoch: 30 training_loss 4.858177585601807 test_loss: 4.648006820678711
epoch: 31 training_loss 4.9962217807769775 test_loss: 4.886434555053711
epoch: 32 training_loss 4.76593600988388 test_loss: 4.556626892089843
epoch: 33 training_loss 4.703957993984222 test_loss: 4.543957138061524
epoch: 34 training_loss 4.642228393554688 test_loss: 4.621430206298828
epoch: 35 training_loss 4.5685253739356995 test_loss: 4.283039855957031
epoch: 36 training_loss 4.3627601552009585 test_loss: 4.267828369140625
epoch: 37 training_loss 4.301512217521667 test_loss: 4.71021614074707
epoch: 38 training_loss 4.303702640533447 test_loss: 4.163651657104492
epoch: 39 training_loss 4.424328238964081 test_loss: 4.7265270233154295
epoch: 40 training_loss 4.192238101959228 test_loss: 4.087973785400391
epoch: 41 training_loss 4.089308347702026 test_loss: 4.021517562866211
epoch: 42 training_loss 4.076295545101166 test_loss: 4.198930740356445
epoch: 43 training_loss 4.096303238868713 test_loss: 3.946067047119141
epoch: 44 training_loss 4.12804360628128 test_loss: 3.9023475646972656
epoch: 45 training_loss 4.170513224601746 test_loss: 3.796731948852539
epoch: 46 training_loss 3.9508374094963075 test_loss: 4.095607757568359
epoch: 47 training_loss 3.9909628438949585 test_loss: 3.751593017578125
epoch: 48 training_loss 3.9454950666427613 test_loss: 3.818528747558594
epoch: 49 training_loss 3.8711870098114014 test_loss: 3.619739532470703
epoch: 50 training_loss 3.667987549304962 test_loss: 4.126092910766602
epoch: 51 training_loss 3.8161388397216798 test_loss: 3.6665287017822266
epoch: 52 training_loss 3.8255899786949157 test_loss: 3.648681640625
epoch: 53 training_loss 3.631110632419586 test_loss: 3.6117183685302736
epoch: 54 training_loss 3.7025301027297974 test_loss: 3.568703842163086
epoch: 55 training_loss 3.627016320228577 test_loss: 3.4699356079101564
epoch: 56 training_loss 3.7583859181404113 test_loss: 3.7476222991943358
epoch: 57 training_loss 3.5201394176483154 test_loss: 3.4187637329101563
epoch: 58 training_loss 3.6007270574569703 test_loss: 3.5542831420898438
epoch: 59 training_loss 3.4522218918800354 test_loss: 3.425587844848633
epoch: 60 training_loss 3.3951104879379272 test_loss: 3.540562057495117
epoch: 61 training_loss 3.360784845352173 test_loss: 3.470253753662109
epoch: 62 training_loss 3.3393323493003844 test_loss: 3.2549381256103516
epoch: 63 training_loss 3.3476497864723207 test_loss: 3.8161216735839845
epoch: 64 training_loss 3.425724949836731 test_loss: 3.279506301879883
epoch: 65 training_loss 3.2498285293579103 test_loss: 3.3313972473144533
epoch: 66 training_loss 3.217760536670685 test_loss: 3.1323619842529298
epoch: 67 training_loss 3.1058254432678223 test_loss: 3.063519096374512
epoch: 68 training_loss 3.05108571767807 test_loss: 2.930950164794922
epoch: 69 training_loss 3.122277042865753 test_loss: 3.0646570205688475
epoch: 70 training_loss 3.034791626930237 test_loss: 3.1848289489746096
epoch: 71 training_loss 3.0349872171878816 test_loss: 2.964018630981445
epoch: 72 training_loss 3.0680281376838683 test_loss: 3.1695125579833983
epoch: 73 training_loss 3.13051518201828 test_loss: 3.1275022506713865
epoch: 74 training_loss 2.8510324931144715 test_loss: 2.9305370330810545
epoch: 75 training_loss 2.9870057034492494 test_loss: 3.0281240463256838
epoch: 76 training_loss 3.052815840244293 test_loss: 3.0886058807373047
epoch: 77 training_loss 2.97491916179657 test_loss: 3.198501968383789
epoch: 78 training_loss 2.825548506975174 test_loss: 2.62158088684082
epoch: 79 training_loss 2.7944851541519165 test_loss: 2.955356788635254
epoch: 80 training_loss 2.8168190014362335 test_loss: 2.801082420349121
epoch: 81 training_loss 3.0316726088523867 test_loss: 3.4005252838134767
epoch: 82 training_loss 2.833793225288391 test_loss: 2.872454833984375
epoch: 83 training_loss 2.806765434741974 test_loss: 2.919361686706543
epoch: 84 training_loss 2.745634274482727 test_loss: 3.5913311004638673
epoch: 85 training_loss 2.8529136562347412 test_loss: 2.783173179626465
epoch: 86 training_loss 2.7129277324676515 test_loss: 2.588962364196777
epoch: 87 training_loss 2.8180682611465455 test_loss: 2.7260568618774412
epoch: 88 training_loss 2.756211495399475 test_loss: 2.979702949523926
epoch: 89 training_loss 2.760055572986603 test_loss: 2.751745414733887
epoch: 90 training_loss 2.746168451309204 test_loss: 3.020206642150879
epoch: 91 training_loss 2.7285406720638274 test_loss: 2.78171329498291
epoch: 92 training_loss 2.6860339546203615 test_loss: 2.6197383880615233
epoch: 93 training_loss 2.8043360102176664 test_loss: 3.35565299987793
epoch: 94 training_loss 2.8280892038345335 test_loss: 2.5557430267333983
epoch: 95 training_loss 2.596154571771622 test_loss: 2.6909103393554688
epoch: 96 training_loss 2.7096033668518067 test_loss: 2.493740272521973
epoch: 97 training_loss 2.535910999774933 test_loss: 2.7192476272583006
epoch: 98 training_loss 2.5249354600906373 test_loss: 3.062565040588379
epoch: 99 training_loss 2.6010723340511324 test_loss: 2.9145572662353514
epoch: 100 training_loss 2.5861260056495667 test_loss: 2.433490753173828
epoch: 101 training_loss 2.522526661157608 test_loss: 2.788121223449707
epoch: 102 training_loss 2.5515909135341643 test_loss: 2.402780532836914
epoch: 103 training_loss 2.5305464935302733 test_loss: 2.4658260345458984
epoch: 104 training_loss 2.444876333475113 test_loss: 2.3454355239868163
epoch: 105 training_loss 2.528516322374344 test_loss: 2.6457141876220702
epoch: 106 training_loss 2.6865316104888914 test_loss: 2.6093509674072264
epoch: 107 training_loss 2.4719849288463593 test_loss: 2.4797975540161135
epoch: 108 training_loss 2.5606713342666625 test_loss: 2.5957040786743164
epoch: 109 training_loss 2.3978776752948763 test_loss: 2.3013755798339846
epoch: 110 training_loss 2.5821457147598266 test_loss: 2.2236474990844726
epoch: 111 training_loss 2.5638573431968688 test_loss: 2.6671892166137696
epoch: 112 training_loss 2.5537311208248137 test_loss: 2.4295679092407227
epoch: 113 training_loss 2.3647778832912447 test_loss: 2.6171123504638674
epoch: 114 training_loss 2.4311911141872407 test_loss: 2.3986509323120115
epoch: 115 training_loss 2.41760964512825 test_loss: 2.544436454772949
epoch: 116 training_loss 2.7437088775634764 test_loss: 2.3708160400390623
epoch: 117 training_loss 2.477523707151413 test_loss: 2.5807966232299804
epoch: 118 training_loss 2.4036465096473694 test_loss: 2.3695985794067385
epoch: 119 training_loss 2.3610109758377074 test_loss: 2.3446155548095704
epoch: 120 training_loss 2.306344573497772 test_loss: 2.314894676208496
epoch: 121 training_loss 2.2928872108459473 test_loss: 2.1886611938476563
epoch: 122 training_loss 2.372982325553894 test_loss: 2.342243957519531
epoch: 123 training_loss 2.263455810546875 test_loss: 2.2723012924194337
epoch: 124 training_loss 2.298462530374527 test_loss: 2.2922849655151367
epoch: 125 training_loss 2.3014236164093016 test_loss: 2.760148620605469
epoch: 126 training_loss 2.32281334400177 test_loss: 2.5922609329223634
epoch: 127 training_loss 2.3576098895072937 test_loss: 2.6583559036254885
epoch: 128 training_loss 2.341423316001892 test_loss: 2.4238277435302735
epoch: 129 training_loss 2.261068230867386 test_loss: 2.3043107986450195
epoch: 130 training_loss 2.173332016468048 test_loss: 2.030767250061035
epoch: 131 training_loss 2.301919673681259 test_loss: 1.9914318084716798
epoch: 132 training_loss 2.2841466426849366 test_loss: 2.242156219482422
epoch: 133 training_loss 2.321244715452194 test_loss: 2.607578659057617
epoch: 134 training_loss 2.393430424928665 test_loss: 2.1258098602294924
epoch: 135 training_loss 2.1714883136749266 test_loss: 2.2236419677734376
epoch: 136 training_loss 2.190748049020767 test_loss: 1.993635368347168
epoch: 137 training_loss 2.1866328752040864 test_loss: 2.020615005493164
epoch: 138 training_loss 2.0709228253364564 test_loss: 2.161427307128906
epoch: 139 training_loss 2.2207035517692564 test_loss: 1.8520509719848632
epoch: 140 training_loss 2.309717940092087 test_loss: 2.1583379745483398
epoch: 141 training_loss 2.0925354301929473 test_loss: 2.4312417984008787
epoch: 142 training_loss 2.0536025547981263 test_loss: 2.069313812255859
epoch: 143 training_loss 2.1582057797908782 test_loss: 2.396780776977539
epoch: 144 training_loss 2.0322718584537505 test_loss: 1.9456398010253906
epoch: 145 training_loss 2.0261188447475433 test_loss: 2.0955785751342773
epoch: 146 training_loss 1.9891673350334167 test_loss: 1.9072004318237306
epoch: 147 training_loss 2.0781565535068514 test_loss: 1.9481225967407227
epoch: 148 training_loss 1.9570892119407655 test_loss: 1.8589555740356445
epoch: 149 training_loss 2.0780232203006745 test_loss: 1.9560731887817382
62.17876540985903
episode: 0 training return: tensor(-178.6770, device='cuda:0')
episode: 1 training return: tensor(-213.5605, device='cuda:0')
episode: 2 training return: tensor(-214.0567, device='cuda:0')
episode: 3 training return: tensor(-59.2079, device='cuda:0')
epoch: 1 test_true_pfm: 65.99351813294656 sim_pfm: -164.28546968880693
episode: 4 training return: tensor(-77.0053, device='cuda:0')
episode: 5 training return: tensor(-85.5301, device='cuda:0')
episode: 6 training return: tensor(-216.1318, device='cuda:0')
episode: 7 training return: tensor(-147.2540, device='cuda:0')
epoch: 2 test_true_pfm: 65.92401462946495 sim_pfm: -145.61374776493759
episode: 8 training return: tensor(-218.5287, device='cuda:0')
episode: 9 training return: tensor(-207.6755, device='cuda:0')
episode: 10 training return: tensor(-76.5197, device='cuda:0')
episode: 11 training return: tensor(-202.3687, device='cuda:0')
epoch: 3 test_true_pfm: 67.12816910254742 sim_pfm: -113.38597901195753
episode: 12 training return: tensor(-41.3309, device='cuda:0')
episode: 13 training return: tensor(-124.2825, device='cuda:0')
episode: 14 training return: tensor(-217.0335, device='cuda:0')
episode: 15 training return: tensor(-170.2270, device='cuda:0')
epoch: 4 test_true_pfm: 54.28785353982361 sim_pfm: -148.50321657141905
episode: 16 training return: tensor(-144.9538, device='cuda:0')
episode: 17 training return: tensor(-194.5243, device='cuda:0')
episode: 18 training return: tensor(-92.8154, device='cuda:0')
episode: 19 training return: tensor(-198.7779, device='cuda:0')
epoch: 5 test_true_pfm: 55.78468640568336 sim_pfm: -150.4204647430917
episode: 20 training return: tensor(-61.3465, device='cuda:0')
episode: 21 training return: tensor(-147.0766, device='cuda:0')
episode: 22 training return: tensor(-65.5935, device='cuda:0')
episode: 23 training return: tensor(-185.7265, device='cuda:0')
epoch: 6 test_true_pfm: 54.49292805783513 sim_pfm: -134.7663716002018
episode: 24 training return: tensor(-117.1807, device='cuda:0')
episode: 25 training return: tensor(-165.6421, device='cuda:0')
episode: 26 training return: tensor(-142.6660, device='cuda:0')
episode: 27 training return: tensor(-124.6797, device='cuda:0')
epoch: 7 test_true_pfm: 58.54279034429885 sim_pfm: -125.91493926291005
episode: 28 training return: tensor(-210.8623, device='cuda:0')
episode: 29 training return: tensor(-141.9287, device='cuda:0')
episode: 30 training return: tensor(-221.0638, device='cuda:0')
episode: 31 training return: tensor(-198.5573, device='cuda:0')
epoch: 8 test_true_pfm: 55.59752609781276 sim_pfm: -183.07585746722762
episode: 32 training return: tensor(-201.4429, device='cuda:0')
episode: 33 training return: tensor(-201.1957, device='cuda:0')
episode: 34 training return: tensor(-145.7194, device='cuda:0')
episode: 35 training return: tensor(-153.2699, device='cuda:0')
epoch: 9 test_true_pfm: 59.778026022095744 sim_pfm: -149.0584104233072
episode: 36 training return: tensor(-193.2576, device='cuda:0')
episode: 37 training return: tensor(-109.2916, device='cuda:0')
episode: 38 training return: tensor(-98.5542, device='cuda:0')
episode: 39 training return: tensor(-129.6395, device='cuda:0')
epoch: 10 test_true_pfm: 53.82422267952537 sim_pfm: -178.33996691869106
episode: 40 training return: tensor(-190.8412, device='cuda:0')
episode: 41 training return: tensor(-159.2398, device='cuda:0')
episode: 42 training return: tensor(-182.5538, device='cuda:0')
episode: 43 training return: tensor(-153.4685, device='cuda:0')
epoch: 11 test_true_pfm: 52.28130004120394 sim_pfm: -172.1327256756136
episode: 44 training return: tensor(-191.5425, device='cuda:0')
episode: 45 training return: tensor(-199.6588, device='cuda:0')
episode: 46 training return: tensor(-219.3454, device='cuda:0')
episode: 47 training return: tensor(-214.2650, device='cuda:0')
epoch: 12 test_true_pfm: 60.431994110694724 sim_pfm: -151.276158564887
episode: 48 training return: tensor(-72.2034, device='cuda:0')
episode: 49 training return: tensor(-146.0018, device='cuda:0')
episode: 50 training return: tensor(-188.6233, device='cuda:0')
episode: 51 training return: tensor(-114.3343, device='cuda:0')
epoch: 13 test_true_pfm: 56.3216312518981 sim_pfm: -173.60424645629246
episode: 52 training return: tensor(-182.9390, device='cuda:0')
episode: 53 training return: tensor(-59.9137, device='cuda:0')
episode: 54 training return: tensor(-191.7437, device='cuda:0')
episode: 55 training return: tensor(-216.2251, device='cuda:0')
epoch: 14 test_true_pfm: 54.24914872805283 sim_pfm: -104.87868074386498
episode: 56 training return: tensor(-73.8734, device='cuda:0')
episode: 57 training return: tensor(-85.3873, device='cuda:0')
episode: 58 training return: tensor(-70.1060, device='cuda:0')
episode: 59 training return: tensor(-144.2167, device='cuda:0')
epoch: 15 test_true_pfm: 54.05901158510035 sim_pfm: -73.45344413425774
episode: 60 training return: tensor(-78.3664, device='cuda:0')
episode: 61 training return: tensor(-57.8050, device='cuda:0')
episode: 62 training return: tensor(-197.6268, device='cuda:0')
episode: 63 training return: tensor(-141.5263, device='cuda:0')
epoch: 16 test_true_pfm: 62.189705348516 sim_pfm: -109.62020262172446
episode: 64 training return: tensor(-68.5666, device='cuda:0')
episode: 65 training return: tensor(-67.0618, device='cuda:0')
episode: 66 training return: tensor(-182.8228, device='cuda:0')
episode: 67 training return: tensor(-186.5117, device='cuda:0')
epoch: 17 test_true_pfm: 53.613986241078194 sim_pfm: -165.91013243478955
episode: 68 training return: tensor(-132.2711, device='cuda:0')
episode: 69 training return: tensor(-132.3553, device='cuda:0')
episode: 70 training return: tensor(-146.0016, device='cuda:0')
episode: 71 training return: tensor(-190.1612, device='cuda:0')
epoch: 18 test_true_pfm: 65.62473690792629 sim_pfm: -106.50074531540159
episode: 72 training return: tensor(-106.4937, device='cuda:0')
episode: 73 training return: tensor(-189.8034, device='cuda:0')
episode: 74 training return: tensor(-61.6670, device='cuda:0')
episode: 75 training return: tensor(-138.2288, device='cuda:0')
epoch: 19 test_true_pfm: 50.37245523287874 sim_pfm: -167.18640477659648
episode: 76 training return: tensor(-185.9913, device='cuda:0')
episode: 77 training return: tensor(-102.6855, device='cuda:0')
episode: 78 training return: tensor(-86.5580, device='cuda:0')
episode: 79 training return: tensor(-125.3359, device='cuda:0')
epoch: 20 test_true_pfm: 49.21351666240219 sim_pfm: -130.13271704200307
episode: 80 training return: tensor(-214.2164, device='cuda:0')
episode: 81 training return: tensor(-121.8605, device='cuda:0')
episode: 82 training return: tensor(-171.9953, device='cuda:0')
episode: 83 training return: tensor(-175.7810, device='cuda:0')
epoch: 21 test_true_pfm: 56.40983695802953 sim_pfm: -162.63364517242297
episode: 84 training return: tensor(-203.4529, device='cuda:0')
episode: 85 training return: tensor(-211.3724, device='cuda:0')
episode: 86 training return: tensor(-172.7071, device='cuda:0')
episode: 87 training return: tensor(-211.0786, device='cuda:0')
epoch: 22 test_true_pfm: 55.3628668481599 sim_pfm: -196.805706846877
episode: 88 training return: tensor(-204.3083, device='cuda:0')
episode: 89 training return: tensor(-139.1185, device='cuda:0')
episode: 90 training return: tensor(-64.0500, device='cuda:0')
episode: 91 training return: tensor(-79.8761, device='cuda:0')
epoch: 23 test_true_pfm: 60.449497412099866 sim_pfm: -113.79487107532331
episode: 92 training return: tensor(-16.4419, device='cuda:0')
episode: 93 training return: tensor(-182.8341, device='cuda:0')
episode: 94 training return: tensor(-9.5445, device='cuda:0')
episode: 95 training return: tensor(-89.1638, device='cuda:0')
epoch: 24 test_true_pfm: 56.9452170318991 sim_pfm: -147.935167020245
episode: 96 training return: tensor(-162.9266, device='cuda:0')
episode: 97 training return: tensor(-142.3839, device='cuda:0')
episode: 98 training return: tensor(-185.1001, device='cuda:0')
episode: 99 training return: tensor(-75.4555, device='cuda:0')
epoch: 25 test_true_pfm: 73.06158188373048 sim_pfm: -103.84187959763804
episode: 100 training return: tensor(-139.8125, device='cuda:0')
episode: 101 training return: tensor(-137.1133, device='cuda:0')
episode: 102 training return: tensor(-50.7339, device='cuda:0')
episode: 103 training return: tensor(-57.2298, device='cuda:0')
epoch: 26 test_true_pfm: 70.06133075632263 sim_pfm: -151.43392045818035
episode: 104 training return: tensor(-139.2975, device='cuda:0')
episode: 105 training return: tensor(-149.0436, device='cuda:0')
episode: 106 training return: tensor(-139.7657, device='cuda:0')
episode: 107 training return: tensor(-212.0029, device='cuda:0')
epoch: 27 test_true_pfm: 58.43149688153865 sim_pfm: -148.8720192792942
episode: 108 training return: tensor(-103.7873, device='cuda:0')
episode: 109 training return: tensor(-206.2711, device='cuda:0')
episode: 110 training return: tensor(-143.1363, device='cuda:0')
episode: 111 training return: tensor(-207.3585, device='cuda:0')
epoch: 28 test_true_pfm: 63.299186498222966 sim_pfm: -126.72025753755005
episode: 112 training return: tensor(-69.1485, device='cuda:0')
episode: 113 training return: tensor(-208.7597, device='cuda:0')
episode: 114 training return: tensor(-72.5649, device='cuda:0')
episode: 115 training return: tensor(-69.7073, device='cuda:0')
epoch: 29 test_true_pfm: 65.09454274461616 sim_pfm: -160.6121380659053
episode: 116 training return: tensor(-209.7790, device='cuda:0')
episode: 117 training return: tensor(-140.0605, device='cuda:0')
episode: 118 training return: tensor(-137.7130, device='cuda:0')
episode: 119 training return: tensor(-77.1499, device='cuda:0')
epoch: 30 test_true_pfm: 61.996180855184036 sim_pfm: -177.26194796044146
episode: 120 training return: tensor(-195.3079, device='cuda:0')
episode: 121 training return: tensor(-97.6988, device='cuda:0')
episode: 122 training return: tensor(-106.8534, device='cuda:0')
episode: 123 training return: tensor(-199.4390, device='cuda:0')
epoch: 31 test_true_pfm: 64.94160626916218 sim_pfm: -149.08029012774932
episode: 124 training return: tensor(-219.6410, device='cuda:0')
episode: 125 training return: tensor(-79.5044, device='cuda:0')
episode: 126 training return: tensor(-162.9035, device='cuda:0')
episode: 127 training return: tensor(-197.7276, device='cuda:0')
epoch: 32 test_true_pfm: 58.40799770362764 sim_pfm: -128.50130424696835
episode: 128 training return: tensor(-58.4548, device='cuda:0')
episode: 129 training return: tensor(-146.4967, device='cuda:0')
episode: 130 training return: tensor(-202.6531, device='cuda:0')
episode: 131 training return: tensor(-187.9356, device='cuda:0')
epoch: 33 test_true_pfm: 64.32723021370084 sim_pfm: -132.44278019273187
episode: 132 training return: tensor(-67.0248, device='cuda:0')
episode: 133 training return: tensor(-209.5753, device='cuda:0')
episode: 134 training return: tensor(-169.0037, device='cuda:0')
episode: 135 training return: tensor(-191.2975, device='cuda:0')
epoch: 34 test_true_pfm: 61.34166881161839 sim_pfm: -137.89569918587804
episode: 136 training return: tensor(-189.4758, device='cuda:0')
episode: 137 training return: tensor(-136.6073, device='cuda:0')
episode: 138 training return: tensor(-142.1061, device='cuda:0')
episode: 139 training return: tensor(-78.6721, device='cuda:0')
epoch: 35 test_true_pfm: 58.06813355922397 sim_pfm: -114.3598585907137
episode: 140 training return: tensor(-59.4024, device='cuda:0')
episode: 141 training return: tensor(-63.7064, device='cuda:0')
episode: 142 training return: tensor(-140.0753, device='cuda:0')
episode: 143 training return: tensor(-148.1659, device='cuda:0')
epoch: 36 test_true_pfm: 65.74101747901565 sim_pfm: -173.35975574156038
episode: 144 training return: tensor(-188.8168, device='cuda:0')
episode: 145 training return: tensor(-140.4804, device='cuda:0')
episode: 146 training return: tensor(-193.4772, device='cuda:0')
episode: 147 training return: tensor(-66.2681, device='cuda:0')
epoch: 37 test_true_pfm: 59.72761734556216 sim_pfm: -104.98240796988247
episode: 148 training return: tensor(-82.5572, device='cuda:0')
episode: 149 training return: tensor(-179.0850, device='cuda:0')
episode: 150 training return: tensor(-134.8618, device='cuda:0')
episode: 151 training return: tensor(-208.2337, device='cuda:0')
epoch: 38 test_true_pfm: 61.0378703763658 sim_pfm: -147.23595383348874
episode: 152 training return: tensor(-194.4243, device='cuda:0')
episode: 153 training return: tensor(-162.7719, device='cuda:0')
episode: 154 training return: tensor(-70.0224, device='cuda:0')
episode: 155 training return: tensor(-77.6132, device='cuda:0')
epoch: 39 test_true_pfm: 51.139466568361776 sim_pfm: -162.7104228737997
episode: 156 training return: tensor(-70.1147, device='cuda:0')
episode: 157 training return: tensor(-60.9646, device='cuda:0')
episode: 158 training return: tensor(-196.0972, device='cuda:0')
episode: 159 training return: tensor(-47.4898, device='cuda:0')
epoch: 40 test_true_pfm: 66.39355764870541 sim_pfm: -92.50262682250468
episode: 160 training return: tensor(-185.1687, device='cuda:0')
episode: 161 training return: tensor(-146.9866, device='cuda:0')
episode: 162 training return: tensor(-204.9805, device='cuda:0')
episode: 163 training return: tensor(-144.2247, device='cuda:0')
epoch: 41 test_true_pfm: 54.050331888425674 sim_pfm: -141.28258537338115
episode: 164 training return: tensor(-151.6734, device='cuda:0')
episode: 165 training return: tensor(-188.7105, device='cuda:0')
episode: 166 training return: tensor(-189.3311, device='cuda:0')
episode: 167 training return: tensor(-186.0106, device='cuda:0')
epoch: 42 test_true_pfm: 59.985180696358135 sim_pfm: -159.70823075009974
episode: 168 training return: tensor(-45.7406, device='cuda:0')
episode: 169 training return: tensor(-101.7118, device='cuda:0')
episode: 170 training return: tensor(-103.2084, device='cuda:0')
episode: 171 training return: tensor(-188.0839, device='cuda:0')
epoch: 43 test_true_pfm: 62.075994796501654 sim_pfm: -171.3374191900948
episode: 172 training return: tensor(-171.1662, device='cuda:0')
episode: 173 training return: tensor(-142.7705, device='cuda:0')
episode: 174 training return: tensor(-187.1901, device='cuda:0')
episode: 175 training return: tensor(-148.2548, device='cuda:0')
epoch: 44 test_true_pfm: 75.76827429953747 sim_pfm: -84.66233128104359
episode: 176 training return: tensor(-184.2159, device='cuda:0')
episode: 177 training return: tensor(-189.6803, device='cuda:0')
episode: 178 training return: tensor(-184.5412, device='cuda:0')
episode: 179 training return: tensor(-47.7959, device='cuda:0')
epoch: 45 test_true_pfm: 54.8748344771376 sim_pfm: -115.01657225826057
episode: 180 training return: tensor(-108.1142, device='cuda:0')
episode: 181 training return: tensor(-147.6734, device='cuda:0')
episode: 182 training return: tensor(-180.4676, device='cuda:0')
episode: 183 training return: tensor(-98.9901, device='cuda:0')
epoch: 46 test_true_pfm: 65.85957593358431 sim_pfm: -106.34919794181478
episode: 184 training return: tensor(-141.6063, device='cuda:0')
episode: 185 training return: tensor(-87.5227, device='cuda:0')
episode: 186 training return: tensor(-69.5494, device='cuda:0')
episode: 187 training return: tensor(-183.0370, device='cuda:0')
epoch: 47 test_true_pfm: 65.78723466253324 sim_pfm: -127.00473770264652
episode: 188 training return: tensor(-183.6220, device='cuda:0')
episode: 189 training return: tensor(-140.6693, device='cuda:0')
episode: 190 training return: tensor(-114.7822, device='cuda:0')
episode: 191 training return: tensor(-74.1348, device='cuda:0')
epoch: 48 test_true_pfm: 64.31839669632463 sim_pfm: -98.93606376347597
episode: 192 training return: tensor(-79.7976, device='cuda:0')
episode: 193 training return: tensor(-184.7254, device='cuda:0')
episode: 194 training return: tensor(-86.3236, device='cuda:0')
episode: 195 training return: tensor(-188.3166, device='cuda:0')
epoch: 49 test_true_pfm: 64.28901409010018 sim_pfm: -173.23750261517125
episode: 196 training return: tensor(-142.5882, device='cuda:0')
episode: 197 training return: tensor(-199.6776, device='cuda:0')
episode: 198 training return: tensor(-201.9735, device='cuda:0')
episode: 199 training return: tensor(-139.2635, device='cuda:0')
epoch: 50 test_true_pfm: 65.23387969970221 sim_pfm: -158.0926409029693
episode: 200 training return: tensor(-192.4119, device='cuda:0')
episode: 201 training return: tensor(-146.2839, device='cuda:0')
episode: 202 training return: tensor(-55.2545, device='cuda:0')
episode: 203 training return: tensor(-183.3474, device='cuda:0')
epoch: 51 test_true_pfm: 62.33432997538536 sim_pfm: -111.80593208944192
episode: 204 training return: tensor(-63.7883, device='cuda:0')
episode: 205 training return: tensor(-104.3248, device='cuda:0')
episode: 206 training return: tensor(-114.2787, device='cuda:0')
episode: 207 training return: tensor(-60.4843, device='cuda:0')
epoch: 52 test_true_pfm: 52.313720458023546 sim_pfm: -129.75205880581635
episode: 208 training return: tensor(-152.5022, device='cuda:0')
episode: 209 training return: tensor(-52.9345, device='cuda:0')
episode: 210 training return: tensor(-59.1061, device='cuda:0')
episode: 211 training return: tensor(-67.6261, device='cuda:0')
epoch: 53 test_true_pfm: 64.45085249876394 sim_pfm: -95.56986599374795
episode: 212 training return: tensor(-63.6308, device='cuda:0')
episode: 213 training return: tensor(-83.2058, device='cuda:0')
episode: 214 training return: tensor(-111.8053, device='cuda:0')
episode: 215 training return: tensor(-77.2415, device='cuda:0')
epoch: 54 test_true_pfm: 62.91300115326676 sim_pfm: -120.00074843470938
episode: 216 training return: tensor(-139.5751, device='cuda:0')
episode: 217 training return: tensor(-189.8036, device='cuda:0')
episode: 218 training return: tensor(-169.1501, device='cuda:0')
episode: 219 training return: tensor(-46.3719, device='cuda:0')
epoch: 55 test_true_pfm: 67.11035049400091 sim_pfm: -107.7811850121594
episode: 220 training return: tensor(-74.2559, device='cuda:0')
episode: 221 training return: tensor(-134.8431, device='cuda:0')
episode: 222 training return: tensor(-155.4168, device='cuda:0')
episode: 223 training return: tensor(-135.0431, device='cuda:0')
epoch: 56 test_true_pfm: 59.50566165528939 sim_pfm: -155.20804278071736
episode: 224 training return: tensor(-49.0997, device='cuda:0')
episode: 225 training return: tensor(-141.8326, device='cuda:0')
episode: 226 training return: tensor(-205.9778, device='cuda:0')
episode: 227 training return: tensor(-51.5480, device='cuda:0')
epoch: 57 test_true_pfm: 68.51909631940808 sim_pfm: -138.344418805826
episode: 228 training return: tensor(-58.4853, device='cuda:0')
episode: 229 training return: tensor(-103.7298, device='cuda:0')
episode: 230 training return: tensor(-55.0967, device='cuda:0')
episode: 231 training return: tensor(-179.0573, device='cuda:0')
epoch: 58 test_true_pfm: 47.82369994072191 sim_pfm: -160.61301729106344
episode: 232 training return: tensor(-51.0019, device='cuda:0')
episode: 233 training return: tensor(-148.3564, device='cuda:0')
episode: 234 training return: tensor(-107.9332, device='cuda:0')
episode: 235 training return: tensor(-40.6151, device='cuda:0')
epoch: 59 test_true_pfm: 57.54162852538307 sim_pfm: -130.69450688388898
episode: 236 training return: tensor(21.3727, device='cuda:0')
episode: 237 training return: tensor(-196.0105, device='cuda:0')
episode: 238 training return: tensor(-80.5824, device='cuda:0')
episode: 239 training return: tensor(-63.7564, device='cuda:0')
epoch: 60 test_true_pfm: 55.87438211795988 sim_pfm: -169.92713942116825
episode: 240 training return: tensor(-83.3858, device='cuda:0')
episode: 241 training return: tensor(-190.3994, device='cuda:0')
episode: 242 training return: tensor(-167.6332, device='cuda:0')
episode: 243 training return: tensor(-77.8935, device='cuda:0')
epoch: 61 test_true_pfm: 57.53841224608614 sim_pfm: -103.80070938693825
episode: 244 training return: tensor(-163.1771, device='cuda:0')
episode: 245 training return: tensor(-56.9243, device='cuda:0')
episode: 246 training return: tensor(-151.2611, device='cuda:0')
episode: 247 training return: tensor(-75.2784, device='cuda:0')
epoch: 62 test_true_pfm: 75.4582634432027 sim_pfm: -102.35211947046336
episode: 248 training return: tensor(-47.6470, device='cuda:0')
episode: 249 training return: tensor(-17.5414, device='cuda:0')
episode: 250 training return: tensor(-56.0877, device='cuda:0')
episode: 251 training return: tensor(-61.9061, device='cuda:0')
epoch: 63 test_true_pfm: 61.41763932093011 sim_pfm: -146.6952334314119
episode: 252 training return: tensor(-156.2857, device='cuda:0')
episode: 253 training return: tensor(-188.4856, device='cuda:0')
episode: 254 training return: tensor(-12.7386, device='cuda:0')
episode: 255 training return: tensor(-138.4978, device='cuda:0')
epoch: 64 test_true_pfm: 68.80329923633977 sim_pfm: -135.34745111413068
episode: 256 training return: tensor(-48.2430, device='cuda:0')
episode: 257 training return: tensor(-57.9942, device='cuda:0')
episode: 258 training return: tensor(-138.4683, device='cuda:0')
episode: 259 training return: tensor(-66.5891, device='cuda:0')
epoch: 65 test_true_pfm: 65.98596892128162 sim_pfm: -126.44088451493299
episode: 260 training return: tensor(-68.0494, device='cuda:0')
episode: 261 training return: tensor(-88.2632, device='cuda:0')
episode: 262 training return: tensor(-191.3071, device='cuda:0')
episode: 263 training return: tensor(-185.1941, device='cuda:0')
epoch: 66 test_true_pfm: 68.43928086681748 sim_pfm: -138.63230650767218
episode: 264 training return: tensor(-58.4067, device='cuda:0')
episode: 265 training return: tensor(-67.0774, device='cuda:0')
episode: 266 training return: tensor(-53.1509, device='cuda:0')
episode: 267 training return: tensor(-186.6006, device='cuda:0')
epoch: 67 test_true_pfm: 79.06388620960259 sim_pfm: -90.69881624673727
episode: 268 training return: tensor(-62.2809, device='cuda:0')
episode: 269 training return: tensor(-13.5742, device='cuda:0')
episode: 270 training return: tensor(-45.7168, device='cuda:0')
episode: 271 training return: tensor(-62.9506, device='cuda:0')
epoch: 68 test_true_pfm: 71.59975678389785 sim_pfm: -74.55805487555335
episode: 272 training return: tensor(-141.4278, device='cuda:0')
episode: 273 training return: tensor(-183.1880, device='cuda:0')
episode: 274 training return: tensor(-57.7026, device='cuda:0')
episode: 275 training return: tensor(-138.6738, device='cuda:0')
epoch: 69 test_true_pfm: 62.72753823828091 sim_pfm: -128.7824452897301
episode: 276 training return: tensor(-116.9199, device='cuda:0')
episode: 277 training return: tensor(-187.7633, device='cuda:0')
episode: 278 training return: tensor(-66.3942, device='cuda:0')
episode: 279 training return: tensor(-45.5106, device='cuda:0')
epoch: 70 test_true_pfm: 63.11597544142923 sim_pfm: -123.0667570043006
episode: 280 training return: tensor(-7.9180, device='cuda:0')
episode: 281 training return: tensor(-51.3962, device='cuda:0')
episode: 282 training return: tensor(-143.2944, device='cuda:0')
episode: 283 training return: tensor(-149.9140, device='cuda:0')
epoch: 71 test_true_pfm: 63.19420087068941 sim_pfm: -107.19454214633443
episode: 284 training return: tensor(-186.6568, device='cuda:0')
episode: 285 training return: tensor(-158.0221, device='cuda:0')
episode: 286 training return: tensor(-139.3504, device='cuda:0')
episode: 287 training return: tensor(-74.5284, device='cuda:0')
epoch: 72 test_true_pfm: 47.61401037779795 sim_pfm: -129.80428028544992
episode: 288 training return: tensor(-56.0481, device='cuda:0')
episode: 289 training return: tensor(-143.4958, device='cuda:0')
episode: 290 training return: tensor(-132.5696, device='cuda:0')
episode: 291 training return: tensor(-36.1509, device='cuda:0')
epoch: 73 test_true_pfm: 62.80382795553055 sim_pfm: -82.34677632517415
episode: 292 training return: tensor(-104.8205, device='cuda:0')
episode: 293 training return: tensor(-84.7312, device='cuda:0')
episode: 294 training return: tensor(-193.4986, device='cuda:0')
episode: 295 training return: tensor(-72.5073, device='cuda:0')
epoch: 74 test_true_pfm: 76.26123712539291 sim_pfm: -91.08338537945528
episode: 296 training return: tensor(-88.2543, device='cuda:0')
episode: 297 training return: tensor(-59.2498, device='cuda:0')
episode: 298 training return: tensor(-50.0674, device='cuda:0')
episode: 299 training return: tensor(-139.5042, device='cuda:0')
epoch: 75 test_true_pfm: 52.67498734305718 sim_pfm: -178.1980147583992
episode: 300 training return: tensor(-192.6849, device='cuda:0')
episode: 301 training return: tensor(-70.7696, device='cuda:0')
episode: 302 training return: tensor(-52.9010, device='cuda:0')
episode: 303 training return: tensor(-146.1537, device='cuda:0')
epoch: 76 test_true_pfm: 78.32903681539078 sim_pfm: -128.26781159627717
episode: 304 training return: tensor(-139.0211, device='cuda:0')
episode: 305 training return: tensor(-170.1684, device='cuda:0')
episode: 306 training return: tensor(-30.1068, device='cuda:0')
episode: 307 training return: tensor(-186.4183, device='cuda:0')
epoch: 77 test_true_pfm: 49.67145611742495 sim_pfm: -126.2658874571207
episode: 308 training return: tensor(47.2087, device='cuda:0')
episode: 309 training return: tensor(-141.8586, device='cuda:0')
episode: 310 training return: tensor(-77.8681, device='cuda:0')
episode: 311 training return: tensor(-136.4998, device='cuda:0')
epoch: 78 test_true_pfm: 65.21160529140975 sim_pfm: -108.29271198357455
episode: 312 training return: tensor(-32.8733, device='cuda:0')
episode: 313 training return: tensor(-190.8828, device='cuda:0')
episode: 314 training return: tensor(-187.5591, device='cuda:0')
episode: 315 training return: tensor(-184.0276, device='cuda:0')
epoch: 79 test_true_pfm: 83.49330021481656 sim_pfm: -78.09139094658894
episode: 316 training return: tensor(-139.7927, device='cuda:0')
episode: 317 training return: tensor(-91.3618, device='cuda:0')
episode: 318 training return: tensor(-184.6399, device='cuda:0')
episode: 319 training return: tensor(64.1678, device='cuda:0')
epoch: 80 test_true_pfm: 59.08511949726564 sim_pfm: -67.6576455555216
episode: 320 training return: tensor(-103.8584, device='cuda:0')
episode: 321 training return: tensor(-187.0117, device='cuda:0')
episode: 322 training return: tensor(-43.6072, device='cuda:0')
episode: 323 training return: tensor(-95.3485, device='cuda:0')
epoch: 81 test_true_pfm: 67.18174458980529 sim_pfm: -82.16790191702312
episode: 324 training return: tensor(-112.6937, device='cuda:0')
episode: 325 training return: tensor(-48.5775, device='cuda:0')
episode: 326 training return: tensor(-114.0426, device='cuda:0')
episode: 327 training return: tensor(-45.6001, device='cuda:0')
epoch: 82 test_true_pfm: 83.28606693677575 sim_pfm: -71.76775827571983
episode: 328 training return: tensor(-57.3544, device='cuda:0')
episode: 329 training return: tensor(-41.7674, device='cuda:0')
episode: 330 training return: tensor(-61.1681, device='cuda:0')
episode: 331 training return: tensor(-60.4632, device='cuda:0')
epoch: 83 test_true_pfm: 72.21724980352732 sim_pfm: -41.072137906122954
episode: 332 training return: tensor(-105.3171, device='cuda:0')
episode: 333 training return: tensor(-141.9900, device='cuda:0')
episode: 334 training return: tensor(-179.6768, device='cuda:0')
episode: 335 training return: tensor(-84.6635, device='cuda:0')
epoch: 84 test_true_pfm: 62.12839276165941 sim_pfm: -92.11141021806979
episode: 336 training return: tensor(-50.1684, device='cuda:0')
episode: 337 training return: tensor(-51.5987, device='cuda:0')
episode: 338 training return: tensor(-53.4361, device='cuda:0')
episode: 339 training return: tensor(-106.9803, device='cuda:0')
epoch: 85 test_true_pfm: 61.69574722594613 sim_pfm: -47.046988617052556
episode: 340 training return: tensor(-51.0441, device='cuda:0')
episode: 341 training return: tensor(-96.8600, device='cuda:0')
episode: 342 training return: tensor(-52.4508, device='cuda:0')
episode: 343 training return: tensor(-68.4977, device='cuda:0')
epoch: 86 test_true_pfm: 79.9808838200717 sim_pfm: -69.01717917447677
episode: 344 training return: tensor(-201.4761, device='cuda:0')
episode: 345 training return: tensor(-189.5877, device='cuda:0')
episode: 346 training return: tensor(-115.8442, device='cuda:0')
episode: 347 training return: tensor(-189.9846, device='cuda:0')
epoch: 87 test_true_pfm: 71.72597802205172 sim_pfm: -81.64118801177247
episode: 348 training return: tensor(-87.5134, device='cuda:0')
episode: 349 training return: tensor(-195.4549, device='cuda:0')
episode: 350 training return: tensor(-182.5842, device='cuda:0')
episode: 351 training return: tensor(-116.4789, device='cuda:0')
epoch: 88 test_true_pfm: 65.99567710135837 sim_pfm: -56.36881552776322
episode: 352 training return: tensor(-45.9245, device='cuda:0')
episode: 353 training return: tensor(-145.9444, device='cuda:0')
episode: 354 training return: tensor(-175.6680, device='cuda:0')
episode: 355 training return: tensor(-188.2930, device='cuda:0')
epoch: 89 test_true_pfm: 65.29295326370575 sim_pfm: -122.528969856404
episode: 356 training return: tensor(-76.2975, device='cuda:0')
episode: 357 training return: tensor(-138.8327, device='cuda:0')
episode: 358 training return: tensor(-48.2321, device='cuda:0')
episode: 359 training return: tensor(-37.2833, device='cuda:0')
epoch: 90 test_true_pfm: 73.52773248217741 sim_pfm: -143.51668908692082
episode: 360 training return: tensor(-187.3875, device='cuda:0')
episode: 361 training return: tensor(68.2778, device='cuda:0')
episode: 362 training return: tensor(-188.5631, device='cuda:0')
episode: 363 training return: tensor(-185.8843, device='cuda:0')
epoch: 91 test_true_pfm: 62.63859187315152 sim_pfm: -84.53677603979595
episode: 364 training return: tensor(-63.2393, device='cuda:0')
episode: 365 training return: tensor(-203.7705, device='cuda:0')
episode: 366 training return: tensor(-190.9911, device='cuda:0')
episode: 367 training return: tensor(-188.4026, device='cuda:0')
epoch: 92 test_true_pfm: 58.9258529184929 sim_pfm: -148.8043751280464
episode: 368 training return: tensor(-9.0337, device='cuda:0')
episode: 369 training return: tensor(4.9197, device='cuda:0')
episode: 370 training return: tensor(-46.7050, device='cuda:0')
episode: 371 training return: tensor(-68.6586, device='cuda:0')
epoch: 93 test_true_pfm: 64.02562121646879 sim_pfm: -71.78133401387022
episode: 372 training return: tensor(-177.6275, device='cuda:0')
episode: 373 training return: tensor(-27.7901, device='cuda:0')
episode: 374 training return: tensor(-61.8632, device='cuda:0')
episode: 375 training return: tensor(-140.2776, device='cuda:0')
epoch: 94 test_true_pfm: 67.56328193389575 sim_pfm: -57.40988801121712
episode: 376 training return: tensor(-35.0397, device='cuda:0')
episode: 377 training return: tensor(-50.8025, device='cuda:0')
episode: 378 training return: tensor(-54.6774, device='cuda:0')
episode: 379 training return: tensor(-67.0214, device='cuda:0')
epoch: 95 test_true_pfm: 64.95970224989516 sim_pfm: -142.6194828435313
episode: 380 training return: tensor(-80.0221, device='cuda:0')
episode: 381 training return: tensor(-105.8394, device='cuda:0')
episode: 382 training return: tensor(-118.3426, device='cuda:0')
episode: 383 training return: tensor(-60.0204, device='cuda:0')
epoch: 96 test_true_pfm: 66.41420158700592 sim_pfm: -87.9018346567289
episode: 384 training return: tensor(-67.6977, device='cuda:0')
episode: 385 training return: tensor(-29.3799, device='cuda:0')
episode: 386 training return: tensor(-87.4150, device='cuda:0')
episode: 387 training return: tensor(-73.2433, device='cuda:0')
epoch: 97 test_true_pfm: 66.55940312791606 sim_pfm: -107.42482015252463
episode: 388 training return: tensor(-184.9246, device='cuda:0')
episode: 389 training return: tensor(-163.2153, device='cuda:0')
episode: 390 training return: tensor(-73.4934, device='cuda:0')
episode: 391 training return: tensor(-55.3171, device='cuda:0')
epoch: 98 test_true_pfm: 71.24805051129104 sim_pfm: -72.718524538778
episode: 392 training return: tensor(-148.0700, device='cuda:0')
episode: 393 training return: tensor(-191.2512, device='cuda:0')
episode: 394 training return: tensor(-47.2146, device='cuda:0')
episode: 395 training return: tensor(-153.2811, device='cuda:0')
epoch: 99 test_true_pfm: 66.24222216909418 sim_pfm: -124.2063321743044
episode: 396 training return: tensor(-49.0965, device='cuda:0')
episode: 397 training return: tensor(-201.1983, device='cuda:0')
episode: 398 training return: tensor(-95.5562, device='cuda:0')
episode: 399 training return: tensor(-88.1633, device='cuda:0')
epoch: 100 test_true_pfm: 66.67806530738265 sim_pfm: -118.40410573980189
episode: 400 training return: tensor(-1.6162, device='cuda:0')
episode: 401 training return: tensor(-50.5501, device='cuda:0')
episode: 402 training return: tensor(-95.1230, device='cuda:0')
episode: 403 training return: tensor(-70.7752, device='cuda:0')
epoch: 101 test_true_pfm: 76.81095427317908 sim_pfm: -54.71647405878757
episode: 404 training return: tensor(-49.5863, device='cuda:0')
episode: 405 training return: tensor(-196.5251, device='cuda:0')
episode: 406 training return: tensor(-41.3009, device='cuda:0')
episode: 407 training return: tensor(-141.5361, device='cuda:0')
epoch: 102 test_true_pfm: 76.55297867914916 sim_pfm: -94.30066811932484
episode: 408 training return: tensor(-105.8098, device='cuda:0')
episode: 409 training return: tensor(-97.1300, device='cuda:0')
episode: 410 training return: tensor(-191.2716, device='cuda:0')
episode: 411 training return: tensor(-69.8723, device='cuda:0')
epoch: 103 test_true_pfm: 70.05508777860139 sim_pfm: -82.9296618402237
episode: 412 training return: tensor(-68.2221, device='cuda:0')
episode: 413 training return: tensor(-67.3017, device='cuda:0')
episode: 414 training return: tensor(-133.9115, device='cuda:0')
episode: 415 training return: tensor(-59.7096, device='cuda:0')
epoch: 104 test_true_pfm: 60.31431993187948 sim_pfm: -55.67555751744658
episode: 416 training return: tensor(-6.0643, device='cuda:0')
episode: 417 training return: tensor(-118.3292, device='cuda:0')
episode: 418 training return: tensor(-161.4488, device='cuda:0')
episode: 419 training return: tensor(-111.5360, device='cuda:0')
epoch: 105 test_true_pfm: 64.27055969481921 sim_pfm: -123.15142229285557
episode: 420 training return: tensor(-145.7497, device='cuda:0')
episode: 421 training return: tensor(-67.6883, device='cuda:0')
episode: 422 training return: tensor(-121.2154, device='cuda:0')
episode: 423 training return: tensor(-205.1866, device='cuda:0')
epoch: 106 test_true_pfm: 59.43039965964708 sim_pfm: -129.47047468527452
episode: 424 training return: tensor(-122.1489, device='cuda:0')
episode: 425 training return: tensor(-42.8500, device='cuda:0')
episode: 426 training return: tensor(-187.1922, device='cuda:0')
episode: 427 training return: tensor(-76.8635, device='cuda:0')
epoch: 107 test_true_pfm: 54.83856574300613 sim_pfm: -138.79570796240586
episode: 428 training return: tensor(-150.5890, device='cuda:0')
episode: 429 training return: tensor(-154.0023, device='cuda:0')
episode: 430 training return: tensor(-146.7768, device='cuda:0')
episode: 431 training return: tensor(-68.6489, device='cuda:0')
epoch: 108 test_true_pfm: 75.081104522069 sim_pfm: -89.16609073659056
episode: 432 training return: tensor(-72.2205, device='cuda:0')
episode: 433 training return: tensor(-60.1165, device='cuda:0')
episode: 434 training return: tensor(-190.5733, device='cuda:0')
episode: 435 training return: tensor(-69.8358, device='cuda:0')
epoch: 109 test_true_pfm: 73.3960758745233 sim_pfm: -61.483519897784575
episode: 436 training return: tensor(1.3875, device='cuda:0')
episode: 437 training return: tensor(-31.7065, device='cuda:0')
episode: 438 training return: tensor(-20.8789, device='cuda:0')
episode: 439 training return: tensor(-7.8374, device='cuda:0')
epoch: 110 test_true_pfm: 79.66218981847867 sim_pfm: -111.67434884327231
episode: 440 training return: tensor(-52.1547, device='cuda:0')
episode: 441 training return: tensor(-194.2795, device='cuda:0')
episode: 442 training return: tensor(-63.1250, device='cuda:0')
episode: 443 training return: tensor(-68.8223, device='cuda:0')
epoch: 111 test_true_pfm: 80.54519610642404 sim_pfm: -59.88190736159449
episode: 444 training return: tensor(-16.5226, device='cuda:0')
episode: 445 training return: tensor(-34.3649, device='cuda:0')
episode: 446 training return: tensor(-79.7007, device='cuda:0')
episode: 447 training return: tensor(-97.6287, device='cuda:0')
epoch: 112 test_true_pfm: 69.84715034429959 sim_pfm: -118.0889286622114
episode: 448 training return: tensor(-65.7276, device='cuda:0')
episode: 449 training return: tensor(-61.3363, device='cuda:0')
episode: 450 training return: tensor(-203.7446, device='cuda:0')
episode: 451 training return: tensor(-153.7485, device='cuda:0')
epoch: 113 test_true_pfm: 77.76027820554263 sim_pfm: -112.67170235415688
episode: 452 training return: tensor(-192.0391, device='cuda:0')
episode: 453 training return: tensor(-152.7885, device='cuda:0')
episode: 454 training return: tensor(-138.2054, device='cuda:0')
episode: 455 training return: tensor(-50.5726, device='cuda:0')
epoch: 114 test_true_pfm: 65.14714362772362 sim_pfm: -113.36969487832394
episode: 456 training return: tensor(-94.4382, device='cuda:0')
episode: 457 training return: tensor(-53.4666, device='cuda:0')
episode: 458 training return: tensor(-52.9655, device='cuda:0')
episode: 459 training return: tensor(-65.8019, device='cuda:0')
epoch: 115 test_true_pfm: 76.24924237025704 sim_pfm: -105.39851433069562
episode: 460 training return: tensor(-59.1422, device='cuda:0')
episode: 461 training return: tensor(-42.2528, device='cuda:0')
episode: 462 training return: tensor(-180.0249, device='cuda:0')
episode: 463 training return: tensor(-59.9985, device='cuda:0')
epoch: 116 test_true_pfm: 67.0314500435032 sim_pfm: -106.09154251040891
episode: 464 training return: tensor(-56.8520, device='cuda:0')
episode: 465 training return: tensor(-77.5383, device='cuda:0')
episode: 466 training return: tensor(-48.5459, device='cuda:0')
episode: 467 training return: tensor(-119.4004, device='cuda:0')
epoch: 117 test_true_pfm: 75.91445474160912 sim_pfm: -51.36517217323417
episode: 468 training return: tensor(-147.8625, device='cuda:0')
episode: 469 training return: tensor(-56.6719, device='cuda:0')
episode: 470 training return: tensor(-151.7753, device='cuda:0')
episode: 471 training return: tensor(-48.7302, device='cuda:0')
epoch: 118 test_true_pfm: 69.50969046000257 sim_pfm: -123.88027303733398
episode: 472 training return: tensor(-81.4502, device='cuda:0')
episode: 473 training return: tensor(-85.4760, device='cuda:0')
episode: 474 training return: tensor(-145.0037, device='cuda:0')
episode: 475 training return: tensor(-194.8756, device='cuda:0')
epoch: 119 test_true_pfm: 57.280651513269405 sim_pfm: -77.59810230113217
episode: 476 training return: tensor(-54.0124, device='cuda:0')
episode: 477 training return: tensor(-37.3897, device='cuda:0')
episode: 478 training return: tensor(-64.1548, device='cuda:0')
episode: 479 training return: tensor(-187.2405, device='cuda:0')
epoch: 120 test_true_pfm: 58.64090861971549 sim_pfm: -67.76229121804936
episode: 480 training return: tensor(-128.5784, device='cuda:0')
episode: 481 training return: tensor(-56.6119, device='cuda:0')
episode: 482 training return: tensor(-92.8481, device='cuda:0')
episode: 483 training return: tensor(-83.8176, device='cuda:0')
epoch: 121 test_true_pfm: 72.8443965065188 sim_pfm: -149.7995694216399
episode: 484 training return: tensor(-57.1461, device='cuda:0')
episode: 485 training return: tensor(-53.6045, device='cuda:0')
episode: 486 training return: tensor(-77.0671, device='cuda:0')
episode: 487 training return: tensor(-153.6048, device='cuda:0')
epoch: 122 test_true_pfm: 62.18240468629084 sim_pfm: -97.96747336476692
episode: 488 training return: tensor(-39.1549, device='cuda:0')
episode: 489 training return: tensor(-56.0383, device='cuda:0')
episode: 490 training return: tensor(-44.1194, device='cuda:0')
episode: 491 training return: tensor(-58.9733, device='cuda:0')
epoch: 123 test_true_pfm: 73.71428273240845 sim_pfm: -98.1567326053977
episode: 492 training return: tensor(-64.9522, device='cuda:0')
episode: 493 training return: tensor(-115.1354, device='cuda:0')
episode: 494 training return: tensor(-53.1568, device='cuda:0')
episode: 495 training return: tensor(-58.6039, device='cuda:0')
epoch: 124 test_true_pfm: 78.76264959269564 sim_pfm: -113.4543218935607
episode: 496 training return: tensor(-41.2157, device='cuda:0')
episode: 497 training return: tensor(-50.2624, device='cuda:0')
episode: 498 training return: tensor(-42.8773, device='cuda:0')
episode: 499 training return: tensor(-55.9956, device='cuda:0')
epoch: 125 test_true_pfm: 68.2873680142346 sim_pfm: -96.2008898482076
episode: 500 training return: tensor(-108.6511, device='cuda:0')
episode: 501 training return: tensor(-27.7726, device='cuda:0')
episode: 502 training return: tensor(-135.7010, device='cuda:0')
episode: 503 training return: tensor(-39.0452, device='cuda:0')
epoch: 126 test_true_pfm: 79.04674532508244 sim_pfm: -91.84197364655556
episode: 504 training return: tensor(-76.4215, device='cuda:0')
episode: 505 training return: tensor(-58.2640, device='cuda:0')
episode: 506 training return: tensor(-42.0227, device='cuda:0')
episode: 507 training return: tensor(-66.1371, device='cuda:0')
epoch: 127 test_true_pfm: 68.3958969668411 sim_pfm: -130.153374078474
episode: 508 training return: tensor(-60.6918, device='cuda:0')
episode: 509 training return: tensor(-112.2519, device='cuda:0')
episode: 510 training return: tensor(-49.2278, device='cuda:0')
episode: 511 training return: tensor(-19.2963, device='cuda:0')
epoch: 128 test_true_pfm: 65.97106770750688 sim_pfm: -120.57057639023405
episode: 512 training return: tensor(-162.3719, device='cuda:0')
episode: 513 training return: tensor(-192.7892, device='cuda:0')
episode: 514 training return: tensor(-49.3186, device='cuda:0')
episode: 515 training return: tensor(6.5174, device='cuda:0')
epoch: 129 test_true_pfm: 74.9106337187662 sim_pfm: -85.02022388306213
episode: 516 training return: tensor(-146.2775, device='cuda:0')
episode: 517 training return: tensor(-38.6130, device='cuda:0')
episode: 518 training return: tensor(-59.5853, device='cuda:0')
episode: 519 training return: tensor(-2.3621, device='cuda:0')
epoch: 130 test_true_pfm: 75.40347306411422 sim_pfm: -53.859446508286055
episode: 520 training return: tensor(-152.0346, device='cuda:0')
episode: 521 training return: tensor(43.3611, device='cuda:0')
episode: 522 training return: tensor(-105.4037, device='cuda:0')
episode: 523 training return: tensor(-52.8831, device='cuda:0')
epoch: 131 test_true_pfm: 75.55763640414156 sim_pfm: -96.28452447357704
episode: 524 training return: tensor(-183.2744, device='cuda:0')
episode: 525 training return: tensor(48.9327, device='cuda:0')
episode: 526 training return: tensor(-180.6611, device='cuda:0')
episode: 527 training return: tensor(-189.9985, device='cuda:0')
epoch: 132 test_true_pfm: 77.47243461063138 sim_pfm: -153.17784030511976
episode: 528 training return: tensor(-195.4177, device='cuda:0')
episode: 529 training return: tensor(-67.8079, device='cuda:0')
episode: 530 training return: tensor(-124.2988, device='cuda:0')
episode: 531 training return: tensor(-76.1268, device='cuda:0')
epoch: 133 test_true_pfm: 66.26214692463627 sim_pfm: -111.79066496139858
episode: 532 training return: tensor(-67.4232, device='cuda:0')
episode: 533 training return: tensor(-67.0105, device='cuda:0')
episode: 534 training return: tensor(-55.9646, device='cuda:0')
episode: 535 training return: tensor(-205.9702, device='cuda:0')
epoch: 134 test_true_pfm: 64.73021643556663 sim_pfm: -175.71472698961733
episode: 536 training return: tensor(-48.0739, device='cuda:0')
episode: 537 training return: tensor(-49.8062, device='cuda:0')
episode: 538 training return: tensor(-96.3535, device='cuda:0')
episode: 539 training return: tensor(-67.8157, device='cuda:0')
epoch: 135 test_true_pfm: 63.15486434850586 sim_pfm: -155.55318351571913
episode: 540 training return: tensor(-144.8001, device='cuda:0')
episode: 541 training return: tensor(-55.1449, device='cuda:0')
episode: 542 training return: tensor(-60.1607, device='cuda:0')
episode: 543 training return: tensor(-134.9758, device='cuda:0')
epoch: 136 test_true_pfm: 69.96659801140625 sim_pfm: -71.94204851349932
episode: 544 training return: tensor(5.8884, device='cuda:0')
episode: 545 training return: tensor(-51.8454, device='cuda:0')
episode: 546 training return: tensor(-35.2086, device='cuda:0')
episode: 547 training return: tensor(-67.4021, device='cuda:0')
epoch: 137 test_true_pfm: 87.03074779330089 sim_pfm: -74.76807821563561
episode: 548 training return: tensor(-155.4404, device='cuda:0')
episode: 549 training return: tensor(-31.3444, device='cuda:0')
episode: 550 training return: tensor(-100.1655, device='cuda:0')
episode: 551 training return: tensor(-102.9944, device='cuda:0')
epoch: 138 test_true_pfm: 70.72822946072695 sim_pfm: -147.9781776970427
episode: 552 training return: tensor(-189.4628, device='cuda:0')
episode: 553 training return: tensor(-103.6665, device='cuda:0')
episode: 554 training return: tensor(-69.6308, device='cuda:0')
episode: 555 training return: tensor(-59.3115, device='cuda:0')
epoch: 139 test_true_pfm: 86.98307248830486 sim_pfm: -51.79099455118994
episode: 556 training return: tensor(13.5815, device='cuda:0')
episode: 557 training return: tensor(-183.7302, device='cuda:0')
episode: 558 training return: tensor(-156.7167, device='cuda:0')
episode: 559 training return: tensor(-69.1300, device='cuda:0')
epoch: 140 test_true_pfm: 88.10904384925686 sim_pfm: -71.40779932074365
episode: 560 training return: tensor(20.7160, device='cuda:0')
episode: 561 training return: tensor(-63.9541, device='cuda:0')
episode: 562 training return: tensor(-197.0985, device='cuda:0')
episode: 563 training return: tensor(-216.4451, device='cuda:0')
epoch: 141 test_true_pfm: 73.77963028559273 sim_pfm: -123.8168378888513
episode: 564 training return: tensor(-6.4676, device='cuda:0')
episode: 565 training return: tensor(-58.8079, device='cuda:0')
episode: 566 training return: tensor(-59.9444, device='cuda:0')
episode: 567 training return: tensor(-121.6334, device='cuda:0')
epoch: 142 test_true_pfm: 74.15063924726917 sim_pfm: -60.00751289694453
episode: 568 training return: tensor(-166.0200, device='cuda:0')
episode: 569 training return: tensor(-69.3921, device='cuda:0')
episode: 570 training return: tensor(-51.8368, device='cuda:0')
episode: 571 training return: tensor(-190.3174, device='cuda:0')
epoch: 143 test_true_pfm: 81.69639741451529 sim_pfm: -48.893258943071125
episode: 572 training return: tensor(-45.2134, device='cuda:0')
episode: 573 training return: tensor(-48.4296, device='cuda:0')
episode: 574 training return: tensor(-139.2965, device='cuda:0')
episode: 575 training return: tensor(-141.9899, device='cuda:0')
epoch: 144 test_true_pfm: 75.81124411277713 sim_pfm: -63.390654445375546
episode: 576 training return: tensor(-136.2887, device='cuda:0')
episode: 577 training return: tensor(-106.3347, device='cuda:0')
episode: 578 training return: tensor(-29.4620, device='cuda:0')
episode: 579 training return: tensor(-28.7889, device='cuda:0')
epoch: 145 test_true_pfm: 72.00861213437466 sim_pfm: -45.61139731940348
episode: 580 training return: tensor(-54.1180, device='cuda:0')
episode: 581 training return: tensor(-49.6352, device='cuda:0')
episode: 582 training return: tensor(-92.9754, device='cuda:0')
episode: 583 training return: tensor(-51.5574, device='cuda:0')
epoch: 146 test_true_pfm: 72.95154576099257 sim_pfm: -91.3461293475877
episode: 584 training return: tensor(-70.2601, device='cuda:0')
episode: 585 training return: tensor(-64.6924, device='cuda:0')
episode: 586 training return: tensor(-45.5710, device='cuda:0')
episode: 587 training return: tensor(-40.3476, device='cuda:0')
epoch: 147 test_true_pfm: 58.70690925765605 sim_pfm: -63.571328484476545
episode: 588 training return: tensor(-95.8032, device='cuda:0')
episode: 589 training return: tensor(-46.9108, device='cuda:0')
episode: 590 training return: tensor(-191.1051, device='cuda:0')
episode: 591 training return: tensor(-178.1240, device='cuda:0')
epoch: 148 test_true_pfm: 65.28798519319824 sim_pfm: -74.07954701353447
episode: 592 training return: tensor(-15.5326, device='cuda:0')
episode: 593 training return: tensor(-29.3423, device='cuda:0')
episode: 594 training return: tensor(-178.5469, device='cuda:0')
episode: 595 training return: tensor(-40.1414, device='cuda:0')
epoch: 149 test_true_pfm: 74.6126752586276 sim_pfm: -66.34356192575652
episode: 596 training return: tensor(-167.1400, device='cuda:0')
episode: 597 training return: tensor(-26.7774, device='cuda:0')
episode: 598 training return: tensor(-147.1728, device='cuda:0')
episode: 599 training return: tensor(-63.5718, device='cuda:0')
epoch: 150 test_true_pfm: 73.17564968322388 sim_pfm: -108.50418029999128
