['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '2', '--data', '10000', '--sub']
epoch: 0 training_loss 0.2611157727241516 test_loss: 0.20551056861877443
epoch: 1 training_loss 0.14927511885762215 test_loss: 0.13565318584442138
epoch: 2 training_loss 0.12556131590157749 test_loss: 0.13501148223876952
epoch: 3 training_loss 0.11047540836036206 test_loss: 0.1358637571334839
epoch: 4 training_loss 0.1275112572312355 test_loss: 0.13956801891326903
epoch: 5 training_loss 0.11830721750855445 test_loss: 0.12301725149154663
epoch: 6 training_loss 0.11437781576067209 test_loss: 0.13156179189682007
epoch: 7 training_loss 0.10535286415368318 test_loss: 0.12260078191757202
epoch: 8 training_loss 0.11801050778478384 test_loss: 0.1343657851219177
epoch: 9 training_loss 0.10073991671204567 test_loss: 0.12173330783843994
epoch: 10 training_loss 0.10312833677977323 test_loss: 0.11801948547363281
epoch: 11 training_loss 0.10300709027796984 test_loss: 0.1202702522277832
epoch: 12 training_loss 0.10033773951232433 test_loss: 0.11531950235366821
epoch: 13 training_loss 0.10164952645078301 test_loss: 0.13111900091171264
epoch: 14 training_loss 0.09684782400727272 test_loss: 0.11709178686141967
epoch: 15 training_loss 0.10167833551764488 test_loss: 0.11393027305603028
epoch: 16 training_loss 0.0975792177580297 test_loss: 0.12392933368682861
epoch: 17 training_loss 0.09574912450276316 test_loss: 0.11822680234909058
epoch: 18 training_loss 0.09614061038941145 test_loss: 0.13335270881652833
epoch: 19 training_loss 0.09745068944990636 test_loss: 0.11389518976211548
epoch: 20 training_loss 0.09286882495507598 test_loss: 0.12558064460754395
epoch: 21 training_loss 0.09532129531726241 test_loss: 0.10750633478164673
epoch: 22 training_loss 0.08882209358736873 test_loss: 0.12049124240875245
epoch: 23 training_loss 0.0901284421607852 test_loss: 0.11378651857376099
epoch: 24 training_loss 0.09479651086032391 test_loss: 0.11651155948638917
epoch: 25 training_loss 0.08778259778395295 test_loss: 0.1293626308441162
epoch: 26 training_loss 0.08951328577473759 test_loss: 0.1164892554283142
epoch: 27 training_loss 0.09268067186698317 test_loss: 0.11692514419555664
epoch: 28 training_loss 0.08997536048293114 test_loss: 0.1126327633857727
epoch: 29 training_loss 0.08845571745187045 test_loss: 0.13151209354400634
epoch: 30 training_loss 0.09659110827371478 test_loss: 0.12269068956375122
epoch: 31 training_loss 0.09095545114949345 test_loss: 0.12686173915863036
epoch: 32 training_loss 0.09074063191190362 test_loss: 0.11780823469161987
epoch: 33 training_loss 0.08724730551242828 test_loss: 0.10668412446975709
epoch: 34 training_loss 0.08552194211632014 test_loss: 0.12434812784194946
epoch: 35 training_loss 0.0766900029592216 test_loss: 0.13408674001693727
epoch: 36 training_loss 0.08311487028375268 test_loss: 0.11353367567062378
epoch: 37 training_loss 0.09187546318396926 test_loss: 0.14826629161834717
epoch: 38 training_loss 0.09527879217639565 test_loss: 0.11365512609481812
epoch: 39 training_loss 0.09035510826855898 test_loss: 0.12307610511779785
epoch: 40 training_loss 0.08592192355543375 test_loss: 0.12418248653411865
epoch: 41 training_loss 0.08453926522284747 test_loss: 0.11965428590774536
epoch: 42 training_loss 0.08147824238985776 test_loss: 0.12899066209793092
epoch: 43 training_loss 0.08366112654097378 test_loss: 0.12879809141159057
epoch: 44 training_loss 0.09299493357539176 test_loss: 0.13424445390701295
epoch: 45 training_loss 0.09239825481548905 test_loss: 0.11138092279434204
epoch: 46 training_loss 0.08310516776517034 test_loss: 0.1106607437133789
epoch: 47 training_loss 0.07618759458884597 test_loss: 0.12975465059280394
epoch: 48 training_loss 0.08315782747231423 test_loss: 0.11558409929275512
epoch: 49 training_loss 0.08460831871256232 test_loss: 0.12262861728668213
epoch: 50 training_loss 0.07661960527300835 test_loss: 0.13095479011535643
epoch: 51 training_loss 0.08821886079385877 test_loss: 0.12691465616226197
epoch: 52 training_loss 0.07313511397689582 test_loss: 0.10995179414749146
epoch: 53 training_loss 0.07706174306571484 test_loss: 0.1370608925819397
epoch: 54 training_loss 0.07746730683371425 test_loss: 0.12907532453536988
epoch: 55 training_loss 0.073041632194072 test_loss: 0.11688742637634278
epoch: 56 training_loss 0.08416827037930488 test_loss: 0.10830227136611939
epoch: 57 training_loss 0.08153321154415608 test_loss: 0.12445597648620606
epoch: 58 training_loss 0.07488599488511681 test_loss: 0.10385143756866455
epoch: 59 training_loss 0.07912415176630021 test_loss: 0.11051563024520875
epoch: 60 training_loss 0.08147674014791846 test_loss: 0.12463277578353882
epoch: 61 training_loss 0.08280421508476138 test_loss: 0.14714441299438477
epoch: 62 training_loss 0.08348666133359074 test_loss: 0.11358860731124878
epoch: 63 training_loss 0.07076087065041065 test_loss: 0.11339141130447387
epoch: 64 training_loss 0.0742890116199851 test_loss: 0.1340009570121765
epoch: 65 training_loss 0.07694624625146389 test_loss: 0.11547560691833496
epoch: 66 training_loss 0.07945027757436036 test_loss: 0.13510648012161255
epoch: 67 training_loss 0.07625369166955352 test_loss: 0.13545650243759155
epoch: 68 training_loss 0.08132127307355404 test_loss: 0.11860990524291992
epoch: 69 training_loss 0.07149729086086154 test_loss: 0.12956297397613525
epoch: 70 training_loss 0.07501618364825845 test_loss: 0.12205421924591064
epoch: 71 training_loss 0.0806724221073091 test_loss: 0.14034132957458495
epoch: 72 training_loss 0.07382785690948367 test_loss: 0.1163218379020691
epoch: 73 training_loss 0.07782301073893905 test_loss: 0.12430324554443359
epoch: 74 training_loss 0.07919376917183399 test_loss: 0.13804826736450196
epoch: 75 training_loss 0.07299346972256898 test_loss: 0.12190062999725342
epoch: 76 training_loss 0.08696749209426344 test_loss: 0.13395098447799683
epoch: 77 training_loss 0.07493301229551434 test_loss: 0.14263383150100709
epoch: 78 training_loss 0.07903310761786997 test_loss: 0.14076319932937623
epoch: 79 training_loss 0.07385361947119236 test_loss: 0.14316810369491578
epoch: 80 training_loss 0.07468992886133492 test_loss: 0.12858883142471314
epoch: 81 training_loss 0.07474448751658201 test_loss: 0.12975015640258789
epoch: 82 training_loss 0.06672564070671796 test_loss: 0.12778388261795043
epoch: 83 training_loss 0.07838189497590065 test_loss: 0.12540149688720703
epoch: 84 training_loss 0.07240218497812748 test_loss: 0.12540801763534545
epoch: 85 training_loss 0.07269396100193262 test_loss: 0.13242818117141725
epoch: 86 training_loss 0.07429060079157353 test_loss: 0.13514573574066163
epoch: 87 training_loss 0.07742789706215263 test_loss: 0.12967523336410522
epoch: 88 training_loss 0.07700296951457858 test_loss: 0.13700050115585327
epoch: 89 training_loss 0.0755149974860251 test_loss: 0.1298075795173645
epoch: 90 training_loss 0.0683248751424253 test_loss: 0.1253206968307495
epoch: 91 training_loss 0.07549709660932422 test_loss: 0.13961220979690553
epoch: 92 training_loss 0.0673218546435237 test_loss: 0.14586222171783447
epoch: 93 training_loss 0.07211343036964536 test_loss: 0.12751702070236207
epoch: 94 training_loss 0.07363663740456104 test_loss: 0.14561829566955567
epoch: 95 training_loss 0.07513857573270798 test_loss: 0.13399348258972169
epoch: 96 training_loss 0.07411516889929771 test_loss: 0.12553589344024657
epoch: 97 training_loss 0.06622945070266724 test_loss: 0.14407864809036255
epoch: 98 training_loss 0.07038574052043259 test_loss: 0.14556357860565186
epoch: 99 training_loss 0.06601418249309063 test_loss: 0.13689278364181517
epoch: 100 training_loss 0.07595748379826546 test_loss: 0.13285006284713746
epoch: 101 training_loss 0.07813163198530675 test_loss: 0.14681540727615355
epoch: 102 training_loss 0.06814193788915873 test_loss: 0.11813915967941284
epoch: 103 training_loss 0.06554139348678291 test_loss: 0.12133899927139283
epoch: 104 training_loss 0.06880557974800468 test_loss: 0.15140854120254515
epoch: 105 training_loss 0.06522179573774338 test_loss: 0.1354411482810974
epoch: 106 training_loss 0.0722710875235498 test_loss: 0.1395735502243042
epoch: 107 training_loss 0.0704117432422936 test_loss: 0.1233522891998291
epoch: 108 training_loss 0.06391214916482568 test_loss: 0.13825592994689942
epoch: 109 training_loss 0.061900613615289334 test_loss: 0.13269916772842408
epoch: 110 training_loss 0.07074900014325976 test_loss: 0.13808050155639648
epoch: 111 training_loss 0.07005661435425281 test_loss: 0.1422513484954834
epoch: 112 training_loss 0.06203683760948479 test_loss: 0.13507426977157594
epoch: 113 training_loss 0.06721498617902398 test_loss: 0.13613402843475342
epoch: 114 training_loss 0.06701221106573939 test_loss: 0.14109203815460206
epoch: 115 training_loss 0.0656959082186222 test_loss: 0.14324555397033692
epoch: 116 training_loss 0.07699717847630382 test_loss: 0.1489649534225464
epoch: 117 training_loss 0.06611503578722477 test_loss: 0.15146769285202027
epoch: 118 training_loss 0.06825739479623735 test_loss: 0.14229313135147095
epoch: 119 training_loss 0.06293131127953529 test_loss: 0.1267695903778076
epoch: 120 training_loss 0.06812725430354476 test_loss: 0.13823636770248413
epoch: 121 training_loss 0.06840925108641387 test_loss: 0.1607852101325989
epoch: 122 training_loss 0.07020842398516834 test_loss: 0.15692540407180786
epoch: 123 training_loss 0.06512502940371633 test_loss: 0.13849821090698242
epoch: 124 training_loss 0.07036999966949224 test_loss: 0.14506335258483888
epoch: 125 training_loss 0.06538106871768833 test_loss: 0.1361644983291626
epoch: 126 training_loss 0.07074542250484228 test_loss: 0.14819028377532958
epoch: 127 training_loss 0.06713766407221555 test_loss: 0.14638086557388305
epoch: 128 training_loss 0.06438381758518517 test_loss: 0.1534245491027832
epoch: 129 training_loss 0.061812015101313594 test_loss: 0.14837743043899537
epoch: 130 training_loss 0.06664270775392651 test_loss: 0.15167529582977296
epoch: 131 training_loss 0.06646054776385427 test_loss: 0.1579250454902649
epoch: 132 training_loss 0.05691574536263943 test_loss: 0.1333686351776123
epoch: 133 training_loss 0.06624928353354335 test_loss: 0.1556578040122986
epoch: 134 training_loss 0.06444788599386811 test_loss: 0.14984560012817383
epoch: 135 training_loss 0.06394924043677747 test_loss: 0.15728896856307983
epoch: 136 training_loss 0.06331695523113012 test_loss: 0.13471610546112062
epoch: 137 training_loss 0.06288874143734574 test_loss: 0.14653401374816893
epoch: 138 training_loss 0.06709482381120324 test_loss: 0.13250505924224854
epoch: 139 training_loss 0.0641315488005057 test_loss: 0.17593444585800172
epoch: 140 training_loss 0.07101824387907982 test_loss: 0.13785221576690673
epoch: 141 training_loss 0.0615186994895339 test_loss: 0.1548161029815674
epoch: 142 training_loss 0.057330097262747584 test_loss: 0.14625484943389894
epoch: 143 training_loss 0.05575415920466185 test_loss: 0.13015683889389038
epoch: 144 training_loss 0.06891061810776591 test_loss: 0.15381962060928345
epoch: 145 training_loss 0.06112164915539324 test_loss: 0.13101418018341066
epoch: 146 training_loss 0.06812428439036011 test_loss: 0.13880683183670045
epoch: 147 training_loss 0.06271604170091451 test_loss: 0.1342815041542053
epoch: 148 training_loss 0.06221716799773276 test_loss: 0.12897435426712037
epoch: 149 training_loss 0.06152124555781484 test_loss: 0.12864208221435547
epoch: 0 training_loss 38.07379093170166 test_loss: 20.00726318359375
epoch: 1 training_loss 15.91819535255432 test_loss: 13.999478149414063
epoch: 2 training_loss 12.183051109313965 test_loss: 11.569146728515625
epoch: 3 training_loss 10.718433074951172 test_loss: 9.977821350097656
epoch: 4 training_loss 9.284505796432494 test_loss: 8.931448364257813
epoch: 5 training_loss 8.30732545375824 test_loss: 8.290370178222656
epoch: 6 training_loss 7.787637348175049 test_loss: 7.791860198974609
epoch: 7 training_loss 7.364401121139526 test_loss: 7.320005798339844
epoch: 8 training_loss 7.033472456932068 test_loss: 6.893739318847656
epoch: 9 training_loss 6.687062621116638 test_loss: 6.549919891357422
epoch: 10 training_loss 6.4537688159942626 test_loss: 6.522234344482422
epoch: 11 training_loss 6.075522413253784 test_loss: 6.203667068481446
epoch: 12 training_loss 5.877839241027832 test_loss: 5.9549560546875
epoch: 13 training_loss 5.5920150136947635 test_loss: 5.835766983032227
epoch: 14 training_loss 5.637823934555054 test_loss: 5.772428512573242
epoch: 15 training_loss 5.308171901702881 test_loss: 5.366206359863281
epoch: 16 training_loss 5.190517985820771 test_loss: 5.0936840057373045
epoch: 17 training_loss 5.018239064216614 test_loss: 5.067392730712891
epoch: 18 training_loss 4.912350924015045 test_loss: 4.910156631469727
epoch: 19 training_loss 4.777359125614166 test_loss: 4.876150894165039
epoch: 20 training_loss 4.698039116859436 test_loss: 4.658723831176758
epoch: 21 training_loss 4.498523812294007 test_loss: 4.538730621337891
epoch: 22 training_loss 4.483816432952881 test_loss: 4.71723747253418
epoch: 23 training_loss 4.405230431556702 test_loss: 4.537635421752929
epoch: 24 training_loss 4.215685248374939 test_loss: 4.4088691711425785
epoch: 25 training_loss 4.24940051317215 test_loss: 4.311393737792969
epoch: 26 training_loss 4.197080748081207 test_loss: 4.320379638671875
epoch: 27 training_loss 4.065902824401856 test_loss: 4.366613388061523
epoch: 28 training_loss 4.055190477371216 test_loss: 4.290270233154297
epoch: 29 training_loss 3.8777382493019106 test_loss: 4.188178253173828
epoch: 30 training_loss 3.891515061855316 test_loss: 3.9916526794433596
epoch: 31 training_loss 3.869648487567902 test_loss: 4.042398071289062
epoch: 32 training_loss 3.82307804107666 test_loss: 3.9747951507568358
epoch: 33 training_loss 3.789111409187317 test_loss: 3.933113861083984
epoch: 34 training_loss 3.6165256977081297 test_loss: 3.8315677642822266
epoch: 35 training_loss 3.6714877915382385 test_loss: 3.749928665161133
epoch: 36 training_loss 3.63355003118515 test_loss: 3.8729305267333984
epoch: 37 training_loss 3.544496603012085 test_loss: 3.7707042694091797
epoch: 38 training_loss 3.5478194165229797 test_loss: 3.746540069580078
epoch: 39 training_loss 3.4971089005470275 test_loss: 3.5906261444091796
epoch: 40 training_loss 3.4020540618896487 test_loss: 3.7690059661865236
epoch: 41 training_loss 3.3938841795921326 test_loss: 3.6877456665039063
epoch: 42 training_loss 3.4626702284812927 test_loss: 3.4839366912841796
epoch: 43 training_loss 3.361258451938629 test_loss: 3.5002605438232424
epoch: 44 training_loss 3.328904094696045 test_loss: 3.451414108276367
epoch: 45 training_loss 3.3040799140930175 test_loss: 3.4089801788330076
epoch: 46 training_loss 3.26646427154541 test_loss: 3.496484375
epoch: 47 training_loss 3.3377907252311707 test_loss: 3.523639678955078
epoch: 48 training_loss 3.145111315250397 test_loss: 3.3951431274414063
epoch: 49 training_loss 3.1971256017684935 test_loss: 3.373224639892578
epoch: 50 training_loss 3.168515832424164 test_loss: 3.224211883544922
epoch: 51 training_loss 3.1837192583084106 test_loss: 3.1895683288574217
epoch: 52 training_loss 3.0839043498039245 test_loss: 3.246129608154297
epoch: 53 training_loss 3.107917308807373 test_loss: 3.2184810638427734
epoch: 54 training_loss 3.005720953941345 test_loss: 3.179062271118164
epoch: 55 training_loss 3.0069450306892396 test_loss: 3.227645492553711
epoch: 56 training_loss 3.0281264185905457 test_loss: 3.2199329376220702
epoch: 57 training_loss 2.9947510504722596 test_loss: 3.1017478942871093
epoch: 58 training_loss 2.9957349157333373 test_loss: 3.192062568664551
epoch: 59 training_loss 2.9568911957740784 test_loss: 3.1533300399780275
epoch: 60 training_loss 3.0255078434944154 test_loss: 3.1210968017578127
epoch: 61 training_loss 2.905262644290924 test_loss: 3.1093156814575194
epoch: 62 training_loss 2.869288363456726 test_loss: 3.1557851791381837
epoch: 63 training_loss 2.87305296421051 test_loss: 3.116088104248047
epoch: 64 training_loss 2.8584784126281737 test_loss: 2.994587707519531
epoch: 65 training_loss 2.8882946705818178 test_loss: 3.0590782165527344
epoch: 66 training_loss 2.829744613170624 test_loss: 3.0695688247680666
epoch: 67 training_loss 2.817983250617981 test_loss: 2.9569583892822267
epoch: 68 training_loss 2.8075451636314392 test_loss: 2.9596124649047852
epoch: 69 training_loss 2.799670352935791 test_loss: 3.062391471862793
epoch: 70 training_loss 2.7536386251449585 test_loss: 2.9288835525512695
epoch: 71 training_loss 2.8077566480636595 test_loss: 2.994984436035156
epoch: 72 training_loss 2.718319447040558 test_loss: 3.022511863708496
epoch: 73 training_loss 2.7850789523124693 test_loss: 3.04898624420166
epoch: 74 training_loss 2.7836972951889036 test_loss: 2.9291236877441404
epoch: 75 training_loss 2.6910942935943605 test_loss: 2.8047082901000975
epoch: 76 training_loss 2.672774910926819 test_loss: 2.846540641784668
epoch: 77 training_loss 2.703802764415741 test_loss: 2.8476404190063476
epoch: 78 training_loss 2.7825465559959413 test_loss: 2.915509033203125
epoch: 79 training_loss 2.6390256416797637 test_loss: 2.9251628875732423
epoch: 80 training_loss 2.666610012054443 test_loss: 2.8510894775390625
epoch: 81 training_loss 2.6516426217556 test_loss: 2.8851362228393556
epoch: 82 training_loss 2.6501127529144286 test_loss: 2.8657958984375
epoch: 83 training_loss 2.6299158787727355 test_loss: 2.856936454772949
epoch: 84 training_loss 2.6235326409339903 test_loss: 2.759654235839844
epoch: 85 training_loss 2.5944329488277433 test_loss: 2.7817150115966798
epoch: 86 training_loss 2.5871011304855345 test_loss: 2.804508018493652
epoch: 87 training_loss 2.567924656867981 test_loss: 2.7149585723876952
epoch: 88 training_loss 2.5633635902404786 test_loss: 2.711919403076172
epoch: 89 training_loss 2.5468624889850617 test_loss: 2.7796451568603517
epoch: 90 training_loss 2.5787969040870666 test_loss: 2.7484798431396484
epoch: 91 training_loss 2.547315104007721 test_loss: 2.7550064086914063
epoch: 92 training_loss 2.5354868149757386 test_loss: 2.7277284622192384
epoch: 93 training_loss 2.5462457478046416 test_loss: 2.7053619384765626
epoch: 94 training_loss 2.5621481847763063 test_loss: 2.762710762023926
epoch: 95 training_loss 2.529768190383911 test_loss: 2.7286203384399412
epoch: 96 training_loss 2.4711661779880525 test_loss: 2.7920459747314452
epoch: 97 training_loss 2.520175111293793 test_loss: 2.6943943023681642
epoch: 98 training_loss 2.485402283668518 test_loss: 2.718499946594238
epoch: 99 training_loss 2.5052898490428923 test_loss: 2.6728702545166017
epoch: 100 training_loss 2.452792056798935 test_loss: 2.6839345932006835
epoch: 101 training_loss 2.482802838087082 test_loss: 2.739109992980957
epoch: 102 training_loss 2.4340776062011718 test_loss: 2.692581367492676
epoch: 103 training_loss 2.429366083145142 test_loss: 2.7781404495239257
epoch: 104 training_loss 2.4707919609546662 test_loss: 2.6284862518310548
epoch: 105 training_loss 2.476957939863205 test_loss: 2.563701629638672
epoch: 106 training_loss 2.4687394273281096 test_loss: 2.777490234375
epoch: 107 training_loss 2.424305363893509 test_loss: 2.5748611450195313
epoch: 108 training_loss 2.374119964838028 test_loss: 2.54024715423584
epoch: 109 training_loss 2.4118271148204804 test_loss: 2.694392776489258
epoch: 110 training_loss 2.369235812425613 test_loss: 2.5547462463378907
epoch: 111 training_loss 2.3823074758052827 test_loss: 2.5948001861572267
epoch: 112 training_loss 2.38664103269577 test_loss: 2.621128273010254
epoch: 113 training_loss 2.3344311368465425 test_loss: 2.5264213562011717
epoch: 114 training_loss 2.344175387620926 test_loss: 2.5949100494384765
epoch: 115 training_loss 2.332547639608383 test_loss: 2.541399383544922
epoch: 116 training_loss 2.3878707337379455 test_loss: 2.6522945404052733
epoch: 117 training_loss 2.343343862295151 test_loss: 2.5428857803344727
epoch: 118 training_loss 2.350582778453827 test_loss: 2.5536376953125
epoch: 119 training_loss 2.310956423282623 test_loss: 2.5774850845336914
epoch: 120 training_loss 2.327638580799103 test_loss: 2.5510879516601563
epoch: 121 training_loss 2.340358270406723 test_loss: 2.5253875732421873
epoch: 122 training_loss 2.322133861780167 test_loss: 2.605082130432129
epoch: 123 training_loss 2.3380003225803376 test_loss: 2.523898124694824
epoch: 124 training_loss 2.301180113554001 test_loss: 2.5385225296020506
epoch: 125 training_loss 2.3238854920864105 test_loss: 2.4427223205566406
epoch: 126 training_loss 2.276442495584488 test_loss: 2.5486942291259767
epoch: 127 training_loss 2.297216168642044 test_loss: 2.5474384307861326
epoch: 128 training_loss 2.295709707736969 test_loss: 2.4458282470703123
epoch: 129 training_loss 2.2590717935562132 test_loss: 2.4809688568115233
epoch: 130 training_loss 2.2489115822315218 test_loss: 2.5591127395629885
epoch: 131 training_loss 2.2850410711765288 test_loss: 2.4495988845825196
epoch: 132 training_loss 2.291616721153259 test_loss: 2.4020605087280273
epoch: 133 training_loss 2.3002404916286467 test_loss: 2.4970186233520506
epoch: 134 training_loss 2.228254590034485 test_loss: 2.4916635513305665
epoch: 135 training_loss 2.2965630435943605 test_loss: 2.4231231689453123
epoch: 136 training_loss 2.260692800283432 test_loss: 2.4294870376586912
epoch: 137 training_loss 2.2344790172576903 test_loss: 2.4925947189331055
epoch: 138 training_loss 2.209261370897293 test_loss: 2.5528059005737305
epoch: 139 training_loss 2.2674199295043946 test_loss: 2.6167470932006838
epoch: 140 training_loss 2.2217599785327913 test_loss: 2.4565408706665037
epoch: 141 training_loss 2.277663817405701 test_loss: 2.5344623565673827
epoch: 142 training_loss 2.2569580709934236 test_loss: 2.495865058898926
epoch: 143 training_loss 2.2677328848838805 test_loss: 2.47015380859375
epoch: 144 training_loss 2.1981381011009216 test_loss: 2.428630256652832
epoch: 145 training_loss 2.189986164569855 test_loss: 2.4069793701171873
epoch: 146 training_loss 2.240161910057068 test_loss: 2.505868339538574
epoch: 147 training_loss 2.1863097393512727 test_loss: 2.4162355422973634
epoch: 148 training_loss 2.2154810214042664 test_loss: 2.389113998413086
epoch: 149 training_loss 2.2279777586460114 test_loss: 2.4653724670410155
3034.550170625819
episode: 0 training return: tensor(197.6808, device='cuda:0')
episode: 1 training return: tensor(151.8788, device='cuda:0')
episode: 2 training return: tensor(225.3005, device='cuda:0')
episode: 3 training return: tensor(235.7911, device='cuda:0')
epoch: 1 test_true_pfm: 2038.1996687410701 sim_pfm: -43.48821500209548
episode: 4 training return: tensor(174.3540, device='cuda:0')
episode: 5 training return: tensor(228.7263, device='cuda:0')
episode: 6 training return: tensor(115.2746, device='cuda:0')
episode: 7 training return: tensor(320.3535, device='cuda:0')
epoch: 2 test_true_pfm: 3383.05580504894 sim_pfm: 254.04180600864734
episode: 8 training return: tensor(201.9891, device='cuda:0')
episode: 9 training return: tensor(167.7587, device='cuda:0')
episode: 10 training return: tensor(244.7358, device='cuda:0')
episode: 11 training return: tensor(250.5581, device='cuda:0')
epoch: 3 test_true_pfm: 3388.7299326459856 sim_pfm: 260.20159066927346
episode: 12 training return: tensor(203.8810, device='cuda:0')
episode: 13 training return: tensor(284.1736, device='cuda:0')
episode: 14 training return: tensor(286.9966, device='cuda:0')
episode: 15 training return: tensor(321.4385, device='cuda:0')
epoch: 4 test_true_pfm: 3416.848577150027 sim_pfm: 242.21696581045398
episode: 16 training return: tensor(264.6533, device='cuda:0')
episode: 17 training return: tensor(316.7119, device='cuda:0')
episode: 18 training return: tensor(204.2166, device='cuda:0')
episode: 19 training return: tensor(192.8082, device='cuda:0')
epoch: 5 test_true_pfm: 3387.9644303391374 sim_pfm: 218.32565569053986
episode: 20 training return: tensor(371.8969, device='cuda:0')
episode: 21 training return: tensor(327.5473, device='cuda:0')
episode: 22 training return: tensor(245.4602, device='cuda:0')
episode: 23 training return: tensor(-89.4821, device='cuda:0')
epoch: 6 test_true_pfm: 3102.4012744676234 sim_pfm: 301.2435230037372
episode: 24 training return: tensor(175.4413, device='cuda:0')
episode: 25 training return: tensor(188.7623, device='cuda:0')
episode: 26 training return: tensor(-201.6817, device='cuda:0')
episode: 27 training return: tensor(376.4380, device='cuda:0')
epoch: 7 test_true_pfm: 3392.3387064894773 sim_pfm: 228.92712698304481
episode: 28 training return: tensor(313.2120, device='cuda:0')
episode: 29 training return: tensor(193.6360, device='cuda:0')
episode: 30 training return: tensor(286.8929, device='cuda:0')
episode: 31 training return: tensor(258.7975, device='cuda:0')
epoch: 8 test_true_pfm: 3233.1307252857055 sim_pfm: -19.421427708410192
episode: 32 training return: tensor(245.9402, device='cuda:0')
episode: 33 training return: tensor(231.5029, device='cuda:0')
episode: 34 training return: tensor(196.0202, device='cuda:0')
episode: 35 training return: tensor(211.2003, device='cuda:0')
epoch: 9 test_true_pfm: 3388.4678998784657 sim_pfm: 238.6825738164674
episode: 36 training return: tensor(163.8350, device='cuda:0')
episode: 37 training return: tensor(218.0278, device='cuda:0')
episode: 38 training return: tensor(299.8637, device='cuda:0')
episode: 39 training return: tensor(294.5647, device='cuda:0')
epoch: 10 test_true_pfm: 3502.0823094109837 sim_pfm: 210.91634803026682
episode: 40 training return: tensor(-57.2216, device='cuda:0')
episode: 41 training return: tensor(276.0300, device='cuda:0')
episode: 42 training return: tensor(236.4194, device='cuda:0')
episode: 43 training return: tensor(-95.5649, device='cuda:0')
epoch: 11 test_true_pfm: 2333.2681370258624 sim_pfm: 48.04866164798538
episode: 44 training return: tensor(296.4160, device='cuda:0')
episode: 45 training return: tensor(238.9391, device='cuda:0')
episode: 46 training return: tensor(233.6687, device='cuda:0')
episode: 47 training return: tensor(188.5571, device='cuda:0')
epoch: 12 test_true_pfm: 3207.7266304195473 sim_pfm: 202.80207861121744
episode: 48 training return: tensor(263.5997, device='cuda:0')
episode: 49 training return: tensor(153.6273, device='cuda:0')
episode: 50 training return: tensor(349.9239, device='cuda:0')
episode: 51 training return: tensor(225.5123, device='cuda:0')
epoch: 13 test_true_pfm: 3467.9799997729533 sim_pfm: 284.84927771106595
episode: 52 training return: tensor(155.0576, device='cuda:0')
episode: 53 training return: tensor(385.5125, device='cuda:0')
episode: 54 training return: tensor(299.2691, device='cuda:0')
episode: 55 training return: tensor(208.8668, device='cuda:0')
epoch: 14 test_true_pfm: 2919.254287749804 sim_pfm: 256.1511384295397
episode: 56 training return: tensor(-377.6716, device='cuda:0')
episode: 57 training return: tensor(327.0694, device='cuda:0')
episode: 58 training return: tensor(-178.0460, device='cuda:0')
episode: 59 training return: tensor(181.4808, device='cuda:0')
epoch: 15 test_true_pfm: 2893.334806312812 sim_pfm: 107.49890664757307
episode: 60 training return: tensor(228.8087, device='cuda:0')
episode: 61 training return: tensor(201.8812, device='cuda:0')
episode: 62 training return: tensor(60.0800, device='cuda:0')
episode: 63 training return: tensor(216.8948, device='cuda:0')
epoch: 16 test_true_pfm: 3442.6372619244416 sim_pfm: 19.74045778120247
episode: 64 training return: tensor(186.6445, device='cuda:0')
episode: 65 training return: tensor(-124.9149, device='cuda:0')
episode: 66 training return: tensor(-210.4659, device='cuda:0')
episode: 67 training return: tensor(-273.7356, device='cuda:0')
epoch: 17 test_true_pfm: 3394.0045554928797 sim_pfm: 147.52028117660666
episode: 68 training return: tensor(289.0578, device='cuda:0')
episode: 69 training return: tensor(-163.0282, device='cuda:0')
episode: 70 training return: tensor(235.3292, device='cuda:0')
episode: 71 training return: tensor(205.1788, device='cuda:0')
epoch: 18 test_true_pfm: 3408.530267107093 sim_pfm: 69.14684997287502
episode: 72 training return: tensor(215.0934, device='cuda:0')
episode: 73 training return: tensor(263.7388, device='cuda:0')
episode: 74 training return: tensor(230.3313, device='cuda:0')
episode: 75 training return: tensor(214.3233, device='cuda:0')
epoch: 19 test_true_pfm: 2932.5028756679785 sim_pfm: -110.83038388246011
episode: 76 training return: tensor(153.6939, device='cuda:0')
episode: 77 training return: tensor(296.5983, device='cuda:0')
episode: 78 training return: tensor(189.6903, device='cuda:0')
episode: 79 training return: tensor(-0.0617, device='cuda:0')
epoch: 20 test_true_pfm: 3402.752029403324 sim_pfm: 231.8103158689822
episode: 80 training return: tensor(249.9136, device='cuda:0')
episode: 81 training return: tensor(-1.8862, device='cuda:0')
episode: 82 training return: tensor(123.5956, device='cuda:0')
episode: 83 training return: tensor(247.2372, device='cuda:0')
epoch: 21 test_true_pfm: 3408.988151509034 sim_pfm: 67.39626606794384
episode: 84 training return: tensor(150.1020, device='cuda:0')
episode: 85 training return: tensor(-391.1400, device='cuda:0')
episode: 86 training return: tensor(83.6255, device='cuda:0')
episode: 87 training return: tensor(-67.3804, device='cuda:0')
epoch: 22 test_true_pfm: 2634.1513246268655 sim_pfm: -14.532596536077714
episode: 88 training return: tensor(-169.8176, device='cuda:0')
episode: 89 training return: tensor(163.8300, device='cuda:0')
episode: 90 training return: tensor(109.4648, device='cuda:0')
episode: 91 training return: tensor(-135.3201, device='cuda:0')
epoch: 23 test_true_pfm: 2625.2295244774855 sim_pfm: -226.12522898497022
episode: 92 training return: tensor(160.3265, device='cuda:0')
episode: 93 training return: tensor(312.9578, device='cuda:0')
episode: 94 training return: tensor(135.8630, device='cuda:0')
episode: 95 training return: tensor(330.4485, device='cuda:0')
epoch: 24 test_true_pfm: 3223.141608785383 sim_pfm: 38.60171413652521
episode: 96 training return: tensor(-134.7222, device='cuda:0')
episode: 97 training return: tensor(303.9337, device='cuda:0')
episode: 98 training return: tensor(241.3414, device='cuda:0')
episode: 99 training return: tensor(-113.6409, device='cuda:0')
epoch: 25 test_true_pfm: 2692.070585457754 sim_pfm: 150.01485015068707
episode: 100 training return: tensor(249.6184, device='cuda:0')
episode: 101 training return: tensor(-35.8003, device='cuda:0')
episode: 102 training return: tensor(260.1982, device='cuda:0')
episode: 103 training return: tensor(203.2829, device='cuda:0')
epoch: 26 test_true_pfm: 3527.646867941956 sim_pfm: 250.25000905646206
episode: 104 training return: tensor(179.2931, device='cuda:0')
episode: 105 training return: tensor(269.5409, device='cuda:0')
episode: 106 training return: tensor(262.3915, device='cuda:0')
episode: 107 training return: tensor(-28.7494, device='cuda:0')
epoch: 27 test_true_pfm: 2899.22440341016 sim_pfm: 192.77198361358023
episode: 108 training return: tensor(298.1632, device='cuda:0')
episode: 109 training return: tensor(176.9702, device='cuda:0')
episode: 110 training return: tensor(308.6005, device='cuda:0')
episode: 111 training return: tensor(276.5358, device='cuda:0')
epoch: 28 test_true_pfm: 3441.963882660142 sim_pfm: 283.0642122694214
episode: 112 training return: tensor(328.0689, device='cuda:0')
episode: 113 training return: tensor(293.6713, device='cuda:0')
episode: 114 training return: tensor(157.6211, device='cuda:0')
episode: 115 training return: tensor(272.4956, device='cuda:0')
epoch: 29 test_true_pfm: 3399.936864689876 sim_pfm: 268.07505600702524
episode: 116 training return: tensor(258.0252, device='cuda:0')
episode: 117 training return: tensor(141.4905, device='cuda:0')
episode: 118 training return: tensor(238.2753, device='cuda:0')
episode: 119 training return: tensor(-167.2549, device='cuda:0')
epoch: 30 test_true_pfm: 3446.8501980298574 sim_pfm: 267.05194895212964
episode: 120 training return: tensor(-287.8012, device='cuda:0')
episode: 121 training return: tensor(188.4417, device='cuda:0')
episode: 122 training return: tensor(198.8707, device='cuda:0')
episode: 123 training return: tensor(209.8118, device='cuda:0')
epoch: 31 test_true_pfm: 3441.0852782563447 sim_pfm: 306.38334579420433
episode: 124 training return: tensor(252.2739, device='cuda:0')
episode: 125 training return: tensor(279.3800, device='cuda:0')
episode: 126 training return: tensor(210.7530, device='cuda:0')
episode: 127 training return: tensor(212.7338, device='cuda:0')
epoch: 32 test_true_pfm: 3449.155570498014 sim_pfm: 250.43949206796242
episode: 128 training return: tensor(297.8520, device='cuda:0')
episode: 129 training return: tensor(315.1740, device='cuda:0')
episode: 130 training return: tensor(160.3810, device='cuda:0')
episode: 131 training return: tensor(230.1391, device='cuda:0')
epoch: 33 test_true_pfm: 3277.7360986763397 sim_pfm: 303.840994688042
episode: 132 training return: tensor(256.6379, device='cuda:0')
episode: 133 training return: tensor(237.2204, device='cuda:0')
episode: 134 training return: tensor(226.3243, device='cuda:0')
episode: 135 training return: tensor(251.2603, device='cuda:0')
epoch: 34 test_true_pfm: 3398.751024960807 sim_pfm: 294.9807742000169
episode: 136 training return: tensor(250.0386, device='cuda:0')
episode: 137 training return: tensor(320.8484, device='cuda:0')
episode: 138 training return: tensor(233.0355, device='cuda:0')
episode: 139 training return: tensor(233.0726, device='cuda:0')
epoch: 35 test_true_pfm: 3426.689901892647 sim_pfm: 203.14823611881002
episode: 140 training return: tensor(169.6181, device='cuda:0')
episode: 141 training return: tensor(237.1200, device='cuda:0')
episode: 142 training return: tensor(286.8455, device='cuda:0')
episode: 143 training return: tensor(244.1234, device='cuda:0')
epoch: 36 test_true_pfm: 3372.5443101171513 sim_pfm: 268.3135600545211
episode: 144 training return: tensor(152.0355, device='cuda:0')
episode: 145 training return: tensor(231.4968, device='cuda:0')
episode: 146 training return: tensor(366.7248, device='cuda:0')
episode: 147 training return: tensor(219.5556, device='cuda:0')
epoch: 37 test_true_pfm: 2894.2392548554176 sim_pfm: 160.02086779477153
episode: 148 training return: tensor(-13.7808, device='cuda:0')
episode: 149 training return: tensor(-103.3767, device='cuda:0')
episode: 150 training return: tensor(254.8440, device='cuda:0')
episode: 151 training return: tensor(248.0205, device='cuda:0')
epoch: 38 test_true_pfm: 3410.4514972236916 sim_pfm: 186.947999020922
episode: 152 training return: tensor(250.7225, device='cuda:0')
episode: 153 training return: tensor(217.9883, device='cuda:0')
episode: 154 training return: tensor(-66.9103, device='cuda:0')
episode: 155 training return: tensor(227.7466, device='cuda:0')
epoch: 39 test_true_pfm: 3483.4486027450125 sim_pfm: 251.85383303789422
episode: 156 training return: tensor(-201.8293, device='cuda:0')
episode: 157 training return: tensor(310.1841, device='cuda:0')
episode: 158 training return: tensor(213.5872, device='cuda:0')
episode: 159 training return: tensor(286.2786, device='cuda:0')
epoch: 40 test_true_pfm: 3417.9045247390254 sim_pfm: 234.72880362624224
episode: 160 training return: tensor(225.7387, device='cuda:0')
episode: 161 training return: tensor(145.3180, device='cuda:0')
episode: 162 training return: tensor(349.4201, device='cuda:0')
episode: 163 training return: tensor(212.2072, device='cuda:0')
epoch: 41 test_true_pfm: 3449.9651566873104 sim_pfm: 234.9270243537127
episode: 164 training return: tensor(277.5672, device='cuda:0')
episode: 165 training return: tensor(266.9990, device='cuda:0')
episode: 166 training return: tensor(-254.4835, device='cuda:0')
episode: 167 training return: tensor(195.6451, device='cuda:0')
epoch: 42 test_true_pfm: 3392.419374806295 sim_pfm: 271.4862944014021
episode: 168 training return: tensor(251.9897, device='cuda:0')
episode: 169 training return: tensor(265.1006, device='cuda:0')
episode: 170 training return: tensor(232.1115, device='cuda:0')
episode: 171 training return: tensor(260.9575, device='cuda:0')
epoch: 43 test_true_pfm: 3449.3791423381103 sim_pfm: 333.5428185213047
episode: 172 training return: tensor(345.8923, device='cuda:0')
episode: 173 training return: tensor(350.7002, device='cuda:0')
episode: 174 training return: tensor(256.0832, device='cuda:0')
episode: 175 training return: tensor(276.3488, device='cuda:0')
epoch: 44 test_true_pfm: 3487.710816759924 sim_pfm: 289.7598329524917
episode: 176 training return: tensor(209.0458, device='cuda:0')
episode: 177 training return: tensor(303.6725, device='cuda:0')
episode: 178 training return: tensor(272.6195, device='cuda:0')
episode: 179 training return: tensor(308.9203, device='cuda:0')
epoch: 45 test_true_pfm: 3455.436178219135 sim_pfm: 265.43756929416367
episode: 180 training return: tensor(196.4893, device='cuda:0')
episode: 181 training return: tensor(261.9504, device='cuda:0')
episode: 182 training return: tensor(171.7780, device='cuda:0')
episode: 183 training return: tensor(-53.7401, device='cuda:0')
epoch: 46 test_true_pfm: 3503.731986563685 sim_pfm: 290.0064690222983
episode: 184 training return: tensor(286.2041, device='cuda:0')
episode: 185 training return: tensor(244.3498, device='cuda:0')
episode: 186 training return: tensor(293.2346, device='cuda:0')
episode: 187 training return: tensor(198.4952, device='cuda:0')
epoch: 47 test_true_pfm: 3446.5008541818474 sim_pfm: 237.24724597197687
episode: 188 training return: tensor(258.4752, device='cuda:0')
episode: 189 training return: tensor(226.9877, device='cuda:0')
episode: 190 training return: tensor(266.2791, device='cuda:0')
episode: 191 training return: tensor(248.9435, device='cuda:0')
epoch: 48 test_true_pfm: 3473.1138540225215 sim_pfm: 273.0093962366033
episode: 192 training return: tensor(-45.3534, device='cuda:0')
episode: 193 training return: tensor(253.7573, device='cuda:0')
episode: 194 training return: tensor(274.2923, device='cuda:0')
episode: 195 training return: tensor(357.5361, device='cuda:0')
epoch: 49 test_true_pfm: 2909.080749589239 sim_pfm: 302.208323615933
episode: 196 training return: tensor(125.8353, device='cuda:0')
episode: 197 training return: tensor(218.4598, device='cuda:0')
episode: 198 training return: tensor(96.0650, device='cuda:0')
episode: 199 training return: tensor(228.4316, device='cuda:0')
epoch: 50 test_true_pfm: 3503.3908912225907 sim_pfm: 236.56051437038695
episode: 200 training return: tensor(206.7101, device='cuda:0')
episode: 201 training return: tensor(251.5226, device='cuda:0')
episode: 202 training return: tensor(192.4330, device='cuda:0')
episode: 203 training return: tensor(209.9238, device='cuda:0')
epoch: 51 test_true_pfm: 3463.176936484297 sim_pfm: 331.9452099048261
episode: 204 training return: tensor(290.5836, device='cuda:0')
episode: 205 training return: tensor(308.0576, device='cuda:0')
episode: 206 training return: tensor(287.0348, device='cuda:0')
episode: 207 training return: tensor(292.3803, device='cuda:0')
epoch: 52 test_true_pfm: 3482.6977992125735 sim_pfm: 302.7685345233961
episode: 208 training return: tensor(176.1796, device='cuda:0')
episode: 209 training return: tensor(293.1041, device='cuda:0')
episode: 210 training return: tensor(224.6319, device='cuda:0')
episode: 211 training return: tensor(214.7948, device='cuda:0')
epoch: 53 test_true_pfm: 3438.707372911562 sim_pfm: 228.22463983349735
episode: 212 training return: tensor(233.1443, device='cuda:0')
episode: 213 training return: tensor(320.9795, device='cuda:0')
episode: 214 training return: tensor(154.4168, device='cuda:0')
episode: 215 training return: tensor(146.1582, device='cuda:0')
epoch: 54 test_true_pfm: 3468.3825844257185 sim_pfm: 249.40823909206665
episode: 216 training return: tensor(288.9698, device='cuda:0')
episode: 217 training return: tensor(-314.7453, device='cuda:0')
episode: 218 training return: tensor(-122.8978, device='cuda:0')
episode: 219 training return: tensor(175.3235, device='cuda:0')
epoch: 55 test_true_pfm: 3435.467542073619 sim_pfm: 270.30341445596423
episode: 220 training return: tensor(238.8535, device='cuda:0')
episode: 221 training return: tensor(228.9042, device='cuda:0')
episode: 222 training return: tensor(322.7236, device='cuda:0')
episode: 223 training return: tensor(233.4437, device='cuda:0')
epoch: 56 test_true_pfm: 3425.0323499758674 sim_pfm: 172.25454408233054
episode: 224 training return: tensor(219.2840, device='cuda:0')
episode: 225 training return: tensor(245.6521, device='cuda:0')
episode: 226 training return: tensor(250.5939, device='cuda:0')
episode: 227 training return: tensor(307.3779, device='cuda:0')
epoch: 57 test_true_pfm: 3450.957004775452 sim_pfm: 255.17312679828805
episode: 228 training return: tensor(240.2538, device='cuda:0')
episode: 229 training return: tensor(257.7878, device='cuda:0')
episode: 230 training return: tensor(278.1761, device='cuda:0')
episode: 231 training return: tensor(285.4069, device='cuda:0')
epoch: 58 test_true_pfm: 3422.2973474515056 sim_pfm: 277.62905382871395
episode: 232 training return: tensor(286.2405, device='cuda:0')
episode: 233 training return: tensor(228.8210, device='cuda:0')
episode: 234 training return: tensor(227.4343, device='cuda:0')
episode: 235 training return: tensor(244.1260, device='cuda:0')
epoch: 59 test_true_pfm: 3513.8546727621433 sim_pfm: 262.1067102965996
episode: 236 training return: tensor(246.3178, device='cuda:0')
episode: 237 training return: tensor(145.3762, device='cuda:0')
episode: 238 training return: tensor(362.2556, device='cuda:0')
episode: 239 training return: tensor(263.3769, device='cuda:0')
epoch: 60 test_true_pfm: 3420.5708343177807 sim_pfm: 315.58503723804216
episode: 240 training return: tensor(303.4649, device='cuda:0')
episode: 241 training return: tensor(257.2852, device='cuda:0')
episode: 242 training return: tensor(119.4871, device='cuda:0')
episode: 243 training return: tensor(225.1852, device='cuda:0')
epoch: 61 test_true_pfm: 3427.50085006536 sim_pfm: 322.28075654112035
episode: 244 training return: tensor(229.7889, device='cuda:0')
episode: 245 training return: tensor(282.6793, device='cuda:0')
episode: 246 training return: tensor(219.1164, device='cuda:0')
episode: 247 training return: tensor(-182.4694, device='cuda:0')
epoch: 62 test_true_pfm: 3433.4104836527536 sim_pfm: 270.25347319668316
episode: 248 training return: tensor(322.6351, device='cuda:0')
episode: 249 training return: tensor(305.0115, device='cuda:0')
episode: 250 training return: tensor(248.2135, device='cuda:0')
episode: 251 training return: tensor(248.5361, device='cuda:0')
epoch: 63 test_true_pfm: 3426.258428210203 sim_pfm: 183.4293288026723
episode: 252 training return: tensor(287.7870, device='cuda:0')
episode: 253 training return: tensor(143.5259, device='cuda:0')
episode: 254 training return: tensor(130.8745, device='cuda:0')
episode: 255 training return: tensor(238.0626, device='cuda:0')
epoch: 64 test_true_pfm: 3383.990397286963 sim_pfm: 270.79593196535524
episode: 256 training return: tensor(190.9714, device='cuda:0')
episode: 257 training return: tensor(224.7083, device='cuda:0')
episode: 258 training return: tensor(-56.3851, device='cuda:0')
episode: 259 training return: tensor(123.3375, device='cuda:0')
epoch: 65 test_true_pfm: 3482.9430432466725 sim_pfm: 245.7328188000538
episode: 260 training return: tensor(224.7744, device='cuda:0')
episode: 261 training return: tensor(292.5320, device='cuda:0')
episode: 262 training return: tensor(304.9147, device='cuda:0')
episode: 263 training return: tensor(206.1013, device='cuda:0')
epoch: 66 test_true_pfm: 3466.2331845741214 sim_pfm: 205.12481769757383
episode: 264 training return: tensor(280.2851, device='cuda:0')
episode: 265 training return: tensor(324.2333, device='cuda:0')
episode: 266 training return: tensor(265.6965, device='cuda:0')
episode: 267 training return: tensor(303.4285, device='cuda:0')
epoch: 67 test_true_pfm: 3364.725982069244 sim_pfm: 282.91436980105937
episode: 268 training return: tensor(285.8027, device='cuda:0')
episode: 269 training return: tensor(257.3739, device='cuda:0')
episode: 270 training return: tensor(297.3298, device='cuda:0')
episode: 271 training return: tensor(241.7780, device='cuda:0')
epoch: 68 test_true_pfm: 3483.2782319244197 sim_pfm: 280.23273622613243
episode: 272 training return: tensor(347.7964, device='cuda:0')
episode: 273 training return: tensor(247.2784, device='cuda:0')
episode: 274 training return: tensor(276.3341, device='cuda:0')
episode: 275 training return: tensor(259.0888, device='cuda:0')
epoch: 69 test_true_pfm: 3479.500253225117 sim_pfm: 244.71189657360082
episode: 276 training return: tensor(267.7719, device='cuda:0')
episode: 277 training return: tensor(244.3064, device='cuda:0')
episode: 278 training return: tensor(223.0128, device='cuda:0')
episode: 279 training return: tensor(225.0905, device='cuda:0')
epoch: 70 test_true_pfm: 3085.823542523672 sim_pfm: 282.6782396153042
episode: 280 training return: tensor(225.9888, device='cuda:0')
episode: 281 training return: tensor(292.5481, device='cuda:0')
episode: 282 training return: tensor(251.0438, device='cuda:0')
episode: 283 training return: tensor(257.9731, device='cuda:0')
epoch: 71 test_true_pfm: 3429.8844390423233 sim_pfm: 252.9683498358548
episode: 284 training return: tensor(310.4470, device='cuda:0')
episode: 285 training return: tensor(256.9006, device='cuda:0')
episode: 286 training return: tensor(277.4467, device='cuda:0')
episode: 287 training return: tensor(237.7232, device='cuda:0')
epoch: 72 test_true_pfm: 3382.6723946161474 sim_pfm: 267.20750614972593
episode: 288 training return: tensor(377.8154, device='cuda:0')
episode: 289 training return: tensor(304.3931, device='cuda:0')
episode: 290 training return: tensor(283.0618, device='cuda:0')
episode: 291 training return: tensor(289.4921, device='cuda:0')
epoch: 73 test_true_pfm: 3453.128524637603 sim_pfm: 294.71001439921866
episode: 292 training return: tensor(262.7846, device='cuda:0')
episode: 293 training return: tensor(271.5529, device='cuda:0')
episode: 294 training return: tensor(18.1849, device='cuda:0')
episode: 295 training return: tensor(341.3094, device='cuda:0')
epoch: 74 test_true_pfm: 3452.448048468443 sim_pfm: 218.99391331873872
episode: 296 training return: tensor(243.1144, device='cuda:0')
episode: 297 training return: tensor(-319.6720, device='cuda:0')
episode: 298 training return: tensor(294.8792, device='cuda:0')
episode: 299 training return: tensor(315.1765, device='cuda:0')
epoch: 75 test_true_pfm: 3431.4250346820318 sim_pfm: 204.15982597550223
episode: 300 training return: tensor(233.3749, device='cuda:0')
episode: 301 training return: tensor(295.7344, device='cuda:0')
episode: 302 training return: tensor(277.1881, device='cuda:0')
episode: 303 training return: tensor(344.3392, device='cuda:0')
epoch: 76 test_true_pfm: 3400.8558345417023 sim_pfm: 142.1960283363393
episode: 304 training return: tensor(294.8784, device='cuda:0')
episode: 305 training return: tensor(256.6810, device='cuda:0')
episode: 306 training return: tensor(257.6394, device='cuda:0')
episode: 307 training return: tensor(241.0589, device='cuda:0')
epoch: 77 test_true_pfm: 3414.4816992998585 sim_pfm: 237.60645318008028
episode: 308 training return: tensor(189.0407, device='cuda:0')
episode: 309 training return: tensor(275.7996, device='cuda:0')
episode: 310 training return: tensor(351.2421, device='cuda:0')
episode: 311 training return: tensor(276.8444, device='cuda:0')
epoch: 78 test_true_pfm: 3430.549955809153 sim_pfm: 257.45375786796404
episode: 312 training return: tensor(292.1950, device='cuda:0')
episode: 313 training return: tensor(317.4850, device='cuda:0')
episode: 314 training return: tensor(303.3958, device='cuda:0')
episode: 315 training return: tensor(244.9319, device='cuda:0')
epoch: 79 test_true_pfm: 3446.8353606053383 sim_pfm: 262.06910595794517
episode: 316 training return: tensor(285.0169, device='cuda:0')
episode: 317 training return: tensor(-305.7270, device='cuda:0')
episode: 318 training return: tensor(318.2079, device='cuda:0')
episode: 319 training return: tensor(287.2577, device='cuda:0')
epoch: 80 test_true_pfm: 3469.8928567959815 sim_pfm: 293.50343707127223
episode: 320 training return: tensor(248.5979, device='cuda:0')
episode: 321 training return: tensor(222.5883, device='cuda:0')
episode: 322 training return: tensor(215.1381, device='cuda:0')
episode: 323 training return: tensor(352.8480, device='cuda:0')
epoch: 81 test_true_pfm: 3458.616445265785 sim_pfm: 260.724373545779
episode: 324 training return: tensor(313.3603, device='cuda:0')
episode: 325 training return: tensor(219.1532, device='cuda:0')
episode: 326 training return: tensor(272.9325, device='cuda:0')
episode: 327 training return: tensor(361.7387, device='cuda:0')
epoch: 82 test_true_pfm: 3430.8750432590973 sim_pfm: 208.0487280986466
episode: 328 training return: tensor(255.6172, device='cuda:0')
episode: 329 training return: tensor(314.8173, device='cuda:0')
episode: 330 training return: tensor(187.5150, device='cuda:0')
episode: 331 training return: tensor(284.9213, device='cuda:0')
epoch: 83 test_true_pfm: 3293.862840491582 sim_pfm: 304.09491313128575
episode: 332 training return: tensor(335.0667, device='cuda:0')
episode: 333 training return: tensor(216.3614, device='cuda:0')
episode: 334 training return: tensor(280.1976, device='cuda:0')
episode: 335 training return: tensor(262.3660, device='cuda:0')
epoch: 84 test_true_pfm: 3475.2183869559135 sim_pfm: 275.8304768404535
episode: 336 training return: tensor(268.9875, device='cuda:0')
episode: 337 training return: tensor(266.4073, device='cuda:0')
episode: 338 training return: tensor(234.2184, device='cuda:0')
episode: 339 training return: tensor(253.8575, device='cuda:0')
epoch: 85 test_true_pfm: 3472.432375286287 sim_pfm: 268.30889691352303
episode: 340 training return: tensor(210.9974, device='cuda:0')
episode: 341 training return: tensor(389.3763, device='cuda:0')
episode: 342 training return: tensor(261.6078, device='cuda:0')
episode: 343 training return: tensor(297.9153, device='cuda:0')
epoch: 86 test_true_pfm: 3524.278041641856 sim_pfm: 234.67677317800312
episode: 344 training return: tensor(215.2564, device='cuda:0')
episode: 345 training return: tensor(285.8221, device='cuda:0')
episode: 346 training return: tensor(272.5255, device='cuda:0')
episode: 347 training return: tensor(247.8688, device='cuda:0')
epoch: 87 test_true_pfm: 3477.2112862763 sim_pfm: 341.2223887699268
episode: 348 training return: tensor(203.6539, device='cuda:0')
episode: 349 training return: tensor(305.3311, device='cuda:0')
episode: 350 training return: tensor(270.2031, device='cuda:0')
episode: 351 training return: tensor(238.9942, device='cuda:0')
epoch: 88 test_true_pfm: 3479.2567866638224 sim_pfm: 305.33821848680964
episode: 352 training return: tensor(258.2298, device='cuda:0')
episode: 353 training return: tensor(359.2444, device='cuda:0')
episode: 354 training return: tensor(290.1476, device='cuda:0')
episode: 355 training return: tensor(243.2879, device='cuda:0')
epoch: 89 test_true_pfm: 3496.7303673335005 sim_pfm: 365.0486221872852
episode: 356 training return: tensor(272.5994, device='cuda:0')
episode: 357 training return: tensor(293.3486, device='cuda:0')
episode: 358 training return: tensor(187.7157, device='cuda:0')
episode: 359 training return: tensor(118.7570, device='cuda:0')
epoch: 90 test_true_pfm: 3510.2697667285124 sim_pfm: 216.52487955838055
episode: 360 training return: tensor(211.4074, device='cuda:0')
episode: 361 training return: tensor(267.6428, device='cuda:0')
episode: 362 training return: tensor(277.8542, device='cuda:0')
episode: 363 training return: tensor(238.4350, device='cuda:0')
epoch: 91 test_true_pfm: 3277.8403185407465 sim_pfm: 259.48167512664804
episode: 364 training return: tensor(178.7209, device='cuda:0')
episode: 365 training return: tensor(254.7456, device='cuda:0')
episode: 366 training return: tensor(158.4396, device='cuda:0')
episode: 367 training return: tensor(281.1794, device='cuda:0')
epoch: 92 test_true_pfm: 3396.9909246792017 sim_pfm: 223.97175220169206
episode: 368 training return: tensor(33.1089, device='cuda:0')
episode: 369 training return: tensor(301.9514, device='cuda:0')
episode: 370 training return: tensor(283.2747, device='cuda:0')
episode: 371 training return: tensor(277.3674, device='cuda:0')
epoch: 93 test_true_pfm: 3418.7593519115167 sim_pfm: 208.15939930245318
episode: 372 training return: tensor(298.7882, device='cuda:0')
episode: 373 training return: tensor(290.4436, device='cuda:0')
episode: 374 training return: tensor(243.1043, device='cuda:0')
episode: 375 training return: tensor(201.9585, device='cuda:0')
epoch: 94 test_true_pfm: 3519.5348707173725 sim_pfm: 273.78341218685574
episode: 376 training return: tensor(304.0302, device='cuda:0')
episode: 377 training return: tensor(190.1804, device='cuda:0')
episode: 378 training return: tensor(299.3664, device='cuda:0')
episode: 379 training return: tensor(324.0363, device='cuda:0')
epoch: 95 test_true_pfm: 3490.0974258393035 sim_pfm: 277.3412436464375
episode: 380 training return: tensor(262.5860, device='cuda:0')
episode: 381 training return: tensor(213.2447, device='cuda:0')
episode: 382 training return: tensor(324.8376, device='cuda:0')
episode: 383 training return: tensor(-46.5307, device='cuda:0')
epoch: 96 test_true_pfm: 3439.339796298633 sim_pfm: 237.46749845255786
episode: 384 training return: tensor(268.8603, device='cuda:0')
episode: 385 training return: tensor(318.9421, device='cuda:0')
episode: 386 training return: tensor(127.9203, device='cuda:0')
episode: 387 training return: tensor(232.7260, device='cuda:0')
epoch: 97 test_true_pfm: 3503.858569647546 sim_pfm: 244.36659262125613
episode: 388 training return: tensor(337.2368, device='cuda:0')
episode: 389 training return: tensor(256.0797, device='cuda:0')
episode: 390 training return: tensor(-18.5363, device='cuda:0')
episode: 391 training return: tensor(269.0678, device='cuda:0')
epoch: 98 test_true_pfm: 3499.7538265393887 sim_pfm: 310.8408256749196
episode: 392 training return: tensor(269.4546, device='cuda:0')
episode: 393 training return: tensor(216.3200, device='cuda:0')
episode: 394 training return: tensor(146.8228, device='cuda:0')
episode: 395 training return: tensor(124.2718, device='cuda:0')
epoch: 99 test_true_pfm: 3424.5915590793793 sim_pfm: 343.5192082040788
episode: 396 training return: tensor(257.6157, device='cuda:0')
episode: 397 training return: tensor(278.2946, device='cuda:0')
episode: 398 training return: tensor(234.4818, device='cuda:0')
episode: 399 training return: tensor(276.3114, device='cuda:0')
epoch: 100 test_true_pfm: 3532.753381243212 sim_pfm: 284.44735305424547
episode: 400 training return: tensor(316.2095, device='cuda:0')
episode: 401 training return: tensor(304.9211, device='cuda:0')
episode: 402 training return: tensor(111.8817, device='cuda:0')
episode: 403 training return: tensor(263.5135, device='cuda:0')
epoch: 101 test_true_pfm: 3505.6993727508147 sim_pfm: 295.46301873963483
episode: 404 training return: tensor(268.8413, device='cuda:0')
episode: 405 training return: tensor(230.4070, device='cuda:0')
episode: 406 training return: tensor(-337.2260, device='cuda:0')
episode: 407 training return: tensor(249.1197, device='cuda:0')
epoch: 102 test_true_pfm: 3485.2921620038255 sim_pfm: 263.75624993018573
episode: 408 training return: tensor(196.0915, device='cuda:0')
episode: 409 training return: tensor(288.7679, device='cuda:0')
episode: 410 training return: tensor(290.8179, device='cuda:0')
episode: 411 training return: tensor(319.6521, device='cuda:0')
epoch: 103 test_true_pfm: 3442.5477209236196 sim_pfm: 304.03555084367207
episode: 412 training return: tensor(347.5180, device='cuda:0')
episode: 413 training return: tensor(351.0083, device='cuda:0')
episode: 414 training return: tensor(253.9453, device='cuda:0')
episode: 415 training return: tensor(215.0946, device='cuda:0')
epoch: 104 test_true_pfm: 3395.824047806369 sim_pfm: 247.38271474220286
episode: 416 training return: tensor(-283.9776, device='cuda:0')
episode: 417 training return: tensor(268.9860, device='cuda:0')
episode: 418 training return: tensor(219.3327, device='cuda:0')
episode: 419 training return: tensor(233.8943, device='cuda:0')
epoch: 105 test_true_pfm: 3434.4364744695417 sim_pfm: 281.7560493854592
episode: 420 training return: tensor(145.3869, device='cuda:0')
episode: 421 training return: tensor(211.2997, device='cuda:0')
episode: 422 training return: tensor(240.1210, device='cuda:0')
episode: 423 training return: tensor(223.7437, device='cuda:0')
epoch: 106 test_true_pfm: 3434.8420433998654 sim_pfm: 266.83367689690203
episode: 424 training return: tensor(263.9991, device='cuda:0')
episode: 425 training return: tensor(166.7531, device='cuda:0')
episode: 426 training return: tensor(275.6201, device='cuda:0')
episode: 427 training return: tensor(243.2485, device='cuda:0')
epoch: 107 test_true_pfm: 3512.9655017935597 sim_pfm: 285.1565010912309
episode: 428 training return: tensor(318.0827, device='cuda:0')
episode: 429 training return: tensor(175.2998, device='cuda:0')
episode: 430 training return: tensor(278.7993, device='cuda:0')
episode: 431 training return: tensor(288.2551, device='cuda:0')
epoch: 108 test_true_pfm: 2875.5571349473626 sim_pfm: 319.51388731993694
episode: 432 training return: tensor(285.9203, device='cuda:0')
episode: 433 training return: tensor(239.3942, device='cuda:0')
episode: 434 training return: tensor(260.8828, device='cuda:0')
episode: 435 training return: tensor(210.9337, device='cuda:0')
epoch: 109 test_true_pfm: 3486.672162481551 sim_pfm: 290.3281454181221
episode: 436 training return: tensor(270.2933, device='cuda:0')
episode: 437 training return: tensor(353.5645, device='cuda:0')
episode: 438 training return: tensor(16.7773, device='cuda:0')
episode: 439 training return: tensor(307.0226, device='cuda:0')
epoch: 110 test_true_pfm: 3475.7615296430436 sim_pfm: 280.92271051074687
episode: 440 training return: tensor(335.1288, device='cuda:0')
episode: 441 training return: tensor(339.6570, device='cuda:0')
episode: 442 training return: tensor(275.0167, device='cuda:0')
episode: 443 training return: tensor(191.2137, device='cuda:0')
epoch: 111 test_true_pfm: 3437.4744348672443 sim_pfm: 250.77930368242474
episode: 444 training return: tensor(263.9454, device='cuda:0')
episode: 445 training return: tensor(222.4647, device='cuda:0')
episode: 446 training return: tensor(287.9906, device='cuda:0')
episode: 447 training return: tensor(260.5089, device='cuda:0')
epoch: 112 test_true_pfm: 3458.797997891473 sim_pfm: 314.6654662235621
episode: 448 training return: tensor(262.7615, device='cuda:0')
episode: 449 training return: tensor(287.5593, device='cuda:0')
episode: 450 training return: tensor(203.3275, device='cuda:0')
episode: 451 training return: tensor(302.1966, device='cuda:0')
epoch: 113 test_true_pfm: 3390.373373440465 sim_pfm: 305.6919431770705
episode: 452 training return: tensor(250.2107, device='cuda:0')
episode: 453 training return: tensor(294.0758, device='cuda:0')
episode: 454 training return: tensor(327.6104, device='cuda:0')
episode: 455 training return: tensor(256.6914, device='cuda:0')
epoch: 114 test_true_pfm: 3448.874602001219 sim_pfm: 266.5134021509245
episode: 456 training return: tensor(348.0187, device='cuda:0')
episode: 457 training return: tensor(249.5548, device='cuda:0')
episode: 458 training return: tensor(242.2370, device='cuda:0')
episode: 459 training return: tensor(260.5273, device='cuda:0')
epoch: 115 test_true_pfm: 3495.4377504490476 sim_pfm: 258.83685900261236
episode: 460 training return: tensor(246.9653, device='cuda:0')
episode: 461 training return: tensor(232.6102, device='cuda:0')
episode: 462 training return: tensor(309.6375, device='cuda:0')
episode: 463 training return: tensor(192.1654, device='cuda:0')
epoch: 116 test_true_pfm: 3474.989379432364 sim_pfm: 239.74537747504655
episode: 464 training return: tensor(216.3384, device='cuda:0')
episode: 465 training return: tensor(196.8665, device='cuda:0')
episode: 466 training return: tensor(180.1693, device='cuda:0')
episode: 467 training return: tensor(324.7256, device='cuda:0')
epoch: 117 test_true_pfm: 3437.6686377832107 sim_pfm: 268.3465876631478
episode: 468 training return: tensor(325.4149, device='cuda:0')
episode: 469 training return: tensor(198.9654, device='cuda:0')
episode: 470 training return: tensor(250.1229, device='cuda:0')
episode: 471 training return: tensor(304.0329, device='cuda:0')
epoch: 118 test_true_pfm: 3397.0894382984734 sim_pfm: 292.6847113600331
episode: 472 training return: tensor(65.4404, device='cuda:0')
episode: 473 training return: tensor(269.8046, device='cuda:0')
episode: 474 training return: tensor(329.8212, device='cuda:0')
episode: 475 training return: tensor(205.2178, device='cuda:0')
epoch: 119 test_true_pfm: 3231.078486404563 sim_pfm: 266.54355505675386
episode: 476 training return: tensor(246.1503, device='cuda:0')
episode: 477 training return: tensor(244.2002, device='cuda:0')
episode: 478 training return: tensor(246.6795, device='cuda:0')
episode: 479 training return: tensor(298.3147, device='cuda:0')
epoch: 120 test_true_pfm: 3458.3243003117113 sim_pfm: 271.13946142127196
episode: 480 training return: tensor(225.4674, device='cuda:0')
episode: 481 training return: tensor(350.3350, device='cuda:0')
episode: 482 training return: tensor(219.7032, device='cuda:0')
episode: 483 training return: tensor(208.5305, device='cuda:0')
epoch: 121 test_true_pfm: 3490.1008161557515 sim_pfm: 297.1067725362761
episode: 484 training return: tensor(-8.0370, device='cuda:0')
episode: 485 training return: tensor(160.9109, device='cuda:0')
episode: 486 training return: tensor(216.9807, device='cuda:0')
episode: 487 training return: tensor(62.2906, device='cuda:0')
epoch: 122 test_true_pfm: 3455.452073787681 sim_pfm: 298.50260627816897
episode: 488 training return: tensor(196.9318, device='cuda:0')
episode: 489 training return: tensor(168.6066, device='cuda:0')
episode: 490 training return: tensor(275.4433, device='cuda:0')
episode: 491 training return: tensor(228.6539, device='cuda:0')
epoch: 123 test_true_pfm: 3054.864574627251 sim_pfm: 220.33032520848792
episode: 492 training return: tensor(241.7132, device='cuda:0')
episode: 493 training return: tensor(246.2388, device='cuda:0')
episode: 494 training return: tensor(272.9274, device='cuda:0')
episode: 495 training return: tensor(165.6559, device='cuda:0')
epoch: 124 test_true_pfm: 3460.9580259430863 sim_pfm: 185.3723167050921
episode: 496 training return: tensor(217.4509, device='cuda:0')
episode: 497 training return: tensor(208.1837, device='cuda:0')
episode: 498 training return: tensor(269.2700, device='cuda:0')
episode: 499 training return: tensor(221.9106, device='cuda:0')
epoch: 125 test_true_pfm: 3459.003670127069 sim_pfm: 281.0564105558151
episode: 500 training return: tensor(230.9718, device='cuda:0')
episode: 501 training return: tensor(317.1124, device='cuda:0')
episode: 502 training return: tensor(315.7530, device='cuda:0')
episode: 503 training return: tensor(-165.7828, device='cuda:0')
epoch: 126 test_true_pfm: 3493.253236410661 sim_pfm: 227.27601098103332
episode: 504 training return: tensor(248.9662, device='cuda:0')
episode: 505 training return: tensor(197.5237, device='cuda:0')
episode: 506 training return: tensor(226.1047, device='cuda:0')
episode: 507 training return: tensor(263.8948, device='cuda:0')
epoch: 127 test_true_pfm: 3459.1623450207476 sim_pfm: 299.0913876870375
episode: 508 training return: tensor(-35.8341, device='cuda:0')
episode: 509 training return: tensor(338.4803, device='cuda:0')
episode: 510 training return: tensor(215.6570, device='cuda:0')
episode: 511 training return: tensor(78.0449, device='cuda:0')
epoch: 128 test_true_pfm: 3445.2653927461124 sim_pfm: 274.704276370001
episode: 512 training return: tensor(281.7941, device='cuda:0')
episode: 513 training return: tensor(220.8679, device='cuda:0')
episode: 514 training return: tensor(333.1072, device='cuda:0')
episode: 515 training return: tensor(265.0899, device='cuda:0')
epoch: 129 test_true_pfm: 3374.469187071642 sim_pfm: 193.00996976485476
episode: 516 training return: tensor(305.8912, device='cuda:0')
episode: 517 training return: tensor(295.1658, device='cuda:0')
episode: 518 training return: tensor(117.7765, device='cuda:0')
episode: 519 training return: tensor(267.1647, device='cuda:0')
epoch: 130 test_true_pfm: 3466.792922801004 sim_pfm: 314.3827379696692
episode: 520 training return: tensor(275.2190, device='cuda:0')
episode: 521 training return: tensor(224.4079, device='cuda:0')
episode: 522 training return: tensor(164.0143, device='cuda:0')
episode: 523 training return: tensor(306.0291, device='cuda:0')
epoch: 131 test_true_pfm: 3368.6555046423314 sim_pfm: 213.34352788120546
episode: 524 training return: tensor(263.2037, device='cuda:0')
episode: 525 training return: tensor(262.6249, device='cuda:0')
episode: 526 training return: tensor(269.4125, device='cuda:0')
episode: 527 training return: tensor(275.8353, device='cuda:0')
epoch: 132 test_true_pfm: 3423.901095432778 sim_pfm: 225.45821485103806
episode: 528 training return: tensor(-284.5205, device='cuda:0')
episode: 529 training return: tensor(265.0222, device='cuda:0')
episode: 530 training return: tensor(188.5965, device='cuda:0')
episode: 531 training return: tensor(333.9472, device='cuda:0')
epoch: 133 test_true_pfm: 3516.2219350941227 sim_pfm: 251.4204825258736
episode: 532 training return: tensor(226.4306, device='cuda:0')
episode: 533 training return: tensor(169.5972, device='cuda:0')
episode: 534 training return: tensor(255.3918, device='cuda:0')
episode: 535 training return: tensor(193.7035, device='cuda:0')
epoch: 134 test_true_pfm: 3474.507246585707 sim_pfm: 225.04168432948063
episode: 536 training return: tensor(265.0609, device='cuda:0')
episode: 537 training return: tensor(278.3849, device='cuda:0')
episode: 538 training return: tensor(322.0567, device='cuda:0')
episode: 539 training return: tensor(202.3854, device='cuda:0')
epoch: 135 test_true_pfm: 3181.500972911362 sim_pfm: 126.83222077938262
episode: 540 training return: tensor(267.9317, device='cuda:0')
episode: 541 training return: tensor(-4.9243, device='cuda:0')
episode: 542 training return: tensor(241.4168, device='cuda:0')
episode: 543 training return: tensor(279.3090, device='cuda:0')
epoch: 136 test_true_pfm: 3071.787878737552 sim_pfm: 207.32388862987864
episode: 544 training return: tensor(211.1231, device='cuda:0')
episode: 545 training return: tensor(15.1349, device='cuda:0')
episode: 546 training return: tensor(243.2524, device='cuda:0')
episode: 547 training return: tensor(218.3040, device='cuda:0')
epoch: 137 test_true_pfm: 3456.343614365156 sim_pfm: 267.67712123094435
episode: 548 training return: tensor(163.1612, device='cuda:0')
episode: 549 training return: tensor(280.4652, device='cuda:0')
episode: 550 training return: tensor(262.3890, device='cuda:0')
episode: 551 training return: tensor(326.5950, device='cuda:0')
epoch: 138 test_true_pfm: 3344.6013221948256 sim_pfm: 240.3098304455149
episode: 552 training return: tensor(199.7677, device='cuda:0')
episode: 553 training return: tensor(244.3398, device='cuda:0')
episode: 554 training return: tensor(235.4175, device='cuda:0')
episode: 555 training return: tensor(314.4475, device='cuda:0')
epoch: 139 test_true_pfm: 3484.4185457252865 sim_pfm: 305.37576337570016
episode: 556 training return: tensor(239.7854, device='cuda:0')
episode: 557 training return: tensor(306.9562, device='cuda:0')
episode: 558 training return: tensor(279.1408, device='cuda:0')
episode: 559 training return: tensor(253.8916, device='cuda:0')
epoch: 140 test_true_pfm: 3513.868207419881 sim_pfm: 236.25741950415735
episode: 560 training return: tensor(264.2100, device='cuda:0')
episode: 561 training return: tensor(350.1203, device='cuda:0')
episode: 562 training return: tensor(246.7557, device='cuda:0')
episode: 563 training return: tensor(385.6052, device='cuda:0')
epoch: 141 test_true_pfm: 3460.8902140984046 sim_pfm: 273.83248281055904
episode: 564 training return: tensor(116.6409, device='cuda:0')
episode: 565 training return: tensor(340.0182, device='cuda:0')
episode: 566 training return: tensor(202.4631, device='cuda:0')
episode: 567 training return: tensor(163.8901, device='cuda:0')
epoch: 142 test_true_pfm: 3455.5073465930714 sim_pfm: 281.7742723588405
episode: 568 training return: tensor(287.5792, device='cuda:0')
episode: 569 training return: tensor(243.4929, device='cuda:0')
episode: 570 training return: tensor(267.5900, device='cuda:0')
episode: 571 training return: tensor(326.6514, device='cuda:0')
epoch: 143 test_true_pfm: 3459.7865810477015 sim_pfm: 312.08716359109775
episode: 572 training return: tensor(193.1048, device='cuda:0')
episode: 573 training return: tensor(268.1314, device='cuda:0')
episode: 574 training return: tensor(11.9082, device='cuda:0')
episode: 575 training return: tensor(-108.5475, device='cuda:0')
epoch: 144 test_true_pfm: 3462.0938743791557 sim_pfm: 251.63788969345237
episode: 576 training return: tensor(261.0510, device='cuda:0')
episode: 577 training return: tensor(211.9805, device='cuda:0')
episode: 578 training return: tensor(222.6204, device='cuda:0')
episode: 579 training return: tensor(163.8522, device='cuda:0')
epoch: 145 test_true_pfm: 3435.9103729978765 sim_pfm: 242.550659371928
episode: 580 training return: tensor(240.2758, device='cuda:0')
episode: 581 training return: tensor(196.8537, device='cuda:0')
episode: 582 training return: tensor(311.8679, device='cuda:0')
episode: 583 training return: tensor(255.1797, device='cuda:0')
epoch: 146 test_true_pfm: 3386.7857534226914 sim_pfm: 308.56354462368955
episode: 584 training return: tensor(254.0049, device='cuda:0')
episode: 585 training return: tensor(174.5400, device='cuda:0')
episode: 586 training return: tensor(335.9848, device='cuda:0')
episode: 587 training return: tensor(261.7224, device='cuda:0')
epoch: 147 test_true_pfm: 3458.3650383905333 sim_pfm: 259.95015166477725
episode: 588 training return: tensor(190.1691, device='cuda:0')
episode: 589 training return: tensor(183.2661, device='cuda:0')
episode: 590 training return: tensor(242.0676, device='cuda:0')
episode: 591 training return: tensor(205.3459, device='cuda:0')
epoch: 148 test_true_pfm: 3365.757995880274 sim_pfm: 215.95211935895108
episode: 592 training return: tensor(274.9852, device='cuda:0')
episode: 593 training return: tensor(190.8364, device='cuda:0')
episode: 594 training return: tensor(149.4374, device='cuda:0')
episode: 595 training return: tensor(279.0547, device='cuda:0')
epoch: 149 test_true_pfm: 3398.8819034558796 sim_pfm: 234.27849830140863
episode: 596 training return: tensor(239.3657, device='cuda:0')
episode: 597 training return: tensor(291.2854, device='cuda:0')
episode: 598 training return: tensor(203.5149, device='cuda:0')
episode: 599 training return: tensor(196.8995, device='cuda:0')
epoch: 150 test_true_pfm: 3458.990891576422 sim_pfm: 265.83185768369003
