['--env', 'Hopper-v3']
epoch: 0 training_loss 0.21700624048709868 test_loss: 0.1541895866394043
epoch: 1 training_loss 0.13375997010618448 test_loss: 0.19898428916931152
epoch: 2 training_loss 0.1462124658934772 test_loss: 0.1536577820777893
epoch: 3 training_loss 0.12258364982903004 test_loss: 0.16898560523986816
epoch: 4 training_loss 0.12519833374768496 test_loss: 0.137709903717041
epoch: 5 training_loss 0.12261076338589191 test_loss: 0.0978829324245453
epoch: 6 training_loss 0.12525700110942126 test_loss: 0.11747524738311768
epoch: 7 training_loss 0.12608420591801406 test_loss: 0.10696154832839966
epoch: 8 training_loss 0.11958971194922924 test_loss: 0.11337392330169678
epoch: 9 training_loss 0.12232679870910942 test_loss: 0.11565653085708619
epoch: 10 training_loss 0.12300557786598802 test_loss: 0.1446205973625183
epoch: 11 training_loss 0.12390425715595484 test_loss: 0.12112349271774292
epoch: 12 training_loss 0.12338467543944716 test_loss: 0.11605894565582275
epoch: 13 training_loss 0.1121549267694354 test_loss: 0.1099930763244629
epoch: 14 training_loss 0.10681459065526724 test_loss: 0.11945401430130005
epoch: 15 training_loss 0.12785546008497475 test_loss: 0.10820654630661011
epoch: 16 training_loss 0.11789300322532653 test_loss: 0.1266251802444458
epoch: 17 training_loss 0.1135365603864193 test_loss: 0.10979859828948975
epoch: 18 training_loss 0.11827773701399565 test_loss: 0.10965243577957154
epoch: 19 training_loss 0.11283288087695836 test_loss: 0.11439352035522461
epoch: 20 training_loss 0.11252702370285989 test_loss: 0.12409694194793701
epoch: 21 training_loss 0.12097386686131358 test_loss: 0.10736119747161865
epoch: 22 training_loss 0.1100967837125063 test_loss: 0.10385531187057495
epoch: 23 training_loss 0.11467426566407085 test_loss: 0.12078566551208496
epoch: 24 training_loss 0.1128409218788147 test_loss: 0.11365875005722045
epoch: 25 training_loss 0.10803547941148281 test_loss: 0.10310685634613037
epoch: 26 training_loss 0.11172680888324976 test_loss: 0.11909832954406738
epoch: 27 training_loss 0.10896425221115351 test_loss: 0.14012354612350464
epoch: 28 training_loss 0.11071206819266081 test_loss: 0.10992090702056885
epoch: 29 training_loss 0.10579610269516707 test_loss: 0.0834525167942047
epoch: 30 training_loss 0.10899069931358099 test_loss: 0.1029820203781128
epoch: 31 training_loss 0.10875922989100217 test_loss: 0.1092065453529358
epoch: 32 training_loss 0.10339189492166043 test_loss: 0.10206189155578613
epoch: 33 training_loss 0.10620966367423534 test_loss: 0.1157957911491394
epoch: 34 training_loss 0.10770849445834756 test_loss: 0.09991548657417297
epoch: 35 training_loss 0.11238607984036207 test_loss: 0.124534010887146
epoch: 36 training_loss 0.11305229002609848 test_loss: 0.13957557678222657
epoch: 37 training_loss 0.10895286463201045 test_loss: 0.12414754629135132
epoch: 38 training_loss 0.11361925637349486 test_loss: 0.10114862918853759
epoch: 39 training_loss 0.10772211795672774 test_loss: 0.13476159572601318
epoch: 40 training_loss 0.11333826083689928 test_loss: 0.09758124947547912
epoch: 41 training_loss 0.11502011340111494 test_loss: 0.09138768315315246
epoch: 42 training_loss 0.10713951114565134 test_loss: 0.10347310304641724
epoch: 43 training_loss 0.10536080250516534 test_loss: 0.09188385605812073
epoch: 44 training_loss 0.10931030374020338 test_loss: 0.13218443393707274
epoch: 45 training_loss 0.10497167788445949 test_loss: 0.11192607879638672
epoch: 46 training_loss 0.10513735985383392 test_loss: 0.11251128911972046
epoch: 47 training_loss 0.11239769212901592 test_loss: 0.11487362384796143
epoch: 48 training_loss 0.10978053890168667 test_loss: 0.09978867769241333
epoch: 49 training_loss 0.11387106366455554 test_loss: 0.11384454965591431
epoch: 50 training_loss 0.11434374995529652 test_loss: 0.11836620569229125
epoch: 51 training_loss 0.1052386300638318 test_loss: 0.10219149589538574
epoch: 52 training_loss 0.10984237499535084 test_loss: 0.11547377109527587
epoch: 53 training_loss 0.11298054296523333 test_loss: 0.10895735025405884
epoch: 54 training_loss 0.1067794989142567 test_loss: 0.10348067283630372
epoch: 55 training_loss 0.11223533872514962 test_loss: 0.09412211775779725
epoch: 56 training_loss 0.10755791287869215 test_loss: 0.10563961267471314
epoch: 57 training_loss 0.10935820018872619 test_loss: 0.10816125869750977
epoch: 58 training_loss 0.10597649445757269 test_loss: 0.13415738344192504
epoch: 59 training_loss 0.11189224652945995 test_loss: 0.12002289295196533
epoch: 60 training_loss 0.1045189144462347 test_loss: 0.11432547569274902
epoch: 61 training_loss 0.11746276888996363 test_loss: 0.13301197290420533
epoch: 62 training_loss 0.11684466168284416 test_loss: 0.10554535388946533
epoch: 63 training_loss 0.10754890725016594 test_loss: 0.09000973701477051
epoch: 64 training_loss 0.10638424295932054 test_loss: 0.11420456171035767
epoch: 65 training_loss 0.1042614401318133 test_loss: 0.10835901498794556
epoch: 66 training_loss 0.10695771258324385 test_loss: 0.14382601976394654
epoch: 67 training_loss 0.11078612070530652 test_loss: 0.10698565244674682
epoch: 68 training_loss 0.107184954918921 test_loss: 0.12891089916229248
epoch: 69 training_loss 0.1074753313139081 test_loss: 0.10177030563354492
epoch: 70 training_loss 0.10983666874468327 test_loss: 0.1134062647819519
epoch: 71 training_loss 0.10847321044653654 test_loss: 0.09531348347663879
epoch: 72 training_loss 0.11099343301728368 test_loss: 0.12470886707305909
epoch: 73 training_loss 0.10541670860722661 test_loss: 0.0924821674823761
epoch: 74 training_loss 0.10874843137338758 test_loss: 0.09721988439559937
epoch: 75 training_loss 0.11173623811453581 test_loss: 0.07938860058784485
epoch: 76 training_loss 0.1076547572016716 test_loss: 0.11177705526351929
epoch: 77 training_loss 0.11518886931240559 test_loss: 0.11372013092041015
epoch: 78 training_loss 0.11162894155830144 test_loss: 0.10965607166290284
epoch: 79 training_loss 0.10965955909341574 test_loss: 0.11185740232467652
epoch: 80 training_loss 0.10479575244709849 test_loss: 0.10055160522460938
epoch: 81 training_loss 0.11113348096609116 test_loss: 0.09564926624298095
epoch: 82 training_loss 0.1085422870516777 test_loss: 0.10242174863815308
epoch: 83 training_loss 0.10821079220622778 test_loss: 0.11118544340133667
epoch: 84 training_loss 0.10967213530093431 test_loss: 0.12208075523376465
epoch: 85 training_loss 0.10537457706406712 test_loss: 0.12065695524215699
epoch: 86 training_loss 0.11438198706135154 test_loss: 0.10501114130020142
epoch: 87 training_loss 0.10791519431397319 test_loss: 0.10311990976333618
epoch: 88 training_loss 0.11575771261006594 test_loss: 0.12294161319732666
epoch: 89 training_loss 0.10590347964316607 test_loss: 0.08873748183250427
epoch: 90 training_loss 0.10064657799899578 test_loss: 0.10405842065811158
epoch: 91 training_loss 0.11112739354372024 test_loss: 0.11306387186050415
epoch: 92 training_loss 0.10818693727254867 test_loss: 0.09195762276649475
epoch: 93 training_loss 0.11284636832773685 test_loss: 0.10480526685714722
epoch: 94 training_loss 0.1028563218191266 test_loss: 0.123393714427948
epoch: 95 training_loss 0.11477962171658873 test_loss: 0.10175213813781739
epoch: 96 training_loss 0.11538363948464393 test_loss: 0.11291465759277344
epoch: 97 training_loss 0.1143932906165719 test_loss: 0.09495532512664795
epoch: 98 training_loss 0.10359583295881748 test_loss: 0.11071302890777587
epoch: 99 training_loss 0.10845785327255726 test_loss: 0.09051716327667236
epoch: 100 training_loss 0.10077707298099994 test_loss: 0.0940260648727417
epoch: 101 training_loss 0.10376241751015186 test_loss: 0.11708362102508545
epoch: 102 training_loss 0.09749378174543381 test_loss: 0.1067762017250061
epoch: 103 training_loss 0.11401011925190688 test_loss: 0.10696418285369873
epoch: 104 training_loss 0.10365177854895592 test_loss: 0.09215962886810303
epoch: 105 training_loss 0.10617103698663413 test_loss: 0.1156764030456543
epoch: 106 training_loss 0.10236058453097939 test_loss: 0.1179443359375
epoch: 107 training_loss 0.10110427457839251 test_loss: 0.10629446506500244
epoch: 108 training_loss 0.10574640106409788 test_loss: 0.10576233863830567
epoch: 109 training_loss 0.11119861382991075 test_loss: 0.10573277473449708
epoch: 110 training_loss 0.10873881831765175 test_loss: 0.08810316324234009
epoch: 111 training_loss 0.1021559396199882 test_loss: 0.10644506216049195
epoch: 112 training_loss 0.11571534145623445 test_loss: 0.11906492710113525
epoch: 113 training_loss 0.10426613917574286 test_loss: 0.10934369564056397
epoch: 114 training_loss 0.10608591962605715 test_loss: 0.08712579607963562
epoch: 115 training_loss 0.1003199241310358 test_loss: 0.0973394513130188
epoch: 116 training_loss 0.10772553438320756 test_loss: 0.11455632448196411
epoch: 117 training_loss 0.10104988064616918 test_loss: 0.11741102933883667
epoch: 118 training_loss 0.10286337111145258 test_loss: 0.10661580562591552
epoch: 119 training_loss 0.10237473670393228 test_loss: 0.1371434211730957
epoch: 120 training_loss 0.11609036184847354 test_loss: 0.12006703615188599
epoch: 121 training_loss 0.10254878029227257 test_loss: 0.11259503364562988
epoch: 122 training_loss 0.10471248015761375 test_loss: 0.10717531442642211
epoch: 123 training_loss 0.10649701371788979 test_loss: 0.10367382764816284
epoch: 124 training_loss 0.10890668749809265 test_loss: 0.09104576110839843
epoch: 125 training_loss 0.10571132231503726 test_loss: 0.10942953824996948
epoch: 126 training_loss 0.11010523926466703 test_loss: 0.12420748472213745
epoch: 127 training_loss 0.10406010994687677 test_loss: 0.09005363583564759
epoch: 128 training_loss 0.10057568687945605 test_loss: 0.10258364677429199
epoch: 129 training_loss 0.10309876387938857 test_loss: 0.12116260528564453
epoch: 130 training_loss 0.10608502658084035 test_loss: 0.09623590111732483
epoch: 131 training_loss 0.10882110038772225 test_loss: 0.10503045320510865
epoch: 132 training_loss 0.10907557126134634 test_loss: 0.09602552652359009
epoch: 133 training_loss 0.10610722053796053 test_loss: 0.07926027774810791
epoch: 134 training_loss 0.10832313166931272 test_loss: 0.10406500101089478
epoch: 135 training_loss 0.10700041579082609 test_loss: 0.11235077381134033
epoch: 136 training_loss 0.10896379552781582 test_loss: 0.12077763080596923
epoch: 137 training_loss 0.10235971458256245 test_loss: 0.13004305362701415
epoch: 138 training_loss 0.10364921484142542 test_loss: 0.08878217339515686
epoch: 139 training_loss 0.10754831280559302 test_loss: 0.09783531427383423
epoch: 140 training_loss 0.10378992680460214 test_loss: 0.10744334459304809
epoch: 141 training_loss 0.10443713979795576 test_loss: 0.08824090957641602
epoch: 142 training_loss 0.10680153435096144 test_loss: 0.10183988809585572
epoch: 143 training_loss 0.10108187150210142 test_loss: 0.1028165340423584
epoch: 144 training_loss 0.10513746451586485 test_loss: 0.10384751558303833
epoch: 145 training_loss 0.10011493321508169 test_loss: 0.08918174505233764
epoch: 146 training_loss 0.10153574764728546 test_loss: 0.10196853876113891
epoch: 147 training_loss 0.10719603978097439 test_loss: 0.10410219430923462
epoch: 148 training_loss 0.10535415902733802 test_loss: 0.102576744556427
epoch: 149 training_loss 0.0995789792574942 test_loss: 0.10071980953216553
epoch: 0 training_loss 7.122846081256866 test_loss: 4.206619644165039
epoch: 1 training_loss 3.3624834704399107 test_loss: 2.774052619934082
epoch: 2 training_loss 2.4242889308929443 test_loss: 2.113652992248535
epoch: 3 training_loss 1.9461713540554046 test_loss: 1.6906398773193358
epoch: 4 training_loss 1.6456959009170533 test_loss: 1.4823407173156737
epoch: 5 training_loss 1.4726318228244781 test_loss: 1.3603655815124511
epoch: 6 training_loss 1.35556445479393 test_loss: 1.266962432861328
epoch: 7 training_loss 1.2463332402706147 test_loss: 1.2300653457641602
epoch: 8 training_loss 1.1704386967420577 test_loss: 1.1679605484008788
epoch: 9 training_loss 1.1181533491611482 test_loss: 1.1184109687805175
epoch: 10 training_loss 1.0582455545663834 test_loss: 1.0237936973571777
epoch: 11 training_loss 1.0083745127916337 test_loss: 0.953583812713623
epoch: 12 training_loss 0.9659976089000701 test_loss: 0.97822265625
epoch: 13 training_loss 0.9441151344776153 test_loss: 0.9079333305358886
epoch: 14 training_loss 0.9030992102622986 test_loss: 0.8602272987365722
epoch: 15 training_loss 0.877485328912735 test_loss: 0.8499067306518555
epoch: 16 training_loss 0.860631855726242 test_loss: 0.8267609596252441
epoch: 17 training_loss 0.8179834771156311 test_loss: 0.8339978218078613
epoch: 18 training_loss 0.7977934265136719 test_loss: 0.7878672122955322
epoch: 19 training_loss 0.7995740669965744 test_loss: 0.7880018711090088
epoch: 20 training_loss 0.7691311770677567 test_loss: 0.7449727058410645
epoch: 21 training_loss 0.7699982970952988 test_loss: 0.7795423984527587
epoch: 22 training_loss 0.7440537422895431 test_loss: 0.7177260875701904
epoch: 23 training_loss 0.7311614954471588 test_loss: 0.733648681640625
epoch: 24 training_loss 0.7318551343679428 test_loss: 0.7101438522338868
epoch: 25 training_loss 0.7189934891462326 test_loss: 0.7263562202453613
epoch: 26 training_loss 0.698648836016655 test_loss: 0.6663485050201416
epoch: 27 training_loss 0.6955038344860077 test_loss: 0.6849260807037354
epoch: 28 training_loss 0.6997478306293488 test_loss: 0.6939713954925537
epoch: 29 training_loss 0.6831845092773438 test_loss: 0.741865873336792
epoch: 30 training_loss 0.6742362129688263 test_loss: 0.7315410137176513
epoch: 31 training_loss 0.668308989405632 test_loss: 0.6866796970367431
epoch: 32 training_loss 0.6516071456670761 test_loss: 0.6440112590789795
epoch: 33 training_loss 0.6639261704683304 test_loss: 0.6618178844451904
epoch: 34 training_loss 0.6548273551464081 test_loss: 0.6463615894317627
epoch: 35 training_loss 0.6478788733482361 test_loss: 0.6271618366241455
epoch: 36 training_loss 0.6297536283731461 test_loss: 0.6232701301574707
epoch: 37 training_loss 0.6366712921857833 test_loss: 0.614643144607544
epoch: 38 training_loss 0.6310566657781601 test_loss: 0.5911108016967773
epoch: 39 training_loss 0.6270993882417679 test_loss: 0.648590087890625
epoch: 40 training_loss 0.6093917310237884 test_loss: 0.589368486404419
epoch: 41 training_loss 0.6311293190717697 test_loss: 0.6276754379272461
epoch: 42 training_loss 0.5963684344291686 test_loss: 0.621222972869873
epoch: 43 training_loss 0.6021152621507645 test_loss: 0.5974312782287597
epoch: 44 training_loss 0.6049788200855255 test_loss: 0.5907838344573975
epoch: 45 training_loss 0.5966997390985489 test_loss: 0.601688003540039
epoch: 46 training_loss 0.598299970626831 test_loss: 0.6001122951507568
epoch: 47 training_loss 0.5914467340707779 test_loss: 0.5872303485870362
epoch: 48 training_loss 0.5820052513480186 test_loss: 0.5728599548339843
epoch: 49 training_loss 0.593933094739914 test_loss: 0.5961884975433349
epoch: 50 training_loss 0.580930061340332 test_loss: 0.5659012794494629
epoch: 51 training_loss 0.5859801018238068 test_loss: 0.581539535522461
epoch: 52 training_loss 0.5735548207163811 test_loss: 0.5763925075531006
epoch: 53 training_loss 0.5699594634771347 test_loss: 0.5789347171783448
epoch: 54 training_loss 0.5646885737776757 test_loss: 0.5666388988494873
epoch: 55 training_loss 0.5805044615268707 test_loss: 0.5581286430358887
epoch: 56 training_loss 0.5713305053114891 test_loss: 0.5728681564331055
epoch: 57 training_loss 0.5656270098686218 test_loss: 0.5617459297180176
epoch: 58 training_loss 0.5618013596534729 test_loss: 0.5396414279937745
epoch: 59 training_loss 0.5514321759343147 test_loss: 0.5814149379730225
epoch: 60 training_loss 0.5559725087881088 test_loss: 0.5707149028778076
epoch: 61 training_loss 0.5623230928182602 test_loss: 0.545424222946167
epoch: 62 training_loss 0.5704749915003776 test_loss: 0.5619414806365967
epoch: 63 training_loss 0.5493815118074417 test_loss: 0.5564890384674073
epoch: 64 training_loss 0.5365101724863053 test_loss: 0.5588031768798828
epoch: 65 training_loss 0.5527271783351898 test_loss: 0.565649938583374
epoch: 66 training_loss 0.5519573256373406 test_loss: 0.5466413021087646
epoch: 67 training_loss 0.5336051592230797 test_loss: 0.5501824855804444
epoch: 68 training_loss 0.5426858860254288 test_loss: 0.5374995231628418
epoch: 69 training_loss 0.5375460687279702 test_loss: 0.5509184837341309
epoch: 70 training_loss 0.5342997154593467 test_loss: 0.5279618263244629
epoch: 71 training_loss 0.5292704811692238 test_loss: 0.5296952724456787
epoch: 72 training_loss 0.5288888505101204 test_loss: 0.5502851963043213
epoch: 73 training_loss 0.536182872056961 test_loss: 0.5283112525939941
epoch: 74 training_loss 0.5281948992609977 test_loss: 0.5482744693756103
epoch: 75 training_loss 0.5220731976628303 test_loss: 0.55580735206604
epoch: 76 training_loss 0.5288651984930038 test_loss: 0.5385428428649902
epoch: 77 training_loss 0.516632733643055 test_loss: 0.538743257522583
epoch: 78 training_loss 0.5302955636382103 test_loss: 0.5160886764526367
epoch: 79 training_loss 0.5094263291358948 test_loss: 0.5267186164855957
epoch: 80 training_loss 0.5080395862460136 test_loss: 0.5127610206604004
epoch: 81 training_loss 0.5152185925841332 test_loss: 0.5181248188018799
epoch: 82 training_loss 0.5183273130655288 test_loss: 0.49568843841552734
epoch: 83 training_loss 0.512508000433445 test_loss: 0.5193932056427002
epoch: 84 training_loss 0.5197158941626548 test_loss: 0.5097676277160644
epoch: 85 training_loss 0.5216058158874511 test_loss: 0.5290135860443115
epoch: 86 training_loss 0.5091748112440109 test_loss: 0.5072082996368408
epoch: 87 training_loss 0.509592653810978 test_loss: 0.49970192909240724
epoch: 88 training_loss 0.5064682060480118 test_loss: 0.5131211757659913
epoch: 89 training_loss 0.5212765273451805 test_loss: 0.49635939598083495
epoch: 90 training_loss 0.50661222666502 test_loss: 0.48790774345397947
epoch: 91 training_loss 0.5101002958416939 test_loss: 0.5546030521392822
epoch: 92 training_loss 0.5015000328421593 test_loss: 0.5170595169067382
epoch: 93 training_loss 0.5046268126368523 test_loss: 0.492193603515625
epoch: 94 training_loss 0.50026625841856 test_loss: 0.5112007617950439
epoch: 95 training_loss 0.49969677805900575 test_loss: 0.5043635845184327
epoch: 96 training_loss 0.5093009114265442 test_loss: 0.5043339729309082
epoch: 97 training_loss 0.49353672564029694 test_loss: 0.4873321533203125
epoch: 98 training_loss 0.4978751939535141 test_loss: 0.49979710578918457
epoch: 99 training_loss 0.4983742779493332 test_loss: 0.5158114433288574
epoch: 100 training_loss 0.5000589975714683 test_loss: 0.4873493671417236
epoch: 101 training_loss 0.4978375706076622 test_loss: 0.5114959716796875
epoch: 102 training_loss 0.5096099951863289 test_loss: 0.4873445987701416
epoch: 103 training_loss 0.4915150597691536 test_loss: 0.48465585708618164
epoch: 104 training_loss 0.4913692635297775 test_loss: 0.49300947189331057
epoch: 105 training_loss 0.4966421982645988 test_loss: 0.47803583145141604
epoch: 106 training_loss 0.4894095340371132 test_loss: 0.5068495750427247
epoch: 107 training_loss 0.49816503256559375 test_loss: 0.49304928779602053
epoch: 108 training_loss 0.49800735801458357 test_loss: 0.4787442207336426
epoch: 109 training_loss 0.4784137934446335 test_loss: 0.47673544883728025
epoch: 110 training_loss 0.4958552473783493 test_loss: 0.5356922626495362
epoch: 111 training_loss 0.49683981388807297 test_loss: 0.47650561332702634
epoch: 112 training_loss 0.48633849203586577 test_loss: 0.48810343742370604
epoch: 113 training_loss 0.4821106845140457 test_loss: 0.47414546012878417
epoch: 114 training_loss 0.48863928228616715 test_loss: 0.4877253532409668
epoch: 115 training_loss 0.4818465366959572 test_loss: 0.483733606338501
epoch: 116 training_loss 0.4943189179897308 test_loss: 0.4825392723083496
epoch: 117 training_loss 0.4878291642665863 test_loss: 0.47800469398498535
epoch: 118 training_loss 0.4772135093808174 test_loss: 0.47965497970581056
epoch: 119 training_loss 0.48370633244514466 test_loss: 0.4661149024963379
epoch: 120 training_loss 0.476535821557045 test_loss: 0.4734926700592041
epoch: 121 training_loss 0.4753922778367996 test_loss: 0.46588616371154784
epoch: 122 training_loss 0.49039784431457517 test_loss: 0.4773758888244629
epoch: 123 training_loss 0.4758343070745468 test_loss: 0.4763631343841553
epoch: 124 training_loss 0.4827354660630226 test_loss: 0.4668839931488037
epoch: 125 training_loss 0.4769762742519379 test_loss: 0.4883110046386719
epoch: 126 training_loss 0.4728442633152008 test_loss: 0.4529515266418457
epoch: 127 training_loss 0.47490849763154985 test_loss: 0.49861655235290525
epoch: 128 training_loss 0.48229109823703764 test_loss: 0.46394619941711424
epoch: 129 training_loss 0.47025234073400496 test_loss: 0.46428308486938474
epoch: 130 training_loss 0.47463640362024306 test_loss: 0.47470808029174805
epoch: 131 training_loss 0.46795260310173037 test_loss: 0.4733403205871582
epoch: 132 training_loss 0.46829900741577146 test_loss: 0.46430058479309083
epoch: 133 training_loss 0.470713275372982 test_loss: 0.5064535140991211
epoch: 134 training_loss 0.47593718141317365 test_loss: 0.4805344581604004
epoch: 135 training_loss 0.4673763447999954 test_loss: 0.48307437896728517
epoch: 136 training_loss 0.473888179063797 test_loss: 0.4620186805725098
epoch: 137 training_loss 0.47660517901182176 test_loss: 0.4852336883544922
epoch: 138 training_loss 0.4651179438829422 test_loss: 0.48866047859191897
epoch: 139 training_loss 0.4713132604956627 test_loss: 0.46079044342041015
epoch: 140 training_loss 0.4624097779393196 test_loss: 0.45196852684020994
epoch: 141 training_loss 0.47053576707839967 test_loss: 0.4758583068847656
epoch: 142 training_loss 0.46707931578159334 test_loss: 0.4575922966003418
epoch: 143 training_loss 0.4634620302915573 test_loss: 0.4648033618927002
epoch: 144 training_loss 0.466967296898365 test_loss: 0.46083498001098633
epoch: 145 training_loss 0.4599613335728645 test_loss: 0.4787386417388916
epoch: 146 training_loss 0.4671801182627678 test_loss: 0.47780799865722656
epoch: 147 training_loss 0.4676891592144966 test_loss: 0.4524630069732666
epoch: 148 training_loss 0.46844732135534284 test_loss: 0.47873702049255373
epoch: 149 training_loss 0.45765591859817506 test_loss: 0.4658979892730713
3181.9041887138746
episode: 0 training return: tensor(93.6541, device='cuda:0')
episode: 1 training return: tensor(47.7449, device='cuda:0')
episode: 2 training return: tensor(9.6808, device='cuda:0')
episode: 3 training return: tensor(-91.1555, device='cuda:0')
epoch: 1 test_true_pfm: 2939.48717329244 sim_pfm: 65.864283569031
episode: 4 training return: tensor(-242.8385, device='cuda:0')
episode: 5 training return: tensor(-3.0384, device='cuda:0')
episode: 6 training return: tensor(64.0830, device='cuda:0')
episode: 7 training return: tensor(65.2326, device='cuda:0')
epoch: 2 test_true_pfm: 2048.1828789982155 sim_pfm: -314.60416482734337
episode: 8 training return: tensor(48.4713, device='cuda:0')
episode: 9 training return: tensor(147.3313, device='cuda:0')
episode: 10 training return: tensor(24.9520, device='cuda:0')
episode: 11 training return: tensor(-194.5779, device='cuda:0')
epoch: 3 test_true_pfm: 1870.3212074623773 sim_pfm: -55.60787661016608
episode: 12 training return: tensor(-243.3453, device='cuda:0')
episode: 13 training return: tensor(-206.7380, device='cuda:0')
episode: 14 training return: tensor(-446.9471, device='cuda:0')
episode: 15 training return: tensor(-128.8522, device='cuda:0')
epoch: 4 test_true_pfm: 2503.975620680873 sim_pfm: -174.66268763889093
episode: 16 training return: tensor(44.7105, device='cuda:0')
episode: 17 training return: tensor(86.7420, device='cuda:0')
episode: 18 training return: tensor(-104.5937, device='cuda:0')
episode: 19 training return: tensor(-358.8590, device='cuda:0')
epoch: 5 test_true_pfm: 2162.105825921601 sim_pfm: -296.945251260205
episode: 20 training return: tensor(-296.3701, device='cuda:0')
episode: 21 training return: tensor(54.8559, device='cuda:0')
episode: 22 training return: tensor(-457.2133, device='cuda:0')
episode: 23 training return: tensor(-253.3188, device='cuda:0')
epoch: 6 test_true_pfm: 2763.5074069388224 sim_pfm: -18.436869908541365
episode: 24 training return: tensor(-443.2019, device='cuda:0')
episode: 25 training return: tensor(-19.2921, device='cuda:0')
episode: 26 training return: tensor(-446.7332, device='cuda:0')
episode: 27 training return: tensor(-171.7294, device='cuda:0')
epoch: 7 test_true_pfm: 2231.461696013805 sim_pfm: -7.2689804492789944
episode: 28 training return: tensor(-441.9903, device='cuda:0')
episode: 29 training return: tensor(29.4546, device='cuda:0')
episode: 30 training return: tensor(-293.3201, device='cuda:0')
episode: 31 training return: tensor(153.5878, device='cuda:0')
epoch: 8 test_true_pfm: 2521.2204526061323 sim_pfm: 92.63137577752543
episode: 32 training return: tensor(55.0309, device='cuda:0')
episode: 33 training return: tensor(-473.8923, device='cuda:0')
episode: 34 training return: tensor(-334.3715, device='cuda:0')
episode: 35 training return: tensor(-135.9717, device='cuda:0')
epoch: 9 test_true_pfm: 3026.3198117745137 sim_pfm: -51.79301428620238
episode: 36 training return: tensor(-290.2126, device='cuda:0')
episode: 37 training return: tensor(-482.8184, device='cuda:0')
episode: 38 training return: tensor(-483.1852, device='cuda:0')
episode: 39 training return: tensor(-444.4679, device='cuda:0')
epoch: 10 test_true_pfm: 2600.674872434028 sim_pfm: -179.60304539183076
episode: 40 training return: tensor(-335.1926, device='cuda:0')
episode: 41 training return: tensor(-481.1712, device='cuda:0')
episode: 42 training return: tensor(45.5051, device='cuda:0')
episode: 43 training return: tensor(-146.0092, device='cuda:0')
epoch: 11 test_true_pfm: 3247.792903577021 sim_pfm: 80.6268072684373
episode: 44 training return: tensor(-162.1739, device='cuda:0')
episode: 45 training return: tensor(-170.8843, device='cuda:0')
episode: 46 training return: tensor(61.7903, device='cuda:0')
episode: 47 training return: tensor(6.1131, device='cuda:0')
epoch: 12 test_true_pfm: 2908.697779805944 sim_pfm: -72.79064200177284
episode: 48 training return: tensor(65.6649, device='cuda:0')
episode: 49 training return: tensor(81.8305, device='cuda:0')
episode: 50 training return: tensor(-95.7110, device='cuda:0')
episode: 51 training return: tensor(103.7130, device='cuda:0')
epoch: 13 test_true_pfm: 2633.7256986800226 sim_pfm: 71.18141748437968
episode: 52 training return: tensor(57.0213, device='cuda:0')
episode: 53 training return: tensor(-130.5996, device='cuda:0')
episode: 54 training return: tensor(56.1168, device='cuda:0')
episode: 55 training return: tensor(51.7064, device='cuda:0')
epoch: 14 test_true_pfm: 3244.654291815796 sim_pfm: 68.81452505128497
episode: 56 training return: tensor(-208.0993, device='cuda:0')
episode: 57 training return: tensor(-7.6094, device='cuda:0')
episode: 58 training return: tensor(44.0766, device='cuda:0')
episode: 59 training return: tensor(113.2548, device='cuda:0')
epoch: 15 test_true_pfm: 3233.7740622554156 sim_pfm: 57.207155754954634
episode: 60 training return: tensor(47.0188, device='cuda:0')
episode: 61 training return: tensor(11.1015, device='cuda:0')
episode: 62 training return: tensor(-415.4078, device='cuda:0')
episode: 63 training return: tensor(-199.3754, device='cuda:0')
epoch: 16 test_true_pfm: 2425.450117366598 sim_pfm: -52.98564909384004
episode: 64 training return: tensor(36.3334, device='cuda:0')
episode: 65 training return: tensor(-309.7829, device='cuda:0')
episode: 66 training return: tensor(-251.8519, device='cuda:0')
episode: 67 training return: tensor(57.2617, device='cuda:0')
epoch: 17 test_true_pfm: 2556.265953740889 sim_pfm: -220.0282675650475
episode: 68 training return: tensor(-310.7357, device='cuda:0')
episode: 69 training return: tensor(33.9738, device='cuda:0')
episode: 70 training return: tensor(-407.5759, device='cuda:0')
episode: 71 training return: tensor(-79.0492, device='cuda:0')
epoch: 18 test_true_pfm: 2228.864463240296 sim_pfm: -304.0444370780897
episode: 72 training return: tensor(-86.3747, device='cuda:0')
episode: 73 training return: tensor(101.2636, device='cuda:0')
episode: 74 training return: tensor(57.1185, device='cuda:0')
episode: 75 training return: tensor(68.7895, device='cuda:0')
epoch: 19 test_true_pfm: 2072.707587204872 sim_pfm: -34.6087275792573
episode: 76 training return: tensor(56.6272, device='cuda:0')
episode: 77 training return: tensor(55.5350, device='cuda:0')
episode: 78 training return: tensor(-360.7308, device='cuda:0')
episode: 79 training return: tensor(-369.2106, device='cuda:0')
epoch: 20 test_true_pfm: 2596.1942631783736 sim_pfm: -262.66540901208646
episode: 80 training return: tensor(-242.5936, device='cuda:0')
episode: 81 training return: tensor(-406.2517, device='cuda:0')
episode: 82 training return: tensor(-160.1010, device='cuda:0')
episode: 83 training return: tensor(68.1487, device='cuda:0')
epoch: 21 test_true_pfm: 1943.6790134686928 sim_pfm: -203.55035911744926
episode: 84 training return: tensor(-433.1154, device='cuda:0')
episode: 85 training return: tensor(44.5391, device='cuda:0')
episode: 86 training return: tensor(-279.2624, device='cuda:0')
episode: 87 training return: tensor(-175.4958, device='cuda:0')
epoch: 22 test_true_pfm: 2818.142206011203 sim_pfm: 94.02459677610507
episode: 88 training return: tensor(42.6180, device='cuda:0')
episode: 89 training return: tensor(-456.1426, device='cuda:0')
episode: 90 training return: tensor(133.7041, device='cuda:0')
episode: 91 training return: tensor(-24.8307, device='cuda:0')
epoch: 23 test_true_pfm: 2739.6227549217233 sim_pfm: -81.47966580141413
episode: 92 training return: tensor(59.1011, device='cuda:0')
episode: 93 training return: tensor(-288.7088, device='cuda:0')
episode: 94 training return: tensor(-146.0373, device='cuda:0')
episode: 95 training return: tensor(-322.9100, device='cuda:0')
epoch: 24 test_true_pfm: 3259.8889305271255 sim_pfm: 61.750122397148516
episode: 96 training return: tensor(81.3535, device='cuda:0')
episode: 97 training return: tensor(-380.0692, device='cuda:0')
episode: 98 training return: tensor(49.2595, device='cuda:0')
episode: 99 training return: tensor(85.7503, device='cuda:0')
epoch: 25 test_true_pfm: 2750.464382059639 sim_pfm: -212.94009645963283
episode: 100 training return: tensor(-146.6420, device='cuda:0')
episode: 101 training return: tensor(-358.6472, device='cuda:0')
episode: 102 training return: tensor(58.6620, device='cuda:0')
episode: 103 training return: tensor(53.5148, device='cuda:0')
epoch: 26 test_true_pfm: 1909.7139685072716 sim_pfm: -371.06614040995674
episode: 104 training return: tensor(19.2387, device='cuda:0')
episode: 105 training return: tensor(37.8066, device='cuda:0')
episode: 106 training return: tensor(-31.6414, device='cuda:0')
episode: 107 training return: tensor(-494.5584, device='cuda:0')
epoch: 27 test_true_pfm: 1892.9960063108767 sim_pfm: -216.78086762110857
episode: 108 training return: tensor(44.2453, device='cuda:0')
episode: 109 training return: tensor(102.4371, device='cuda:0')
episode: 110 training return: tensor(-434.9437, device='cuda:0')
episode: 111 training return: tensor(52.2350, device='cuda:0')
epoch: 28 test_true_pfm: 1766.7742159921875 sim_pfm: -320.9471518970968
episode: 112 training return: tensor(-388.1705, device='cuda:0')
episode: 113 training return: tensor(-158.0346, device='cuda:0')
episode: 114 training return: tensor(71.0012, device='cuda:0')
episode: 115 training return: tensor(35.0765, device='cuda:0')
epoch: 29 test_true_pfm: 1828.9595390499414 sim_pfm: -358.88790110939107
episode: 116 training return: tensor(-226.4550, device='cuda:0')
episode: 117 training return: tensor(-188.5980, device='cuda:0')
episode: 118 training return: tensor(-402.5778, device='cuda:0')
episode: 119 training return: tensor(-230.9429, device='cuda:0')
epoch: 30 test_true_pfm: 1910.7783491277708 sim_pfm: -241.8139198292823
episode: 120 training return: tensor(-16.7781, device='cuda:0')
episode: 121 training return: tensor(-307.0674, device='cuda:0')
episode: 122 training return: tensor(140.4391, device='cuda:0')
episode: 123 training return: tensor(84.5838, device='cuda:0')
epoch: 31 test_true_pfm: 2484.7389666914255 sim_pfm: -195.60911957800272
episode: 124 training return: tensor(64.3492, device='cuda:0')
episode: 125 training return: tensor(-86.7003, device='cuda:0')
episode: 126 training return: tensor(140.8763, device='cuda:0')
episode: 127 training return: tensor(-369.9333, device='cuda:0')
epoch: 32 test_true_pfm: 2254.434543667359 sim_pfm: -350.35666407834896
episode: 128 training return: tensor(-25.6517, device='cuda:0')
episode: 129 training return: tensor(123.7948, device='cuda:0')
episode: 130 training return: tensor(-300.5993, device='cuda:0')
episode: 131 training return: tensor(-184.2898, device='cuda:0')
epoch: 33 test_true_pfm: 2328.5897524496518 sim_pfm: -35.75181786751879
episode: 132 training return: tensor(-128.4242, device='cuda:0')
episode: 133 training return: tensor(-356.2807, device='cuda:0')
episode: 134 training return: tensor(-293.6726, device='cuda:0')
episode: 135 training return: tensor(-180.2542, device='cuda:0')
epoch: 34 test_true_pfm: 2272.94310650933 sim_pfm: -228.9276041398601
episode: 136 training return: tensor(47.3267, device='cuda:0')
episode: 137 training return: tensor(-317.0782, device='cuda:0')
episode: 138 training return: tensor(78.0768, device='cuda:0')
episode: 139 training return: tensor(57.9164, device='cuda:0')
epoch: 35 test_true_pfm: 1715.2778844173542 sim_pfm: -247.078612678044
episode: 140 training return: tensor(-157.4833, device='cuda:0')
episode: 141 training return: tensor(62.6306, device='cuda:0')
episode: 142 training return: tensor(-417.2995, device='cuda:0')
episode: 143 training return: tensor(-424.4279, device='cuda:0')
epoch: 36 test_true_pfm: 1854.2002580849567 sim_pfm: -393.82258521421073
episode: 144 training return: tensor(38.8504, device='cuda:0')
episode: 145 training return: tensor(-332.5227, device='cuda:0')
episode: 146 training return: tensor(151.5292, device='cuda:0')
episode: 147 training return: tensor(-68.7624, device='cuda:0')
epoch: 37 test_true_pfm: 2825.954827271818 sim_pfm: -346.2271907309575
episode: 148 training return: tensor(-302.8416, device='cuda:0')
episode: 149 training return: tensor(47.7051, device='cuda:0')
episode: 150 training return: tensor(-112.2022, device='cuda:0')
episode: 151 training return: tensor(-188.0941, device='cuda:0')
epoch: 38 test_true_pfm: 2072.5828374645894 sim_pfm: -144.77494641266335
episode: 152 training return: tensor(70.3422, device='cuda:0')
episode: 153 training return: tensor(-355.8672, device='cuda:0')
episode: 154 training return: tensor(29.6970, device='cuda:0')
episode: 155 training return: tensor(-132.2875, device='cuda:0')
epoch: 39 test_true_pfm: 1751.448627718312 sim_pfm: -142.45450236992716
episode: 156 training return: tensor(-185.7619, device='cuda:0')
episode: 157 training return: tensor(-95.4894, device='cuda:0')
episode: 158 training return: tensor(-524.0474, device='cuda:0')
episode: 159 training return: tensor(-215.3966, device='cuda:0')
epoch: 40 test_true_pfm: 2613.844907921434 sim_pfm: -209.16877157839676
episode: 160 training return: tensor(-130.7200, device='cuda:0')
episode: 161 training return: tensor(-370.6837, device='cuda:0')
episode: 162 training return: tensor(-331.4651, device='cuda:0')
episode: 163 training return: tensor(51.1465, device='cuda:0')
epoch: 41 test_true_pfm: 2388.7342998858803 sim_pfm: -263.10270824242616
episode: 164 training return: tensor(-343.3031, device='cuda:0')
episode: 165 training return: tensor(-122.9299, device='cuda:0')
episode: 166 training return: tensor(-465.8066, device='cuda:0')
episode: 167 training return: tensor(145.5698, device='cuda:0')
epoch: 42 test_true_pfm: 2059.9969584423493 sim_pfm: -174.1885899422147
episode: 168 training return: tensor(122.8973, device='cuda:0')
episode: 169 training return: tensor(37.2499, device='cuda:0')
episode: 170 training return: tensor(-470.6859, device='cuda:0')
episode: 171 training return: tensor(-171.5998, device='cuda:0')
epoch: 43 test_true_pfm: 2576.3162390860766 sim_pfm: -238.86262750571282
episode: 172 training return: tensor(-299.2799, device='cuda:0')
episode: 173 training return: tensor(-211.8109, device='cuda:0')
episode: 174 training return: tensor(-15.8645, device='cuda:0')
episode: 175 training return: tensor(-521.1140, device='cuda:0')
epoch: 44 test_true_pfm: 2080.7423774985605 sim_pfm: -194.65250168067482
episode: 176 training return: tensor(43.1635, device='cuda:0')
episode: 177 training return: tensor(144.9482, device='cuda:0')
episode: 178 training return: tensor(-479.3802, device='cuda:0')
episode: 179 training return: tensor(87.6849, device='cuda:0')
epoch: 45 test_true_pfm: 2491.2691128363863 sim_pfm: -283.8950008826602
episode: 180 training return: tensor(73.9285, device='cuda:0')
episode: 181 training return: tensor(-349.6797, device='cuda:0')
episode: 182 training return: tensor(-281.9881, device='cuda:0')
episode: 183 training return: tensor(-198.9429, device='cuda:0')
epoch: 46 test_true_pfm: 2272.3589543354933 sim_pfm: -3.4010549996358654
episode: 184 training return: tensor(68.1550, device='cuda:0')
episode: 185 training return: tensor(-439.4308, device='cuda:0')
episode: 186 training return: tensor(-322.1291, device='cuda:0')
episode: 187 training return: tensor(-425.7326, device='cuda:0')
epoch: 47 test_true_pfm: 1756.825387845606 sim_pfm: -333.7798090999713
episode: 188 training return: tensor(-367.9972, device='cuda:0')
episode: 189 training return: tensor(128.7475, device='cuda:0')
episode: 190 training return: tensor(-507.5901, device='cuda:0')
episode: 191 training return: tensor(-279.6317, device='cuda:0')
epoch: 48 test_true_pfm: 2633.9602534950786 sim_pfm: -63.73595972927675
episode: 192 training return: tensor(-266.2765, device='cuda:0')
episode: 193 training return: tensor(-414.3048, device='cuda:0')
episode: 194 training return: tensor(-458.2874, device='cuda:0')
episode: 195 training return: tensor(-419.1571, device='cuda:0')
epoch: 49 test_true_pfm: 2181.078416401723 sim_pfm: -371.8879259926907
episode: 196 training return: tensor(42.6719, device='cuda:0')
episode: 197 training return: tensor(-124.4553, device='cuda:0')
episode: 198 training return: tensor(-447.5555, device='cuda:0')
episode: 199 training return: tensor(-355.3564, device='cuda:0')
epoch: 50 test_true_pfm: 1970.0755433490579 sim_pfm: -321.93490047191153
episode: 200 training return: tensor(-369.6244, device='cuda:0')
episode: 201 training return: tensor(-314.1164, device='cuda:0')
episode: 202 training return: tensor(-393.0464, device='cuda:0')
episode: 203 training return: tensor(58.5532, device='cuda:0')
epoch: 51 test_true_pfm: 1997.7561034282983 sim_pfm: -259.5390153170253
episode: 204 training return: tensor(7.2291, device='cuda:0')
episode: 205 training return: tensor(126.1641, device='cuda:0')
episode: 206 training return: tensor(-63.6120, device='cuda:0')
episode: 207 training return: tensor(80.5075, device='cuda:0')
epoch: 52 test_true_pfm: 2508.4581628272554 sim_pfm: -152.24246971601193
episode: 208 training return: tensor(-446.6856, device='cuda:0')
episode: 209 training return: tensor(-297.4431, device='cuda:0')
episode: 210 training return: tensor(52.5051, device='cuda:0')
episode: 211 training return: tensor(-205.4929, device='cuda:0')
epoch: 53 test_true_pfm: 2215.397782525316 sim_pfm: -290.5580097190007
episode: 212 training return: tensor(-9.5701, device='cuda:0')
episode: 213 training return: tensor(-16.2456, device='cuda:0')
episode: 214 training return: tensor(-478.0471, device='cuda:0')
episode: 215 training return: tensor(65.1215, device='cuda:0')
epoch: 54 test_true_pfm: 2459.671384190065 sim_pfm: -383.867610494354
episode: 216 training return: tensor(26.6195, device='cuda:0')
episode: 217 training return: tensor(54.8323, device='cuda:0')
episode: 218 training return: tensor(80.6495, device='cuda:0')
episode: 219 training return: tensor(-275.3813, device='cuda:0')
epoch: 55 test_true_pfm: 2339.6482271963687 sim_pfm: -371.4501819756697
episode: 220 training return: tensor(-193.2768, device='cuda:0')
episode: 221 training return: tensor(-29.8595, device='cuda:0')
episode: 222 training return: tensor(-232.3260, device='cuda:0')
episode: 223 training return: tensor(-125.1528, device='cuda:0')
epoch: 56 test_true_pfm: 1955.4008362123548 sim_pfm: -350.1501640870022
episode: 224 training return: tensor(65.1417, device='cuda:0')
episode: 225 training return: tensor(-446.6161, device='cuda:0')
episode: 226 training return: tensor(-457.0573, device='cuda:0')
episode: 227 training return: tensor(-39.6778, device='cuda:0')
epoch: 57 test_true_pfm: 2533.1563570920575 sim_pfm: -143.57015166203686
episode: 228 training return: tensor(-474.5306, device='cuda:0')
episode: 229 training return: tensor(5.3201, device='cuda:0')
episode: 230 training return: tensor(46.9293, device='cuda:0')
episode: 231 training return: tensor(156.2856, device='cuda:0')
epoch: 58 test_true_pfm: 2181.0602056015464 sim_pfm: -39.37710278490946
episode: 232 training return: tensor(36.7956, device='cuda:0')
episode: 233 training return: tensor(-442.0803, device='cuda:0')
episode: 234 training return: tensor(-272.6942, device='cuda:0')
episode: 235 training return: tensor(-500.8150, device='cuda:0')
epoch: 59 test_true_pfm: 2263.8521761550733 sim_pfm: -258.477559246084
episode: 236 training return: tensor(-69.6404, device='cuda:0')
episode: 237 training return: tensor(-317.7296, device='cuda:0')
episode: 238 training return: tensor(-233.6448, device='cuda:0')
episode: 239 training return: tensor(-173.4301, device='cuda:0')
epoch: 60 test_true_pfm: 2258.4405389005447 sim_pfm: -118.20581010414753
episode: 240 training return: tensor(-97.2521, device='cuda:0')
episode: 241 training return: tensor(-2.5946, device='cuda:0')
episode: 242 training return: tensor(-453.7111, device='cuda:0')
episode: 243 training return: tensor(65.5398, device='cuda:0')
epoch: 61 test_true_pfm: 2347.771976591944 sim_pfm: -56.392787404794944
episode: 244 training return: tensor(-66.8404, device='cuda:0')
episode: 245 training return: tensor(-344.4863, device='cuda:0')
episode: 246 training return: tensor(123.6710, device='cuda:0')
episode: 247 training return: tensor(-212.3008, device='cuda:0')
epoch: 62 test_true_pfm: 2386.346763182999 sim_pfm: -296.2680863049366
episode: 248 training return: tensor(53.2069, device='cuda:0')
episode: 249 training return: tensor(-153.7360, device='cuda:0')
episode: 250 training return: tensor(112.1342, device='cuda:0')
episode: 251 training return: tensor(-106.5459, device='cuda:0')
epoch: 63 test_true_pfm: 1985.878727728902 sim_pfm: -333.9682313435478
episode: 252 training return: tensor(-355.4018, device='cuda:0')
episode: 253 training return: tensor(-274.4290, device='cuda:0')
episode: 254 training return: tensor(107.5400, device='cuda:0')
episode: 255 training return: tensor(72.9213, device='cuda:0')
epoch: 64 test_true_pfm: 1659.8460613176703 sim_pfm: -238.14482587767029
episode: 256 training return: tensor(-241.0596, device='cuda:0')
episode: 257 training return: tensor(-57.3189, device='cuda:0')
episode: 258 training return: tensor(-230.6006, device='cuda:0')
episode: 259 training return: tensor(-242.6175, device='cuda:0')
epoch: 65 test_true_pfm: 2341.966770805317 sim_pfm: -321.70680539595196
episode: 260 training return: tensor(-11.5769, device='cuda:0')
episode: 261 training return: tensor(-526.9937, device='cuda:0')
episode: 262 training return: tensor(-369.1733, device='cuda:0')
episode: 263 training return: tensor(-174.4744, device='cuda:0')
epoch: 66 test_true_pfm: 2330.878498575809 sim_pfm: -270.850143227668
episode: 264 training return: tensor(10.0327, device='cuda:0')
episode: 265 training return: tensor(-32.9092, device='cuda:0')
episode: 266 training return: tensor(-385.6309, device='cuda:0')
episode: 267 training return: tensor(-442.2078, device='cuda:0')
epoch: 67 test_true_pfm: 2236.2166559073607 sim_pfm: -55.06502396261203
episode: 268 training return: tensor(-359.2640, device='cuda:0')
episode: 269 training return: tensor(-55.2462, device='cuda:0')
episode: 270 training return: tensor(-213.3383, device='cuda:0')
episode: 271 training return: tensor(-364.9289, device='cuda:0')
epoch: 68 test_true_pfm: 1753.1565533543844 sim_pfm: -305.01811528607504
episode: 272 training return: tensor(-505.4657, device='cuda:0')
episode: 273 training return: tensor(-237.5124, device='cuda:0')
episode: 274 training return: tensor(-310.7163, device='cuda:0')
episode: 275 training return: tensor(-197.2187, device='cuda:0')
epoch: 69 test_true_pfm: 2248.7673813791375 sim_pfm: -223.61728391370465
episode: 276 training return: tensor(-444.8484, device='cuda:0')
episode: 277 training return: tensor(108.0968, device='cuda:0')
episode: 278 training return: tensor(-462.7295, device='cuda:0')
episode: 279 training return: tensor(-197.6644, device='cuda:0')
epoch: 70 test_true_pfm: 1995.3657283351124 sim_pfm: -368.24596510979853
episode: 280 training return: tensor(9.3285, device='cuda:0')
episode: 281 training return: tensor(29.2848, device='cuda:0')
episode: 282 training return: tensor(-466.0373, device='cuda:0')
episode: 283 training return: tensor(-195.0359, device='cuda:0')
epoch: 71 test_true_pfm: 1988.3256195737758 sim_pfm: -130.48101767760818
episode: 284 training return: tensor(-132.4521, device='cuda:0')
episode: 285 training return: tensor(-236.6221, device='cuda:0')
episode: 286 training return: tensor(-512.3449, device='cuda:0')
episode: 287 training return: tensor(-476.0096, device='cuda:0')
epoch: 72 test_true_pfm: 2089.172111775562 sim_pfm: -244.5488876248031
episode: 288 training return: tensor(-473.9177, device='cuda:0')
episode: 289 training return: tensor(58.5173, device='cuda:0')
episode: 290 training return: tensor(-90.7976, device='cuda:0')
episode: 291 training return: tensor(-229.9722, device='cuda:0')
epoch: 73 test_true_pfm: 1901.487066140763 sim_pfm: -326.50585576966597
episode: 292 training return: tensor(-200.5056, device='cuda:0')
episode: 293 training return: tensor(-185.1912, device='cuda:0')
episode: 294 training return: tensor(-295.9346, device='cuda:0')
episode: 295 training return: tensor(-282.8701, device='cuda:0')
epoch: 74 test_true_pfm: 2606.438639169062 sim_pfm: -52.125647076735426
episode: 296 training return: tensor(-471.5011, device='cuda:0')
episode: 297 training return: tensor(-293.1425, device='cuda:0')
episode: 298 training return: tensor(77.7634, device='cuda:0')
episode: 299 training return: tensor(46.6464, device='cuda:0')
epoch: 75 test_true_pfm: 2499.9551822298336 sim_pfm: -376.7751204021333
episode: 300 training return: tensor(-218.7099, device='cuda:0')
episode: 301 training return: tensor(-36.6651, device='cuda:0')
episode: 302 training return: tensor(-7.4067, device='cuda:0')
episode: 303 training return: tensor(-445.3797, device='cuda:0')
epoch: 76 test_true_pfm: 2168.7795239820503 sim_pfm: 15.684572940226644
episode: 304 training return: tensor(-55.0410, device='cuda:0')
episode: 305 training return: tensor(-148.5287, device='cuda:0')
episode: 306 training return: tensor(-485.8926, device='cuda:0')
episode: 307 training return: tensor(-193.7547, device='cuda:0')
epoch: 77 test_true_pfm: 3061.254145498515 sim_pfm: -292.1108428875353
episode: 308 training return: tensor(-362.1349, device='cuda:0')
episode: 309 training return: tensor(-443.0864, device='cuda:0')
episode: 310 training return: tensor(-445.0216, device='cuda:0')
episode: 311 training return: tensor(-331.8711, device='cuda:0')
epoch: 78 test_true_pfm: 2240.420589045405 sim_pfm: -312.8599802554624
episode: 312 training return: tensor(106.7388, device='cuda:0')
episode: 313 training return: tensor(-68.4293, device='cuda:0')
episode: 314 training return: tensor(-501.2576, device='cuda:0')
episode: 315 training return: tensor(10.2191, device='cuda:0')
epoch: 79 test_true_pfm: 2462.853458382512 sim_pfm: -167.38265819542963
episode: 316 training return: tensor(-491.9056, device='cuda:0')
episode: 317 training return: tensor(-447.9107, device='cuda:0')
episode: 318 training return: tensor(60.8422, device='cuda:0')
episode: 319 training return: tensor(81.5790, device='cuda:0')
epoch: 80 test_true_pfm: 1905.784741929001 sim_pfm: -359.6597981088562
episode: 320 training return: tensor(-298.8983, device='cuda:0')
episode: 321 training return: tensor(69.2410, device='cuda:0')
episode: 322 training return: tensor(-146.0516, device='cuda:0')
episode: 323 training return: tensor(-308.3194, device='cuda:0')
epoch: 81 test_true_pfm: 1690.4586861638982 sim_pfm: -345.29440066740307
episode: 324 training return: tensor(-289.9961, device='cuda:0')
episode: 325 training return: tensor(-258.8111, device='cuda:0')
episode: 326 training return: tensor(93.2520, device='cuda:0')
episode: 327 training return: tensor(-210.0493, device='cuda:0')
epoch: 82 test_true_pfm: 1755.0966852879853 sim_pfm: -395.8059210552504
episode: 328 training return: tensor(-189.1483, device='cuda:0')
episode: 329 training return: tensor(-441.3242, device='cuda:0')
episode: 330 training return: tensor(50.4447, device='cuda:0')
episode: 331 training return: tensor(102.4763, device='cuda:0')
epoch: 83 test_true_pfm: 2235.2948273957522 sim_pfm: -318.6216924632511
episode: 332 training return: tensor(53.1060, device='cuda:0')
episode: 333 training return: tensor(-238.3899, device='cuda:0')
episode: 334 training return: tensor(74.4810, device='cuda:0')
episode: 335 training return: tensor(-443.1223, device='cuda:0')
epoch: 84 test_true_pfm: 2819.584276524099 sim_pfm: -314.86155805558275
episode: 336 training return: tensor(-195.2010, device='cuda:0')
episode: 337 training return: tensor(69.5884, device='cuda:0')
episode: 338 training return: tensor(-445.4711, device='cuda:0')
episode: 339 training return: tensor(86.7112, device='cuda:0')
epoch: 85 test_true_pfm: 2233.615220205503 sim_pfm: -269.6563281646813
episode: 340 training return: tensor(40.6443, device='cuda:0')
episode: 341 training return: tensor(19.4533, device='cuda:0')
episode: 342 training return: tensor(-252.8510, device='cuda:0')
episode: 343 training return: tensor(-97.8715, device='cuda:0')
epoch: 86 test_true_pfm: 1695.3908120868045 sim_pfm: -309.8286800820303
episode: 344 training return: tensor(-117.9323, device='cuda:0')
episode: 345 training return: tensor(-439.5390, device='cuda:0')
episode: 346 training return: tensor(88.9961, device='cuda:0')
episode: 347 training return: tensor(-462.0842, device='cuda:0')
epoch: 87 test_true_pfm: 2518.0419135111124 sim_pfm: -333.4752439088382
episode: 348 training return: tensor(-182.5666, device='cuda:0')
episode: 349 training return: tensor(-110.3957, device='cuda:0')
episode: 350 training return: tensor(-315.1089, device='cuda:0')
episode: 351 training return: tensor(68.8100, device='cuda:0')
epoch: 88 test_true_pfm: 1893.5606974024001 sim_pfm: -345.67573395720683
episode: 352 training return: tensor(-343.2727, device='cuda:0')
episode: 353 training return: tensor(89.5825, device='cuda:0')
episode: 354 training return: tensor(44.9575, device='cuda:0')
episode: 355 training return: tensor(-383.0633, device='cuda:0')
epoch: 89 test_true_pfm: 1962.5277389809246 sim_pfm: -402.43626658661134
episode: 356 training return: tensor(-326.7403, device='cuda:0')
episode: 357 training return: tensor(-263.6686, device='cuda:0')
episode: 358 training return: tensor(-45.2121, device='cuda:0')
episode: 359 training return: tensor(-338.6951, device='cuda:0')
epoch: 90 test_true_pfm: 1913.9672491233614 sim_pfm: -387.32729976718355
episode: 360 training return: tensor(-193.4445, device='cuda:0')
episode: 361 training return: tensor(-282.5890, device='cuda:0')
episode: 362 training return: tensor(64.1148, device='cuda:0')
episode: 363 training return: tensor(-437.1503, device='cuda:0')
epoch: 91 test_true_pfm: 1691.598673520585 sim_pfm: -373.4381784096865
episode: 364 training return: tensor(-25.4277, device='cuda:0')
episode: 365 training return: tensor(-355.9818, device='cuda:0')
episode: 366 training return: tensor(-445.9956, device='cuda:0')
episode: 367 training return: tensor(-484.7790, device='cuda:0')
epoch: 92 test_true_pfm: 2372.43286816311 sim_pfm: -330.4920562240101
episode: 368 training return: tensor(-460.7690, device='cuda:0')
episode: 369 training return: tensor(-213.0383, device='cuda:0')
episode: 370 training return: tensor(-348.5351, device='cuda:0')
episode: 371 training return: tensor(-206.9602, device='cuda:0')
epoch: 93 test_true_pfm: 2303.4842972023357 sim_pfm: -300.9599887607231
episode: 372 training return: tensor(-156.1945, device='cuda:0')
episode: 373 training return: tensor(-181.4367, device='cuda:0')
episode: 374 training return: tensor(-436.7034, device='cuda:0')
episode: 375 training return: tensor(68.3356, device='cuda:0')
epoch: 94 test_true_pfm: 2701.7263597201313 sim_pfm: -297.4491362750802
episode: 376 training return: tensor(-195.2716, device='cuda:0')
episode: 377 training return: tensor(133.8917, device='cuda:0')
episode: 378 training return: tensor(-276.5541, device='cuda:0')
episode: 379 training return: tensor(-184.3450, device='cuda:0')
epoch: 95 test_true_pfm: 2142.1226467774563 sim_pfm: -322.58437029097695
episode: 380 training return: tensor(-158.1257, device='cuda:0')
episode: 381 training return: tensor(-205.3719, device='cuda:0')
episode: 382 training return: tensor(-273.5304, device='cuda:0')
episode: 383 training return: tensor(-203.9856, device='cuda:0')
epoch: 96 test_true_pfm: 2405.251120817828 sim_pfm: -157.06028152781073
episode: 384 training return: tensor(-117.3068, device='cuda:0')
episode: 385 training return: tensor(-181.6981, device='cuda:0')
episode: 386 training return: tensor(80.3652, device='cuda:0')
episode: 387 training return: tensor(67.8855, device='cuda:0')
epoch: 97 test_true_pfm: 1995.4167279960004 sim_pfm: -300.1728843021653
episode: 388 training return: tensor(127.0168, device='cuda:0')
episode: 389 training return: tensor(-403.1336, device='cuda:0')
episode: 390 training return: tensor(-365.4409, device='cuda:0')
episode: 391 training return: tensor(-273.9736, device='cuda:0')
epoch: 98 test_true_pfm: 2950.9777620350123 sim_pfm: 84.50914652224553
episode: 392 training return: tensor(-400.0760, device='cuda:0')
episode: 393 training return: tensor(-306.4207, device='cuda:0')
episode: 394 training return: tensor(-430.4801, device='cuda:0')
episode: 395 training return: tensor(-341.1691, device='cuda:0')
epoch: 99 test_true_pfm: 1675.3435512755916 sim_pfm: -336.55920618163265
episode: 396 training return: tensor(-290.8111, device='cuda:0')
episode: 397 training return: tensor(-459.1490, device='cuda:0')
episode: 398 training return: tensor(-310.8774, device='cuda:0')
episode: 399 training return: tensor(-444.5413, device='cuda:0')
epoch: 100 test_true_pfm: 1712.1180361237948 sim_pfm: -364.3248342517957
episode: 400 training return: tensor(-94.9783, device='cuda:0')
episode: 401 training return: tensor(60.0944, device='cuda:0')
episode: 402 training return: tensor(98.4866, device='cuda:0')
episode: 403 training return: tensor(-181.8259, device='cuda:0')
epoch: 101 test_true_pfm: 1815.9199216419618 sim_pfm: -309.52272515411215
episode: 404 training return: tensor(-445.1162, device='cuda:0')
episode: 405 training return: tensor(-228.2984, device='cuda:0')
episode: 406 training return: tensor(-128.8906, device='cuda:0')
episode: 407 training return: tensor(-437.5545, device='cuda:0')
epoch: 102 test_true_pfm: 1907.5453942943388 sim_pfm: -343.1553579260556
episode: 408 training return: tensor(-351.4536, device='cuda:0')
episode: 409 training return: tensor(-185.5814, device='cuda:0')
episode: 410 training return: tensor(-473.2530, device='cuda:0')
episode: 411 training return: tensor(67.4233, device='cuda:0')
epoch: 103 test_true_pfm: 1581.636596736959 sim_pfm: -114.68227359593341
episode: 412 training return: tensor(58.3075, device='cuda:0')
episode: 413 training return: tensor(-400.2508, device='cuda:0')
episode: 414 training return: tensor(-445.9404, device='cuda:0')
episode: 415 training return: tensor(-122.7310, device='cuda:0')
epoch: 104 test_true_pfm: 2134.992642868411 sim_pfm: -372.6224188559766
episode: 416 training return: tensor(-442.1853, device='cuda:0')
episode: 417 training return: tensor(44.6448, device='cuda:0')
episode: 418 training return: tensor(-432.0486, device='cuda:0')
episode: 419 training return: tensor(-187.2752, device='cuda:0')
epoch: 105 test_true_pfm: 2051.5518771394763 sim_pfm: -393.88428006559843
episode: 420 training return: tensor(-3.1162, device='cuda:0')
episode: 421 training return: tensor(-91.9626, device='cuda:0')
episode: 422 training return: tensor(-277.3724, device='cuda:0')
episode: 423 training return: tensor(-186.7330, device='cuda:0')
epoch: 106 test_true_pfm: 1835.3130391055508 sim_pfm: -379.3584408283156
episode: 424 training return: tensor(54.0933, device='cuda:0')
episode: 425 training return: tensor(3.7927, device='cuda:0')
episode: 426 training return: tensor(-505.6744, device='cuda:0')
episode: 427 training return: tensor(-93.2395, device='cuda:0')
epoch: 107 test_true_pfm: 1587.3980775184534 sim_pfm: -410.9686279866728
episode: 428 training return: tensor(-524.5411, device='cuda:0')
episode: 429 training return: tensor(-160.9399, device='cuda:0')
episode: 430 training return: tensor(-445.2020, device='cuda:0')
episode: 431 training return: tensor(53.3664, device='cuda:0')
epoch: 108 test_true_pfm: 1913.5387506093468 sim_pfm: -349.27590338720864
episode: 432 training return: tensor(-464.9005, device='cuda:0')
episode: 433 training return: tensor(51.4802, device='cuda:0')
episode: 434 training return: tensor(-451.0410, device='cuda:0')
episode: 435 training return: tensor(-438.9934, device='cuda:0')
epoch: 109 test_true_pfm: 2559.038331651651 sim_pfm: -278.0700661250157
episode: 436 training return: tensor(-440.4418, device='cuda:0')
episode: 437 training return: tensor(68.8664, device='cuda:0')
episode: 438 training return: tensor(-320.7601, device='cuda:0')
episode: 439 training return: tensor(-347.1421, device='cuda:0')
epoch: 110 test_true_pfm: 1944.8141824584638 sim_pfm: -265.22829684312455
episode: 440 training return: tensor(-316.8466, device='cuda:0')
episode: 441 training return: tensor(-147.4356, device='cuda:0')
episode: 442 training return: tensor(-322.9763, device='cuda:0')
episode: 443 training return: tensor(25.7682, device='cuda:0')
epoch: 111 test_true_pfm: 2051.289982906828 sim_pfm: -11.368071537474558
episode: 444 training return: tensor(-472.1284, device='cuda:0')
episode: 445 training return: tensor(-442.9931, device='cuda:0')
episode: 446 training return: tensor(-122.8673, device='cuda:0')
episode: 447 training return: tensor(-327.7689, device='cuda:0')
epoch: 112 test_true_pfm: 1997.6223991633312 sim_pfm: -326.66431076502585
episode: 448 training return: tensor(-103.7871, device='cuda:0')
episode: 449 training return: tensor(-350.0272, device='cuda:0')
episode: 450 training return: tensor(-289.7888, device='cuda:0')
episode: 451 training return: tensor(-438.7080, device='cuda:0')
epoch: 113 test_true_pfm: 1941.1724797995173 sim_pfm: -360.9012061604881
episode: 452 training return: tensor(59.4092, device='cuda:0')
episode: 453 training return: tensor(-331.4446, device='cuda:0')
episode: 454 training return: tensor(-353.1573, device='cuda:0')
episode: 455 training return: tensor(-475.5683, device='cuda:0')
epoch: 114 test_true_pfm: 2137.2129927619103 sim_pfm: -146.38313837445457
episode: 456 training return: tensor(-398.1091, device='cuda:0')
episode: 457 training return: tensor(-346.2242, device='cuda:0')
episode: 458 training return: tensor(151.9503, device='cuda:0')
episode: 459 training return: tensor(-330.6226, device='cuda:0')
epoch: 115 test_true_pfm: 1865.7048517097144 sim_pfm: -349.0724110475858
episode: 460 training return: tensor(-357.7835, device='cuda:0')
episode: 461 training return: tensor(-472.9077, device='cuda:0')
episode: 462 training return: tensor(-481.8199, device='cuda:0')
episode: 463 training return: tensor(-365.8646, device='cuda:0')
epoch: 116 test_true_pfm: 2234.3722468937863 sim_pfm: -136.87956418554919
episode: 464 training return: tensor(104.2428, device='cuda:0')
episode: 465 training return: tensor(110.1534, device='cuda:0')
episode: 466 training return: tensor(-174.8226, device='cuda:0')
episode: 467 training return: tensor(-287.7625, device='cuda:0')
epoch: 117 test_true_pfm: 2030.6463621230896 sim_pfm: -215.4658918082035
episode: 468 training return: tensor(-445.2191, device='cuda:0')
episode: 469 training return: tensor(-42.4808, device='cuda:0')
episode: 470 training return: tensor(-362.7023, device='cuda:0')
episode: 471 training return: tensor(-390.7833, device='cuda:0')
epoch: 118 test_true_pfm: 2466.0327220066333 sim_pfm: -165.66357557539595
episode: 472 training return: tensor(-29.9697, device='cuda:0')
episode: 473 training return: tensor(49.0937, device='cuda:0')
episode: 474 training return: tensor(-469.4220, device='cuda:0')
episode: 475 training return: tensor(-367.8435, device='cuda:0')
epoch: 119 test_true_pfm: 2064.482922756151 sim_pfm: -230.32950115471613
episode: 476 training return: tensor(-113.2240, device='cuda:0')
episode: 477 training return: tensor(-440.6969, device='cuda:0')
episode: 478 training return: tensor(112.7856, device='cuda:0')
episode: 479 training return: tensor(-179.0331, device='cuda:0')
epoch: 120 test_true_pfm: 2041.6382495661746 sim_pfm: -122.14263014791261
episode: 480 training return: tensor(-345.9714, device='cuda:0')
episode: 481 training return: tensor(-219.3593, device='cuda:0')
episode: 482 training return: tensor(116.9571, device='cuda:0')
episode: 483 training return: tensor(-224.2142, device='cuda:0')
epoch: 121 test_true_pfm: 2219.45303632819 sim_pfm: -258.1757859402569
episode: 484 training return: tensor(-470.2329, device='cuda:0')
episode: 485 training return: tensor(-342.2218, device='cuda:0')
episode: 486 training return: tensor(-392.8135, device='cuda:0')
episode: 487 training return: tensor(-444.3830, device='cuda:0')
epoch: 122 test_true_pfm: 1652.4129146567614 sim_pfm: -323.9058625259592
episode: 488 training return: tensor(-450.2209, device='cuda:0')
episode: 489 training return: tensor(-344.6566, device='cuda:0')
episode: 490 training return: tensor(-275.5451, device='cuda:0')
episode: 491 training return: tensor(-456.6296, device='cuda:0')
epoch: 123 test_true_pfm: 2075.1755773038853 sim_pfm: -291.09894136543153
episode: 492 training return: tensor(-419.2601, device='cuda:0')
episode: 493 training return: tensor(-463.2055, device='cuda:0')
episode: 494 training return: tensor(-464.7295, device='cuda:0')
episode: 495 training return: tensor(-126.6890, device='cuda:0')
epoch: 124 test_true_pfm: 1830.8264263609608 sim_pfm: -283.33774812472984
episode: 496 training return: tensor(-452.3055, device='cuda:0')
episode: 497 training return: tensor(-102.3565, device='cuda:0')
episode: 498 training return: tensor(-384.2745, device='cuda:0')
episode: 499 training return: tensor(-189.7245, device='cuda:0')
epoch: 125 test_true_pfm: 1718.038660175366 sim_pfm: -355.312785557607
episode: 500 training return: tensor(-342.2617, device='cuda:0')
episode: 501 training return: tensor(-199.1690, device='cuda:0')
episode: 502 training return: tensor(-229.8339, device='cuda:0')
episode: 503 training return: tensor(-42.8687, device='cuda:0')
epoch: 126 test_true_pfm: 1758.4096890897993 sim_pfm: -281.926881863367
episode: 504 training return: tensor(-341.0477, device='cuda:0')
episode: 505 training return: tensor(-340.3606, device='cuda:0')
episode: 506 training return: tensor(-243.5759, device='cuda:0')
episode: 507 training return: tensor(29.5864, device='cuda:0')
epoch: 127 test_true_pfm: 1950.502383770376 sim_pfm: -346.3147918735631
episode: 508 training return: tensor(-466.6414, device='cuda:0')
episode: 509 training return: tensor(-362.3077, device='cuda:0')
episode: 510 training return: tensor(-257.1593, device='cuda:0')
episode: 511 training return: tensor(-266.9393, device='cuda:0')
epoch: 128 test_true_pfm: 2023.4097625978186 sim_pfm: -126.5912852921562
episode: 512 training return: tensor(-179.6317, device='cuda:0')
episode: 513 training return: tensor(-339.3253, device='cuda:0')
episode: 514 training return: tensor(-270.2847, device='cuda:0')
episode: 515 training return: tensor(14.2326, device='cuda:0')
epoch: 129 test_true_pfm: 1967.5407454766134 sim_pfm: -277.630185708714
episode: 516 training return: tensor(-417.6747, device='cuda:0')
episode: 517 training return: tensor(-381.2450, device='cuda:0')
episode: 518 training return: tensor(-276.3919, device='cuda:0')
episode: 519 training return: tensor(-435.4087, device='cuda:0')
epoch: 130 test_true_pfm: 2158.861445741599 sim_pfm: -282.0374164359916
episode: 520 training return: tensor(-446.6173, device='cuda:0')
episode: 521 training return: tensor(44.2402, device='cuda:0')
episode: 522 training return: tensor(-360.0809, device='cuda:0')
episode: 523 training return: tensor(-411.0393, device='cuda:0')
epoch: 131 test_true_pfm: 2034.696338647623 sim_pfm: -159.51601670432137
episode: 524 training return: tensor(131.8760, device='cuda:0')
episode: 525 training return: tensor(-205.8469, device='cuda:0')
episode: 526 training return: tensor(-356.6347, device='cuda:0')
episode: 527 training return: tensor(-404.4286, device='cuda:0')
epoch: 132 test_true_pfm: 1946.4429544161867 sim_pfm: -181.28719863925167
episode: 528 training return: tensor(-381.4917, device='cuda:0')
episode: 529 training return: tensor(-317.2989, device='cuda:0')
episode: 530 training return: tensor(-383.6913, device='cuda:0')
episode: 531 training return: tensor(-436.4106, device='cuda:0')
epoch: 133 test_true_pfm: 2140.916357603794 sim_pfm: -187.113249048222
episode: 532 training return: tensor(-434.8315, device='cuda:0')
episode: 533 training return: tensor(-354.0118, device='cuda:0')
episode: 534 training return: tensor(-441.1339, device='cuda:0')
episode: 535 training return: tensor(131.1031, device='cuda:0')
epoch: 134 test_true_pfm: 2462.0784414006857 sim_pfm: -264.75857250551536
episode: 536 training return: tensor(-465.4300, device='cuda:0')
episode: 537 training return: tensor(-440.6137, device='cuda:0')
episode: 538 training return: tensor(58.1987, device='cuda:0')
episode: 539 training return: tensor(-430.6757, device='cuda:0')
epoch: 135 test_true_pfm: 1886.4778579775648 sim_pfm: -144.86408552343104
episode: 540 training return: tensor(-21.0646, device='cuda:0')
episode: 541 training return: tensor(-337.1791, device='cuda:0')
episode: 542 training return: tensor(-87.6299, device='cuda:0')
episode: 543 training return: tensor(55.1741, device='cuda:0')
epoch: 136 test_true_pfm: 2367.111085413243 sim_pfm: -238.65584269124278
episode: 544 training return: tensor(141.0889, device='cuda:0')
episode: 545 training return: tensor(-367.1952, device='cuda:0')
episode: 546 training return: tensor(-432.9297, device='cuda:0')
episode: 547 training return: tensor(21.2817, device='cuda:0')
epoch: 137 test_true_pfm: 2141.683586935673 sim_pfm: -415.2009813647407
episode: 548 training return: tensor(-388.8706, device='cuda:0')
episode: 549 training return: tensor(-380.7480, device='cuda:0')
episode: 550 training return: tensor(-372.0438, device='cuda:0')
episode: 551 training return: tensor(-305.7630, device='cuda:0')
epoch: 138 test_true_pfm: 1927.939990460259 sim_pfm: -305.06260536674137
episode: 552 training return: tensor(-401.8234, device='cuda:0')
episode: 553 training return: tensor(-287.5200, device='cuda:0')
episode: 554 training return: tensor(67.8899, device='cuda:0')
episode: 555 training return: tensor(130.4865, device='cuda:0')
epoch: 139 test_true_pfm: 1733.2972247291543 sim_pfm: -402.43328043156845
episode: 556 training return: tensor(-466.4656, device='cuda:0')
episode: 557 training return: tensor(-439.2132, device='cuda:0')
episode: 558 training return: tensor(-434.6214, device='cuda:0')
episode: 559 training return: tensor(-358.8831, device='cuda:0')
epoch: 140 test_true_pfm: 1947.5020753607457 sim_pfm: -363.2800989231716
episode: 560 training return: tensor(-280.0126, device='cuda:0')
episode: 561 training return: tensor(-396.4180, device='cuda:0')
episode: 562 training return: tensor(-520.5273, device='cuda:0')
episode: 563 training return: tensor(108.7225, device='cuda:0')
epoch: 141 test_true_pfm: 2412.0342231374893 sim_pfm: -199.9011920922785
episode: 564 training return: tensor(53.1971, device='cuda:0')
episode: 565 training return: tensor(-245.0903, device='cuda:0')
episode: 566 training return: tensor(-513.2321, device='cuda:0')
episode: 567 training return: tensor(-301.5887, device='cuda:0')
epoch: 142 test_true_pfm: 2034.964472590765 sim_pfm: -298.4918480172831
episode: 568 training return: tensor(-41.4135, device='cuda:0')
episode: 569 training return: tensor(-276.3734, device='cuda:0')
episode: 570 training return: tensor(-441.2655, device='cuda:0')
episode: 571 training return: tensor(-493.2369, device='cuda:0')
epoch: 143 test_true_pfm: 2138.4387074520587 sim_pfm: -159.38844321249053
episode: 572 training return: tensor(-201.9653, device='cuda:0')
episode: 573 training return: tensor(-227.2478, device='cuda:0')
episode: 574 training return: tensor(-503.6461, device='cuda:0')
episode: 575 training return: tensor(-382.9816, device='cuda:0')
epoch: 144 test_true_pfm: 2264.7898946048426 sim_pfm: -262.61257824330823
episode: 576 training return: tensor(-131.2722, device='cuda:0')
episode: 577 training return: tensor(-149.3398, device='cuda:0')
episode: 578 training return: tensor(-227.2416, device='cuda:0')
episode: 579 training return: tensor(-55.5219, device='cuda:0')
epoch: 145 test_true_pfm: 2511.3562280713336 sim_pfm: -124.80547294179753
episode: 580 training return: tensor(-237.2826, device='cuda:0')
episode: 581 training return: tensor(-445.2992, device='cuda:0')
episode: 582 training return: tensor(69.8749, device='cuda:0')
episode: 583 training return: tensor(-465.1092, device='cuda:0')
epoch: 146 test_true_pfm: 2252.9663642867886 sim_pfm: -224.9083207503621
episode: 584 training return: tensor(-85.2686, device='cuda:0')
episode: 585 training return: tensor(-193.2309, device='cuda:0')
episode: 586 training return: tensor(-422.0956, device='cuda:0')
episode: 587 training return: tensor(-529.3553, device='cuda:0')
epoch: 147 test_true_pfm: 2432.401708970024 sim_pfm: -188.43205775530078
episode: 588 training return: tensor(-418.0951, device='cuda:0')
episode: 589 training return: tensor(-492.2278, device='cuda:0')
episode: 590 training return: tensor(-213.1621, device='cuda:0')
episode: 591 training return: tensor(-191.8795, device='cuda:0')
epoch: 148 test_true_pfm: 1711.835737682032 sim_pfm: -406.2861623870267
episode: 592 training return: tensor(-438.3367, device='cuda:0')
episode: 593 training return: tensor(-435.7643, device='cuda:0')
episode: 594 training return: tensor(-440.2693, device='cuda:0')
episode: 595 training return: tensor(-353.8629, device='cuda:0')
epoch: 149 test_true_pfm: 1950.180878323422 sim_pfm: -138.1924131909036
episode: 596 training return: tensor(-516.8677, device='cuda:0')
episode: 597 training return: tensor(-357.3411, device='cuda:0')
episode: 598 training return: tensor(-417.1335, device='cuda:0')
episode: 599 training return: tensor(-424.5145, device='cuda:0')
epoch: 150 test_true_pfm: 2098.3175638935118 sim_pfm: -352.929217183361
