['--env', 'Hopper-v3', '--seed', '1']
epoch: 0 training_loss 0.24029827527701855 test_loss: 0.1607495903968811
epoch: 1 training_loss 0.15253314726054668 test_loss: 0.109206223487854
epoch: 2 training_loss 0.14429080955684184 test_loss: 0.12306773662567139
epoch: 3 training_loss 0.13607342161238192 test_loss: 0.147105872631073
epoch: 4 training_loss 0.13540711801499128 test_loss: 0.12610849142074584
epoch: 5 training_loss 0.12493115577846765 test_loss: 0.12088533639907836
epoch: 6 training_loss 0.14389302492141723 test_loss: 0.12839484214782715
epoch: 7 training_loss 0.12056859254837037 test_loss: 0.12336505651473999
epoch: 8 training_loss 0.11726061787456274 test_loss: 0.11350393295288086
epoch: 9 training_loss 0.11884076986461878 test_loss: 0.10225517749786377
epoch: 10 training_loss 0.12294443208724261 test_loss: 0.11123976707458497
epoch: 11 training_loss 0.11747479978948831 test_loss: 0.11931253671646118
epoch: 12 training_loss 0.11905783362686634 test_loss: 0.11006923913955688
epoch: 13 training_loss 0.11662756007164717 test_loss: 0.10715558528900146
epoch: 14 training_loss 0.11760359514504672 test_loss: 0.117523455619812
epoch: 15 training_loss 0.11659796293824912 test_loss: 0.135703182220459
epoch: 16 training_loss 0.10982518881559372 test_loss: 0.13590670824050904
epoch: 17 training_loss 0.11391751995310187 test_loss: 0.10425838232040405
epoch: 18 training_loss 0.11295132972300052 test_loss: 0.10781261920928956
epoch: 19 training_loss 0.11892631789669394 test_loss: 0.11230714321136474
epoch: 20 training_loss 0.11707771625369787 test_loss: 0.09813499450683594
epoch: 21 training_loss 0.12351248826831579 test_loss: 0.1151931881904602
epoch: 22 training_loss 0.11438064314424992 test_loss: 0.11212162971496582
epoch: 23 training_loss 0.112620940906927 test_loss: 0.11451406478881836
epoch: 24 training_loss 0.11439250987023115 test_loss: 0.11765114068984986
epoch: 25 training_loss 0.11787372570484876 test_loss: 0.12826694250106813
epoch: 26 training_loss 0.11388520378619432 test_loss: 0.10056054592132568
epoch: 27 training_loss 0.10842257294803857 test_loss: 0.12145360708236694
epoch: 28 training_loss 0.11295384055003524 test_loss: 0.13612473011016846
epoch: 29 training_loss 0.11439860582351685 test_loss: 0.10530498027801513
epoch: 30 training_loss 0.11132226675748826 test_loss: 0.09121493101119996
epoch: 31 training_loss 0.1018480259925127 test_loss: 0.11776752471923828
epoch: 32 training_loss 0.11036095086485148 test_loss: 0.10210018157958985
epoch: 33 training_loss 0.10979048872366548 test_loss: 0.10970290899276733
epoch: 34 training_loss 0.1193670105189085 test_loss: 0.13236525058746337
epoch: 35 training_loss 0.11815960839390754 test_loss: 0.11913045644760131
epoch: 36 training_loss 0.109391281735152 test_loss: 0.0990373432636261
epoch: 37 training_loss 0.12019518200308084 test_loss: 0.10162079334259033
epoch: 38 training_loss 0.11775809489190578 test_loss: 0.12178547382354736
epoch: 39 training_loss 0.11947975762188434 test_loss: 0.11214911937713623
epoch: 40 training_loss 0.11200484782457351 test_loss: 0.1231500506401062
epoch: 41 training_loss 0.10848539635539055 test_loss: 0.09680721163749695
epoch: 42 training_loss 0.11122814014554024 test_loss: 0.12316139936447143
epoch: 43 training_loss 0.1132904790341854 test_loss: 0.0921702265739441
epoch: 44 training_loss 0.11090540790930391 test_loss: 0.12652029991149902
epoch: 45 training_loss 0.10720388934016228 test_loss: 0.1114043116569519
epoch: 46 training_loss 0.11084007982164622 test_loss: 0.09389317631721497
epoch: 47 training_loss 0.11605656411498785 test_loss: 0.11269578933715821
epoch: 48 training_loss 0.11602448493242264 test_loss: 0.0969285249710083
epoch: 49 training_loss 0.11121934145689011 test_loss: 0.08568859100341797
epoch: 50 training_loss 0.11162739764899016 test_loss: 0.0975016176700592
epoch: 51 training_loss 0.10514896526932717 test_loss: 0.11828097105026245
epoch: 52 training_loss 0.1130124139226973 test_loss: 0.08433983325958253
epoch: 53 training_loss 0.10661256235092878 test_loss: 0.11836831569671631
epoch: 54 training_loss 0.1074418542534113 test_loss: 0.10481098890304566
epoch: 55 training_loss 0.10863570829853415 test_loss: 0.10277462005615234
epoch: 56 training_loss 0.10654321376234294 test_loss: 0.11515055894851685
epoch: 57 training_loss 0.10747818684205412 test_loss: 0.09463911652565002
epoch: 58 training_loss 0.10798729941248894 test_loss: 0.09249811768531799
epoch: 59 training_loss 0.10988523764535785 test_loss: 0.1008840799331665
epoch: 60 training_loss 0.10801514528691769 test_loss: 0.1003648042678833
epoch: 61 training_loss 0.10790686309337616 test_loss: 0.08811954855918884
epoch: 62 training_loss 0.11104873519390822 test_loss: 0.12463407516479492
epoch: 63 training_loss 0.11514227442443371 test_loss: 0.1112450122833252
epoch: 64 training_loss 0.11558298606425524 test_loss: 0.09284833669662476
epoch: 65 training_loss 0.11162021640688181 test_loss: 0.0919909954071045
epoch: 66 training_loss 0.11232194058597088 test_loss: 0.10054622888565064
epoch: 67 training_loss 0.11432726629078388 test_loss: 0.11399592161178589
epoch: 68 training_loss 0.11118270847946406 test_loss: 0.09902248978614807
epoch: 69 training_loss 0.11239241283386946 test_loss: 0.11678069829940796
epoch: 70 training_loss 0.10710151471197606 test_loss: 0.12417246103286743
epoch: 71 training_loss 0.1043106333911419 test_loss: 0.11343463659286498
epoch: 72 training_loss 0.10465656945481897 test_loss: 0.10782079696655274
epoch: 73 training_loss 0.10799299288541078 test_loss: 0.09261592626571655
epoch: 74 training_loss 0.10718789961189032 test_loss: 0.1132997989654541
epoch: 75 training_loss 0.10919400207698345 test_loss: 0.10679877996444702
epoch: 76 training_loss 0.11187866935506463 test_loss: 0.08543989658355713
epoch: 77 training_loss 0.11474302381277085 test_loss: 0.08977360129356385
epoch: 78 training_loss 0.11349594488739967 test_loss: 0.09922050833702087
epoch: 79 training_loss 0.10673651248216628 test_loss: 0.11268242597579955
epoch: 80 training_loss 0.1127869300544262 test_loss: 0.08962730169296265
epoch: 81 training_loss 0.11363213147968054 test_loss: 0.08962799310684204
epoch: 82 training_loss 0.10276070408523083 test_loss: 0.10594009160995484
epoch: 83 training_loss 0.1144248415157199 test_loss: 0.12180491685867309
epoch: 84 training_loss 0.11183438770473003 test_loss: 0.11979423761367798
epoch: 85 training_loss 0.10777163498103619 test_loss: 0.08651782274246216
epoch: 86 training_loss 0.11229974810034037 test_loss: 0.13525452613830566
epoch: 87 training_loss 0.11312024243175983 test_loss: 0.11535465717315674
epoch: 88 training_loss 0.11268337849527597 test_loss: 0.12356197834014893
epoch: 89 training_loss 0.11209485828876495 test_loss: 0.11445624828338623
epoch: 90 training_loss 0.11229222388938069 test_loss: 0.09076969027519226
epoch: 91 training_loss 0.11106119025498629 test_loss: 0.10949351787567138
epoch: 92 training_loss 0.10515917293727398 test_loss: 0.09658569097518921
epoch: 93 training_loss 0.10683511370792985 test_loss: 0.14580001831054687
epoch: 94 training_loss 0.10888999842107296 test_loss: 0.09443215131759644
epoch: 95 training_loss 0.10766862016171216 test_loss: 0.11026525497436523
epoch: 96 training_loss 0.10582301132380963 test_loss: 0.09350928664207458
epoch: 97 training_loss 0.11226831628009677 test_loss: 0.09954832196235656
epoch: 98 training_loss 0.10899782124906779 test_loss: 0.10789282321929931
epoch: 99 training_loss 0.10859410785138607 test_loss: 0.09401506185531616
epoch: 100 training_loss 0.10985022079199552 test_loss: 0.12086269855499268
epoch: 101 training_loss 0.1053121792152524 test_loss: 0.1164986252784729
epoch: 102 training_loss 0.10844205625355244 test_loss: 0.1364891767501831
epoch: 103 training_loss 0.11539107050746679 test_loss: 0.10418663024902344
epoch: 104 training_loss 0.10394377646967769 test_loss: 0.09298130869865417
epoch: 105 training_loss 0.1048456747084856 test_loss: 0.09344509840011597
epoch: 106 training_loss 0.10962672255933285 test_loss: 0.09214386343955994
epoch: 107 training_loss 0.10889586448669433 test_loss: 0.10214837789535522
epoch: 108 training_loss 0.11032393952831626 test_loss: 0.11156998872756958
epoch: 109 training_loss 0.10929445914924145 test_loss: 0.10297280550003052
epoch: 110 training_loss 0.10534372426569462 test_loss: 0.11047658920288086
epoch: 111 training_loss 0.11081370122730733 test_loss: 0.12891981601715088
epoch: 112 training_loss 0.11034112583845854 test_loss: 0.09155142307281494
epoch: 113 training_loss 0.10506524324417114 test_loss: 0.10549707412719726
epoch: 114 training_loss 0.1080759995058179 test_loss: 0.09034807682037353
epoch: 115 training_loss 0.1048756068944931 test_loss: 0.0982937753200531
epoch: 116 training_loss 0.10767008164897561 test_loss: 0.10745375156402588
epoch: 117 training_loss 0.10947813365608454 test_loss: 0.09572517275810241
epoch: 118 training_loss 0.1077308227494359 test_loss: 0.09918898940086365
epoch: 119 training_loss 0.11339113740250469 test_loss: 0.09245149493217468
epoch: 120 training_loss 0.10533277010545135 test_loss: 0.10392676591873169
epoch: 121 training_loss 0.11306210678070784 test_loss: 0.10567941665649414
epoch: 122 training_loss 0.10947449676692486 test_loss: 0.11599793434143066
epoch: 123 training_loss 0.10729535091668367 test_loss: 0.08407301306724549
epoch: 124 training_loss 0.10330073639750481 test_loss: 0.09130810499191284
epoch: 125 training_loss 0.1094065973162651 test_loss: 0.0864956557750702
epoch: 126 training_loss 0.10394907550886273 test_loss: 0.09792713522911071
epoch: 127 training_loss 0.10420948445796967 test_loss: 0.1038981556892395
epoch: 128 training_loss 0.1060750726237893 test_loss: 0.11502338647842407
epoch: 129 training_loss 0.11025435574352742 test_loss: 0.10029524564743042
epoch: 130 training_loss 0.1044167360663414 test_loss: 0.09835804104804993
epoch: 131 training_loss 0.11979361452162265 test_loss: 0.09992960095405579
epoch: 132 training_loss 0.10361300773918629 test_loss: 0.09888715147972107
epoch: 133 training_loss 0.10727394310757518 test_loss: 0.09181976318359375
epoch: 134 training_loss 0.11021074656397105 test_loss: 0.11069999933242798
epoch: 135 training_loss 0.11469472859054804 test_loss: 0.0878551721572876
epoch: 136 training_loss 0.10831075921654701 test_loss: 0.1022071361541748
epoch: 137 training_loss 0.10454389706254005 test_loss: 0.08275552988052368
epoch: 138 training_loss 0.10237659336999058 test_loss: 0.12785810232162476
epoch: 139 training_loss 0.09731754764914513 test_loss: 0.10842890739440918
epoch: 140 training_loss 0.1054492942430079 test_loss: 0.11364792585372925
epoch: 141 training_loss 0.10339462738484144 test_loss: 0.09880465269088745
epoch: 142 training_loss 0.11621442824602127 test_loss: 0.08550126552581787
epoch: 143 training_loss 0.10463892292231321 test_loss: 0.09535327553749084
epoch: 144 training_loss 0.10920849539339543 test_loss: 0.09726684689521789
epoch: 145 training_loss 0.10487111236900092 test_loss: 0.08696249723434449
epoch: 146 training_loss 0.10804588388651609 test_loss: 0.1019474744796753
epoch: 147 training_loss 0.11153955060988664 test_loss: 0.10415799617767334
epoch: 148 training_loss 0.1119352425262332 test_loss: 0.0915380597114563
epoch: 149 training_loss 0.0995664275623858 test_loss: 0.10681838989257812
epoch: 0 training_loss 7.1659712314605715 test_loss: 4.04090690612793
epoch: 1 training_loss 3.4027990174293516 test_loss: 2.851777267456055
epoch: 2 training_loss 2.2928255224227905 test_loss: 1.99182071685791
epoch: 3 training_loss 1.8470160615444184 test_loss: 1.633066749572754
epoch: 4 training_loss 1.5756497776508331 test_loss: 1.3926112174987793
epoch: 5 training_loss 1.416322239637375 test_loss: 1.3558375358581543
epoch: 6 training_loss 1.270084046125412 test_loss: 1.2185155868530273
epoch: 7 training_loss 1.1732079207897186 test_loss: 1.141476535797119
epoch: 8 training_loss 1.088773644566536 test_loss: 1.0744182586669921
epoch: 9 training_loss 1.0428377830982207 test_loss: 1.0598939895629882
epoch: 10 training_loss 0.9679483956098557 test_loss: 0.9821521759033203
epoch: 11 training_loss 0.9558761620521545 test_loss: 0.9513269424438476
epoch: 12 training_loss 0.9056365394592285 test_loss: 0.9135407447814942
epoch: 13 training_loss 0.86950124502182 test_loss: 0.8747323036193848
epoch: 14 training_loss 0.8547762048244476 test_loss: 0.8523694038391113
epoch: 15 training_loss 0.8191428369283676 test_loss: 0.8111510276794434
epoch: 16 training_loss 0.819069293141365 test_loss: 0.8026373863220215
epoch: 17 training_loss 0.789698234796524 test_loss: 0.7698407173156738
epoch: 18 training_loss 0.7739292550086975 test_loss: 0.7684805393218994
epoch: 19 training_loss 0.7526981574296951 test_loss: 0.7478718280792236
epoch: 20 training_loss 0.740937362909317 test_loss: 0.7516085624694824
epoch: 21 training_loss 0.729153002500534 test_loss: 0.707796287536621
epoch: 22 training_loss 0.7265684700012207 test_loss: 0.6788157463073731
epoch: 23 training_loss 0.7201715457439423 test_loss: 0.6982584953308105
epoch: 24 training_loss 0.7014128118753433 test_loss: 0.6511927604675293
epoch: 25 training_loss 0.6910327017307282 test_loss: 0.7147510528564454
epoch: 26 training_loss 0.6898991239070892 test_loss: 0.6559667110443115
epoch: 27 training_loss 0.680337290763855 test_loss: 0.6817698001861572
epoch: 28 training_loss 0.6654884082078933 test_loss: 0.6867920875549316
epoch: 29 training_loss 0.6655908751487732 test_loss: 0.6567657470703125
epoch: 30 training_loss 0.646869124174118 test_loss: 0.6381312847137451
epoch: 31 training_loss 0.6431962466239929 test_loss: 0.6303836345672608
epoch: 32 training_loss 0.6489897406101227 test_loss: 0.6057422637939454
epoch: 33 training_loss 0.6486005467176438 test_loss: 0.6235085487365722
epoch: 34 training_loss 0.6244684416055679 test_loss: 0.6288733005523681
epoch: 35 training_loss 0.6319975036382676 test_loss: 0.5980556011199951
epoch: 36 training_loss 0.6232420235872269 test_loss: 0.6379292488098145
epoch: 37 training_loss 0.6212898761034011 test_loss: 0.5886705875396728
epoch: 38 training_loss 0.6103117525577545 test_loss: 0.6232747077941895
epoch: 39 training_loss 0.6105261653661728 test_loss: 0.5793952465057373
epoch: 40 training_loss 0.6118923723697662 test_loss: 0.5709389686584473
epoch: 41 training_loss 0.6068691551685333 test_loss: 0.5977352142333985
epoch: 42 training_loss 0.6041906344890594 test_loss: 0.5885592937469483
epoch: 43 training_loss 0.598785434961319 test_loss: 0.5903673648834229
epoch: 44 training_loss 0.6056311500072479 test_loss: 0.6005287170410156
epoch: 45 training_loss 0.5967801243066788 test_loss: 0.6177292346954346
epoch: 46 training_loss 0.5853505849838256 test_loss: 0.5672868251800537
epoch: 47 training_loss 0.5803299069404602 test_loss: 0.5489941120147706
epoch: 48 training_loss 0.5833657705783843 test_loss: 0.6204163074493408
epoch: 49 training_loss 0.5835481649637222 test_loss: 0.5831707000732422
epoch: 50 training_loss 0.5736117690801621 test_loss: 0.5680286884307861
epoch: 51 training_loss 0.5760783636569977 test_loss: 0.5925997257232666
epoch: 52 training_loss 0.5683874601125717 test_loss: 0.585545825958252
epoch: 53 training_loss 0.5738937199115753 test_loss: 0.5744168758392334
epoch: 54 training_loss 0.5702321714162827 test_loss: 0.5605811595916748
epoch: 55 training_loss 0.553366992175579 test_loss: 0.5446112155914307
epoch: 56 training_loss 0.5658258295059204 test_loss: 0.5522253036499023
epoch: 57 training_loss 0.548695108294487 test_loss: 0.5412827491760254
epoch: 58 training_loss 0.5668429401516915 test_loss: 0.5872434616088867
epoch: 59 training_loss 0.5661595922708511 test_loss: 0.5541584968566895
epoch: 60 training_loss 0.5616203594207764 test_loss: 0.5380806922912598
epoch: 61 training_loss 0.5484627375006675 test_loss: 0.5277067661285401
epoch: 62 training_loss 0.5503497824072838 test_loss: 0.5323570251464844
epoch: 63 training_loss 0.5466618275642395 test_loss: 0.568229866027832
epoch: 64 training_loss 0.5428542086482048 test_loss: 0.5441184997558594
epoch: 65 training_loss 0.539475599527359 test_loss: 0.5544608116149903
epoch: 66 training_loss 0.5459911462664604 test_loss: 0.5295973777770996
epoch: 67 training_loss 0.5335504099726677 test_loss: 0.5277658462524414
epoch: 68 training_loss 0.5318360185623169 test_loss: 0.5396523475646973
epoch: 69 training_loss 0.5317924469709396 test_loss: 0.5190531730651855
epoch: 70 training_loss 0.5305840760469437 test_loss: 0.5326854228973389
epoch: 71 training_loss 0.5258178594708443 test_loss: 0.5589046001434326
epoch: 72 training_loss 0.5347309428453445 test_loss: 0.5303092479705811
epoch: 73 training_loss 0.5353480851650239 test_loss: 0.5190833568572998
epoch: 74 training_loss 0.5303021916747093 test_loss: 0.5181067943572998
epoch: 75 training_loss 0.529033795595169 test_loss: 0.5295603275299072
epoch: 76 training_loss 0.5289865699410439 test_loss: 0.5349726676940918
epoch: 77 training_loss 0.5245241320133209 test_loss: 0.5049107551574707
epoch: 78 training_loss 0.5257721441984177 test_loss: 0.5337644577026367
epoch: 79 training_loss 0.5256166023015976 test_loss: 0.5174819469451905
epoch: 80 training_loss 0.5169102096557617 test_loss: 0.5267380714416504
epoch: 81 training_loss 0.5206440663337708 test_loss: 0.5190322399139404
epoch: 82 training_loss 0.5155509075522423 test_loss: 0.51937894821167
epoch: 83 training_loss 0.5077495023608207 test_loss: 0.4959970474243164
epoch: 84 training_loss 0.5098333889245987 test_loss: 0.5123436927795411
epoch: 85 training_loss 0.5099531868100167 test_loss: 0.5564270496368409
epoch: 86 training_loss 0.5171419489383697 test_loss: 0.5165485382080078
epoch: 87 training_loss 0.5177075654268265 test_loss: 0.5102193832397461
epoch: 88 training_loss 0.5083031636476517 test_loss: 0.5028590202331543
epoch: 89 training_loss 0.5139692991971969 test_loss: 0.5148914337158204
epoch: 90 training_loss 0.5049321722984313 test_loss: 0.49721126556396483
epoch: 91 training_loss 0.5015163397789002 test_loss: 0.5051683902740478
epoch: 92 training_loss 0.5015916553139687 test_loss: 0.5077988624572753
epoch: 93 training_loss 0.5100733748078347 test_loss: 0.4983823299407959
epoch: 94 training_loss 0.5039151161909103 test_loss: 0.5212530612945556
epoch: 95 training_loss 0.4985294169187546 test_loss: 0.5251815319061279
epoch: 96 training_loss 0.5047521084547043 test_loss: 0.4832610607147217
epoch: 97 training_loss 0.5090490442514419 test_loss: 0.49401054382324217
epoch: 98 training_loss 0.4920873808860779 test_loss: 0.4897041320800781
epoch: 99 training_loss 0.497420029938221 test_loss: 0.505282974243164
epoch: 100 training_loss 0.497875748872757 test_loss: 0.5032151222229004
epoch: 101 training_loss 0.49486758023500443 test_loss: 0.4792947769165039
epoch: 102 training_loss 0.4950388842821121 test_loss: 0.49166064262390136
epoch: 103 training_loss 0.49845732003450394 test_loss: 0.5101353645324707
epoch: 104 training_loss 0.4968916884064674 test_loss: 0.48593363761901853
epoch: 105 training_loss 0.49171909034252165 test_loss: 0.47714409828186033
epoch: 106 training_loss 0.4904167914390564 test_loss: 0.5064664840698242
epoch: 107 training_loss 0.492950873374939 test_loss: 0.4916077136993408
epoch: 108 training_loss 0.48272341579198835 test_loss: 0.48459596633911134
epoch: 109 training_loss 0.4947188752889633 test_loss: 0.5030128002166748
epoch: 110 training_loss 0.4869601032137871 test_loss: 0.47974982261657717
epoch: 111 training_loss 0.48765269011259077 test_loss: 0.5286450386047363
epoch: 112 training_loss 0.4885842275619507 test_loss: 0.4891964912414551
epoch: 113 training_loss 0.4850051897764206 test_loss: 0.48842735290527345
epoch: 114 training_loss 0.4856812909245491 test_loss: 0.48575873374938966
epoch: 115 training_loss 0.4931747296452522 test_loss: 0.47304821014404297
epoch: 116 training_loss 0.4843928310275078 test_loss: 0.4819183826446533
epoch: 117 training_loss 0.49536084294319155 test_loss: 0.4838000774383545
epoch: 118 training_loss 0.4851324501633644 test_loss: 0.49959568977355956
epoch: 119 training_loss 0.48078927904367447 test_loss: 0.4664489269256592
epoch: 120 training_loss 0.4859617608785629 test_loss: 0.4696162700653076
epoch: 121 training_loss 0.48605659753084185 test_loss: 0.48377671241760256
epoch: 122 training_loss 0.4824342566728592 test_loss: 0.47029595375061034
epoch: 123 training_loss 0.48358475148677826 test_loss: 0.4917441844940186
epoch: 124 training_loss 0.47781474322080614 test_loss: 0.465546178817749
epoch: 125 training_loss 0.4707967704534531 test_loss: 0.4663863658905029
epoch: 126 training_loss 0.4778717413544655 test_loss: 0.47107367515563964
epoch: 127 training_loss 0.4733513668179512 test_loss: 0.4603241443634033
epoch: 128 training_loss 0.47981239229440686 test_loss: 0.46498122215271
epoch: 129 training_loss 0.47841193854808806 test_loss: 0.47196283340454104
epoch: 130 training_loss 0.4729361724853516 test_loss: 0.515669870376587
epoch: 131 training_loss 0.4883914041519165 test_loss: 0.4688426494598389
epoch: 132 training_loss 0.47450603008270265 test_loss: 0.46073427200317385
epoch: 133 training_loss 0.47090096682310106 test_loss: 0.46849870681762695
epoch: 134 training_loss 0.46975923240184786 test_loss: 0.49738302230834963
epoch: 135 training_loss 0.4715253204107285 test_loss: 0.4785182952880859
epoch: 136 training_loss 0.46886167764663694 test_loss: 0.4647552013397217
epoch: 137 training_loss 0.47041988670825957 test_loss: 0.4676187515258789
epoch: 138 training_loss 0.4670166632533073 test_loss: 0.4761763095855713
epoch: 139 training_loss 0.4760004621744156 test_loss: 0.47887630462646485
epoch: 140 training_loss 0.47094025582075116 test_loss: 0.476169490814209
epoch: 141 training_loss 0.4789027205109596 test_loss: 0.4923232078552246
epoch: 142 training_loss 0.472970050573349 test_loss: 0.45904173851013186
epoch: 143 training_loss 0.46239883184432984 test_loss: 0.4636413097381592
epoch: 144 training_loss 0.4748247495293617 test_loss: 0.49452505111694334
epoch: 145 training_loss 0.47509181797504424 test_loss: 0.47605161666870116
epoch: 146 training_loss 0.46411420494318006 test_loss: 0.46360301971435547
epoch: 147 training_loss 0.4658442631363869 test_loss: 0.46343560218811036
epoch: 148 training_loss 0.4634162214398384 test_loss: 0.4570119857788086
epoch: 149 training_loss 0.46201488226652143 test_loss: 0.4803311824798584
3117.2551302224665
episode: 0 training return: tensor(-210.7839, device='cuda:0')
episode: 1 training return: tensor(19.2435, device='cuda:0')
episode: 2 training return: tensor(-396.4330, device='cuda:0')
episode: 3 training return: tensor(62.4825, device='cuda:0')
epoch: 1 test_true_pfm: 2558.932584271869 sim_pfm: -227.4718223923313
episode: 4 training return: tensor(25.1928, device='cuda:0')
episode: 5 training return: tensor(-458.1939, device='cuda:0')
episode: 6 training return: tensor(-315.0992, device='cuda:0')
episode: 7 training return: tensor(-374.4557, device='cuda:0')
epoch: 2 test_true_pfm: 1678.819418712068 sim_pfm: -285.3996014666627
episode: 8 training return: tensor(38.2017, device='cuda:0')
episode: 9 training return: tensor(-37.9165, device='cuda:0')
episode: 10 training return: tensor(-420.8497, device='cuda:0')
episode: 11 training return: tensor(-398.6095, device='cuda:0')
epoch: 3 test_true_pfm: 2564.628884545961 sim_pfm: -327.12919721817406
episode: 12 training return: tensor(-400.2981, device='cuda:0')
episode: 13 training return: tensor(15.7940, device='cuda:0')
episode: 14 training return: tensor(-255.7233, device='cuda:0')
episode: 15 training return: tensor(-528.2408, device='cuda:0')
epoch: 4 test_true_pfm: 1482.7359607279498 sim_pfm: -468.4304061253497
episode: 16 training return: tensor(-505.8763, device='cuda:0')
episode: 17 training return: tensor(4.4415, device='cuda:0')
episode: 18 training return: tensor(-448.9124, device='cuda:0')
episode: 19 training return: tensor(-382.0986, device='cuda:0')
epoch: 5 test_true_pfm: 2200.6721340353247 sim_pfm: -306.49383558090386
episode: 20 training return: tensor(-486.9763, device='cuda:0')
episode: 21 training return: tensor(-128.8810, device='cuda:0')
episode: 22 training return: tensor(58.3130, device='cuda:0')
episode: 23 training return: tensor(-451.0077, device='cuda:0')
epoch: 6 test_true_pfm: 1509.2538821452774 sim_pfm: -357.9574417275726
episode: 24 training return: tensor(-424.2929, device='cuda:0')
episode: 25 training return: tensor(26.3756, device='cuda:0')
episode: 26 training return: tensor(6.4317, device='cuda:0')
episode: 27 training return: tensor(-474.4393, device='cuda:0')
epoch: 7 test_true_pfm: 3218.728389283851 sim_pfm: -90.37283846143207
episode: 28 training return: tensor(19.0875, device='cuda:0')
episode: 29 training return: tensor(-161.5597, device='cuda:0')
episode: 30 training return: tensor(-163.9010, device='cuda:0')
episode: 31 training return: tensor(-373.7258, device='cuda:0')
epoch: 8 test_true_pfm: 1758.1914083531062 sim_pfm: -90.94560539295587
episode: 32 training return: tensor(-453.2289, device='cuda:0')
episode: 33 training return: tensor(-196.2728, device='cuda:0')
episode: 34 training return: tensor(81.6453, device='cuda:0')
episode: 35 training return: tensor(-322.9891, device='cuda:0')
epoch: 9 test_true_pfm: 2651.271389277725 sim_pfm: -384.3571904778558
episode: 36 training return: tensor(-375.7677, device='cuda:0')
episode: 37 training return: tensor(-95.3536, device='cuda:0')
episode: 38 training return: tensor(124.1001, device='cuda:0')
episode: 39 training return: tensor(-293.2155, device='cuda:0')
epoch: 10 test_true_pfm: 3172.927550422019 sim_pfm: 53.16047374450136
episode: 40 training return: tensor(-471.2567, device='cuda:0')
episode: 41 training return: tensor(-408.5085, device='cuda:0')
episode: 42 training return: tensor(-202.0154, device='cuda:0')
episode: 43 training return: tensor(78.8325, device='cuda:0')
epoch: 11 test_true_pfm: 2798.35438067353 sim_pfm: -193.7740841974155
episode: 44 training return: tensor(5.5391, device='cuda:0')
episode: 45 training return: tensor(-480.4198, device='cuda:0')
episode: 46 training return: tensor(-212.8695, device='cuda:0')
episode: 47 training return: tensor(22.9510, device='cuda:0')
epoch: 12 test_true_pfm: 2718.754143488876 sim_pfm: -218.52383163621803
episode: 48 training return: tensor(15.3711, device='cuda:0')
episode: 49 training return: tensor(-45.4583, device='cuda:0')
episode: 50 training return: tensor(108.7340, device='cuda:0')
episode: 51 training return: tensor(-499.5751, device='cuda:0')
epoch: 13 test_true_pfm: 2257.14471435642 sim_pfm: -89.12123271947105
episode: 52 training return: tensor(-205.4402, device='cuda:0')
episode: 53 training return: tensor(-448.7737, device='cuda:0')
episode: 54 training return: tensor(-290.6866, device='cuda:0')
episode: 55 training return: tensor(88.6968, device='cuda:0')
epoch: 14 test_true_pfm: 3141.6491711546296 sim_pfm: 56.42710931028705
episode: 56 training return: tensor(-372.0786, device='cuda:0')
episode: 57 training return: tensor(76.5481, device='cuda:0')
episode: 58 training return: tensor(-285.3197, device='cuda:0')
episode: 59 training return: tensor(-393.5108, device='cuda:0')
epoch: 15 test_true_pfm: 3289.7278700779207 sim_pfm: -62.97060593141941
episode: 60 training return: tensor(67.7592, device='cuda:0')
episode: 61 training return: tensor(22.3995, device='cuda:0')
episode: 62 training return: tensor(-531.4735, device='cuda:0')
episode: 63 training return: tensor(88.0610, device='cuda:0')
epoch: 16 test_true_pfm: 2799.9814153733664 sim_pfm: 35.40720151030109
episode: 64 training return: tensor(-116.6626, device='cuda:0')
episode: 65 training return: tensor(102.7050, device='cuda:0')
episode: 66 training return: tensor(1.4736, device='cuda:0')
episode: 67 training return: tensor(-29.2029, device='cuda:0')
epoch: 17 test_true_pfm: 3238.6544276437066 sim_pfm: -30.390570594017237
episode: 68 training return: tensor(96.0776, device='cuda:0')
episode: 69 training return: tensor(22.5596, device='cuda:0')
episode: 70 training return: tensor(-56.2442, device='cuda:0')
episode: 71 training return: tensor(30.3317, device='cuda:0')
epoch: 18 test_true_pfm: 1621.09720274998 sim_pfm: -124.09571003659705
episode: 72 training return: tensor(-447.9206, device='cuda:0')
episode: 73 training return: tensor(56.3439, device='cuda:0')
episode: 74 training return: tensor(37.9523, device='cuda:0')
episode: 75 training return: tensor(-18.0481, device='cuda:0')
epoch: 19 test_true_pfm: 1764.0383047382074 sim_pfm: -90.00941559721832
episode: 76 training return: tensor(-63.2804, device='cuda:0')
episode: 77 training return: tensor(-196.8623, device='cuda:0')
episode: 78 training return: tensor(9.3933, device='cuda:0')
episode: 79 training return: tensor(-16.8054, device='cuda:0')
epoch: 20 test_true_pfm: 3029.387182531842 sim_pfm: -31.58934818779623
episode: 80 training return: tensor(82.8301, device='cuda:0')
episode: 81 training return: tensor(39.1948, device='cuda:0')
episode: 82 training return: tensor(24.0203, device='cuda:0')
episode: 83 training return: tensor(40.5134, device='cuda:0')
epoch: 21 test_true_pfm: 3247.6789258040276 sim_pfm: -104.12373041267467
episode: 84 training return: tensor(45.0152, device='cuda:0')
episode: 85 training return: tensor(24.2988, device='cuda:0')
episode: 86 training return: tensor(-451.6971, device='cuda:0')
episode: 87 training return: tensor(12.2971, device='cuda:0')
epoch: 22 test_true_pfm: 2299.1317938800066 sim_pfm: 54.87155139896398
episode: 88 training return: tensor(-471.6480, device='cuda:0')
episode: 89 training return: tensor(105.5075, device='cuda:0')
episode: 90 training return: tensor(-481.2623, device='cuda:0')
episode: 91 training return: tensor(61.2674, device='cuda:0')
epoch: 23 test_true_pfm: 3236.6550109373766 sim_pfm: 61.82443159885588
episode: 92 training return: tensor(83.7366, device='cuda:0')
episode: 93 training return: tensor(-102.0977, device='cuda:0')
episode: 94 training return: tensor(-470.3960, device='cuda:0')
episode: 95 training return: tensor(-415.0392, device='cuda:0')
epoch: 24 test_true_pfm: 3262.372152652703 sim_pfm: 64.48648050151921
episode: 96 training return: tensor(42.7210, device='cuda:0')
episode: 97 training return: tensor(-433.0889, device='cuda:0')
episode: 98 training return: tensor(37.2791, device='cuda:0')
episode: 99 training return: tensor(-411.2802, device='cuda:0')
epoch: 25 test_true_pfm: 2693.008654179743 sim_pfm: -208.96923242736375
episode: 100 training return: tensor(31.9062, device='cuda:0')
episode: 101 training return: tensor(-104.4096, device='cuda:0')
episode: 102 training return: tensor(-419.7175, device='cuda:0')
episode: 103 training return: tensor(-437.3199, device='cuda:0')
epoch: 26 test_true_pfm: 2806.7251057079025 sim_pfm: 41.81337070085768
episode: 104 training return: tensor(62.3613, device='cuda:0')
episode: 105 training return: tensor(-73.9562, device='cuda:0')
episode: 106 training return: tensor(129.2670, device='cuda:0')
episode: 107 training return: tensor(-451.5478, device='cuda:0')
epoch: 27 test_true_pfm: 2993.445763628908 sim_pfm: 36.3579469541437
episode: 108 training return: tensor(-370.8044, device='cuda:0')
episode: 109 training return: tensor(-501.7724, device='cuda:0')
episode: 110 training return: tensor(73.5570, device='cuda:0')
episode: 111 training return: tensor(-469.9594, device='cuda:0')
epoch: 28 test_true_pfm: 3263.644624004743 sim_pfm: -132.70004913757052
episode: 112 training return: tensor(-500.6928, device='cuda:0')
episode: 113 training return: tensor(-455.1130, device='cuda:0')
episode: 114 training return: tensor(-445.2808, device='cuda:0')
episode: 115 training return: tensor(11.9124, device='cuda:0')
epoch: 29 test_true_pfm: 2739.3523894572663 sim_pfm: 72.29356587317307
episode: 116 training return: tensor(128.8280, device='cuda:0')
episode: 117 training return: tensor(50.4418, device='cuda:0')
episode: 118 training return: tensor(-287.2338, device='cuda:0')
episode: 119 training return: tensor(-359.9200, device='cuda:0')
epoch: 30 test_true_pfm: 2435.5698403006822 sim_pfm: -174.02350408839993
episode: 120 training return: tensor(-398.9845, device='cuda:0')
episode: 121 training return: tensor(35.7076, device='cuda:0')
episode: 122 training return: tensor(-470.7681, device='cuda:0')
episode: 123 training return: tensor(60.5020, device='cuda:0')
epoch: 31 test_true_pfm: 2721.229507545528 sim_pfm: 6.036312894071064
episode: 124 training return: tensor(36.2500, device='cuda:0')
episode: 125 training return: tensor(-448.5669, device='cuda:0')
episode: 126 training return: tensor(33.3376, device='cuda:0')
episode: 127 training return: tensor(-389.6489, device='cuda:0')
epoch: 32 test_true_pfm: 2926.6005077815403 sim_pfm: -78.30104241658894
episode: 128 training return: tensor(40.2916, device='cuda:0')
episode: 129 training return: tensor(58.7801, device='cuda:0')
episode: 130 training return: tensor(38.7627, device='cuda:0')
episode: 131 training return: tensor(-449.1353, device='cuda:0')
epoch: 33 test_true_pfm: 2921.224452814051 sim_pfm: -37.15231743068822
episode: 132 training return: tensor(-506.8306, device='cuda:0')
episode: 133 training return: tensor(27.6921, device='cuda:0')
episode: 134 training return: tensor(132.9564, device='cuda:0')
episode: 135 training return: tensor(-67.1045, device='cuda:0')
epoch: 34 test_true_pfm: 2682.7183884697038 sim_pfm: -272.90727695659734
episode: 136 training return: tensor(-182.0572, device='cuda:0')
episode: 137 training return: tensor(-199.1620, device='cuda:0')
episode: 138 training return: tensor(68.1630, device='cuda:0')
episode: 139 training return: tensor(7.2798, device='cuda:0')
epoch: 35 test_true_pfm: 2727.57265341442 sim_pfm: -16.51063084419002
episode: 140 training return: tensor(45.9493, device='cuda:0')
episode: 141 training return: tensor(102.1402, device='cuda:0')
episode: 142 training return: tensor(-461.5142, device='cuda:0')
episode: 143 training return: tensor(29.9607, device='cuda:0')
epoch: 36 test_true_pfm: 2215.7025793575594 sim_pfm: -270.6634032387519
episode: 144 training return: tensor(21.4879, device='cuda:0')
episode: 145 training return: tensor(59.3292, device='cuda:0')
episode: 146 training return: tensor(43.3219, device='cuda:0')
episode: 147 training return: tensor(-510.4617, device='cuda:0')
epoch: 37 test_true_pfm: 2014.1441714946523 sim_pfm: -372.027365265094
episode: 148 training return: tensor(74.9748, device='cuda:0')
episode: 149 training return: tensor(-462.9714, device='cuda:0')
episode: 150 training return: tensor(74.4258, device='cuda:0')
episode: 151 training return: tensor(55.5528, device='cuda:0')
epoch: 38 test_true_pfm: 2628.509559125472 sim_pfm: -243.2676788645816
episode: 152 training return: tensor(-442.5892, device='cuda:0')
episode: 153 training return: tensor(-384.3309, device='cuda:0')
episode: 154 training return: tensor(66.5779, device='cuda:0')
episode: 155 training return: tensor(-256.0791, device='cuda:0')
epoch: 39 test_true_pfm: 2936.9430532012534 sim_pfm: 73.9062293561874
episode: 156 training return: tensor(51.2240, device='cuda:0')
episode: 157 training return: tensor(33.3574, device='cuda:0')
episode: 158 training return: tensor(49.0968, device='cuda:0')
episode: 159 training return: tensor(-107.1327, device='cuda:0')
epoch: 40 test_true_pfm: 3032.5906126231107 sim_pfm: 83.86259766289731
episode: 160 training return: tensor(66.4816, device='cuda:0')
episode: 161 training return: tensor(12.2168, device='cuda:0')
episode: 162 training return: tensor(16.9005, device='cuda:0')
episode: 163 training return: tensor(88.6957, device='cuda:0')
epoch: 41 test_true_pfm: 1981.1068386924856 sim_pfm: -254.13429140211278
episode: 164 training return: tensor(-394.6695, device='cuda:0')
episode: 165 training return: tensor(-505.8387, device='cuda:0')
episode: 166 training return: tensor(12.8851, device='cuda:0')
episode: 167 training return: tensor(-487.3696, device='cuda:0')
epoch: 42 test_true_pfm: 1843.7042277338462 sim_pfm: -275.11892688030883
episode: 168 training return: tensor(-443.1040, device='cuda:0')
episode: 169 training return: tensor(-105.6758, device='cuda:0')
episode: 170 training return: tensor(-367.9575, device='cuda:0')
episode: 171 training return: tensor(-362.7058, device='cuda:0')
epoch: 43 test_true_pfm: 2160.608672555291 sim_pfm: -16.59868095384445
episode: 172 training return: tensor(75.7528, device='cuda:0')
episode: 173 training return: tensor(-487.4469, device='cuda:0')
episode: 174 training return: tensor(57.8294, device='cuda:0')
episode: 175 training return: tensor(52.4471, device='cuda:0')
epoch: 44 test_true_pfm: 3115.3046410987595 sim_pfm: -211.54527825677846
episode: 176 training return: tensor(6.4558, device='cuda:0')
episode: 177 training return: tensor(-197.5025, device='cuda:0')
episode: 178 training return: tensor(-105.1954, device='cuda:0')
episode: 179 training return: tensor(33.1369, device='cuda:0')
epoch: 45 test_true_pfm: 2494.355053081947 sim_pfm: -42.87882037628636
episode: 180 training return: tensor(-272.6929, device='cuda:0')
episode: 181 training return: tensor(42.5306, device='cuda:0')
episode: 182 training return: tensor(-29.8569, device='cuda:0')
episode: 183 training return: tensor(-478.6322, device='cuda:0')
epoch: 46 test_true_pfm: 3213.337173842951 sim_pfm: -151.90258732565175
episode: 184 training return: tensor(-366.0927, device='cuda:0')
episode: 185 training return: tensor(-383.8344, device='cuda:0')
episode: 186 training return: tensor(-500.1052, device='cuda:0')
episode: 187 training return: tensor(-455.4355, device='cuda:0')
epoch: 47 test_true_pfm: 2054.076546538493 sim_pfm: -446.221873612764
episode: 188 training return: tensor(-361.4853, device='cuda:0')
episode: 189 training return: tensor(-448.1035, device='cuda:0')
episode: 190 training return: tensor(33.5499, device='cuda:0')
episode: 191 training return: tensor(-200.0443, device='cuda:0')
epoch: 48 test_true_pfm: 3081.1509564380235 sim_pfm: -246.62808337826087
episode: 192 training return: tensor(-532.2232, device='cuda:0')
episode: 193 training return: tensor(22.1753, device='cuda:0')
episode: 194 training return: tensor(-427.0917, device='cuda:0')
episode: 195 training return: tensor(-451.9937, device='cuda:0')
epoch: 49 test_true_pfm: 2759.5930749678123 sim_pfm: -252.94440652109915
episode: 196 training return: tensor(-215.0254, device='cuda:0')
episode: 197 training return: tensor(-285.4316, device='cuda:0')
episode: 198 training return: tensor(-87.4005, device='cuda:0')
episode: 199 training return: tensor(-192.3421, device='cuda:0')
epoch: 50 test_true_pfm: 2641.8909284263045 sim_pfm: -220.18510209662296
episode: 200 training return: tensor(-239.7882, device='cuda:0')
episode: 201 training return: tensor(25.1095, device='cuda:0')
episode: 202 training return: tensor(-434.9932, device='cuda:0')
episode: 203 training return: tensor(-453.3267, device='cuda:0')
epoch: 51 test_true_pfm: 2867.916539715552 sim_pfm: -172.3179635788159
episode: 204 training return: tensor(-369.9516, device='cuda:0')
episode: 205 training return: tensor(-426.2551, device='cuda:0')
episode: 206 training return: tensor(-448.6794, device='cuda:0')
episode: 207 training return: tensor(-449.8502, device='cuda:0')
epoch: 52 test_true_pfm: 2427.3690256886434 sim_pfm: -83.26863451009073
episode: 208 training return: tensor(-473.6481, device='cuda:0')
episode: 209 training return: tensor(-437.0873, device='cuda:0')
episode: 210 training return: tensor(-441.5308, device='cuda:0')
episode: 211 training return: tensor(39.5298, device='cuda:0')
epoch: 53 test_true_pfm: 3263.0949552579746 sim_pfm: 62.05412686569616
episode: 212 training return: tensor(-453.0423, device='cuda:0')
episode: 213 training return: tensor(-194.7860, device='cuda:0')
episode: 214 training return: tensor(-211.8915, device='cuda:0')
episode: 215 training return: tensor(72.3241, device='cuda:0')
epoch: 54 test_true_pfm: 2971.2988695355125 sim_pfm: 2.6207562415899397
episode: 216 training return: tensor(53.1766, device='cuda:0')
episode: 217 training return: tensor(-421.6398, device='cuda:0')
episode: 218 training return: tensor(-372.4362, device='cuda:0')
episode: 219 training return: tensor(-514.2114, device='cuda:0')
epoch: 55 test_true_pfm: 3115.760776493708 sim_pfm: -33.38794246067604
episode: 220 training return: tensor(82.0344, device='cuda:0')
episode: 221 training return: tensor(-457.6186, device='cuda:0')
episode: 222 training return: tensor(-483.9559, device='cuda:0')
episode: 223 training return: tensor(-443.4538, device='cuda:0')
epoch: 56 test_true_pfm: 2368.2901667436986 sim_pfm: -15.526549964755153
episode: 224 training return: tensor(99.1824, device='cuda:0')
episode: 225 training return: tensor(49.7063, device='cuda:0')
episode: 226 training return: tensor(-449.7321, device='cuda:0')
episode: 227 training return: tensor(-66.8169, device='cuda:0')
epoch: 57 test_true_pfm: 2614.931242798293 sim_pfm: -107.74728445277044
episode: 228 training return: tensor(-545.9187, device='cuda:0')
episode: 229 training return: tensor(-497.2821, device='cuda:0')
episode: 230 training return: tensor(-237.4859, device='cuda:0')
episode: 231 training return: tensor(-425.2134, device='cuda:0')
epoch: 58 test_true_pfm: 3255.340353753789 sim_pfm: 43.65080023931417
episode: 232 training return: tensor(-374.3099, device='cuda:0')
episode: 233 training return: tensor(-454.3777, device='cuda:0')
episode: 234 training return: tensor(-451.9475, device='cuda:0')
episode: 235 training return: tensor(-515.3583, device='cuda:0')
epoch: 59 test_true_pfm: 2861.9273930217655 sim_pfm: -60.393423629458994
episode: 236 training return: tensor(-136.3898, device='cuda:0')
episode: 237 training return: tensor(-407.1796, device='cuda:0')
episode: 238 training return: tensor(-453.0386, device='cuda:0')
episode: 239 training return: tensor(40.6614, device='cuda:0')
epoch: 60 test_true_pfm: 2090.6580926293263 sim_pfm: -329.2660132625412
episode: 240 training return: tensor(-447.3747, device='cuda:0')
episode: 241 training return: tensor(-456.7986, device='cuda:0')
episode: 242 training return: tensor(-463.6191, device='cuda:0')
episode: 243 training return: tensor(-456.4054, device='cuda:0')
epoch: 61 test_true_pfm: 3103.9072207805125 sim_pfm: 7.435939697286813
episode: 244 training return: tensor(20.0731, device='cuda:0')
episode: 245 training return: tensor(-296.9671, device='cuda:0')
episode: 246 training return: tensor(-481.6971, device='cuda:0')
episode: 247 training return: tensor(-548.9869, device='cuda:0')
epoch: 62 test_true_pfm: 2680.088553140578 sim_pfm: -41.694213501381455
episode: 248 training return: tensor(-452.9063, device='cuda:0')
episode: 249 training return: tensor(-300.0162, device='cuda:0')
episode: 250 training return: tensor(36.4153, device='cuda:0')
episode: 251 training return: tensor(-398.9717, device='cuda:0')
epoch: 63 test_true_pfm: 1590.437539225786 sim_pfm: -437.20570121348527
episode: 252 training return: tensor(-452.2681, device='cuda:0')
episode: 253 training return: tensor(-159.8276, device='cuda:0')
episode: 254 training return: tensor(-427.5831, device='cuda:0')
episode: 255 training return: tensor(-451.7199, device='cuda:0')
epoch: 64 test_true_pfm: 1593.3865831639298 sim_pfm: -217.94323949235454
episode: 256 training return: tensor(-454.2920, device='cuda:0')
episode: 257 training return: tensor(-505.4809, device='cuda:0')
episode: 258 training return: tensor(-476.1039, device='cuda:0')
episode: 259 training return: tensor(-459.7736, device='cuda:0')
epoch: 65 test_true_pfm: 2311.5661521523984 sim_pfm: -374.53852373253903
episode: 260 training return: tensor(31.1740, device='cuda:0')
episode: 261 training return: tensor(-455.8138, device='cuda:0')
episode: 262 training return: tensor(-492.2057, device='cuda:0')
episode: 263 training return: tensor(-512.0059, device='cuda:0')
epoch: 66 test_true_pfm: 1943.6674328998606 sim_pfm: -165.71392841102593
episode: 264 training return: tensor(-442.7545, device='cuda:0')
episode: 265 training return: tensor(-486.9933, device='cuda:0')
episode: 266 training return: tensor(-446.0376, device='cuda:0')
episode: 267 training return: tensor(71.3141, device='cuda:0')
epoch: 67 test_true_pfm: 3256.5863458617127 sim_pfm: -276.0009418934448
episode: 268 training return: tensor(97.1168, device='cuda:0')
episode: 269 training return: tensor(-454.3108, device='cuda:0')
episode: 270 training return: tensor(139.9779, device='cuda:0')
episode: 271 training return: tensor(-363.9490, device='cuda:0')
epoch: 68 test_true_pfm: 2039.6057124452782 sim_pfm: -389.2270873694797
episode: 272 training return: tensor(-502.2610, device='cuda:0')
episode: 273 training return: tensor(-197.9404, device='cuda:0')
episode: 274 training return: tensor(-394.5541, device='cuda:0')
episode: 275 training return: tensor(-385.3662, device='cuda:0')
epoch: 69 test_true_pfm: 2523.288415700603 sim_pfm: -66.07007151852788
episode: 276 training return: tensor(-503.6272, device='cuda:0')
episode: 277 training return: tensor(-430.2562, device='cuda:0')
episode: 278 training return: tensor(-243.2057, device='cuda:0')
episode: 279 training return: tensor(-403.7191, device='cuda:0')
epoch: 70 test_true_pfm: 2024.4610620379274 sim_pfm: -354.4052028456451
episode: 280 training return: tensor(-451.5019, device='cuda:0')
episode: 281 training return: tensor(-417.1774, device='cuda:0')
episode: 282 training return: tensor(-449.9525, device='cuda:0')
episode: 283 training return: tensor(-109.0439, device='cuda:0')
epoch: 71 test_true_pfm: 2300.1631620161693 sim_pfm: -229.46622239732338
episode: 284 training return: tensor(-20.6580, device='cuda:0')
episode: 285 training return: tensor(-469.2238, device='cuda:0')
episode: 286 training return: tensor(-213.5033, device='cuda:0')
episode: 287 training return: tensor(-474.5845, device='cuda:0')
epoch: 72 test_true_pfm: 1703.7755212041823 sim_pfm: -368.7871328598121
episode: 288 training return: tensor(-484.5963, device='cuda:0')
episode: 289 training return: tensor(-137.9179, device='cuda:0')
episode: 290 training return: tensor(-449.9012, device='cuda:0')
episode: 291 training return: tensor(-508.9229, device='cuda:0')
epoch: 73 test_true_pfm: 2193.559764611178 sim_pfm: -57.98151594395555
episode: 292 training return: tensor(-447.4235, device='cuda:0')
episode: 293 training return: tensor(-204.5541, device='cuda:0')
episode: 294 training return: tensor(-477.8907, device='cuda:0')
episode: 295 training return: tensor(-200.8016, device='cuda:0')
epoch: 74 test_true_pfm: 1574.0055151852775 sim_pfm: -348.574935343815
episode: 296 training return: tensor(-287.2763, device='cuda:0')
episode: 297 training return: tensor(-236.9444, device='cuda:0')
episode: 298 training return: tensor(-547.2657, device='cuda:0')
episode: 299 training return: tensor(-471.7079, device='cuda:0')
