['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'behavior', '--traj', 'medium', '--seed', '6', '--data', '100000', '--regu', '0.2']
epoch: 0 training_loss 0.26430252104997637 test_loss: 0.21254749298095704
epoch: 1 training_loss 0.21050513342022895 test_loss: 0.2032160520553589
epoch: 2 training_loss 0.1931356555223465 test_loss: 0.21167006492614746
epoch: 3 training_loss 0.19479215130209923 test_loss: 0.20920794010162352
epoch: 4 training_loss 0.19222417138516903 test_loss: 0.18336825370788573
epoch: 5 training_loss 0.18978051900863646 test_loss: 0.1881019353866577
epoch: 6 training_loss 0.2000520393997431 test_loss: 0.19144750833511354
epoch: 7 training_loss 0.19153621166944504 test_loss: 0.19558755159378052
epoch: 8 training_loss 0.19629304952919482 test_loss: 0.20503151416778564
epoch: 9 training_loss 0.18735776759684086 test_loss: 0.20343809127807616
epoch: 10 training_loss 0.18995495527982711 test_loss: 0.2008214473724365
epoch: 11 training_loss 0.18454160936176778 test_loss: 0.19018087387084961
epoch: 12 training_loss 0.18907307766377926 test_loss: 0.17746015787124633
epoch: 13 training_loss 0.18576423570513725 test_loss: 0.19121620655059815
epoch: 14 training_loss 0.18248607642948628 test_loss: 0.19905922412872315
epoch: 15 training_loss 0.1807768975198269 test_loss: 0.20391604900360108
epoch: 16 training_loss 0.17656303532421588 test_loss: 0.1907094359397888
epoch: 17 training_loss 0.18703000098466874 test_loss: 0.19630836248397826
epoch: 18 training_loss 0.17845645643770694 test_loss: 0.18336554765701293
epoch: 19 training_loss 0.18313043408095836 test_loss: 0.17627559900283812
epoch: 20 training_loss 0.1841978683322668 test_loss: 0.18715720176696776
epoch: 21 training_loss 0.1906028287112713 test_loss: 0.19185372591018676
epoch: 22 training_loss 0.18684464320540428 test_loss: 0.18007487058639526
epoch: 23 training_loss 0.18813891775906086 test_loss: 0.16345016956329345
epoch: 24 training_loss 0.1754497680068016 test_loss: 0.1768072724342346
epoch: 25 training_loss 0.17741029553115367 test_loss: 0.18046947717666625
epoch: 26 training_loss 0.17881361842155458 test_loss: 0.17858237028121948
epoch: 27 training_loss 0.18377548523247242 test_loss: 0.18347066640853882
epoch: 28 training_loss 0.18424032047390937 test_loss: 0.18830935955047606
epoch: 29 training_loss 0.18213491782546043 test_loss: 0.18045123815536498
epoch: 30 training_loss 0.19069004274904727 test_loss: 0.17746440172195435
epoch: 31 training_loss 0.18418553560972215 test_loss: 0.18922356367111207
epoch: 32 training_loss 0.18334002517163753 test_loss: 0.2083953619003296
epoch: 33 training_loss 0.17972359716892242 test_loss: 0.20506103038787843
epoch: 34 training_loss 0.17919240713119508 test_loss: 0.19690675735473634
epoch: 35 training_loss 0.17915895327925682 test_loss: 0.18368370532989503
epoch: 36 training_loss 0.17892482563853263 test_loss: 0.17823005914688111
epoch: 37 training_loss 0.17805398873984812 test_loss: 0.19488019943237306
epoch: 38 training_loss 0.18387495398521422 test_loss: 0.1773577332496643
epoch: 39 training_loss 0.1785078602284193 test_loss: 0.19660816192626954
epoch: 40 training_loss 0.17799028590321542 test_loss: 0.16751922369003297
epoch: 41 training_loss 0.1891896365582943 test_loss: 0.19567166566848754
epoch: 42 training_loss 0.18519831210374832 test_loss: 0.19342132806777954
epoch: 43 training_loss 0.17720189355313778 test_loss: 0.189503812789917
epoch: 44 training_loss 0.17730909138917922 test_loss: 0.21052334308624268
epoch: 45 training_loss 0.18252932451665402 test_loss: 0.1921232223510742
epoch: 46 training_loss 0.17614536717534066 test_loss: 0.18850743770599365
epoch: 47 training_loss 0.18772463120520114 test_loss: 0.17749278545379638
epoch: 48 training_loss 0.17306062787771226 test_loss: 0.18541240692138672
epoch: 49 training_loss 0.18112051442265512 test_loss: 0.1653773546218872
epoch: 50 training_loss 0.16892764993011952 test_loss: 0.19757428169250488
epoch: 51 training_loss 0.1792880390584469 test_loss: 0.19622291326522828
epoch: 52 training_loss 0.17543996877968313 test_loss: 0.2099679470062256
epoch: 53 training_loss 0.17599862165749072 test_loss: 0.18484556674957275
epoch: 54 training_loss 0.1765502218157053 test_loss: 0.17435150146484374
epoch: 55 training_loss 0.17324589215219022 test_loss: 0.18967097997665405
epoch: 56 training_loss 0.1755657784640789 test_loss: 0.18418978452682494
epoch: 57 training_loss 0.17922996543347836 test_loss: 0.18758819103240967
epoch: 58 training_loss 0.17885601572692394 test_loss: 0.17592337131500244
epoch: 59 training_loss 0.17305307306349277 test_loss: 0.17869131565093993
epoch: 60 training_loss 0.18074451342225076 test_loss: 0.19059563875198365
epoch: 61 training_loss 0.18404767915606499 test_loss: 0.19892756938934325
epoch: 62 training_loss 0.174798850864172 test_loss: 0.2019434928894043
epoch: 63 training_loss 0.18133378759026528 test_loss: 0.18595160245895387
epoch: 64 training_loss 0.17462966196238994 test_loss: 0.1763203263282776
epoch: 65 training_loss 0.18249706439673902 test_loss: 0.15901364088058473
epoch: 66 training_loss 0.1767547835037112 test_loss: 0.16302088499069214
epoch: 67 training_loss 0.18358616143465042 test_loss: 0.18272366523742675
epoch: 68 training_loss 0.17755094222724438 test_loss: 0.18656336069107055
epoch: 69 training_loss 0.1764844497293234 test_loss: 0.19215551614761353
epoch: 70 training_loss 0.18328288473188878 test_loss: 0.18391414880752563
epoch: 71 training_loss 0.1815665589272976 test_loss: 0.17766835689544677
epoch: 72 training_loss 0.1772371445596218 test_loss: 0.16720833778381347
epoch: 73 training_loss 0.1759648984670639 test_loss: 0.18280287981033325
epoch: 74 training_loss 0.17781232818961143 test_loss: 0.20485906600952147
epoch: 75 training_loss 0.18382730558514596 test_loss: 0.20602550506591796
epoch: 76 training_loss 0.18332621224224568 test_loss: 0.19569216966629027
epoch: 77 training_loss 0.17896890945732594 test_loss: 0.16990716457366944
epoch: 78 training_loss 0.17898447215557098 test_loss: 0.17785229682922363
epoch: 79 training_loss 0.1797748962789774 test_loss: 0.15090872049331666
epoch: 80 training_loss 0.18266422651708125 test_loss: 0.1846841335296631
epoch: 81 training_loss 0.1790113263577223 test_loss: 0.169063937664032
epoch: 82 training_loss 0.18133560985326766 test_loss: 0.18654258251190187
epoch: 83 training_loss 0.17734834253787995 test_loss: 0.1931912422180176
epoch: 84 training_loss 0.18064242608845235 test_loss: 0.17057405710220336
epoch: 85 training_loss 0.17357021160423755 test_loss: 0.17036041021347045
epoch: 86 training_loss 0.18197360441088675 test_loss: 0.2024322271347046
epoch: 87 training_loss 0.1798470363020897 test_loss: 0.1809380054473877
epoch: 88 training_loss 0.17708721220493318 test_loss: 0.19131109714508057
epoch: 89 training_loss 0.17539631590247154 test_loss: 0.17694581747055055
epoch: 90 training_loss 0.17445918604731558 test_loss: 0.17783817052841186
epoch: 91 training_loss 0.17433582440018655 test_loss: 0.18648918867111205
epoch: 92 training_loss 0.17779041327536105 test_loss: 0.20391359329223632
epoch: 93 training_loss 0.1766799905896187 test_loss: 0.1825714588165283
epoch: 94 training_loss 0.18452275410294533 test_loss: 0.16283482313156128
epoch: 95 training_loss 0.17729353845119478 test_loss: 0.1816018581390381
epoch: 96 training_loss 0.17609054781496525 test_loss: 0.17073817253112794
epoch: 97 training_loss 0.17843351036310195 test_loss: 0.1771884083747864
epoch: 98 training_loss 0.1820992647856474 test_loss: 0.19216569662094116
epoch: 99 training_loss 0.18003377623856068 test_loss: 0.17235220670700074
epoch: 100 training_loss 0.1734065344929695 test_loss: 0.16633580923080443
epoch: 101 training_loss 0.1805433089286089 test_loss: 0.1766796350479126
epoch: 102 training_loss 0.1804610016196966 test_loss: 0.17209774255752563
epoch: 103 training_loss 0.1814301074296236 test_loss: 0.1838499426841736
epoch: 104 training_loss 0.17820216946303843 test_loss: 0.192074716091156
epoch: 105 training_loss 0.18101268224418163 test_loss: 0.18185701370239257
epoch: 106 training_loss 0.17152565181255341 test_loss: 0.18725801706314088
epoch: 107 training_loss 0.17985901191830636 test_loss: 0.18604577779769899
epoch: 108 training_loss 0.18011110357940197 test_loss: 0.18534610271453858
epoch: 109 training_loss 0.17306549474596977 test_loss: 0.18866434097290039
epoch: 110 training_loss 0.1768881471455097 test_loss: 0.17245007753372193
epoch: 111 training_loss 0.1727218247950077 test_loss: 0.18424861431121825
epoch: 112 training_loss 0.16889185685664415 test_loss: 0.19085193872451783
epoch: 113 training_loss 0.1808394518494606 test_loss: 0.1594848394393921
epoch: 114 training_loss 0.17621299520134925 test_loss: 0.1913159132003784
epoch: 115 training_loss 0.1851437881588936 test_loss: 0.1722201704978943
epoch: 116 training_loss 0.1776072259247303 test_loss: 0.1775371789932251
epoch: 117 training_loss 0.18178874529898167 test_loss: 0.16971220970153808
epoch: 118 training_loss 0.17129563726484776 test_loss: 0.16889101266860962
epoch: 119 training_loss 0.17371655255556107 test_loss: 0.1862734079360962
epoch: 120 training_loss 0.1867599879205227 test_loss: 0.1865817666053772
epoch: 121 training_loss 0.18219051167368888 test_loss: 0.1784103512763977
epoch: 122 training_loss 0.1751445546001196 test_loss: 0.18364498615264893
epoch: 123 training_loss 0.1842483275383711 test_loss: 0.18697142601013184
epoch: 124 training_loss 0.17641424506902695 test_loss: 0.18850733041763307
epoch: 125 training_loss 0.17146891318261623 test_loss: 0.1902894377708435
epoch: 126 training_loss 0.17231904797255992 test_loss: 0.19361398220062256
epoch: 127 training_loss 0.1822262492775917 test_loss: 0.18446358442306518
epoch: 128 training_loss 0.17616246208548547 test_loss: 0.18947352170944215
epoch: 129 training_loss 0.17217071294784547 test_loss: 0.19073586463928222
epoch: 130 training_loss 0.16883398160338403 test_loss: 0.1804267406463623
epoch: 131 training_loss 0.17685235545039177 test_loss: 0.181313955783844
epoch: 132 training_loss 0.17154572546482086 test_loss: 0.18739286661148072
epoch: 133 training_loss 0.18083162888884544 test_loss: 0.18258357048034668
epoch: 134 training_loss 0.17395240172743798 test_loss: 0.17077987194061278
epoch: 135 training_loss 0.1809712638705969 test_loss: 0.18272229433059692
epoch: 136 training_loss 0.17714176781475544 test_loss: 0.1839617371559143
epoch: 137 training_loss 0.1794418191164732 test_loss: 0.16474132537841796
epoch: 138 training_loss 0.17707318402826785 test_loss: 0.17496498823165893
epoch: 139 training_loss 0.1811390966922045 test_loss: 0.18440845012664794
epoch: 140 training_loss 0.18326212242245674 test_loss: 0.18023304939270018
epoch: 141 training_loss 0.171516951918602 test_loss: 0.1785716414451599
epoch: 142 training_loss 0.17566743575036525 test_loss: 0.2049654722213745
epoch: 143 training_loss 0.17525621183216572 test_loss: 0.18432278633117677
epoch: 144 training_loss 0.18384016543626786 test_loss: 0.1932366371154785
epoch: 145 training_loss 0.17455631271004676 test_loss: 0.173678457736969
epoch: 146 training_loss 0.17190285973250866 test_loss: 0.19280864000320436
epoch: 147 training_loss 0.17626757852733135 test_loss: 0.20200610160827637
epoch: 148 training_loss 0.17594887994229794 test_loss: 0.20105156898498536
epoch: 149 training_loss 0.17204538889229298 test_loss: 0.19476984739303588
epoch: 0 training_loss 8.300443449020385 test_loss: 5.025671768188476
epoch: 1 training_loss 3.858607988357544 test_loss: 3.0133995056152343
epoch: 2 training_loss 2.6057751083374026 test_loss: 2.2755971908569337
epoch: 3 training_loss 2.0972727870941164 test_loss: 1.9030632019042968
epoch: 4 training_loss 1.8135463893413544 test_loss: 1.7006732940673828
epoch: 5 training_loss 1.6453029692173005 test_loss: 1.5898921966552735
epoch: 6 training_loss 1.5056604182720184 test_loss: 1.4399473190307617
epoch: 7 training_loss 1.4225602996349336 test_loss: 1.3297462463378906
epoch: 8 training_loss 1.336461718082428 test_loss: 1.2992019653320312
epoch: 9 training_loss 1.270150077342987 test_loss: 1.236709976196289
epoch: 10 training_loss 1.204644775390625 test_loss: 1.1632195472717286
epoch: 11 training_loss 1.1516130965948106 test_loss: 1.184505558013916
epoch: 12 training_loss 1.111849970817566 test_loss: 1.0648659706115722
epoch: 13 training_loss 1.064785479903221 test_loss: 1.0309648513793945
epoch: 14 training_loss 1.0248901128768921 test_loss: 1.0154740333557128
epoch: 15 training_loss 1.003712105154991 test_loss: 1.0210634231567384
epoch: 16 training_loss 0.9597271031141281 test_loss: 0.9679712295532227
epoch: 17 training_loss 0.9533346116542816 test_loss: 0.9687626838684082
epoch: 18 training_loss 0.9395382934808731 test_loss: 0.9456437110900879
epoch: 19 training_loss 0.9063633573055268 test_loss: 0.9160021781921387
epoch: 20 training_loss 0.8862375336885452 test_loss: 0.9187816619873047
epoch: 21 training_loss 0.8879397070407867 test_loss: 0.8729534149169922
epoch: 22 training_loss 0.8429852962493897 test_loss: 0.8360958099365234
epoch: 23 training_loss 0.8327430135011673 test_loss: 0.8688817024230957
epoch: 24 training_loss 0.8416625416278839 test_loss: 0.8028693199157715
epoch: 25 training_loss 0.8253235369920731 test_loss: 0.7904774188995362
epoch: 26 training_loss 0.8006354886293411 test_loss: 0.8190746307373047
epoch: 27 training_loss 0.7949778413772584 test_loss: 0.8152106285095215
epoch: 28 training_loss 0.7781527179479599 test_loss: 0.7798219203948975
epoch: 29 training_loss 0.7716391211748124 test_loss: 0.7799696445465087
epoch: 30 training_loss 0.7755797451734543 test_loss: 0.779162073135376
epoch: 31 training_loss 0.7493989032506942 test_loss: 0.7462444305419922
epoch: 32 training_loss 0.7365757095813751 test_loss: 0.7300728797912598
epoch: 33 training_loss 0.7368483054637909 test_loss: 0.7358577728271485
epoch: 34 training_loss 0.7225219368934631 test_loss: 0.7188737869262696
epoch: 35 training_loss 0.7137925183773041 test_loss: 0.7044037818908692
epoch: 36 training_loss 0.7193864631652832 test_loss: 0.7787383556365967
epoch: 37 training_loss 0.6998642891645431 test_loss: 0.741864538192749
epoch: 38 training_loss 0.6888529872894287 test_loss: 0.7523475646972656
epoch: 39 training_loss 0.6823464047908783 test_loss: 0.7046971321105957
epoch: 40 training_loss 0.6846114897727966 test_loss: 0.6859057426452637
epoch: 41 training_loss 0.6748395949602127 test_loss: 0.7031927585601807
epoch: 42 training_loss 0.674425106048584 test_loss: 0.6878468990325928
epoch: 43 training_loss 0.6857409751415253 test_loss: 0.7042778491973877
epoch: 44 training_loss 0.6730027019977569 test_loss: 0.6633332729339599
epoch: 45 training_loss 0.6574287217855453 test_loss: 0.6640292167663574
epoch: 46 training_loss 0.6511081463098526 test_loss: 0.6449585914611816
epoch: 47 training_loss 0.6478265208005906 test_loss: 0.6617299079895019
epoch: 48 training_loss 0.646583154797554 test_loss: 0.6630617141723633
epoch: 49 training_loss 0.6496556842327118 test_loss: 0.6563188552856445
epoch: 50 training_loss 0.6503067600727082 test_loss: 0.6711459159851074
epoch: 51 training_loss 0.6352481883764267 test_loss: 0.6427189826965332
epoch: 52 training_loss 0.6234569382667542 test_loss: 0.6398155689239502
epoch: 53 training_loss 0.6172915452718735 test_loss: 0.6353927612304687
epoch: 54 training_loss 0.622971802353859 test_loss: 0.6071887016296387
epoch: 55 training_loss 0.6197590005397796 test_loss: 0.6476751327514648
epoch: 56 training_loss 0.6213763976097106 test_loss: 0.6001608371734619
epoch: 57 training_loss 0.6188102394342423 test_loss: 0.6115490913391113
epoch: 58 training_loss 0.6216923022270202 test_loss: 0.6124660491943359
epoch: 59 training_loss 0.5956609737873078 test_loss: 0.6222886562347412
epoch: 60 training_loss 0.6027750217914581 test_loss: 0.6039785861968994
epoch: 61 training_loss 0.5991471111774445 test_loss: 0.6113566875457763
epoch: 62 training_loss 0.6010267817974091 test_loss: 0.6401748180389404
epoch: 63 training_loss 0.5998596984148026 test_loss: 0.6135859012603759
epoch: 64 training_loss 0.5936209946870804 test_loss: 0.6008001804351807
epoch: 65 training_loss 0.6011191284656525 test_loss: 0.6373939990997315
epoch: 66 training_loss 0.5940800040960312 test_loss: 0.5998698711395264
epoch: 67 training_loss 0.5938430815935135 test_loss: 0.5844285011291503
epoch: 68 training_loss 0.5934987843036652 test_loss: 0.5922587871551513
epoch: 69 training_loss 0.5861284309625625 test_loss: 0.5671315670013428
epoch: 70 training_loss 0.5839850324392318 test_loss: 0.5692400455474853
epoch: 71 training_loss 0.5816531240940094 test_loss: 0.5948506355285644
epoch: 72 training_loss 0.5761363530158996 test_loss: 0.5828845500946045
epoch: 73 training_loss 0.5821269857883453 test_loss: 0.5952373504638672
epoch: 74 training_loss 0.5666468155384063 test_loss: 0.5817987442016601
epoch: 75 training_loss 0.5810137993097305 test_loss: 0.5734909534454345
epoch: 76 training_loss 0.5705383950471878 test_loss: 0.5840303421020507
epoch: 77 training_loss 0.5701653099060059 test_loss: 0.5516878128051758
epoch: 78 training_loss 0.5602931526303291 test_loss: 0.5666290760040283
epoch: 79 training_loss 0.5650557070970536 test_loss: 0.5656450271606446
epoch: 80 training_loss 0.5563520312309265 test_loss: 0.5742997646331787
epoch: 81 training_loss 0.5554287043213845 test_loss: 0.5594586849212646
epoch: 82 training_loss 0.5550806373357773 test_loss: 0.5496464252471924
epoch: 83 training_loss 0.5572583881020546 test_loss: 0.566917610168457
epoch: 84 training_loss 0.5633361631631851 test_loss: 0.5354190349578858
epoch: 85 training_loss 0.5569172322750091 test_loss: 0.5780878067016602
epoch: 86 training_loss 0.5463482689857483 test_loss: 0.5626450538635254
epoch: 87 training_loss 0.5493054237961769 test_loss: 0.5479759216308594
epoch: 88 training_loss 0.553498400747776 test_loss: 0.5348516464233398
epoch: 89 training_loss 0.5392379605770111 test_loss: 0.5690876007080078
epoch: 90 training_loss 0.5562860319018363 test_loss: 0.560947322845459
epoch: 91 training_loss 0.5469540256261826 test_loss: 0.5527422904968262
epoch: 92 training_loss 0.5454317924380302 test_loss: 0.5540192127227783
epoch: 93 training_loss 0.5444886127114296 test_loss: 0.5386119842529297
epoch: 94 training_loss 0.5460749396681785 test_loss: 0.5391959190368653
epoch: 95 training_loss 0.5355206179618835 test_loss: 0.5628550052642822
epoch: 96 training_loss 0.5391884884238243 test_loss: 0.5759172916412354
epoch: 97 training_loss 0.5426531228423118 test_loss: 0.5599192619323731
epoch: 98 training_loss 0.537863082587719 test_loss: 0.552736759185791
epoch: 99 training_loss 0.5424999648332596 test_loss: 0.5700574398040772
epoch: 100 training_loss 0.5368744918704033 test_loss: 0.523682689666748
epoch: 101 training_loss 0.5323299843072892 test_loss: 0.5484156608581543
epoch: 102 training_loss 0.5305745169520378 test_loss: 0.5429928779602051
epoch: 103 training_loss 0.5253596675395965 test_loss: 0.5266136646270752
epoch: 104 training_loss 0.5255689471960068 test_loss: 0.5304516315460205
epoch: 105 training_loss 0.5204202994704247 test_loss: 0.5467281818389893
epoch: 106 training_loss 0.5210213246941566 test_loss: 0.5214148044586182
epoch: 107 training_loss 0.5224367779493332 test_loss: 0.5326932430267334
epoch: 108 training_loss 0.5279249858856201 test_loss: 0.5268308639526367
epoch: 109 training_loss 0.5187890332937241 test_loss: 0.5492537498474122
epoch: 110 training_loss 0.5240639498829842 test_loss: 0.5206698417663574
epoch: 111 training_loss 0.5216853868961334 test_loss: 0.5068074226379394
epoch: 112 training_loss 0.5179240635037422 test_loss: 0.5106751441955566
epoch: 113 training_loss 0.5123376974463463 test_loss: 0.5186463356018066
epoch: 114 training_loss 0.5153593602776527 test_loss: 0.5378114700317382
epoch: 115 training_loss 0.5181046003103256 test_loss: 0.5329484939575195
epoch: 116 training_loss 0.5232768967747689 test_loss: 0.5123836517333984
epoch: 117 training_loss 0.525865506529808 test_loss: 0.539287519454956
epoch: 118 training_loss 0.5125760516524315 test_loss: 0.5164638996124268
epoch: 119 training_loss 0.511456828713417 test_loss: 0.5120866298675537
epoch: 120 training_loss 0.5019541037082672 test_loss: 0.5157974243164063
epoch: 121 training_loss 0.5134611293673516 test_loss: 0.512800645828247
epoch: 122 training_loss 0.5167596316337586 test_loss: 0.5137986183166504
epoch: 123 training_loss 0.5079086881875992 test_loss: 0.49803891181945803
epoch: 124 training_loss 0.5108069095015526 test_loss: 0.500965166091919
epoch: 125 training_loss 0.5152366173267364 test_loss: 0.4950676918029785
epoch: 126 training_loss 0.5103064626455307 test_loss: 0.5050134658813477
epoch: 127 training_loss 0.5142250087857246 test_loss: 0.5305083751678467
epoch: 128 training_loss 0.49822735995054246 test_loss: 0.5184906482696533
epoch: 129 training_loss 0.5152838510274887 test_loss: 0.5100137233734131
epoch: 130 training_loss 0.5064321058988571 test_loss: 0.5149068355560302
epoch: 131 training_loss 0.5051386034488679 test_loss: 0.5265051841735839
epoch: 132 training_loss 0.5063617327809333 test_loss: 0.5250271797180176
epoch: 133 training_loss 0.5051614800095559 test_loss: 0.5080263137817382
epoch: 134 training_loss 0.5100382113456726 test_loss: 0.5048486709594726
epoch: 135 training_loss 0.4983616277575493 test_loss: 0.4846189498901367
epoch: 136 training_loss 0.4902114677429199 test_loss: 0.4933930397033691
epoch: 137 training_loss 0.488867526948452 test_loss: 0.48375444412231444
epoch: 138 training_loss 0.5055802047252655 test_loss: 0.4858293056488037
epoch: 139 training_loss 0.495084168612957 test_loss: 0.49811463356018065
epoch: 140 training_loss 0.5104752439260483 test_loss: 0.5126266956329346
epoch: 141 training_loss 0.49649677485227584 test_loss: 0.4888336658477783
epoch: 142 training_loss 0.4952713790535927 test_loss: 0.5140936374664307
epoch: 143 training_loss 0.5030867472290993 test_loss: 0.4982041358947754
epoch: 144 training_loss 0.4879976016283035 test_loss: 0.5037771701812744
epoch: 145 training_loss 0.49408301621675493 test_loss: 0.5088923931121826
epoch: 146 training_loss 0.5065402185916901 test_loss: 0.48325066566467284
epoch: 147 training_loss 0.48736057311296466 test_loss: 0.5022506713867188
epoch: 148 training_loss 0.49206721007823945 test_loss: 0.4908596992492676
epoch: 149 training_loss 0.49319634556770325 test_loss: 0.5123502254486084
2931.6009269345877
episode: 0 training return: tensor(-313.8307, device='cuda:0')
episode: 1 training return: tensor(-367.9765, device='cuda:0')
episode: 2 training return: tensor(-352.1131, device='cuda:0')
episode: 3 training return: tensor(-339.0071, device='cuda:0')
epoch: 1 test_true_pfm: 2792.7138692331405 sim_pfm: 177.3847153725316
episode: 4 training return: tensor(-155.3018, device='cuda:0')
episode: 5 training return: tensor(-380.7617, device='cuda:0')
episode: 6 training return: tensor(-128.3264, device='cuda:0')
episode: 7 training return: tensor(-61.2747, device='cuda:0')
epoch: 2 test_true_pfm: 3032.1420130040037 sim_pfm: 138.136070049038
episode: 8 training return: tensor(231.6565, device='cuda:0')
episode: 9 training return: tensor(-399.2987, device='cuda:0')
episode: 10 training return: tensor(-135.9162, device='cuda:0')
episode: 11 training return: tensor(-122.0832, device='cuda:0')
epoch: 3 test_true_pfm: 2732.197824135829 sim_pfm: 164.07630130068478
episode: 12 training return: tensor(-255.9606, device='cuda:0')
episode: 13 training return: tensor(279.5608, device='cuda:0')
episode: 14 training return: tensor(-357.7277, device='cuda:0')
episode: 15 training return: tensor(217.6593, device='cuda:0')
epoch: 4 test_true_pfm: 2333.907983566052 sim_pfm: -144.0517165065588
episode: 16 training return: tensor(-279.7716, device='cuda:0')
episode: 17 training return: tensor(-243.0796, device='cuda:0')
episode: 18 training return: tensor(-430.0962, device='cuda:0')
episode: 19 training return: tensor(-375.7056, device='cuda:0')
epoch: 5 test_true_pfm: 1646.0919567892297 sim_pfm: -215.77982269584513
episode: 20 training return: tensor(-316.2972, device='cuda:0')
episode: 21 training return: tensor(-402.1275, device='cuda:0')
episode: 22 training return: tensor(-370.5307, device='cuda:0')
episode: 23 training return: tensor(-373.6134, device='cuda:0')
epoch: 6 test_true_pfm: 2225.3134400999884 sim_pfm: 221.35442700781277
episode: 24 training return: tensor(-125.7150, device='cuda:0')
episode: 25 training return: tensor(-415.6082, device='cuda:0')
episode: 26 training return: tensor(-346.6829, device='cuda:0')
episode: 27 training return: tensor(31.9791, device='cuda:0')
epoch: 7 test_true_pfm: 1935.6192076458326 sim_pfm: -231.9171838760861
episode: 28 training return: tensor(40.4926, device='cuda:0')
episode: 29 training return: tensor(-403.3480, device='cuda:0')
episode: 30 training return: tensor(-359.2531, device='cuda:0')
episode: 31 training return: tensor(-10.5379, device='cuda:0')
epoch: 8 test_true_pfm: 2323.5462844868957 sim_pfm: 81.4281390743078
episode: 32 training return: tensor(-397.1041, device='cuda:0')
episode: 33 training return: tensor(-386.6914, device='cuda:0')
episode: 34 training return: tensor(-142.3659, device='cuda:0')
episode: 35 training return: tensor(-392.7086, device='cuda:0')
epoch: 9 test_true_pfm: 2723.3760629115036 sim_pfm: -46.59003853881344
episode: 36 training return: tensor(-392.6613, device='cuda:0')
episode: 37 training return: tensor(-249.1217, device='cuda:0')
episode: 38 training return: tensor(-329.1862, device='cuda:0')
episode: 39 training return: tensor(-407.7068, device='cuda:0')
epoch: 10 test_true_pfm: 2623.6325485722905 sim_pfm: -97.83940452388681
episode: 40 training return: tensor(-111.8434, device='cuda:0')
episode: 41 training return: tensor(-388.5088, device='cuda:0')
episode: 42 training return: tensor(-299.5929, device='cuda:0')
episode: 43 training return: tensor(-297.2840, device='cuda:0')
epoch: 11 test_true_pfm: 1589.9285337461897 sim_pfm: -260.81430388758116
episode: 44 training return: tensor(-336.4467, device='cuda:0')
episode: 45 training return: tensor(-161.8603, device='cuda:0')
episode: 46 training return: tensor(140.9145, device='cuda:0')
episode: 47 training return: tensor(197.7882, device='cuda:0')
epoch: 12 test_true_pfm: 1682.9264962445056 sim_pfm: -192.08478812033232
episode: 48 training return: tensor(228.8037, device='cuda:0')
episode: 49 training return: tensor(-242.9334, device='cuda:0')
episode: 50 training return: tensor(-110.9008, device='cuda:0')
episode: 51 training return: tensor(-382.5022, device='cuda:0')
epoch: 13 test_true_pfm: 1597.6131657979465 sim_pfm: -279.7318789114167
episode: 52 training return: tensor(-272.6164, device='cuda:0')
episode: 53 training return: tensor(-116.0331, device='cuda:0')
episode: 54 training return: tensor(243.2504, device='cuda:0')
episode: 55 training return: tensor(217.6817, device='cuda:0')
epoch: 14 test_true_pfm: 1619.1676473028856 sim_pfm: -131.03852588207033
episode: 56 training return: tensor(-95.0818, device='cuda:0')
episode: 57 training return: tensor(251.3924, device='cuda:0')
episode: 58 training return: tensor(-379.9157, device='cuda:0')
episode: 59 training return: tensor(-136.1234, device='cuda:0')
epoch: 15 test_true_pfm: 1803.3956502080962 sim_pfm: -73.5253654604797
episode: 60 training return: tensor(-189.1331, device='cuda:0')
episode: 61 training return: tensor(-78.0090, device='cuda:0')
episode: 62 training return: tensor(-214.8517, device='cuda:0')
episode: 63 training return: tensor(-341.9003, device='cuda:0')
epoch: 16 test_true_pfm: 1804.2817245322094 sim_pfm: -239.52895814853642
episode: 64 training return: tensor(-166.3307, device='cuda:0')
episode: 65 training return: tensor(-389.7047, device='cuda:0')
episode: 66 training return: tensor(-104.9576, device='cuda:0')
episode: 67 training return: tensor(-237.9824, device='cuda:0')
epoch: 17 test_true_pfm: 1633.6197072738503 sim_pfm: -270.08477612337447
episode: 68 training return: tensor(-290.1731, device='cuda:0')
episode: 69 training return: tensor(-287.2726, device='cuda:0')
episode: 70 training return: tensor(-41.6003, device='cuda:0')
episode: 71 training return: tensor(283.9821, device='cuda:0')
epoch: 18 test_true_pfm: 1634.0335898231324 sim_pfm: -258.71937527366873
episode: 72 training return: tensor(295.4339, device='cuda:0')
episode: 73 training return: tensor(-168.7663, device='cuda:0')
episode: 74 training return: tensor(304.4964, device='cuda:0')
episode: 75 training return: tensor(211.7614, device='cuda:0')
epoch: 19 test_true_pfm: 1799.2239336240407 sim_pfm: -184.4561433341199
episode: 76 training return: tensor(-51.3006, device='cuda:0')
episode: 77 training return: tensor(-61.7107, device='cuda:0')
episode: 78 training return: tensor(55.9576, device='cuda:0')
episode: 79 training return: tensor(1.0257, device='cuda:0')
epoch: 20 test_true_pfm: 2011.0993134633009 sim_pfm: -96.87030539478292
episode: 80 training return: tensor(-231.5956, device='cuda:0')
episode: 81 training return: tensor(-82.2693, device='cuda:0')
episode: 82 training return: tensor(-361.0022, device='cuda:0')
episode: 83 training return: tensor(-86.4829, device='cuda:0')
epoch: 21 test_true_pfm: 2803.239663499993 sim_pfm: -19.053347187601805
episode: 84 training return: tensor(-37.7128, device='cuda:0')
episode: 85 training return: tensor(-223.2947, device='cuda:0')
episode: 86 training return: tensor(-391.3659, device='cuda:0')
episode: 87 training return: tensor(-309.0457, device='cuda:0')
epoch: 22 test_true_pfm: 1624.3333287838639 sim_pfm: -269.71486545934266
episode: 88 training return: tensor(-334.5091, device='cuda:0')
episode: 89 training return: tensor(-334.7854, device='cuda:0')
episode: 90 training return: tensor(-321.1646, device='cuda:0')
episode: 91 training return: tensor(-337.9997, device='cuda:0')
epoch: 23 test_true_pfm: 2354.365096298215 sim_pfm: -15.289401705163376
episode: 92 training return: tensor(-384.1841, device='cuda:0')
episode: 93 training return: tensor(-60.4072, device='cuda:0')
episode: 94 training return: tensor(21.4593, device='cuda:0')
episode: 95 training return: tensor(263.1758, device='cuda:0')
epoch: 24 test_true_pfm: 1892.4111171065342 sim_pfm: -108.59667767148737
episode: 96 training return: tensor(268.9766, device='cuda:0')
episode: 97 training return: tensor(-212.0140, device='cuda:0')
episode: 98 training return: tensor(-47.0584, device='cuda:0')
episode: 99 training return: tensor(279.7050, device='cuda:0')
epoch: 25 test_true_pfm: 2408.074091803152 sim_pfm: -85.06550324675239
episode: 100 training return: tensor(253.9708, device='cuda:0')
episode: 101 training return: tensor(-116.4692, device='cuda:0')
episode: 102 training return: tensor(-196.0335, device='cuda:0')
episode: 103 training return: tensor(-361.7833, device='cuda:0')
epoch: 26 test_true_pfm: 2527.67013099483 sim_pfm: 78.47102258146818
episode: 104 training return: tensor(-262.8285, device='cuda:0')
episode: 105 training return: tensor(-310.1171, device='cuda:0')
episode: 106 training return: tensor(-121.3979, device='cuda:0')
episode: 107 training return: tensor(247.1343, device='cuda:0')
epoch: 27 test_true_pfm: 2322.8742108529036 sim_pfm: 209.39608140990217
episode: 108 training return: tensor(62.3909, device='cuda:0')
episode: 109 training return: tensor(-326.1928, device='cuda:0')
episode: 110 training return: tensor(309.9524, device='cuda:0')
episode: 111 training return: tensor(-136.6848, device='cuda:0')
epoch: 28 test_true_pfm: 3116.6387882361882 sim_pfm: 34.047640949633205
episode: 112 training return: tensor(-334.9835, device='cuda:0')
episode: 113 training return: tensor(-179.7054, device='cuda:0')
episode: 114 training return: tensor(-339.8815, device='cuda:0')
episode: 115 training return: tensor(74.9115, device='cuda:0')
epoch: 29 test_true_pfm: 2371.60265492855 sim_pfm: 105.50346383477638
episode: 116 training return: tensor(-300.3520, device='cuda:0')
episode: 117 training return: tensor(-154.7625, device='cuda:0')
episode: 118 training return: tensor(-339.7773, device='cuda:0')
episode: 119 training return: tensor(-278.8831, device='cuda:0')
epoch: 30 test_true_pfm: 1647.7732535561563 sim_pfm: -217.1770475351659
episode: 120 training return: tensor(-126.1938, device='cuda:0')
episode: 121 training return: tensor(-63.2966, device='cuda:0')
episode: 122 training return: tensor(-199.9695, device='cuda:0')
episode: 123 training return: tensor(-40.7496, device='cuda:0')
epoch: 31 test_true_pfm: 2345.335492212071 sim_pfm: -182.07684208883438
episode: 124 training return: tensor(209.1934, device='cuda:0')
episode: 125 training return: tensor(51.7552, device='cuda:0')
episode: 126 training return: tensor(-295.6909, device='cuda:0')
episode: 127 training return: tensor(-314.3745, device='cuda:0')
epoch: 32 test_true_pfm: 2973.112152897497 sim_pfm: 157.30550729549336
episode: 128 training return: tensor(71.0773, device='cuda:0')
episode: 129 training return: tensor(259.2504, device='cuda:0')
episode: 130 training return: tensor(-383.2238, device='cuda:0')
episode: 131 training return: tensor(-401.4216, device='cuda:0')
epoch: 33 test_true_pfm: 1825.775134944129 sim_pfm: -196.04112140166885
episode: 132 training return: tensor(225.0235, device='cuda:0')
episode: 133 training return: tensor(-241.6633, device='cuda:0')
episode: 134 training return: tensor(-390.2025, device='cuda:0')
episode: 135 training return: tensor(-385.9144, device='cuda:0')
epoch: 34 test_true_pfm: 2272.9676544049175 sim_pfm: -285.12409972197685
episode: 136 training return: tensor(-322.7593, device='cuda:0')
episode: 137 training return: tensor(-52.9666, device='cuda:0')
episode: 138 training return: tensor(64.8934, device='cuda:0')
episode: 139 training return: tensor(-136.1085, device='cuda:0')
epoch: 35 test_true_pfm: 2336.8383459529755 sim_pfm: -15.156288116491245
episode: 140 training return: tensor(-49.1631, device='cuda:0')
episode: 141 training return: tensor(-136.8033, device='cuda:0')
episode: 142 training return: tensor(-377.6405, device='cuda:0')
episode: 143 training return: tensor(248.4401, device='cuda:0')
epoch: 36 test_true_pfm: 2880.354324277792 sim_pfm: 88.25644717579901
episode: 144 training return: tensor(185.0588, device='cuda:0')
episode: 145 training return: tensor(254.1333, device='cuda:0')
episode: 146 training return: tensor(-227.1422, device='cuda:0')
episode: 147 training return: tensor(-329.2839, device='cuda:0')
epoch: 37 test_true_pfm: 2896.738749583101 sim_pfm: -136.11390500958078
episode: 148 training return: tensor(-217.4841, device='cuda:0')
episode: 149 training return: tensor(-389.4677, device='cuda:0')
episode: 150 training return: tensor(-346.9835, device='cuda:0')
episode: 151 training return: tensor(-244.8521, device='cuda:0')
epoch: 38 test_true_pfm: 2133.343296286784 sim_pfm: 69.17295878690977
episode: 152 training return: tensor(-383.0186, device='cuda:0')
episode: 153 training return: tensor(-344.4034, device='cuda:0')
episode: 154 training return: tensor(-410.1444, device='cuda:0')
episode: 155 training return: tensor(-356.2053, device='cuda:0')
epoch: 39 test_true_pfm: 2561.659781680239 sim_pfm: 85.28445754376783
episode: 156 training return: tensor(-154.6957, device='cuda:0')
episode: 157 training return: tensor(-343.7082, device='cuda:0')
episode: 158 training return: tensor(-299.0955, device='cuda:0')
episode: 159 training return: tensor(-227.0476, device='cuda:0')
epoch: 40 test_true_pfm: 2453.3916715835326 sim_pfm: 55.428406472026836
episode: 160 training return: tensor(35.4426, device='cuda:0')
episode: 161 training return: tensor(268.7352, device='cuda:0')
episode: 162 training return: tensor(-36.7558, device='cuda:0')
episode: 163 training return: tensor(-133.2223, device='cuda:0')
epoch: 41 test_true_pfm: 3350.2483019686274 sim_pfm: 235.44233956602207
episode: 164 training return: tensor(-375.1866, device='cuda:0')
episode: 165 training return: tensor(-347.9777, device='cuda:0')
episode: 166 training return: tensor(-141.9836, device='cuda:0')
episode: 167 training return: tensor(277.7968, device='cuda:0')
epoch: 42 test_true_pfm: 2570.1942709950713 sim_pfm: 91.56693834010123
episode: 168 training return: tensor(-346.8762, device='cuda:0')
episode: 169 training return: tensor(16.2088, device='cuda:0')
episode: 170 training return: tensor(-347.9586, device='cuda:0')
episode: 171 training return: tensor(-127.4808, device='cuda:0')
epoch: 43 test_true_pfm: 2132.6784374316444 sim_pfm: -113.26580881672756
episode: 172 training return: tensor(275.0432, device='cuda:0')
episode: 173 training return: tensor(-112.7253, device='cuda:0')
episode: 174 training return: tensor(-166.3353, device='cuda:0')
episode: 175 training return: tensor(-309.5782, device='cuda:0')
epoch: 44 test_true_pfm: 2998.2444200228724 sim_pfm: 260.23110271282104
episode: 176 training return: tensor(273.4406, device='cuda:0')
episode: 177 training return: tensor(-303.9695, device='cuda:0')
episode: 178 training return: tensor(-277.1322, device='cuda:0')
episode: 179 training return: tensor(-227.3557, device='cuda:0')
epoch: 45 test_true_pfm: 3264.089525470841 sim_pfm: 243.3248367595661
episode: 180 training return: tensor(-72.3999, device='cuda:0')
episode: 181 training return: tensor(280.1936, device='cuda:0')
episode: 182 training return: tensor(48.7980, device='cuda:0')
episode: 183 training return: tensor(-205.7913, device='cuda:0')
epoch: 46 test_true_pfm: 2886.8710178107394 sim_pfm: 250.75369739051288
episode: 184 training return: tensor(-382.8938, device='cuda:0')
episode: 185 training return: tensor(-253.5800, device='cuda:0')
episode: 186 training return: tensor(-382.9807, device='cuda:0')
episode: 187 training return: tensor(-297.0704, device='cuda:0')
epoch: 47 test_true_pfm: 1947.366982482663 sim_pfm: 237.20559876057087
episode: 188 training return: tensor(-315.1001, device='cuda:0')
episode: 189 training return: tensor(-119.2120, device='cuda:0')
episode: 190 training return: tensor(-88.2962, device='cuda:0')
episode: 191 training return: tensor(308.3508, device='cuda:0')
epoch: 48 test_true_pfm: 2307.566658725065 sim_pfm: 219.5362810632602
episode: 192 training return: tensor(259.1285, device='cuda:0')
episode: 193 training return: tensor(-240.8327, device='cuda:0')
episode: 194 training return: tensor(-395.7427, device='cuda:0')
episode: 195 training return: tensor(-362.5422, device='cuda:0')
epoch: 49 test_true_pfm: 2822.6767070843125 sim_pfm: -257.75222586465924
episode: 196 training return: tensor(-154.4825, device='cuda:0')
episode: 197 training return: tensor(-218.2997, device='cuda:0')
episode: 198 training return: tensor(-202.0590, device='cuda:0')
episode: 199 training return: tensor(301.5699, device='cuda:0')
epoch: 50 test_true_pfm: 2714.160759439536 sim_pfm: -143.20721763702264
episode: 200 training return: tensor(310.1313, device='cuda:0')
episode: 201 training return: tensor(107.4408, device='cuda:0')
episode: 202 training return: tensor(-50.1635, device='cuda:0')
episode: 203 training return: tensor(-93.5154, device='cuda:0')
epoch: 51 test_true_pfm: 2894.442749496913 sim_pfm: 111.42871154759389
episode: 204 training return: tensor(-362.3609, device='cuda:0')
episode: 205 training return: tensor(-386.5573, device='cuda:0')
episode: 206 training return: tensor(-384.2229, device='cuda:0')
episode: 207 training return: tensor(26.3745, device='cuda:0')
epoch: 52 test_true_pfm: 2331.4480926554443 sim_pfm: -218.27199357839223
episode: 208 training return: tensor(-133.9948, device='cuda:0')
episode: 209 training return: tensor(-200.6908, device='cuda:0')
episode: 210 training return: tensor(-387.7009, device='cuda:0')
episode: 211 training return: tensor(107.6512, device='cuda:0')
epoch: 53 test_true_pfm: 2203.6759079588755 sim_pfm: 9.239781983119125
episode: 212 training return: tensor(-343.6621, device='cuda:0')
episode: 213 training return: tensor(-376.2998, device='cuda:0')
episode: 214 training return: tensor(-214.2971, device='cuda:0')
episode: 215 training return: tensor(254.5213, device='cuda:0')
epoch: 54 test_true_pfm: 3049.0196000933342 sim_pfm: 283.22577714179835
episode: 216 training return: tensor(-127.3134, device='cuda:0')
episode: 217 training return: tensor(-113.4001, device='cuda:0')
episode: 218 training return: tensor(39.4448, device='cuda:0')
episode: 219 training return: tensor(-338.0685, device='cuda:0')
epoch: 55 test_true_pfm: 3068.1673012537562 sim_pfm: 82.44497349652617
episode: 220 training return: tensor(249.8751, device='cuda:0')
episode: 221 training return: tensor(-81.5058, device='cuda:0')
episode: 222 training return: tensor(269.0915, device='cuda:0')
episode: 223 training return: tensor(-297.9648, device='cuda:0')
epoch: 56 test_true_pfm: 3083.271299678812 sim_pfm: 160.4611531221211
episode: 224 training return: tensor(-53.3688, device='cuda:0')
episode: 225 training return: tensor(76.0510, device='cuda:0')
episode: 226 training return: tensor(240.9542, device='cuda:0')
episode: 227 training return: tensor(-272.9125, device='cuda:0')
epoch: 57 test_true_pfm: 3141.5486656855755 sim_pfm: 148.12331974726598
episode: 228 training return: tensor(-340.2980, device='cuda:0')
episode: 229 training return: tensor(-245.9633, device='cuda:0')
episode: 230 training return: tensor(-393.4714, device='cuda:0')
episode: 231 training return: tensor(-171.3577, device='cuda:0')
epoch: 58 test_true_pfm: 2946.064534952522 sim_pfm: 136.30600340660507
episode: 232 training return: tensor(-135.7492, device='cuda:0')
episode: 233 training return: tensor(-214.1873, device='cuda:0')
episode: 234 training return: tensor(-137.9864, device='cuda:0')
episode: 235 training return: tensor(-39.6830, device='cuda:0')
epoch: 59 test_true_pfm: 3290.26853608423 sim_pfm: 242.19436475289208
episode: 236 training return: tensor(-52.4522, device='cuda:0')
episode: 237 training return: tensor(-378.2837, device='cuda:0')
episode: 238 training return: tensor(-146.9153, device='cuda:0')
episode: 239 training return: tensor(-364.3230, device='cuda:0')
epoch: 60 test_true_pfm: 2752.5200436482396 sim_pfm: 40.49296904169023
episode: 240 training return: tensor(-192.6702, device='cuda:0')
episode: 241 training return: tensor(-223.5795, device='cuda:0')
episode: 242 training return: tensor(-218.6468, device='cuda:0')
episode: 243 training return: tensor(-331.9848, device='cuda:0')
epoch: 61 test_true_pfm: 2410.877297141378 sim_pfm: 281.98621911388665
episode: 244 training return: tensor(-121.0742, device='cuda:0')
episode: 245 training return: tensor(5.5315, device='cuda:0')
episode: 246 training return: tensor(-126.8694, device='cuda:0')
episode: 247 training return: tensor(282.2593, device='cuda:0')
epoch: 62 test_true_pfm: 3283.310408902185 sim_pfm: 91.87905386131024
episode: 248 training return: tensor(-293.0856, device='cuda:0')
episode: 249 training return: tensor(102.4144, device='cuda:0')
episode: 250 training return: tensor(121.4143, device='cuda:0')
episode: 251 training return: tensor(-218.5745, device='cuda:0')
epoch: 63 test_true_pfm: 3045.9629858272306 sim_pfm: 248.9522722355711
episode: 252 training return: tensor(285.5140, device='cuda:0')
episode: 253 training return: tensor(-258.1057, device='cuda:0')
episode: 254 training return: tensor(-298.5302, device='cuda:0')
episode: 255 training return: tensor(-38.4486, device='cuda:0')
epoch: 64 test_true_pfm: 2756.7448768413683 sim_pfm: 51.62718411971582
episode: 256 training return: tensor(-148.5045, device='cuda:0')
episode: 257 training return: tensor(50.1125, device='cuda:0')
episode: 258 training return: tensor(-32.8256, device='cuda:0')
episode: 259 training return: tensor(-274.1811, device='cuda:0')
epoch: 65 test_true_pfm: 2725.0508551961657 sim_pfm: 102.9774876954325
episode: 260 training return: tensor(295.9505, device='cuda:0')
episode: 261 training return: tensor(111.1377, device='cuda:0')
episode: 262 training return: tensor(75.2627, device='cuda:0')
episode: 263 training return: tensor(-382.6722, device='cuda:0')
epoch: 66 test_true_pfm: 2523.9059513150028 sim_pfm: 228.88856244056174
episode: 264 training return: tensor(-214.1366, device='cuda:0')
episode: 265 training return: tensor(-291.7805, device='cuda:0')
episode: 266 training return: tensor(298.8905, device='cuda:0')
episode: 267 training return: tensor(-212.4406, device='cuda:0')
epoch: 67 test_true_pfm: 2622.672467868206 sim_pfm: 175.26596804793613
episode: 268 training return: tensor(242.4998, device='cuda:0')
episode: 269 training return: tensor(-401.7859, device='cuda:0')
episode: 270 training return: tensor(212.7136, device='cuda:0')
episode: 271 training return: tensor(14.8279, device='cuda:0')
epoch: 68 test_true_pfm: 3267.318375437035 sim_pfm: 205.24135098117404
episode: 272 training return: tensor(255.5466, device='cuda:0')
episode: 273 training return: tensor(-262.7975, device='cuda:0')
episode: 274 training return: tensor(-383.3235, device='cuda:0')
episode: 275 training return: tensor(249.8880, device='cuda:0')
epoch: 69 test_true_pfm: 2925.7447483413416 sim_pfm: 185.7198361932145
episode: 276 training return: tensor(-194.9028, device='cuda:0')
episode: 277 training return: tensor(-388.8841, device='cuda:0')
episode: 278 training return: tensor(220.9846, device='cuda:0')
episode: 279 training return: tensor(-359.1232, device='cuda:0')
epoch: 70 test_true_pfm: 2759.5381058590156 sim_pfm: 116.039314265945
episode: 280 training return: tensor(-356.1622, device='cuda:0')
episode: 281 training return: tensor(-343.4670, device='cuda:0')
episode: 282 training return: tensor(112.8269, device='cuda:0')
episode: 283 training return: tensor(-250.4008, device='cuda:0')
epoch: 71 test_true_pfm: 1558.322202628772 sim_pfm: -307.46178636327386
episode: 284 training return: tensor(33.7229, device='cuda:0')
episode: 285 training return: tensor(-254.3123, device='cuda:0')
episode: 286 training return: tensor(-52.3711, device='cuda:0')
episode: 287 training return: tensor(-169.1989, device='cuda:0')
epoch: 72 test_true_pfm: 2963.929334267357 sim_pfm: 159.94298875945
episode: 288 training return: tensor(-0.2234, device='cuda:0')
episode: 289 training return: tensor(-385.8514, device='cuda:0')
episode: 290 training return: tensor(-151.1815, device='cuda:0')
episode: 291 training return: tensor(137.5183, device='cuda:0')
epoch: 73 test_true_pfm: 2704.330839579327 sim_pfm: -4.2929856311529875
episode: 292 training return: tensor(70.8845, device='cuda:0')
episode: 293 training return: tensor(256.5883, device='cuda:0')
episode: 294 training return: tensor(115.9665, device='cuda:0')
episode: 295 training return: tensor(-54.4714, device='cuda:0')
epoch: 74 test_true_pfm: 3259.2216700721197 sim_pfm: 91.49015546255396
episode: 296 training return: tensor(197.9478, device='cuda:0')
episode: 297 training return: tensor(30.7582, device='cuda:0')
episode: 298 training return: tensor(-142.9451, device='cuda:0')
episode: 299 training return: tensor(17.0659, device='cuda:0')
epoch: 75 test_true_pfm: 2498.550163908159 sim_pfm: 267.2931527410789
episode: 300 training return: tensor(38.7499, device='cuda:0')
episode: 301 training return: tensor(-309.7153, device='cuda:0')
episode: 302 training return: tensor(-196.7885, device='cuda:0')
episode: 303 training return: tensor(-408.6634, device='cuda:0')
epoch: 76 test_true_pfm: 2577.7706481400437 sim_pfm: 134.9391525593237
episode: 304 training return: tensor(59.6305, device='cuda:0')
episode: 305 training return: tensor(-334.2321, device='cuda:0')
episode: 306 training return: tensor(-120.0034, device='cuda:0')
episode: 307 training return: tensor(240.8329, device='cuda:0')
epoch: 77 test_true_pfm: 2384.120807350461 sim_pfm: 267.8242980663199
episode: 308 training return: tensor(5.4517, device='cuda:0')
episode: 309 training return: tensor(-303.4438, device='cuda:0')
episode: 310 training return: tensor(-362.2773, device='cuda:0')
episode: 311 training return: tensor(-154.1173, device='cuda:0')
epoch: 78 test_true_pfm: 2894.3269326813825 sim_pfm: 262.2391664837196
episode: 312 training return: tensor(-252.0695, device='cuda:0')
episode: 313 training return: tensor(-27.0686, device='cuda:0')
episode: 314 training return: tensor(-87.9043, device='cuda:0')
episode: 315 training return: tensor(277.4454, device='cuda:0')
epoch: 79 test_true_pfm: 2659.984852097284 sim_pfm: 117.36242642684374
episode: 316 training return: tensor(-386.3170, device='cuda:0')
episode: 317 training return: tensor(266.6379, device='cuda:0')
episode: 318 training return: tensor(-64.3365, device='cuda:0')
episode: 319 training return: tensor(69.2104, device='cuda:0')
epoch: 80 test_true_pfm: 2968.0163050941896 sim_pfm: 174.4556034525449
episode: 320 training return: tensor(141.8372, device='cuda:0')
episode: 321 training return: tensor(35.4304, device='cuda:0')
episode: 322 training return: tensor(-71.3068, device='cuda:0')
episode: 323 training return: tensor(-223.1630, device='cuda:0')
epoch: 81 test_true_pfm: 3273.3103715737984 sim_pfm: 45.89559254841879
episode: 324 training return: tensor(263.1111, device='cuda:0')
episode: 325 training return: tensor(-335.1717, device='cuda:0')
episode: 326 training return: tensor(-66.1676, device='cuda:0')
episode: 327 training return: tensor(-338.4747, device='cuda:0')
epoch: 82 test_true_pfm: 2935.5736844012167 sim_pfm: 137.53553669926865
episode: 328 training return: tensor(-141.6106, device='cuda:0')
episode: 329 training return: tensor(58.4592, device='cuda:0')
episode: 330 training return: tensor(181.2913, device='cuda:0')
episode: 331 training return: tensor(256.2651, device='cuda:0')
epoch: 83 test_true_pfm: 2734.182385749971 sim_pfm: 91.13005900623587
episode: 332 training return: tensor(278.9185, device='cuda:0')
episode: 333 training return: tensor(12.9545, device='cuda:0')
episode: 334 training return: tensor(-45.4953, device='cuda:0')
episode: 335 training return: tensor(-379.6968, device='cuda:0')
epoch: 84 test_true_pfm: 3047.7378313852223 sim_pfm: 89.38145742792403
episode: 336 training return: tensor(-202.3605, device='cuda:0')
episode: 337 training return: tensor(-214.4008, device='cuda:0')
episode: 338 training return: tensor(253.3692, device='cuda:0')
episode: 339 training return: tensor(77.0294, device='cuda:0')
epoch: 85 test_true_pfm: 2915.8174868749775 sim_pfm: 239.6863332055897
episode: 340 training return: tensor(304.1624, device='cuda:0')
episode: 341 training return: tensor(257.9771, device='cuda:0')
episode: 342 training return: tensor(44.9879, device='cuda:0')
episode: 343 training return: tensor(-336.6464, device='cuda:0')
epoch: 86 test_true_pfm: 3252.3364793088913 sim_pfm: 268.513072114205
episode: 344 training return: tensor(-147.6375, device='cuda:0')
episode: 345 training return: tensor(-229.6659, device='cuda:0')
episode: 346 training return: tensor(-219.6120, device='cuda:0')
episode: 347 training return: tensor(-365.2525, device='cuda:0')
epoch: 87 test_true_pfm: 2939.681192733522 sim_pfm: 151.72929550152426
episode: 348 training return: tensor(266.7491, device='cuda:0')
episode: 349 training return: tensor(-197.7682, device='cuda:0')
episode: 350 training return: tensor(262.7921, device='cuda:0')
episode: 351 training return: tensor(-146.0727, device='cuda:0')
epoch: 88 test_true_pfm: 2775.754287639587 sim_pfm: 128.89296975037237
episode: 352 training return: tensor(-230.0308, device='cuda:0')
episode: 353 training return: tensor(-11.8791, device='cuda:0')
episode: 354 training return: tensor(-218.4227, device='cuda:0')
episode: 355 training return: tensor(-127.4237, device='cuda:0')
epoch: 89 test_true_pfm: 2211.5421434932878 sim_pfm: 303.40961225373513
episode: 356 training return: tensor(-333.2482, device='cuda:0')
episode: 357 training return: tensor(-121.3038, device='cuda:0')
episode: 358 training return: tensor(-72.4335, device='cuda:0')
episode: 359 training return: tensor(-330.7676, device='cuda:0')
epoch: 90 test_true_pfm: 2656.1316218007196 sim_pfm: 67.70916860319751
episode: 360 training return: tensor(246.7318, device='cuda:0')
episode: 361 training return: tensor(-349.3270, device='cuda:0')
episode: 362 training return: tensor(224.9207, device='cuda:0')
episode: 363 training return: tensor(-183.9382, device='cuda:0')
epoch: 91 test_true_pfm: 2852.872184297406 sim_pfm: 213.9113086881116
episode: 364 training return: tensor(84.5543, device='cuda:0')
episode: 365 training return: tensor(242.5289, device='cuda:0')
episode: 366 training return: tensor(-335.6499, device='cuda:0')
episode: 367 training return: tensor(-355.7674, device='cuda:0')
epoch: 92 test_true_pfm: 3292.9253486578805 sim_pfm: 235.82205686805537
episode: 368 training return: tensor(273.4484, device='cuda:0')
episode: 369 training return: tensor(102.1698, device='cuda:0')
episode: 370 training return: tensor(129.0725, device='cuda:0')
episode: 371 training return: tensor(-393.7201, device='cuda:0')
epoch: 93 test_true_pfm: 2504.330053613617 sim_pfm: -22.617247651243815
episode: 372 training return: tensor(258.2884, device='cuda:0')
episode: 373 training return: tensor(-40.2631, device='cuda:0')
episode: 374 training return: tensor(-349.9759, device='cuda:0')
episode: 375 training return: tensor(-40.1505, device='cuda:0')
epoch: 94 test_true_pfm: 2933.25071620472 sim_pfm: 283.63630437730654
episode: 376 training return: tensor(-285.3188, device='cuda:0')
episode: 377 training return: tensor(315.6057, device='cuda:0')
episode: 378 training return: tensor(-30.1855, device='cuda:0')
episode: 379 training return: tensor(-34.2623, device='cuda:0')
epoch: 95 test_true_pfm: 2720.1633054471563 sim_pfm: 155.77427705247342
episode: 380 training return: tensor(-144.7997, device='cuda:0')
episode: 381 training return: tensor(-201.8235, device='cuda:0')
episode: 382 training return: tensor(-314.2686, device='cuda:0')
episode: 383 training return: tensor(203.5225, device='cuda:0')
epoch: 96 test_true_pfm: 2741.654332245005 sim_pfm: 161.85724973895898
episode: 384 training return: tensor(265.5995, device='cuda:0')
episode: 385 training return: tensor(-3.3140, device='cuda:0')
episode: 386 training return: tensor(-344.4490, device='cuda:0')
episode: 387 training return: tensor(-382.4179, device='cuda:0')
epoch: 97 test_true_pfm: 3308.5594867198197 sim_pfm: 195.1038578376174
episode: 388 training return: tensor(-378.1809, device='cuda:0')
episode: 389 training return: tensor(-134.1389, device='cuda:0')
episode: 390 training return: tensor(236.0868, device='cuda:0')
episode: 391 training return: tensor(-17.0650, device='cuda:0')
epoch: 98 test_true_pfm: 2925.954647517235 sim_pfm: 40.982677023935445
episode: 392 training return: tensor(236.8438, device='cuda:0')
episode: 393 training return: tensor(-63.4951, device='cuda:0')
episode: 394 training return: tensor(-354.2927, device='cuda:0')
episode: 395 training return: tensor(-97.5000, device='cuda:0')
epoch: 99 test_true_pfm: 2557.2408276234983 sim_pfm: 162.89840685459785
episode: 396 training return: tensor(-294.1822, device='cuda:0')
episode: 397 training return: tensor(-318.8018, device='cuda:0')
episode: 398 training return: tensor(-384.6888, device='cuda:0')
episode: 399 training return: tensor(-309.9647, device='cuda:0')
epoch: 100 test_true_pfm: 1428.235718457617 sim_pfm: -304.60986525169574
episode: 400 training return: tensor(115.1540, device='cuda:0')
episode: 401 training return: tensor(-150.9834, device='cuda:0')
episode: 402 training return: tensor(-391.4774, device='cuda:0')
episode: 403 training return: tensor(23.8695, device='cuda:0')
epoch: 101 test_true_pfm: 3316.316031618366 sim_pfm: 73.96238845070668
episode: 404 training return: tensor(100.8177, device='cuda:0')
episode: 405 training return: tensor(209.5399, device='cuda:0')
episode: 406 training return: tensor(256.2622, device='cuda:0')
episode: 407 training return: tensor(-235.2672, device='cuda:0')
epoch: 102 test_true_pfm: 2899.7174791648117 sim_pfm: 297.5891798181013
episode: 408 training return: tensor(17.5706, device='cuda:0')
episode: 409 training return: tensor(-153.1207, device='cuda:0')
episode: 410 training return: tensor(-78.8709, device='cuda:0')
episode: 411 training return: tensor(-292.6766, device='cuda:0')
epoch: 103 test_true_pfm: 2588.5166429100377 sim_pfm: 140.07890607998706
episode: 412 training return: tensor(244.0089, device='cuda:0')
episode: 413 training return: tensor(-129.1284, device='cuda:0')
episode: 414 training return: tensor(-226.3433, device='cuda:0')
episode: 415 training return: tensor(29.9140, device='cuda:0')
epoch: 104 test_true_pfm: 2608.6757938991946 sim_pfm: 245.15577503251066
episode: 416 training return: tensor(-210.3679, device='cuda:0')
episode: 417 training return: tensor(-41.5061, device='cuda:0')
episode: 418 training return: tensor(-14.4907, device='cuda:0')
episode: 419 training return: tensor(-312.2901, device='cuda:0')
epoch: 105 test_true_pfm: 2752.71912431574 sim_pfm: 274.2070304587639
episode: 420 training return: tensor(243.4891, device='cuda:0')
episode: 421 training return: tensor(96.6560, device='cuda:0')
episode: 422 training return: tensor(-380.8618, device='cuda:0')
episode: 423 training return: tensor(-310.1331, device='cuda:0')
epoch: 106 test_true_pfm: 3269.7027890294817 sim_pfm: 206.88171528368062
episode: 424 training return: tensor(-388.4981, device='cuda:0')
episode: 425 training return: tensor(139.0544, device='cuda:0')
episode: 426 training return: tensor(-133.2452, device='cuda:0')
episode: 427 training return: tensor(244.0938, device='cuda:0')
epoch: 107 test_true_pfm: 3035.702065669095 sim_pfm: 205.6699851731537
episode: 428 training return: tensor(3.2973, device='cuda:0')
episode: 429 training return: tensor(-316.4373, device='cuda:0')
episode: 430 training return: tensor(60.6392, device='cuda:0')
episode: 431 training return: tensor(-244.3328, device='cuda:0')
epoch: 108 test_true_pfm: 2471.682322197136 sim_pfm: 259.7477018655821
episode: 432 training return: tensor(283.1564, device='cuda:0')
episode: 433 training return: tensor(197.7720, device='cuda:0')
episode: 434 training return: tensor(-242.3314, device='cuda:0')
episode: 435 training return: tensor(-384.4265, device='cuda:0')
epoch: 109 test_true_pfm: 3254.1899109154133 sim_pfm: 154.74133032909594
episode: 436 training return: tensor(-360.9082, device='cuda:0')
episode: 437 training return: tensor(49.6936, device='cuda:0')
episode: 438 training return: tensor(-266.9908, device='cuda:0')
episode: 439 training return: tensor(-350.3055, device='cuda:0')
epoch: 110 test_true_pfm: 3281.789565730544 sim_pfm: 272.52853082839283
episode: 440 training return: tensor(26.1978, device='cuda:0')
episode: 441 training return: tensor(-38.9722, device='cuda:0')
episode: 442 training return: tensor(184.0397, device='cuda:0')
episode: 443 training return: tensor(-390.7799, device='cuda:0')
epoch: 111 test_true_pfm: 3175.050690927195 sim_pfm: 117.98583194456296
episode: 444 training return: tensor(272.2969, device='cuda:0')
episode: 445 training return: tensor(-242.2625, device='cuda:0')
episode: 446 training return: tensor(-262.9868, device='cuda:0')
episode: 447 training return: tensor(267.6517, device='cuda:0')
epoch: 112 test_true_pfm: 2865.652026220839 sim_pfm: 166.43380315378695
episode: 448 training return: tensor(-389.9348, device='cuda:0')
episode: 449 training return: tensor(257.8405, device='cuda:0')
episode: 450 training return: tensor(-342.5034, device='cuda:0')
episode: 451 training return: tensor(51.8535, device='cuda:0')
epoch: 113 test_true_pfm: 2720.314367431198 sim_pfm: -323.5628756122508
episode: 452 training return: tensor(-192.1591, device='cuda:0')
episode: 453 training return: tensor(-425.6071, device='cuda:0')
episode: 454 training return: tensor(256.0282, device='cuda:0')
episode: 455 training return: tensor(-267.0914, device='cuda:0')
epoch: 114 test_true_pfm: 2957.0055000768466 sim_pfm: 163.05224002789086
episode: 456 training return: tensor(-243.7033, device='cuda:0')
episode: 457 training return: tensor(-382.9303, device='cuda:0')
episode: 458 training return: tensor(-248.8059, device='cuda:0')
episode: 459 training return: tensor(-331.7958, device='cuda:0')
epoch: 115 test_true_pfm: 2985.5671812547066 sim_pfm: 127.59288538130932
episode: 460 training return: tensor(31.6920, device='cuda:0')
episode: 461 training return: tensor(123.4102, device='cuda:0')
episode: 462 training return: tensor(-90.2457, device='cuda:0')
episode: 463 training return: tensor(-382.0239, device='cuda:0')
epoch: 116 test_true_pfm: 2667.806701442341 sim_pfm: -16.8205668386848
episode: 464 training return: tensor(258.3722, device='cuda:0')
episode: 465 training return: tensor(-387.6244, device='cuda:0')
episode: 466 training return: tensor(-87.9491, device='cuda:0')
episode: 467 training return: tensor(-166.7513, device='cuda:0')
epoch: 117 test_true_pfm: 3193.603865420424 sim_pfm: 198.2402016413398
episode: 468 training return: tensor(87.4461, device='cuda:0')
episode: 469 training return: tensor(290.3886, device='cuda:0')
episode: 470 training return: tensor(-338.0316, device='cuda:0')
episode: 471 training return: tensor(259.3332, device='cuda:0')
epoch: 118 test_true_pfm: 3298.675294403896 sim_pfm: 274.7094522373906
episode: 472 training return: tensor(-339.9869, device='cuda:0')
episode: 473 training return: tensor(55.6309, device='cuda:0')
episode: 474 training return: tensor(-183.0253, device='cuda:0')
episode: 475 training return: tensor(97.2798, device='cuda:0')
epoch: 119 test_true_pfm: 2864.6928334719864 sim_pfm: 157.71126246258305
episode: 476 training return: tensor(263.8565, device='cuda:0')
episode: 477 training return: tensor(-302.5563, device='cuda:0')
episode: 478 training return: tensor(-338.2318, device='cuda:0')
episode: 479 training return: tensor(-299.1046, device='cuda:0')
epoch: 120 test_true_pfm: 3161.812856446002 sim_pfm: 265.3361080174121
episode: 480 training return: tensor(-395.6162, device='cuda:0')
episode: 481 training return: tensor(263.7361, device='cuda:0')
episode: 482 training return: tensor(155.3571, device='cuda:0')
episode: 483 training return: tensor(-398.1780, device='cuda:0')
epoch: 121 test_true_pfm: 2812.216435042393 sim_pfm: 273.65412529642344
episode: 484 training return: tensor(-264.6235, device='cuda:0')
episode: 485 training return: tensor(254.3550, device='cuda:0')
episode: 486 training return: tensor(-341.2299, device='cuda:0')
episode: 487 training return: tensor(296.0222, device='cuda:0')
epoch: 122 test_true_pfm: 2896.1566385284773 sim_pfm: 160.3377132594663
episode: 488 training return: tensor(-297.1647, device='cuda:0')
episode: 489 training return: tensor(-137.4314, device='cuda:0')
episode: 490 training return: tensor(-239.0620, device='cuda:0')
episode: 491 training return: tensor(-117.1314, device='cuda:0')
epoch: 123 test_true_pfm: 2954.3486803715987 sim_pfm: 180.2723462645081
episode: 492 training return: tensor(47.1970, device='cuda:0')
episode: 493 training return: tensor(-298.1664, device='cuda:0')
episode: 494 training return: tensor(269.8199, device='cuda:0')
episode: 495 training return: tensor(102.1012, device='cuda:0')
epoch: 124 test_true_pfm: 2879.5737150772925 sim_pfm: 173.67428262394
episode: 496 training return: tensor(-156.9867, device='cuda:0')
episode: 497 training return: tensor(-267.8799, device='cuda:0')
episode: 498 training return: tensor(169.5039, device='cuda:0')
episode: 499 training return: tensor(-135.4511, device='cuda:0')
epoch: 125 test_true_pfm: 3075.487423163468 sim_pfm: 10.893482902397713
episode: 500 training return: tensor(-379.4550, device='cuda:0')
episode: 501 training return: tensor(-299.5831, device='cuda:0')
episode: 502 training return: tensor(302.4491, device='cuda:0')
episode: 503 training return: tensor(-367.8124, device='cuda:0')
epoch: 126 test_true_pfm: 2662.1673744138875 sim_pfm: 79.3698362672197
episode: 504 training return: tensor(-96.3616, device='cuda:0')
episode: 505 training return: tensor(-322.9803, device='cuda:0')
episode: 506 training return: tensor(-144.5860, device='cuda:0')
episode: 507 training return: tensor(-262.1472, device='cuda:0')
epoch: 127 test_true_pfm: 2925.827677642153 sim_pfm: 174.4151861310432
episode: 508 training return: tensor(309.4991, device='cuda:0')
episode: 509 training return: tensor(-264.2193, device='cuda:0')
episode: 510 training return: tensor(259.4912, device='cuda:0')
episode: 511 training return: tensor(-108.9942, device='cuda:0')
epoch: 128 test_true_pfm: 2707.2711531579625 sim_pfm: 72.42697027213096
episode: 512 training return: tensor(-271.3992, device='cuda:0')
episode: 513 training return: tensor(-95.4151, device='cuda:0')
episode: 514 training return: tensor(-247.3073, device='cuda:0')
episode: 515 training return: tensor(225.6908, device='cuda:0')
epoch: 129 test_true_pfm: 1288.9295187297141 sim_pfm: -152.7721885224455
episode: 516 training return: tensor(264.1350, device='cuda:0')
episode: 517 training return: tensor(-308.4231, device='cuda:0')
episode: 518 training return: tensor(-263.7291, device='cuda:0')
episode: 519 training return: tensor(-165.9028, device='cuda:0')
epoch: 130 test_true_pfm: 2897.3986976060064 sim_pfm: -56.86129188874232
episode: 520 training return: tensor(-365.1023, device='cuda:0')
episode: 521 training return: tensor(-302.2703, device='cuda:0')
episode: 522 training return: tensor(-270.9924, device='cuda:0')
episode: 523 training return: tensor(115.6950, device='cuda:0')
epoch: 131 test_true_pfm: 1459.5036524432317 sim_pfm: -151.48328366681622
episode: 524 training return: tensor(161.1304, device='cuda:0')
episode: 525 training return: tensor(16.1536, device='cuda:0')
episode: 526 training return: tensor(45.5830, device='cuda:0')
episode: 527 training return: tensor(60.8505, device='cuda:0')
epoch: 132 test_true_pfm: 2917.3102572062785 sim_pfm: 168.1745015751027
episode: 528 training return: tensor(-133.3098, device='cuda:0')
episode: 529 training return: tensor(269.7090, device='cuda:0')
episode: 530 training return: tensor(-48.8939, device='cuda:0')
episode: 531 training return: tensor(-222.8982, device='cuda:0')
epoch: 133 test_true_pfm: 3038.1010546495713 sim_pfm: 284.6199902752608
episode: 532 training return: tensor(-349.7274, device='cuda:0')
episode: 533 training return: tensor(-25.2728, device='cuda:0')
episode: 534 training return: tensor(-4.5234, device='cuda:0')
episode: 535 training return: tensor(-229.3680, device='cuda:0')
epoch: 134 test_true_pfm: 3041.067456368833 sim_pfm: 104.52845549839549
episode: 536 training return: tensor(-61.8435, device='cuda:0')
episode: 537 training return: tensor(-324.5566, device='cuda:0')
episode: 538 training return: tensor(34.1862, device='cuda:0')
episode: 539 training return: tensor(21.2439, device='cuda:0')
epoch: 135 test_true_pfm: 1445.0860709591605 sim_pfm: -348.16630390154506
episode: 540 training return: tensor(287.0685, device='cuda:0')
episode: 541 training return: tensor(-5.7917, device='cuda:0')
episode: 542 training return: tensor(-321.7378, device='cuda:0')
episode: 543 training return: tensor(-332.2294, device='cuda:0')
epoch: 136 test_true_pfm: 2987.898308243814 sim_pfm: 28.796476942448255
episode: 544 training return: tensor(-234.5345, device='cuda:0')
episode: 545 training return: tensor(-315.8642, device='cuda:0')
episode: 546 training return: tensor(-59.4737, device='cuda:0')
episode: 547 training return: tensor(24.3665, device='cuda:0')
epoch: 137 test_true_pfm: 3239.2633720636936 sim_pfm: 236.47771907333905
episode: 548 training return: tensor(243.7810, device='cuda:0')
episode: 549 training return: tensor(-336.8856, device='cuda:0')
episode: 550 training return: tensor(-360.3122, device='cuda:0')
episode: 551 training return: tensor(254.5103, device='cuda:0')
epoch: 138 test_true_pfm: 1391.968608020149 sim_pfm: -242.50724257465723
episode: 552 training return: tensor(-247.4385, device='cuda:0')
episode: 553 training return: tensor(-179.1412, device='cuda:0')
episode: 554 training return: tensor(-84.0608, device='cuda:0')
episode: 555 training return: tensor(168.9769, device='cuda:0')
epoch: 139 test_true_pfm: 3014.4433186548827 sim_pfm: 280.4318181242949
episode: 556 training return: tensor(260.6144, device='cuda:0')
episode: 557 training return: tensor(-67.1241, device='cuda:0')
episode: 558 training return: tensor(-330.0373, device='cuda:0')
episode: 559 training return: tensor(-213.5764, device='cuda:0')
epoch: 140 test_true_pfm: 2254.8427363761875 sim_pfm: 58.989195585018024
episode: 560 training return: tensor(-26.0684, device='cuda:0')
episode: 561 training return: tensor(298.0994, device='cuda:0')
episode: 562 training return: tensor(49.3126, device='cuda:0')
episode: 563 training return: tensor(-293.3936, device='cuda:0')
epoch: 141 test_true_pfm: 1720.2397235709388 sim_pfm: -379.53216325390775
episode: 564 training return: tensor(-125.8480, device='cuda:0')
episode: 565 training return: tensor(-326.4195, device='cuda:0')
episode: 566 training return: tensor(121.5373, device='cuda:0')
episode: 567 training return: tensor(-333.7033, device='cuda:0')
epoch: 142 test_true_pfm: 2857.273374399099 sim_pfm: 178.51015684693508
episode: 568 training return: tensor(-252.4642, device='cuda:0')
episode: 569 training return: tensor(183.5057, device='cuda:0')
episode: 570 training return: tensor(166.2681, device='cuda:0')
episode: 571 training return: tensor(-150.7496, device='cuda:0')
epoch: 143 test_true_pfm: 2677.236722021602 sim_pfm: 217.90830326902991
episode: 572 training return: tensor(-40.7010, device='cuda:0')
episode: 573 training return: tensor(219.8438, device='cuda:0')
episode: 574 training return: tensor(-21.5018, device='cuda:0')
episode: 575 training return: tensor(116.6066, device='cuda:0')
epoch: 144 test_true_pfm: 2746.3745250734887 sim_pfm: 175.6294204671091
episode: 576 training return: tensor(-192.8572, device='cuda:0')
episode: 577 training return: tensor(259.3576, device='cuda:0')
episode: 578 training return: tensor(-216.3892, device='cuda:0')
episode: 579 training return: tensor(-254.9746, device='cuda:0')
epoch: 145 test_true_pfm: 2665.8127329294434 sim_pfm: 153.7918038146648
episode: 580 training return: tensor(-125.7700, device='cuda:0')
episode: 581 training return: tensor(31.1175, device='cuda:0')
episode: 582 training return: tensor(30.7616, device='cuda:0')
episode: 583 training return: tensor(45.7239, device='cuda:0')
epoch: 146 test_true_pfm: 1451.3788228285555 sim_pfm: -142.969903088524
episode: 584 training return: tensor(-178.0324, device='cuda:0')
episode: 585 training return: tensor(-312.7650, device='cuda:0')
episode: 586 training return: tensor(17.3923, device='cuda:0')
episode: 587 training return: tensor(-237.9022, device='cuda:0')
epoch: 147 test_true_pfm: 3218.7106849740435 sim_pfm: 56.649097122310195
episode: 588 training return: tensor(262.7369, device='cuda:0')
episode: 589 training return: tensor(-389.5491, device='cuda:0')
episode: 590 training return: tensor(-387.6172, device='cuda:0')
episode: 591 training return: tensor(-17.0034, device='cuda:0')
epoch: 148 test_true_pfm: 2183.9941090831758 sim_pfm: 178.3378797629848
episode: 592 training return: tensor(56.1485, device='cuda:0')
episode: 593 training return: tensor(-140.0559, device='cuda:0')
episode: 594 training return: tensor(283.5657, device='cuda:0')
episode: 595 training return: tensor(258.2323, device='cuda:0')
epoch: 149 test_true_pfm: 3176.7078651204642 sim_pfm: 171.0517488125091
episode: 596 training return: tensor(-146.9249, device='cuda:0')
episode: 597 training return: tensor(257.9798, device='cuda:0')
episode: 598 training return: tensor(-185.9615, device='cuda:0')
episode: 599 training return: tensor(-398.9945, device='cuda:0')
epoch: 150 test_true_pfm: 3232.678036938875 sim_pfm: 262.02805303392233
