['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'behavior', '--traj', 'medium', '--seed', '4', '--data', '3000']
epoch: 0 training_loss 0.29667728409171107 test_loss: 0.08866416215896607
epoch: 1 training_loss 0.15292752180248498 test_loss: 0.0736660897731781
epoch: 2 training_loss 0.12681366082280873 test_loss: 0.05697611570358276
epoch: 3 training_loss 0.10945870514959097 test_loss: 0.059752863645553586
epoch: 4 training_loss 0.09246213901787996 test_loss: 0.060556930303573606
epoch: 5 training_loss 0.08310402991250157 test_loss: 0.04832260608673096
epoch: 6 training_loss 0.07596696641296148 test_loss: 0.06243818998336792
epoch: 7 training_loss 0.07734791943803429 test_loss: 0.061544609069824216
epoch: 8 training_loss 0.06420460510998964 test_loss: 0.05609161853790283
epoch: 9 training_loss 0.06374282490462065 test_loss: 0.07039680480957031
epoch: 10 training_loss 0.059911548886448146 test_loss: 0.05850702524185181
epoch: 11 training_loss 0.06121528970077634 test_loss: 0.06409083008766174
epoch: 12 training_loss 0.06564992558211089 test_loss: 0.07006768584251404
epoch: 13 training_loss 0.06185239523649216 test_loss: 0.05837395191192627
epoch: 14 training_loss 0.046442574076354506 test_loss: 0.06200150847434997
epoch: 15 training_loss 0.05008379419334233 test_loss: 0.07232783436775207
epoch: 16 training_loss 0.042965407073497774 test_loss: 0.06432812213897705
epoch: 17 training_loss 0.04540329775772989 test_loss: 0.06286702752113342
epoch: 18 training_loss 0.03574718912132084 test_loss: 0.06759693622589111
epoch: 19 training_loss 0.0395223484467715 test_loss: 0.063221275806427
epoch: 20 training_loss 0.03575498992577195 test_loss: 0.06834491491317748
epoch: 21 training_loss 0.030844549890607597 test_loss: 0.07775316834449768
epoch: 22 training_loss 0.034819071646779776 test_loss: 0.0755587100982666
epoch: 23 training_loss 0.02898898529820144 test_loss: 0.07255833745002746
epoch: 24 training_loss 0.027335720709525047 test_loss: 0.07328444123268127
epoch: 25 training_loss 0.030537984706461428 test_loss: 0.08275453448295593
epoch: 26 training_loss 0.029663809910416603 test_loss: 0.07373079061508178
epoch: 27 training_loss 0.02906648294068873 test_loss: 0.08168063163757325
epoch: 28 training_loss 0.031172166699543594 test_loss: 0.0744372010231018
epoch: 29 training_loss 0.022241344889625907 test_loss: 0.0806054949760437
epoch: 30 training_loss 0.023122304151766002 test_loss: 0.0674126386642456
epoch: 31 training_loss 0.026696795807220043 test_loss: 0.07353057861328124
epoch: 32 training_loss 0.019614472393877803 test_loss: 0.08184905052185058
epoch: 33 training_loss 0.017064385479316116 test_loss: 0.08025336265563965
epoch: 34 training_loss 0.01846091167535633 test_loss: 0.08693847060203552
epoch: 35 training_loss 0.016854673326015472 test_loss: 0.07954617738723754
epoch: 36 training_loss 0.020347512760199606 test_loss: 0.08001243472099304
epoch: 37 training_loss 0.02123120571952313 test_loss: 0.07877719998359681
epoch: 38 training_loss 0.016511444703210146 test_loss: 0.08021482229232788
epoch: 39 training_loss 0.011001124943140895 test_loss: 0.08289234638214112
epoch: 40 training_loss 0.01364633210003376 test_loss: 0.08293638825416565
epoch: 41 training_loss 0.012197140641510486 test_loss: 0.08474391102790832
epoch: 42 training_loss 0.012067448520101607 test_loss: 0.08184519410133362
epoch: 43 training_loss 0.011768222015816718 test_loss: 0.0911267876625061
epoch: 44 training_loss 0.016963945869356394 test_loss: 0.07763387560844422
epoch: 45 training_loss 0.03720527733908966 test_loss: 0.0929088532924652
epoch: 46 training_loss 0.0341959642060101 test_loss: 0.08174006938934326
epoch: 47 training_loss 0.012791923384647817 test_loss: 0.08223799467086793
epoch: 48 training_loss 0.009974970540497452 test_loss: 0.08115324974060059
epoch: 49 training_loss 0.008864704561419785 test_loss: 0.08317000865936279
epoch: 50 training_loss 0.00790374906384386 test_loss: 0.08233494758605957
epoch: 51 training_loss 0.007298249679151923 test_loss: 0.08764945864677429
epoch: 52 training_loss 0.007255979841575027 test_loss: 0.09016520380973816
epoch: 53 training_loss 0.006198015122208744 test_loss: 0.09401168823242187
epoch: 54 training_loss 0.008993750023655593 test_loss: 0.08270518779754639
epoch: 55 training_loss 0.006334133793134242 test_loss: 0.0825694739818573
epoch: 56 training_loss 0.006783862789161503 test_loss: 0.09489139318466186
epoch: 57 training_loss 0.0053199226839933545 test_loss: 0.09401187896728516
epoch: 58 training_loss 0.004762296928092837 test_loss: 0.09109097123146057
epoch: 59 training_loss 0.005398399560945109 test_loss: 0.08873750567436219
epoch: 60 training_loss 0.005660911514423788 test_loss: 0.10236430168151855
epoch: 61 training_loss 0.005103738069301471 test_loss: 0.09906326532363892
epoch: 62 training_loss 0.009864802467636763 test_loss: 0.10411022901535034
epoch: 63 training_loss 0.12235892612487077 test_loss: 0.08996206521987915
epoch: 64 training_loss 0.03429042906500399 test_loss: 0.07937701940536498
epoch: 65 training_loss 0.016012391787953675 test_loss: 0.09043944478034974
epoch: 66 training_loss 0.010195565500762314 test_loss: 0.09415903687477112
epoch: 67 training_loss 0.008464523607399315 test_loss: 0.0939386546611786
epoch: 68 training_loss 0.008231165694305674 test_loss: 0.0900452196598053
epoch: 69 training_loss 0.0054008454142604025 test_loss: 0.09124555587768554
epoch: 70 training_loss 0.00534702243283391 test_loss: 0.09463372826576233
epoch: 71 training_loss 0.004528693130705506 test_loss: 0.09388272762298584
epoch: 72 training_loss 0.004461054936982691 test_loss: 0.09926068186759948
epoch: 73 training_loss 0.003992835108656436 test_loss: 0.09690155386924744
epoch: 74 training_loss 0.0041917307814583185 test_loss: 0.09831213355064392
epoch: 75 training_loss 0.003715718345483765 test_loss: 0.10087941884994507
epoch: 76 training_loss 0.0035035093617625536 test_loss: 0.10610898733139038
epoch: 77 training_loss 0.003212503300746903 test_loss: 0.10572954416275024
epoch: 78 training_loss 0.003407781535061076 test_loss: 0.09908537864685059
epoch: 79 training_loss 0.005497837882721797 test_loss: 0.10083001852035522
epoch: 80 training_loss 0.10831771784462035 test_loss: 0.08837391138076782
epoch: 81 training_loss 0.05719569914974272 test_loss: 0.07574819326400757
epoch: 82 training_loss 0.021745544266887008 test_loss: 0.08341577053070068
epoch: 83 training_loss 0.013679034532979131 test_loss: 0.09667963385581971
epoch: 84 training_loss 0.00922820849576965 test_loss: 0.09925939440727234
epoch: 85 training_loss 0.007727357528638095 test_loss: 0.10110914707183838
epoch: 86 training_loss 0.005582294219639152 test_loss: 0.09783924221992493
epoch: 87 training_loss 0.004822945554042235 test_loss: 0.09638816714286805
epoch: 88 training_loss 0.004349596631946042 test_loss: 0.10021814107894897
epoch: 89 training_loss 0.004153308174572885 test_loss: 0.09906736612319947
epoch: 90 training_loss 0.004016227426473051 test_loss: 0.10150102376937867
epoch: 91 training_loss 0.0037508612405508755 test_loss: 0.105027174949646
epoch: 92 training_loss 0.0032737290801014753 test_loss: 0.10584136247634887
epoch: 93 training_loss 0.002870560463052243 test_loss: 0.1086287260055542
epoch: 94 training_loss 0.003750859102001414 test_loss: 0.10640753507614135
epoch: 95 training_loss 0.0031323968025390057 test_loss: 0.10474629402160644
epoch: 96 training_loss 0.0028998694429174065 test_loss: 0.11117637157440186
epoch: 97 training_loss 0.002627447658451274 test_loss: 0.10544422864913941
epoch: 98 training_loss 0.002414398418040946 test_loss: 0.10502003431320191
epoch: 99 training_loss 0.0025122686463873835 test_loss: 0.10464473962783813
epoch: 100 training_loss 0.0026825283421203494 test_loss: 0.10927294492721558
epoch: 101 training_loss 0.0033119140309281645 test_loss: 0.11560168266296386
epoch: 102 training_loss 0.0027718673704657704 test_loss: 0.11066919565200806
epoch: 103 training_loss 0.02331244932953268 test_loss: 0.1844433069229126
epoch: 104 training_loss 0.11387581927701831 test_loss: 0.07367751598358155
epoch: 105 training_loss 0.03023665865883231 test_loss: 0.08633553385734558
epoch: 106 training_loss 0.01712125353515148 test_loss: 0.08317991495132446
epoch: 107 training_loss 0.008487390291411429 test_loss: 0.0909174144268036
epoch: 108 training_loss 0.006186214983463287 test_loss: 0.09296992421150208
epoch: 109 training_loss 0.0048218044335953895 test_loss: 0.09484532475471497
epoch: 110 training_loss 0.00444256148301065 test_loss: 0.09199071526527405
epoch: 111 training_loss 0.004927226712461561 test_loss: 0.0979969322681427
epoch: 112 training_loss 0.0037897692457772793 test_loss: 0.09729666113853455
epoch: 113 training_loss 0.0033659317484125493 test_loss: 0.10085197687149047
epoch: 114 training_loss 0.003085555613506585 test_loss: 0.10187411308288574
epoch: 115 training_loss 0.0029324465268291532 test_loss: 0.10632764101028443
epoch: 116 training_loss 0.002671577932778746 test_loss: 0.10042489767074585
epoch: 117 training_loss 0.0026081046351464467 test_loss: 0.10103695392608643
epoch: 118 training_loss 0.002572768069803715 test_loss: 0.10155582427978516
epoch: 119 training_loss 0.0026981462689582257 test_loss: 0.10621200799942017
epoch: 120 training_loss 0.002388878215570003 test_loss: 0.10788067579269409
epoch: 121 training_loss 0.0021589996392140163 test_loss: 0.10600757598876953
epoch: 122 training_loss 0.0021832644549431277 test_loss: 0.10531132221221924
epoch: 123 training_loss 0.0020441299240337683 test_loss: 0.10869718790054321
epoch: 124 training_loss 0.002127213405328803 test_loss: 0.10900360345840454
epoch: 125 training_loss 0.0018991146283224224 test_loss: 0.11441473960876465
epoch: 126 training_loss 0.001993417516350746 test_loss: 0.10859168767929077
epoch: 127 training_loss 0.001922840690240264 test_loss: 0.11115998029708862
epoch: 128 training_loss 0.0019062598497839645 test_loss: 0.1121327519416809
epoch: 129 training_loss 0.07467810030095279 test_loss: 0.18623604774475097
epoch: 130 training_loss 0.24935166012495757 test_loss: 0.08078867793083191
epoch: 131 training_loss 0.09265154436230659 test_loss: 0.07695733904838561
epoch: 132 training_loss 0.05490284289233387 test_loss: 0.07514911293983459
epoch: 133 training_loss 0.03046180211007595 test_loss: 0.07857841849327088
epoch: 134 training_loss 0.021468251612968742 test_loss: 0.08097776770591736
epoch: 135 training_loss 0.01383980830432847 test_loss: 0.09189993143081665
epoch: 136 training_loss 0.01256396233337 test_loss: 0.09388899207115173
epoch: 137 training_loss 0.009689397937618196 test_loss: 0.0927682340145111
epoch: 138 training_loss 0.007307453707326204 test_loss: 0.09381400942802429
epoch: 139 training_loss 0.006954387095756829 test_loss: 0.0985685110092163
epoch: 140 training_loss 0.0064314815343823285 test_loss: 0.0992834746837616
epoch: 141 training_loss 0.005186097898986191 test_loss: 0.09973339438438415
epoch: 142 training_loss 0.004881997122429311 test_loss: 0.1030659794807434
epoch: 143 training_loss 0.0043205905449576676 test_loss: 0.09743872284889221
epoch: 144 training_loss 0.004587987154955044 test_loss: 0.1014478087425232
epoch: 145 training_loss 0.004186296409461648 test_loss: 0.10426983833312989
epoch: 146 training_loss 0.003752673293929547 test_loss: 0.10259138345718384
epoch: 147 training_loss 0.00332755854120478 test_loss: 0.10990933179855347
epoch: 148 training_loss 0.003187535824254155 test_loss: 0.11161655187606812
epoch: 149 training_loss 0.0028806731483200566 test_loss: 0.10106339454650878
epoch: 0 training_loss 49.99617345809936 test_loss: 11.045465850830078
epoch: 1 training_loss 18.367458267211916 test_loss: 7.253760528564453
epoch: 2 training_loss 13.77979263305664 test_loss: 5.808613204956055
epoch: 3 training_loss 11.11843752861023 test_loss: 4.705601501464844
epoch: 4 training_loss 9.051625995635986 test_loss: 3.9526954650878907
epoch: 5 training_loss 7.708091721534729 test_loss: 3.4202342987060548
epoch: 6 training_loss 6.709228434562683 test_loss: 3.0400390625
epoch: 7 training_loss 5.881532740592957 test_loss: 2.8132286071777344
epoch: 8 training_loss 5.391420259475708 test_loss: 2.513584518432617
epoch: 9 training_loss 4.976972441673279 test_loss: 2.3577489852905273
epoch: 10 training_loss 4.645633652210235 test_loss: 2.188431167602539
epoch: 11 training_loss 4.378963274955749 test_loss: 2.093923568725586
epoch: 12 training_loss 4.070297298431396 test_loss: 1.9577293395996094
epoch: 13 training_loss 3.9402874517440796 test_loss: 1.9355031967163085
epoch: 14 training_loss 3.7783989191055296 test_loss: 1.8182365417480468
epoch: 15 training_loss 3.5935888481140137 test_loss: 1.7473499298095703
epoch: 16 training_loss 3.5060688424110413 test_loss: 1.7176897048950195
epoch: 17 training_loss 3.391475794315338 test_loss: 1.642286491394043
epoch: 18 training_loss 3.266245512962341 test_loss: 1.5975738525390626
epoch: 19 training_loss 3.1388602805137635 test_loss: 1.5440455436706544
epoch: 20 training_loss 3.133861713409424 test_loss: 1.5187337875366211
epoch: 21 training_loss 3.054251115322113 test_loss: 1.5092146873474122
epoch: 22 training_loss 2.9328401374816893 test_loss: 1.4555489540100097
epoch: 23 training_loss 2.9246963095664977 test_loss: 1.4208513259887696
epoch: 24 training_loss 2.797869725227356 test_loss: 1.3877254486083985
epoch: 25 training_loss 2.8079316806793213 test_loss: 1.4078367233276368
epoch: 26 training_loss 2.650061135292053 test_loss: 1.3372403144836427
epoch: 27 training_loss 2.599003567695618 test_loss: 1.334632968902588
epoch: 28 training_loss 2.647264995574951 test_loss: 1.311655330657959
epoch: 29 training_loss 2.6092283296585084 test_loss: 1.2748273849487304
epoch: 30 training_loss 2.499938406944275 test_loss: 1.2864837646484375
epoch: 31 training_loss 2.5584477734565736 test_loss: 1.2496939659118653
epoch: 32 training_loss 2.4569697403907775 test_loss: 1.2389161109924316
epoch: 33 training_loss 2.4369320344924925 test_loss: 1.216496467590332
epoch: 34 training_loss 2.3852527165412902 test_loss: 1.210489559173584
epoch: 35 training_loss 2.361580686569214 test_loss: 1.1927875518798827
epoch: 36 training_loss 2.328878707885742 test_loss: 1.1713827133178711
epoch: 37 training_loss 2.304030624628067 test_loss: 1.1777043342590332
epoch: 38 training_loss 2.259953758716583 test_loss: 1.1594014167785645
epoch: 39 training_loss 2.2049855530261993 test_loss: 1.1383362770080567
epoch: 40 training_loss 2.1951103019714355 test_loss: 1.1214971542358398
epoch: 41 training_loss 2.217186678647995 test_loss: 1.156799602508545
epoch: 42 training_loss 2.239673937559128 test_loss: 1.0963918685913085
epoch: 43 training_loss 2.2009235048294067 test_loss: 1.1063026428222655
epoch: 44 training_loss 2.115524994134903 test_loss: 1.0826767921447753
epoch: 45 training_loss 2.0831622922420503 test_loss: 1.0952383041381837
epoch: 46 training_loss 2.133870564699173 test_loss: 1.101595973968506
epoch: 47 training_loss 2.0449500560760496 test_loss: 1.0589337348937988
epoch: 48 training_loss 2.041278198957443 test_loss: 1.0626497268676758
epoch: 49 training_loss 2.015270881652832 test_loss: 1.0566374778747558
epoch: 50 training_loss 2.0540361332893373 test_loss: 1.0498101234436035
epoch: 51 training_loss 2.0193615221977232 test_loss: 1.040721321105957
epoch: 52 training_loss 1.9988587379455567 test_loss: 1.016913604736328
epoch: 53 training_loss 1.9776381862163543 test_loss: 1.0068836212158203
epoch: 54 training_loss 1.9613300931453705 test_loss: 1.0197253227233887
epoch: 55 training_loss 1.9677083945274354 test_loss: 0.9945226669311523
epoch: 56 training_loss 1.9229193532466888 test_loss: 1.0053946495056152
epoch: 57 training_loss 1.9710870826244353 test_loss: 0.9870118141174317
epoch: 58 training_loss 1.9054400372505187 test_loss: 0.9800682067871094
epoch: 59 training_loss 1.8911748719215393 test_loss: 0.9782501220703125
epoch: 60 training_loss 1.9111098980903625 test_loss: 0.9642213821411133
epoch: 61 training_loss 1.882460869550705 test_loss: 0.9792844772338867
epoch: 62 training_loss 1.9028150498867036 test_loss: 0.9585132598876953
epoch: 63 training_loss 1.8346218276023865 test_loss: 0.9530887603759766
epoch: 64 training_loss 1.8798991549015045 test_loss: 0.9448304176330566
epoch: 65 training_loss 1.8538642203807831 test_loss: 0.9494470596313477
epoch: 66 training_loss 1.8069157767295838 test_loss: 0.9440587043762207
epoch: 67 training_loss 1.846480438709259 test_loss: 0.93496732711792
epoch: 68 training_loss 1.8010902905464172 test_loss: 0.9320448875427246
epoch: 69 training_loss 1.8009184682369233 test_loss: 0.9300904273986816
epoch: 70 training_loss 1.769684648513794 test_loss: 0.9350673675537109
epoch: 71 training_loss 1.8079355311393739 test_loss: 0.9186023712158203
epoch: 72 training_loss 1.779112113714218 test_loss: 0.9410340309143066
epoch: 73 training_loss 1.784740422964096 test_loss: 0.9170914649963379
epoch: 74 training_loss 1.7670763146877289 test_loss: 0.9082562446594238
epoch: 75 training_loss 1.7409964072704316 test_loss: 0.9131543159484863
epoch: 76 training_loss 1.7904094457626343 test_loss: 0.9135847091674805
epoch: 77 training_loss 1.747859880924225 test_loss: 0.8983724594116211
epoch: 78 training_loss 1.7610539543628692 test_loss: 0.8998529434204101
epoch: 79 training_loss 1.7026851928234101 test_loss: 0.9089272499084473
epoch: 80 training_loss 1.735064185857773 test_loss: 0.8963491439819335
epoch: 81 training_loss 1.730017867088318 test_loss: 0.9031121253967285
epoch: 82 training_loss 1.721484158039093 test_loss: 0.9045104026794434
epoch: 83 training_loss 1.6578880143165589 test_loss: 0.8908084869384766
epoch: 84 training_loss 1.7020727503299713 test_loss: 0.8815798759460449
epoch: 85 training_loss 1.693452343940735 test_loss: 0.8875233650207519
epoch: 86 training_loss 1.7175207078456878 test_loss: 0.8705890655517579
epoch: 87 training_loss 1.656618446111679 test_loss: 0.890876293182373
epoch: 88 training_loss 1.6607283568382263 test_loss: 0.8778021812438965
epoch: 89 training_loss 1.6721719646453856 test_loss: 0.8796267509460449
epoch: 90 training_loss 1.6474686253070832 test_loss: 0.8741782188415528
epoch: 91 training_loss 1.6893331956863404 test_loss: 0.8743655204772949
epoch: 92 training_loss 1.6744442903995513 test_loss: 0.860985279083252
epoch: 93 training_loss 1.6381566774845124 test_loss: 0.857975959777832
epoch: 94 training_loss 1.6279702627658843 test_loss: 0.8636798858642578
epoch: 95 training_loss 1.65808274269104 test_loss: 0.8733329772949219
epoch: 96 training_loss 1.6654905760288239 test_loss: 0.8580232620239258
epoch: 97 training_loss 1.6145073103904723 test_loss: 0.8551748275756836
epoch: 98 training_loss 1.6320612263679504 test_loss: 0.8475737571716309
epoch: 99 training_loss 1.6227454781532287 test_loss: 0.8808681488037109
epoch: 100 training_loss 1.6285201811790466 test_loss: 0.8591780662536621
epoch: 101 training_loss 1.6370313894748687 test_loss: 0.8587854385375977
epoch: 102 training_loss 1.6529281449317932 test_loss: 0.8454909324645996
epoch: 103 training_loss 1.6395990252494812 test_loss: 0.8465173721313477
epoch: 104 training_loss 1.5966560232639313 test_loss: 0.8405056953430176
epoch: 105 training_loss 1.5904185688495636 test_loss: 0.8366308212280273
epoch: 106 training_loss 1.6075106179714203 test_loss: 0.8448622703552247
epoch: 107 training_loss 1.593573362827301 test_loss: 0.8360503196716309
epoch: 108 training_loss 1.597604205608368 test_loss: 0.8338218688964844
epoch: 109 training_loss 1.615229642391205 test_loss: 0.8368059158325195
epoch: 110 training_loss 1.607563190460205 test_loss: 0.8512960433959961
epoch: 111 training_loss 1.5891746544837952 test_loss: 0.8220162391662598
epoch: 112 training_loss 1.575686993598938 test_loss: 0.8128785133361817
epoch: 113 training_loss 1.5866100823879241 test_loss: 0.8210723876953125
epoch: 114 training_loss 1.5617534852027892 test_loss: 0.8217094421386719
epoch: 115 training_loss 1.5546586835384368 test_loss: 0.8264176368713378
epoch: 116 training_loss 1.557009720802307 test_loss: 0.8258308410644531
epoch: 117 training_loss 1.5783395171165466 test_loss: 0.8133748054504395
epoch: 118 training_loss 1.5554041409492492 test_loss: 0.81588134765625
epoch: 119 training_loss 1.5682666730880737 test_loss: 0.8114190101623535
epoch: 120 training_loss 1.5524584543704987 test_loss: 0.812617301940918
epoch: 121 training_loss 1.534590961933136 test_loss: 0.7964619159698486
epoch: 122 training_loss 1.5455167782306671 test_loss: 0.8084394454956054
epoch: 123 training_loss 1.5438033211231232 test_loss: 0.8113570213317871
epoch: 124 training_loss 1.5421633994579316 test_loss: 0.8018366813659668
epoch: 125 training_loss 1.529833847284317 test_loss: 0.8159692764282227
epoch: 126 training_loss 1.5403372740745545 test_loss: 0.795152235031128
epoch: 127 training_loss 1.5208425688743592 test_loss: 0.8024810791015625
epoch: 128 training_loss 1.535795236825943 test_loss: 0.7902114868164063
epoch: 129 training_loss 1.510764776468277 test_loss: 0.809254264831543
epoch: 130 training_loss 1.5245660758018493 test_loss: 0.7890304088592529
epoch: 131 training_loss 1.5133337163925171 test_loss: 0.7886957168579102
epoch: 132 training_loss 1.5381577956676482 test_loss: 0.7938910484313965
epoch: 133 training_loss 1.51610892534256 test_loss: 0.8091934204101563
epoch: 134 training_loss 1.4994407272338868 test_loss: 0.8009381294250488
epoch: 135 training_loss 1.4993685328960418 test_loss: 0.7893166542053223
epoch: 136 training_loss 1.4917012202739715 test_loss: 0.7919480323791503
epoch: 137 training_loss 1.507583612203598 test_loss: 0.7863237857818604
epoch: 138 training_loss 1.4915452933311462 test_loss: 0.7853734493255615
epoch: 139 training_loss 1.490254271030426 test_loss: 0.7816547870635986
epoch: 140 training_loss 1.4993383467197419 test_loss: 0.8073504447937012
epoch: 141 training_loss 1.4824140548706055 test_loss: 0.7745896339416504
epoch: 142 training_loss 1.5120717930793761 test_loss: 0.7738418102264404
epoch: 143 training_loss 1.4978042006492616 test_loss: 0.7819060325622559
epoch: 144 training_loss 1.4726226711273194 test_loss: 0.7896156787872315
epoch: 145 training_loss 1.4850885474681854 test_loss: 0.7652240753173828
epoch: 146 training_loss 1.4810168278217315 test_loss: 0.7711798191070557
epoch: 147 training_loss 1.4635461747646332 test_loss: 0.76650972366333
epoch: 148 training_loss 1.4879579532146454 test_loss: 0.7807196140289306
epoch: 149 training_loss 1.45264741897583 test_loss: 0.7762730598449707
5076.484239770607
episode: 0 training return: tensor(158.8303, device='cuda:0')
episode: 1 training return: tensor(61.0509, device='cuda:0')
episode: 2 training return: tensor(169.2458, device='cuda:0')
episode: 3 training return: tensor(3.7410, device='cuda:0')
epoch: 1 test_true_pfm: 5173.740431052473 sim_pfm: 130.95488468161784
episode: 4 training return: tensor(-72.9484, device='cuda:0')
episode: 5 training return: tensor(-49.9434, device='cuda:0')
episode: 6 training return: tensor(74.7465, device='cuda:0')
episode: 7 training return: tensor(-96.1152, device='cuda:0')
epoch: 2 test_true_pfm: 5103.46639944482 sim_pfm: 94.88201544880091
episode: 8 training return: tensor(-57.8918, device='cuda:0')
episode: 9 training return: tensor(116.6342, device='cuda:0')
episode: 10 training return: tensor(141.8487, device='cuda:0')
episode: 11 training return: tensor(-110.6580, device='cuda:0')
epoch: 3 test_true_pfm: 4807.831358781761 sim_pfm: 93.06699748182048
episode: 12 training return: tensor(144.8550, device='cuda:0')
episode: 13 training return: tensor(105.1232, device='cuda:0')
episode: 14 training return: tensor(-143.0919, device='cuda:0')
episode: 15 training return: tensor(43.2705, device='cuda:0')
epoch: 4 test_true_pfm: 5064.219889858107 sim_pfm: -250.94727823840608
episode: 16 training return: tensor(-96.0553, device='cuda:0')
episode: 17 training return: tensor(92.7397, device='cuda:0')
episode: 18 training return: tensor(180.7665, device='cuda:0')
episode: 19 training return: tensor(-45.7903, device='cuda:0')
epoch: 5 test_true_pfm: 5095.759415494705 sim_pfm: 147.29791975025242
episode: 20 training return: tensor(163.0972, device='cuda:0')
episode: 21 training return: tensor(42.8084, device='cuda:0')
episode: 22 training return: tensor(163.7995, device='cuda:0')
episode: 23 training return: tensor(161.8943, device='cuda:0')
epoch: 6 test_true_pfm: 5020.064996860826 sim_pfm: 66.4765671902763
episode: 24 training return: tensor(35.7802, device='cuda:0')
episode: 25 training return: tensor(133.4736, device='cuda:0')
episode: 26 training return: tensor(-44.7629, device='cuda:0')
episode: 27 training return: tensor(6.2732, device='cuda:0')
epoch: 7 test_true_pfm: 5030.599915001953 sim_pfm: 256.79464796573546
episode: 28 training return: tensor(69.0947, device='cuda:0')
episode: 29 training return: tensor(-59.3248, device='cuda:0')
episode: 30 training return: tensor(64.8751, device='cuda:0')
episode: 31 training return: tensor(273.9913, device='cuda:0')
epoch: 8 test_true_pfm: 5184.325539898634 sim_pfm: 137.21039497336218
episode: 32 training return: tensor(242.0052, device='cuda:0')
episode: 33 training return: tensor(95.4036, device='cuda:0')
episode: 34 training return: tensor(39.5304, device='cuda:0')
episode: 35 training return: tensor(69.2457, device='cuda:0')
epoch: 9 test_true_pfm: 5156.270296610135 sim_pfm: -116.74263165503119
episode: 36 training return: tensor(188.3917, device='cuda:0')
episode: 37 training return: tensor(104.5657, device='cuda:0')
episode: 38 training return: tensor(311.5444, device='cuda:0')
episode: 39 training return: tensor(283.9461, device='cuda:0')
epoch: 10 test_true_pfm: 5127.475143534411 sim_pfm: 257.0057701386395
episode: 40 training return: tensor(51.2078, device='cuda:0')
episode: 41 training return: tensor(22.8828, device='cuda:0')
episode: 42 training return: tensor(108.7215, device='cuda:0')
episode: 43 training return: tensor(335.6916, device='cuda:0')
epoch: 11 test_true_pfm: 5190.317975039092 sim_pfm: 206.49965232789205
episode: 44 training return: tensor(-2.2296, device='cuda:0')
episode: 45 training return: tensor(50.8941, device='cuda:0')
episode: 46 training return: tensor(236.9822, device='cuda:0')
episode: 47 training return: tensor(163.9018, device='cuda:0')
epoch: 12 test_true_pfm: 5138.066064615097 sim_pfm: 226.39048184237132
episode: 48 training return: tensor(77.9795, device='cuda:0')
episode: 49 training return: tensor(22.3294, device='cuda:0')
episode: 50 training return: tensor(106.3148, device='cuda:0')
episode: 51 training return: tensor(185.2311, device='cuda:0')
epoch: 13 test_true_pfm: 5288.91921146838 sim_pfm: 210.8680717292785
episode: 52 training return: tensor(217.2576, device='cuda:0')
episode: 53 training return: tensor(144.6639, device='cuda:0')
episode: 54 training return: tensor(117.3452, device='cuda:0')
episode: 55 training return: tensor(26.1059, device='cuda:0')
epoch: 14 test_true_pfm: 5203.989362245975 sim_pfm: 276.37474342919694
episode: 56 training return: tensor(287.7666, device='cuda:0')
episode: 57 training return: tensor(393.0659, device='cuda:0')
episode: 58 training return: tensor(366.4652, device='cuda:0')
episode: 59 training return: tensor(315.4301, device='cuda:0')
epoch: 15 test_true_pfm: 5321.180429396055 sim_pfm: 371.70724946210004
episode: 60 training return: tensor(218.0046, device='cuda:0')
episode: 61 training return: tensor(212.0588, device='cuda:0')
episode: 62 training return: tensor(122.8207, device='cuda:0')
episode: 63 training return: tensor(-74.5239, device='cuda:0')
epoch: 16 test_true_pfm: 5200.997059338893 sim_pfm: 244.53235080917753
episode: 64 training return: tensor(94.7200, device='cuda:0')
episode: 65 training return: tensor(193.5361, device='cuda:0')
episode: 66 training return: tensor(242.7105, device='cuda:0')
episode: 67 training return: tensor(320.6458, device='cuda:0')
epoch: 17 test_true_pfm: 5311.664886402824 sim_pfm: 310.08862489514286
episode: 68 training return: tensor(110.5091, device='cuda:0')
episode: 69 training return: tensor(199.5046, device='cuda:0')
episode: 70 training return: tensor(120.6108, device='cuda:0')
episode: 71 training return: tensor(275.7895, device='cuda:0')
epoch: 18 test_true_pfm: 5309.281935090029 sim_pfm: 298.6063463164416
episode: 72 training return: tensor(269.0341, device='cuda:0')
episode: 73 training return: tensor(189.9061, device='cuda:0')
episode: 74 training return: tensor(308.4791, device='cuda:0')
episode: 75 training return: tensor(133.5146, device='cuda:0')
epoch: 19 test_true_pfm: 5390.087524847717 sim_pfm: 402.7115048686198
episode: 76 training return: tensor(206.8501, device='cuda:0')
episode: 77 training return: tensor(125.1305, device='cuda:0')
episode: 78 training return: tensor(441.7710, device='cuda:0')
episode: 79 training return: tensor(88.6071, device='cuda:0')
epoch: 20 test_true_pfm: 5347.851495055237 sim_pfm: 286.24209165524616
episode: 80 training return: tensor(233.1528, device='cuda:0')
episode: 81 training return: tensor(166.4860, device='cuda:0')
episode: 82 training return: tensor(395.1133, device='cuda:0')
episode: 83 training return: tensor(265.5125, device='cuda:0')
epoch: 21 test_true_pfm: 5399.7664735155295 sim_pfm: 356.41420589611516
episode: 84 training return: tensor(218.0434, device='cuda:0')
episode: 85 training return: tensor(294.6158, device='cuda:0')
episode: 86 training return: tensor(166.6804, device='cuda:0')
episode: 87 training return: tensor(266.0396, device='cuda:0')
epoch: 22 test_true_pfm: 5407.146681125244 sim_pfm: 378.467721737766
episode: 88 training return: tensor(-12.9603, device='cuda:0')
episode: 89 training return: tensor(226.0925, device='cuda:0')
episode: 90 training return: tensor(392.6727, device='cuda:0')
episode: 91 training return: tensor(246.4107, device='cuda:0')
epoch: 23 test_true_pfm: 5338.313342532114 sim_pfm: 387.12553850916447
episode: 92 training return: tensor(176.1244, device='cuda:0')
episode: 93 training return: tensor(215.2637, device='cuda:0')
episode: 94 training return: tensor(209.1389, device='cuda:0')
episode: 95 training return: tensor(351.0664, device='cuda:0')
epoch: 24 test_true_pfm: 5516.045693774657 sim_pfm: 398.0331002000409
episode: 96 training return: tensor(157.8046, device='cuda:0')
episode: 97 training return: tensor(397.1994, device='cuda:0')
episode: 98 training return: tensor(305.9349, device='cuda:0')
episode: 99 training return: tensor(404.9830, device='cuda:0')
epoch: 25 test_true_pfm: 5380.514864447828 sim_pfm: 455.1884037195705
episode: 100 training return: tensor(309.8709, device='cuda:0')
episode: 101 training return: tensor(320.1724, device='cuda:0')
episode: 102 training return: tensor(288.3538, device='cuda:0')
episode: 103 training return: tensor(290.7466, device='cuda:0')
epoch: 26 test_true_pfm: 5439.586975249792 sim_pfm: 308.15253243839834
episode: 104 training return: tensor(296.2873, device='cuda:0')
episode: 105 training return: tensor(282.0290, device='cuda:0')
episode: 106 training return: tensor(297.1326, device='cuda:0')
episode: 107 training return: tensor(353.3033, device='cuda:0')
epoch: 27 test_true_pfm: 5401.861523577557 sim_pfm: 435.25274352690514
episode: 108 training return: tensor(168.0590, device='cuda:0')
episode: 109 training return: tensor(145.1278, device='cuda:0')
episode: 110 training return: tensor(330.4596, device='cuda:0')
episode: 111 training return: tensor(311.4780, device='cuda:0')
epoch: 28 test_true_pfm: 5309.889923192342 sim_pfm: 522.8062116613922
episode: 112 training return: tensor(165.9875, device='cuda:0')
episode: 113 training return: tensor(282.5565, device='cuda:0')
episode: 114 training return: tensor(284.7404, device='cuda:0')
episode: 115 training return: tensor(307.6931, device='cuda:0')
epoch: 29 test_true_pfm: 5407.747048052725 sim_pfm: 366.1147208135905
episode: 116 training return: tensor(381.5574, device='cuda:0')
episode: 117 training return: tensor(345.1656, device='cuda:0')
episode: 118 training return: tensor(140.4159, device='cuda:0')
episode: 119 training return: tensor(89.9738, device='cuda:0')
epoch: 30 test_true_pfm: 5439.911667206102 sim_pfm: 458.13863058601663
episode: 120 training return: tensor(370.6531, device='cuda:0')
episode: 121 training return: tensor(221.5940, device='cuda:0')
episode: 122 training return: tensor(296.6663, device='cuda:0')
episode: 123 training return: tensor(400.6019, device='cuda:0')
epoch: 31 test_true_pfm: 5421.198579381517 sim_pfm: 420.4947696872987
episode: 124 training return: tensor(334.4814, device='cuda:0')
episode: 125 training return: tensor(432.1561, device='cuda:0')
episode: 126 training return: tensor(250.0869, device='cuda:0')
episode: 127 training return: tensor(467.9211, device='cuda:0')
epoch: 32 test_true_pfm: 5388.750678405887 sim_pfm: 501.0342892122765
episode: 128 training return: tensor(256.0799, device='cuda:0')
episode: 129 training return: tensor(212.5912, device='cuda:0')
episode: 130 training return: tensor(389.3395, device='cuda:0')
episode: 131 training return: tensor(245.7628, device='cuda:0')
epoch: 33 test_true_pfm: 5475.112851964458 sim_pfm: 405.8392496031399
episode: 132 training return: tensor(122.1048, device='cuda:0')
episode: 133 training return: tensor(371.4729, device='cuda:0')
episode: 134 training return: tensor(239.9077, device='cuda:0')
episode: 135 training return: tensor(195.6338, device='cuda:0')
epoch: 34 test_true_pfm: 5516.393560179291 sim_pfm: 460.4745802715576
episode: 136 training return: tensor(211.5357, device='cuda:0')
episode: 137 training return: tensor(260.4056, device='cuda:0')
episode: 138 training return: tensor(333.9406, device='cuda:0')
episode: 139 training return: tensor(286.2806, device='cuda:0')
epoch: 35 test_true_pfm: 5397.608495899276 sim_pfm: 512.1942185397105
episode: 140 training return: tensor(197.9267, device='cuda:0')
episode: 141 training return: tensor(216.8227, device='cuda:0')
episode: 142 training return: tensor(424.1340, device='cuda:0')
episode: 143 training return: tensor(177.6026, device='cuda:0')
epoch: 36 test_true_pfm: 5553.78460329387 sim_pfm: 507.35640163345187
episode: 144 training return: tensor(348.0863, device='cuda:0')
episode: 145 training return: tensor(427.8352, device='cuda:0')
episode: 146 training return: tensor(444.0374, device='cuda:0')
episode: 147 training return: tensor(246.3927, device='cuda:0')
epoch: 37 test_true_pfm: 5569.6599494115035 sim_pfm: 476.1731503613216
episode: 148 training return: tensor(465.1935, device='cuda:0')
episode: 149 training return: tensor(390.2988, device='cuda:0')
episode: 150 training return: tensor(410.0107, device='cuda:0')
episode: 151 training return: tensor(371.8427, device='cuda:0')
epoch: 38 test_true_pfm: 5494.788399827477 sim_pfm: 481.9463476331827
episode: 152 training return: tensor(211.4178, device='cuda:0')
episode: 153 training return: tensor(192.8005, device='cuda:0')
episode: 154 training return: tensor(423.7471, device='cuda:0')
episode: 155 training return: tensor(456.0968, device='cuda:0')
epoch: 39 test_true_pfm: 5538.460306505567 sim_pfm: 559.5724045420744
episode: 156 training return: tensor(228.3186, device='cuda:0')
episode: 157 training return: tensor(313.8342, device='cuda:0')
episode: 158 training return: tensor(167.7131, device='cuda:0')
episode: 159 training return: tensor(319.6239, device='cuda:0')
epoch: 40 test_true_pfm: 5530.248046995446 sim_pfm: 416.71802665584255
episode: 160 training return: tensor(415.4095, device='cuda:0')
episode: 161 training return: tensor(271.6477, device='cuda:0')
episode: 162 training return: tensor(420.5762, device='cuda:0')
episode: 163 training return: tensor(206.7997, device='cuda:0')
epoch: 41 test_true_pfm: 5610.147701606237 sim_pfm: 538.2151362358903
episode: 164 training return: tensor(136.9030, device='cuda:0')
episode: 165 training return: tensor(316.0283, device='cuda:0')
episode: 166 training return: tensor(479.5502, device='cuda:0')
episode: 167 training return: tensor(331.3108, device='cuda:0')
epoch: 42 test_true_pfm: 5461.815618501253 sim_pfm: 328.3768766799088
episode: 168 training return: tensor(264.8859, device='cuda:0')
episode: 169 training return: tensor(460.9915, device='cuda:0')
episode: 170 training return: tensor(460.4148, device='cuda:0')
episode: 171 training return: tensor(367.1554, device='cuda:0')
epoch: 43 test_true_pfm: 5473.916462296823 sim_pfm: 545.5666485199472
episode: 172 training return: tensor(401.2334, device='cuda:0')
episode: 173 training return: tensor(182.2764, device='cuda:0')
episode: 174 training return: tensor(430.1457, device='cuda:0')
episode: 175 training return: tensor(265.8351, device='cuda:0')
epoch: 44 test_true_pfm: 5465.32881494379 sim_pfm: 545.1372517017493
episode: 176 training return: tensor(234.0227, device='cuda:0')
episode: 177 training return: tensor(448.4088, device='cuda:0')
episode: 178 training return: tensor(451.3794, device='cuda:0')
episode: 179 training return: tensor(412.9776, device='cuda:0')
epoch: 45 test_true_pfm: 5536.01431482106 sim_pfm: 572.1896082086023
episode: 180 training return: tensor(299.3809, device='cuda:0')
episode: 181 training return: tensor(321.9645, device='cuda:0')
episode: 182 training return: tensor(464.5895, device='cuda:0')
episode: 183 training return: tensor(270.1512, device='cuda:0')
epoch: 46 test_true_pfm: 5642.637742672305 sim_pfm: 576.9625220571955
episode: 184 training return: tensor(380.3743, device='cuda:0')
episode: 185 training return: tensor(425.8948, device='cuda:0')
episode: 186 training return: tensor(348.8777, device='cuda:0')
episode: 187 training return: tensor(445.8846, device='cuda:0')
epoch: 47 test_true_pfm: 5541.6321907039055 sim_pfm: 512.1498215524867
episode: 188 training return: tensor(380.6376, device='cuda:0')
episode: 189 training return: tensor(485.7193, device='cuda:0')
episode: 190 training return: tensor(591.4726, device='cuda:0')
episode: 191 training return: tensor(390.5650, device='cuda:0')
epoch: 48 test_true_pfm: 5492.5130660968525 sim_pfm: 462.4778899416172
episode: 192 training return: tensor(480.4576, device='cuda:0')
episode: 193 training return: tensor(429.7415, device='cuda:0')
episode: 194 training return: tensor(182.6151, device='cuda:0')
episode: 195 training return: tensor(361.4028, device='cuda:0')
epoch: 49 test_true_pfm: 5436.774659630116 sim_pfm: 580.9752732551229
episode: 196 training return: tensor(312.2543, device='cuda:0')
episode: 197 training return: tensor(227.1501, device='cuda:0')
episode: 198 training return: tensor(542.3987, device='cuda:0')
episode: 199 training return: tensor(383.1474, device='cuda:0')
epoch: 50 test_true_pfm: 5583.3010387539 sim_pfm: 472.706189924075
episode: 200 training return: tensor(484.0941, device='cuda:0')
episode: 201 training return: tensor(352.7415, device='cuda:0')
episode: 202 training return: tensor(228.6486, device='cuda:0')
episode: 203 training return: tensor(438.1800, device='cuda:0')
epoch: 51 test_true_pfm: 5551.810027285285 sim_pfm: 497.78223091049585
episode: 204 training return: tensor(218.5810, device='cuda:0')
episode: 205 training return: tensor(429.6726, device='cuda:0')
episode: 206 training return: tensor(362.5486, device='cuda:0')
episode: 207 training return: tensor(432.5281, device='cuda:0')
epoch: 52 test_true_pfm: 5521.974889059401 sim_pfm: 513.2181472792096
episode: 208 training return: tensor(489.8647, device='cuda:0')
episode: 209 training return: tensor(283.7473, device='cuda:0')
episode: 210 training return: tensor(367.1349, device='cuda:0')
episode: 211 training return: tensor(345.1060, device='cuda:0')
epoch: 53 test_true_pfm: 5622.528438695345 sim_pfm: 571.418942644193
episode: 212 training return: tensor(425.8685, device='cuda:0')
episode: 213 training return: tensor(439.4917, device='cuda:0')
episode: 214 training return: tensor(516.0765, device='cuda:0')
episode: 215 training return: tensor(336.0599, device='cuda:0')
epoch: 54 test_true_pfm: 5541.591174418662 sim_pfm: 538.0914862867697
episode: 216 training return: tensor(461.9610, device='cuda:0')
episode: 217 training return: tensor(349.9337, device='cuda:0')
episode: 218 training return: tensor(292.5465, device='cuda:0')
episode: 219 training return: tensor(235.3774, device='cuda:0')
epoch: 55 test_true_pfm: 5463.976192284984 sim_pfm: 534.1721698351515
episode: 220 training return: tensor(308.8007, device='cuda:0')
episode: 221 training return: tensor(512.9731, device='cuda:0')
episode: 222 training return: tensor(565.9772, device='cuda:0')
episode: 223 training return: tensor(389.1213, device='cuda:0')
epoch: 56 test_true_pfm: 5661.383947624829 sim_pfm: 474.7941397884085
episode: 224 training return: tensor(563.5264, device='cuda:0')
episode: 225 training return: tensor(513.1002, device='cuda:0')
episode: 226 training return: tensor(486.7356, device='cuda:0')
episode: 227 training return: tensor(437.1153, device='cuda:0')
epoch: 57 test_true_pfm: 5677.026204592058 sim_pfm: 560.8936913849515
episode: 228 training return: tensor(397.0313, device='cuda:0')
episode: 229 training return: tensor(394.9094, device='cuda:0')
episode: 230 training return: tensor(302.2834, device='cuda:0')
episode: 231 training return: tensor(378.9012, device='cuda:0')
epoch: 58 test_true_pfm: 5656.05963985906 sim_pfm: 455.4486159635708
episode: 232 training return: tensor(319.7588, device='cuda:0')
episode: 233 training return: tensor(526.1525, device='cuda:0')
episode: 234 training return: tensor(501.9904, device='cuda:0')
episode: 235 training return: tensor(488.5717, device='cuda:0')
epoch: 59 test_true_pfm: 5638.906670051981 sim_pfm: 526.9494843838426
episode: 236 training return: tensor(494.9737, device='cuda:0')
episode: 237 training return: tensor(424.6848, device='cuda:0')
episode: 238 training return: tensor(471.3525, device='cuda:0')
episode: 239 training return: tensor(495.9955, device='cuda:0')
epoch: 60 test_true_pfm: 5495.171872819136 sim_pfm: 633.69724179393
episode: 240 training return: tensor(466.5258, device='cuda:0')
episode: 241 training return: tensor(486.2751, device='cuda:0')
episode: 242 training return: tensor(415.6252, device='cuda:0')
episode: 243 training return: tensor(418.7214, device='cuda:0')
epoch: 61 test_true_pfm: 5627.766749252503 sim_pfm: 629.6871121656926
episode: 244 training return: tensor(280.5226, device='cuda:0')
episode: 245 training return: tensor(468.8437, device='cuda:0')
episode: 246 training return: tensor(178.2665, device='cuda:0')
episode: 247 training return: tensor(435.2826, device='cuda:0')
epoch: 62 test_true_pfm: 5690.22785227753 sim_pfm: 614.0938001250228
episode: 248 training return: tensor(506.4541, device='cuda:0')
episode: 249 training return: tensor(557.5781, device='cuda:0')
episode: 250 training return: tensor(485.0750, device='cuda:0')
episode: 251 training return: tensor(460.4929, device='cuda:0')
epoch: 63 test_true_pfm: 5628.390209007586 sim_pfm: 458.21318561265554
episode: 252 training return: tensor(466.7708, device='cuda:0')
episode: 253 training return: tensor(315.2454, device='cuda:0')
episode: 254 training return: tensor(571.6794, device='cuda:0')
episode: 255 training return: tensor(511.1070, device='cuda:0')
epoch: 64 test_true_pfm: 5561.877850014723 sim_pfm: 620.7434132302491
episode: 256 training return: tensor(466.3882, device='cuda:0')
episode: 257 training return: tensor(530.5336, device='cuda:0')
episode: 258 training return: tensor(412.0316, device='cuda:0')
episode: 259 training return: tensor(387.0910, device='cuda:0')
epoch: 65 test_true_pfm: 5582.357669309073 sim_pfm: 511.11393011213903
episode: 260 training return: tensor(496.0147, device='cuda:0')
episode: 261 training return: tensor(455.9883, device='cuda:0')
episode: 262 training return: tensor(578.8068, device='cuda:0')
episode: 263 training return: tensor(370.1676, device='cuda:0')
epoch: 66 test_true_pfm: 5636.714520505779 sim_pfm: 656.3224338028425
episode: 264 training return: tensor(341.0790, device='cuda:0')
episode: 265 training return: tensor(441.4686, device='cuda:0')
episode: 266 training return: tensor(262.1765, device='cuda:0')
episode: 267 training return: tensor(571.3188, device='cuda:0')
epoch: 67 test_true_pfm: 5587.827928896612 sim_pfm: 614.833974906709
episode: 268 training return: tensor(586.6842, device='cuda:0')
episode: 269 training return: tensor(447.5112, device='cuda:0')
episode: 270 training return: tensor(416.1092, device='cuda:0')
episode: 271 training return: tensor(335.9235, device='cuda:0')
epoch: 68 test_true_pfm: 5569.506042887268 sim_pfm: 562.5517473433478
episode: 272 training return: tensor(475.7279, device='cuda:0')
episode: 273 training return: tensor(501.7646, device='cuda:0')
episode: 274 training return: tensor(461.2125, device='cuda:0')
episode: 275 training return: tensor(540.5186, device='cuda:0')
epoch: 69 test_true_pfm: 5654.810996387532 sim_pfm: 587.189481070498
episode: 276 training return: tensor(344.1514, device='cuda:0')
episode: 277 training return: tensor(318.6480, device='cuda:0')
episode: 278 training return: tensor(525.1774, device='cuda:0')
episode: 279 training return: tensor(360.6439, device='cuda:0')
epoch: 70 test_true_pfm: 5593.023776169226 sim_pfm: 584.0002548395811
episode: 280 training return: tensor(567.5112, device='cuda:0')
episode: 281 training return: tensor(409.1423, device='cuda:0')
episode: 282 training return: tensor(393.3007, device='cuda:0')
episode: 283 training return: tensor(504.4439, device='cuda:0')
epoch: 71 test_true_pfm: 5658.490247035116 sim_pfm: 620.757323453688
episode: 284 training return: tensor(601.7826, device='cuda:0')
episode: 285 training return: tensor(391.7563, device='cuda:0')
episode: 286 training return: tensor(512.2828, device='cuda:0')
episode: 287 training return: tensor(351.1427, device='cuda:0')
epoch: 72 test_true_pfm: 5718.461791454825 sim_pfm: 553.0270284262951
episode: 288 training return: tensor(555.6130, device='cuda:0')
episode: 289 training return: tensor(440.4391, device='cuda:0')
episode: 290 training return: tensor(447.2347, device='cuda:0')
episode: 291 training return: tensor(589.9265, device='cuda:0')
epoch: 73 test_true_pfm: 5594.336536141068 sim_pfm: 603.1163607304139
episode: 292 training return: tensor(439.7592, device='cuda:0')
episode: 293 training return: tensor(455.5288, device='cuda:0')
episode: 294 training return: tensor(356.3039, device='cuda:0')
episode: 295 training return: tensor(249.9999, device='cuda:0')
epoch: 74 test_true_pfm: 5608.815352825317 sim_pfm: 605.0886799655467
episode: 296 training return: tensor(646.5101, device='cuda:0')
episode: 297 training return: tensor(624.1333, device='cuda:0')
episode: 298 training return: tensor(552.4391, device='cuda:0')
episode: 299 training return: tensor(565.6443, device='cuda:0')
epoch: 75 test_true_pfm: 5533.432533400152 sim_pfm: 507.1606759848073
episode: 300 training return: tensor(596.1041, device='cuda:0')
episode: 301 training return: tensor(452.6624, device='cuda:0')
episode: 302 training return: tensor(490.8760, device='cuda:0')
episode: 303 training return: tensor(547.5013, device='cuda:0')
epoch: 76 test_true_pfm: 5676.633771017671 sim_pfm: 559.793760587209
episode: 304 training return: tensor(394.0518, device='cuda:0')
episode: 305 training return: tensor(536.1489, device='cuda:0')
episode: 306 training return: tensor(524.9821, device='cuda:0')
episode: 307 training return: tensor(472.7082, device='cuda:0')
epoch: 77 test_true_pfm: 5598.612162401269 sim_pfm: 682.3877624257779
episode: 308 training return: tensor(571.9061, device='cuda:0')
episode: 309 training return: tensor(344.0195, device='cuda:0')
episode: 310 training return: tensor(520.2031, device='cuda:0')
episode: 311 training return: tensor(512.5479, device='cuda:0')
epoch: 78 test_true_pfm: 5696.298571915065 sim_pfm: 577.0169144095853
episode: 312 training return: tensor(388.6564, device='cuda:0')
episode: 313 training return: tensor(471.6061, device='cuda:0')
episode: 314 training return: tensor(272.7353, device='cuda:0')
episode: 315 training return: tensor(432.5055, device='cuda:0')
epoch: 79 test_true_pfm: 5649.970713494841 sim_pfm: 600.8108662994733
episode: 316 training return: tensor(520.8414, device='cuda:0')
episode: 317 training return: tensor(545.9998, device='cuda:0')
episode: 318 training return: tensor(334.3696, device='cuda:0')
episode: 319 training return: tensor(633.1566, device='cuda:0')
epoch: 80 test_true_pfm: 5665.338678728774 sim_pfm: 576.3464853904055
episode: 320 training return: tensor(537.8115, device='cuda:0')
episode: 321 training return: tensor(428.8630, device='cuda:0')
episode: 322 training return: tensor(416.3080, device='cuda:0')
episode: 323 training return: tensor(462.7239, device='cuda:0')
epoch: 81 test_true_pfm: 5643.838557106825 sim_pfm: 588.77377207682
episode: 324 training return: tensor(301.3629, device='cuda:0')
episode: 325 training return: tensor(502.7400, device='cuda:0')
episode: 326 training return: tensor(538.3766, device='cuda:0')
episode: 327 training return: tensor(345.7561, device='cuda:0')
epoch: 82 test_true_pfm: 5634.250518823418 sim_pfm: 558.5330327187354
episode: 328 training return: tensor(679.7133, device='cuda:0')
episode: 329 training return: tensor(512.6436, device='cuda:0')
episode: 330 training return: tensor(429.4954, device='cuda:0')
episode: 331 training return: tensor(628.2338, device='cuda:0')
epoch: 83 test_true_pfm: 5748.737437633575 sim_pfm: 697.1527336845174
episode: 332 training return: tensor(538.7180, device='cuda:0')
episode: 333 training return: tensor(581.3494, device='cuda:0')
episode: 334 training return: tensor(543.7637, device='cuda:0')
episode: 335 training return: tensor(461.3988, device='cuda:0')
epoch: 84 test_true_pfm: 5591.4023845226475 sim_pfm: 631.8911806652322
episode: 336 training return: tensor(488.3557, device='cuda:0')
episode: 337 training return: tensor(570.8357, device='cuda:0')
episode: 338 training return: tensor(449.0497, device='cuda:0')
episode: 339 training return: tensor(530.0472, device='cuda:0')
epoch: 85 test_true_pfm: 5651.98570169711 sim_pfm: 634.373456840675
episode: 340 training return: tensor(480.3103, device='cuda:0')
episode: 341 training return: tensor(495.6540, device='cuda:0')
episode: 342 training return: tensor(484.0867, device='cuda:0')
episode: 343 training return: tensor(342.8033, device='cuda:0')
epoch: 86 test_true_pfm: 5597.804339056817 sim_pfm: 547.1964080093118
episode: 344 training return: tensor(611.9748, device='cuda:0')
episode: 345 training return: tensor(413.8649, device='cuda:0')
episode: 346 training return: tensor(539.9973, device='cuda:0')
episode: 347 training return: tensor(424.4618, device='cuda:0')
epoch: 87 test_true_pfm: 5582.228493524995 sim_pfm: 661.0013310230182
episode: 348 training return: tensor(495.8900, device='cuda:0')
episode: 349 training return: tensor(504.0312, device='cuda:0')
episode: 350 training return: tensor(597.5373, device='cuda:0')
episode: 351 training return: tensor(350.3769, device='cuda:0')
epoch: 88 test_true_pfm: 5682.685160883241 sim_pfm: 481.79330131660873
episode: 352 training return: tensor(523.7770, device='cuda:0')
episode: 353 training return: tensor(442.9442, device='cuda:0')
episode: 354 training return: tensor(556.7498, device='cuda:0')
episode: 355 training return: tensor(583.3909, device='cuda:0')
epoch: 89 test_true_pfm: 5659.776085345849 sim_pfm: 611.1291301020732
episode: 356 training return: tensor(333.1495, device='cuda:0')
episode: 357 training return: tensor(488.3162, device='cuda:0')
episode: 358 training return: tensor(606.3752, device='cuda:0')
episode: 359 training return: tensor(530.5942, device='cuda:0')
epoch: 90 test_true_pfm: 5752.995946564831 sim_pfm: 583.6404359214939
episode: 360 training return: tensor(572.9691, device='cuda:0')
episode: 361 training return: tensor(576.2274, device='cuda:0')
episode: 362 training return: tensor(489.8312, device='cuda:0')
episode: 363 training return: tensor(477.1444, device='cuda:0')
epoch: 91 test_true_pfm: 5574.893123605248 sim_pfm: 678.22695806801
episode: 364 training return: tensor(542.0117, device='cuda:0')
episode: 365 training return: tensor(416.2090, device='cuda:0')
episode: 366 training return: tensor(515.4069, device='cuda:0')
episode: 367 training return: tensor(551.3164, device='cuda:0')
epoch: 92 test_true_pfm: 5654.258915033381 sim_pfm: 621.6138926359126
episode: 368 training return: tensor(481.8589, device='cuda:0')
episode: 369 training return: tensor(559.6873, device='cuda:0')
episode: 370 training return: tensor(558.8256, device='cuda:0')
episode: 371 training return: tensor(467.3647, device='cuda:0')
epoch: 93 test_true_pfm: 5628.631370019098 sim_pfm: 613.2471090705754
episode: 372 training return: tensor(589.2847, device='cuda:0')
episode: 373 training return: tensor(524.2368, device='cuda:0')
episode: 374 training return: tensor(497.1201, device='cuda:0')
episode: 375 training return: tensor(427.9132, device='cuda:0')
epoch: 94 test_true_pfm: 5636.7791255846605 sim_pfm: 671.0609413076503
episode: 376 training return: tensor(449.4455, device='cuda:0')
episode: 377 training return: tensor(550.3307, device='cuda:0')
episode: 378 training return: tensor(321.4999, device='cuda:0')
episode: 379 training return: tensor(589.1207, device='cuda:0')
epoch: 95 test_true_pfm: 5697.232610846378 sim_pfm: 602.8390045805523
episode: 380 training return: tensor(473.1377, device='cuda:0')
episode: 381 training return: tensor(552.9138, device='cuda:0')
episode: 382 training return: tensor(494.5611, device='cuda:0')
episode: 383 training return: tensor(534.5002, device='cuda:0')
epoch: 96 test_true_pfm: 5785.369743095936 sim_pfm: 702.9041522811943
episode: 384 training return: tensor(462.6429, device='cuda:0')
episode: 385 training return: tensor(587.4752, device='cuda:0')
episode: 386 training return: tensor(590.6954, device='cuda:0')
episode: 387 training return: tensor(520.1528, device='cuda:0')
epoch: 97 test_true_pfm: 5736.952870061257 sim_pfm: 583.7438221695678
episode: 388 training return: tensor(562.3211, device='cuda:0')
episode: 389 training return: tensor(504.1272, device='cuda:0')
episode: 390 training return: tensor(420.4647, device='cuda:0')
episode: 391 training return: tensor(386.6284, device='cuda:0')
epoch: 98 test_true_pfm: 5657.495607194124 sim_pfm: 615.9875913191742
episode: 392 training return: tensor(518.0934, device='cuda:0')
episode: 393 training return: tensor(484.0989, device='cuda:0')
episode: 394 training return: tensor(571.6036, device='cuda:0')
episode: 395 training return: tensor(557.4949, device='cuda:0')
epoch: 99 test_true_pfm: 5655.702586609346 sim_pfm: 605.1363069595924
episode: 396 training return: tensor(488.7375, device='cuda:0')
episode: 397 training return: tensor(586.3477, device='cuda:0')
episode: 398 training return: tensor(569.3167, device='cuda:0')
episode: 399 training return: tensor(574.6595, device='cuda:0')
epoch: 100 test_true_pfm: 5582.194669595422 sim_pfm: 658.2409190897209
episode: 400 training return: tensor(388.5827, device='cuda:0')
episode: 401 training return: tensor(527.3068, device='cuda:0')
episode: 402 training return: tensor(595.9261, device='cuda:0')
episode: 403 training return: tensor(618.8010, device='cuda:0')
epoch: 101 test_true_pfm: 5776.5623924036545 sim_pfm: 665.6525993907513
episode: 404 training return: tensor(507.3731, device='cuda:0')
episode: 405 training return: tensor(559.1733, device='cuda:0')
episode: 406 training return: tensor(527.3264, device='cuda:0')
episode: 407 training return: tensor(625.4542, device='cuda:0')
epoch: 102 test_true_pfm: 5675.246802999504 sim_pfm: 595.9111634607738
episode: 408 training return: tensor(569.3563, device='cuda:0')
episode: 409 training return: tensor(428.7110, device='cuda:0')
episode: 410 training return: tensor(540.5866, device='cuda:0')
episode: 411 training return: tensor(438.7668, device='cuda:0')
epoch: 103 test_true_pfm: 5717.907471246034 sim_pfm: 705.4721721199263
episode: 412 training return: tensor(452.1364, device='cuda:0')
episode: 413 training return: tensor(636.0594, device='cuda:0')
episode: 414 training return: tensor(533.9001, device='cuda:0')
episode: 415 training return: tensor(637.3962, device='cuda:0')
epoch: 104 test_true_pfm: 5697.529509941555 sim_pfm: 670.1127579491973
episode: 416 training return: tensor(574.2726, device='cuda:0')
episode: 417 training return: tensor(516.1718, device='cuda:0')
episode: 418 training return: tensor(484.5763, device='cuda:0')
episode: 419 training return: tensor(460.6124, device='cuda:0')
epoch: 105 test_true_pfm: 5594.067012442921 sim_pfm: 617.2046540214991
episode: 420 training return: tensor(587.0519, device='cuda:0')
episode: 421 training return: tensor(470.3388, device='cuda:0')
episode: 422 training return: tensor(419.4515, device='cuda:0')
episode: 423 training return: tensor(559.5825, device='cuda:0')
epoch: 106 test_true_pfm: 5688.046113136198 sim_pfm: 637.8687720166054
episode: 424 training return: tensor(451.7580, device='cuda:0')
episode: 425 training return: tensor(456.8031, device='cuda:0')
episode: 426 training return: tensor(511.5068, device='cuda:0')
episode: 427 training return: tensor(613.6753, device='cuda:0')
epoch: 107 test_true_pfm: 5642.796080517267 sim_pfm: 687.5913685333993
episode: 428 training return: tensor(494.5056, device='cuda:0')
episode: 429 training return: tensor(618.8132, device='cuda:0')
episode: 430 training return: tensor(599.3239, device='cuda:0')
episode: 431 training return: tensor(421.8714, device='cuda:0')
epoch: 108 test_true_pfm: 5640.810888325351 sim_pfm: 611.0523832251007
episode: 432 training return: tensor(515.2677, device='cuda:0')
episode: 433 training return: tensor(313.7122, device='cuda:0')
episode: 434 training return: tensor(454.1998, device='cuda:0')
episode: 435 training return: tensor(509.6754, device='cuda:0')
epoch: 109 test_true_pfm: 5647.348957347355 sim_pfm: 605.390695823667
episode: 436 training return: tensor(494.1228, device='cuda:0')
episode: 437 training return: tensor(559.7653, device='cuda:0')
episode: 438 training return: tensor(529.3242, device='cuda:0')
episode: 439 training return: tensor(604.7457, device='cuda:0')
epoch: 110 test_true_pfm: 5710.360837284018 sim_pfm: 638.9432751465841
episode: 440 training return: tensor(499.6406, device='cuda:0')
episode: 441 training return: tensor(578.3458, device='cuda:0')
episode: 442 training return: tensor(412.8622, device='cuda:0')
episode: 443 training return: tensor(519.8433, device='cuda:0')
epoch: 111 test_true_pfm: 5583.571317455425 sim_pfm: 543.3065054070127
episode: 444 training return: tensor(498.3501, device='cuda:0')
episode: 445 training return: tensor(529.4952, device='cuda:0')
episode: 446 training return: tensor(621.0922, device='cuda:0')
episode: 447 training return: tensor(496.5847, device='cuda:0')
epoch: 112 test_true_pfm: 5671.728303028558 sim_pfm: 622.0801624666783
episode: 448 training return: tensor(561.5877, device='cuda:0')
episode: 449 training return: tensor(513.5443, device='cuda:0')
episode: 450 training return: tensor(668.0204, device='cuda:0')
episode: 451 training return: tensor(613.4049, device='cuda:0')
epoch: 113 test_true_pfm: 5707.439272594452 sim_pfm: 668.3556087144243
episode: 452 training return: tensor(515.3583, device='cuda:0')
episode: 453 training return: tensor(554.4670, device='cuda:0')
episode: 454 training return: tensor(505.2327, device='cuda:0')
episode: 455 training return: tensor(411.2085, device='cuda:0')
epoch: 114 test_true_pfm: 5694.313847835579 sim_pfm: 665.3419139040925
episode: 456 training return: tensor(604.2217, device='cuda:0')
episode: 457 training return: tensor(492.9500, device='cuda:0')
episode: 458 training return: tensor(652.3958, device='cuda:0')
episode: 459 training return: tensor(573.0408, device='cuda:0')
epoch: 115 test_true_pfm: 5730.331938682714 sim_pfm: 616.2481175442226
episode: 460 training return: tensor(555.4481, device='cuda:0')
episode: 461 training return: tensor(621.5488, device='cuda:0')
episode: 462 training return: tensor(551.0254, device='cuda:0')
episode: 463 training return: tensor(548.7405, device='cuda:0')
epoch: 116 test_true_pfm: 5615.262897378452 sim_pfm: 697.5105630253189
episode: 464 training return: tensor(595.6856, device='cuda:0')
episode: 465 training return: tensor(556.3821, device='cuda:0')
episode: 466 training return: tensor(517.0801, device='cuda:0')
episode: 467 training return: tensor(522.1918, device='cuda:0')
epoch: 117 test_true_pfm: 5783.0242753415 sim_pfm: 682.8394041807429
episode: 468 training return: tensor(609.1371, device='cuda:0')
episode: 469 training return: tensor(509.2984, device='cuda:0')
episode: 470 training return: tensor(556.4755, device='cuda:0')
episode: 471 training return: tensor(480.9500, device='cuda:0')
epoch: 118 test_true_pfm: 5678.9617953040715 sim_pfm: 615.7912654968289
episode: 472 training return: tensor(605.3851, device='cuda:0')
episode: 473 training return: tensor(475.3012, device='cuda:0')
episode: 474 training return: tensor(441.7350, device='cuda:0')
episode: 475 training return: tensor(497.8726, device='cuda:0')
epoch: 119 test_true_pfm: 5750.677899140349 sim_pfm: 691.0453767369812
episode: 476 training return: tensor(485.8042, device='cuda:0')
episode: 477 training return: tensor(523.8635, device='cuda:0')
episode: 478 training return: tensor(-49.1796, device='cuda:0')
episode: 479 training return: tensor(563.3040, device='cuda:0')
epoch: 120 test_true_pfm: 5681.623208567413 sim_pfm: 558.1852987772436
episode: 480 training return: tensor(549.3090, device='cuda:0')
episode: 481 training return: tensor(643.9973, device='cuda:0')
episode: 482 training return: tensor(442.2863, device='cuda:0')
episode: 483 training return: tensor(626.6157, device='cuda:0')
epoch: 121 test_true_pfm: 5711.643201191858 sim_pfm: 663.8692351662806
episode: 484 training return: tensor(659.7631, device='cuda:0')
episode: 485 training return: tensor(500.7706, device='cuda:0')
episode: 486 training return: tensor(449.2823, device='cuda:0')
episode: 487 training return: tensor(500.7401, device='cuda:0')
epoch: 122 test_true_pfm: 5605.272858294833 sim_pfm: 587.7340022653807
episode: 488 training return: tensor(517.4273, device='cuda:0')
episode: 489 training return: tensor(647.1430, device='cuda:0')
episode: 490 training return: tensor(575.4120, device='cuda:0')
episode: 491 training return: tensor(448.3723, device='cuda:0')
epoch: 123 test_true_pfm: 5708.769489793472 sim_pfm: 691.9535189799111
episode: 492 training return: tensor(485.4660, device='cuda:0')
episode: 493 training return: tensor(351.8554, device='cuda:0')
episode: 494 training return: tensor(501.7122, device='cuda:0')
episode: 495 training return: tensor(704.3974, device='cuda:0')
epoch: 124 test_true_pfm: 5708.2796548877295 sim_pfm: 653.4576992574148
episode: 496 training return: tensor(587.0991, device='cuda:0')
episode: 497 training return: tensor(632.4512, device='cuda:0')
episode: 498 training return: tensor(552.4868, device='cuda:0')
episode: 499 training return: tensor(519.9943, device='cuda:0')
epoch: 125 test_true_pfm: 5680.675951218793 sim_pfm: 697.0884898857524
episode: 500 training return: tensor(419.3447, device='cuda:0')
episode: 501 training return: tensor(390.8325, device='cuda:0')
episode: 502 training return: tensor(573.2948, device='cuda:0')
episode: 503 training return: tensor(469.1079, device='cuda:0')
epoch: 126 test_true_pfm: 5712.368359306199 sim_pfm: 668.4185681509165
episode: 504 training return: tensor(571.2426, device='cuda:0')
episode: 505 training return: tensor(439.8614, device='cuda:0')
episode: 506 training return: tensor(660.0667, device='cuda:0')
episode: 507 training return: tensor(680.4959, device='cuda:0')
epoch: 127 test_true_pfm: 5647.762315239796 sim_pfm: 631.3646486346843
episode: 508 training return: tensor(542.7698, device='cuda:0')
episode: 509 training return: tensor(543.7277, device='cuda:0')
episode: 510 training return: tensor(592.8024, device='cuda:0')
episode: 511 training return: tensor(549.9454, device='cuda:0')
epoch: 128 test_true_pfm: 5715.4926158076705 sim_pfm: 592.9939387782166
episode: 512 training return: tensor(397.4122, device='cuda:0')
episode: 513 training return: tensor(491.8069, device='cuda:0')
episode: 514 training return: tensor(505.0175, device='cuda:0')
episode: 515 training return: tensor(601.0859, device='cuda:0')
epoch: 129 test_true_pfm: 5754.715503658675 sim_pfm: 666.6378712483371
episode: 516 training return: tensor(575.3638, device='cuda:0')
episode: 517 training return: tensor(503.0178, device='cuda:0')
episode: 518 training return: tensor(678.2425, device='cuda:0')
episode: 519 training return: tensor(556.7137, device='cuda:0')
epoch: 130 test_true_pfm: 5686.626013196554 sim_pfm: 741.7610959470427
episode: 520 training return: tensor(410.7516, device='cuda:0')
episode: 521 training return: tensor(435.0413, device='cuda:0')
episode: 522 training return: tensor(628.9780, device='cuda:0')
episode: 523 training return: tensor(642.3019, device='cuda:0')
epoch: 131 test_true_pfm: 5744.6554756689375 sim_pfm: 692.8550840434813
episode: 524 training return: tensor(598.4870, device='cuda:0')
episode: 525 training return: tensor(641.7372, device='cuda:0')
episode: 526 training return: tensor(509.5367, device='cuda:0')
episode: 527 training return: tensor(506.4899, device='cuda:0')
epoch: 132 test_true_pfm: 5723.620612547125 sim_pfm: 551.1807139078543
episode: 528 training return: tensor(298.6768, device='cuda:0')
episode: 529 training return: tensor(618.6984, device='cuda:0')
episode: 530 training return: tensor(524.6522, device='cuda:0')
episode: 531 training return: tensor(622.7859, device='cuda:0')
epoch: 133 test_true_pfm: 5645.662533184295 sim_pfm: 614.6467679130534
episode: 532 training return: tensor(573.9158, device='cuda:0')
episode: 533 training return: tensor(437.7080, device='cuda:0')
episode: 534 training return: tensor(553.8973, device='cuda:0')
episode: 535 training return: tensor(460.9545, device='cuda:0')
epoch: 134 test_true_pfm: 5789.333451130387 sim_pfm: 678.330385207897
episode: 536 training return: tensor(510.7568, device='cuda:0')
episode: 537 training return: tensor(605.6886, device='cuda:0')
episode: 538 training return: tensor(473.5598, device='cuda:0')
episode: 539 training return: tensor(532.4290, device='cuda:0')
epoch: 135 test_true_pfm: 5725.554942064951 sim_pfm: 684.0953909865542
episode: 540 training return: tensor(417.5288, device='cuda:0')
episode: 541 training return: tensor(431.5332, device='cuda:0')
episode: 542 training return: tensor(440.3426, device='cuda:0')
episode: 543 training return: tensor(-504.3951, device='cuda:0')
epoch: 136 test_true_pfm: 5732.276466680113 sim_pfm: 741.5720815770328
episode: 544 training return: tensor(435.5953, device='cuda:0')
episode: 545 training return: tensor(594.6045, device='cuda:0')
episode: 546 training return: tensor(414.0143, device='cuda:0')
episode: 547 training return: tensor(572.1487, device='cuda:0')
epoch: 137 test_true_pfm: 5777.989163629459 sim_pfm: 741.5590039338762
episode: 548 training return: tensor(584.1924, device='cuda:0')
episode: 549 training return: tensor(361.5152, device='cuda:0')
episode: 550 training return: tensor(523.6943, device='cuda:0')
episode: 551 training return: tensor(599.8167, device='cuda:0')
epoch: 138 test_true_pfm: 5684.138977883106 sim_pfm: 650.8988662317861
episode: 552 training return: tensor(598.0608, device='cuda:0')
episode: 553 training return: tensor(534.7115, device='cuda:0')
episode: 554 training return: tensor(695.0608, device='cuda:0')
episode: 555 training return: tensor(574.7594, device='cuda:0')
epoch: 139 test_true_pfm: 5645.41011482707 sim_pfm: 593.185785861065
episode: 556 training return: tensor(519.9467, device='cuda:0')
episode: 557 training return: tensor(681.3604, device='cuda:0')
episode: 558 training return: tensor(532.8401, device='cuda:0')
episode: 559 training return: tensor(563.8837, device='cuda:0')
epoch: 140 test_true_pfm: 5743.194038101253 sim_pfm: 739.788522063444
episode: 560 training return: tensor(490.7625, device='cuda:0')
episode: 561 training return: tensor(514.8573, device='cuda:0')
episode: 562 training return: tensor(570.4680, device='cuda:0')
episode: 563 training return: tensor(529.9418, device='cuda:0')
epoch: 141 test_true_pfm: 5739.3703810022735 sim_pfm: 679.4321400621169
episode: 564 training return: tensor(594.2112, device='cuda:0')
episode: 565 training return: tensor(585.3086, device='cuda:0')
episode: 566 training return: tensor(686.8162, device='cuda:0')
episode: 567 training return: tensor(580.1809, device='cuda:0')
epoch: 142 test_true_pfm: 5643.735505949863 sim_pfm: 601.0459862531958
episode: 568 training return: tensor(368.7419, device='cuda:0')
episode: 569 training return: tensor(586.7078, device='cuda:0')
episode: 570 training return: tensor(461.5625, device='cuda:0')
episode: 571 training return: tensor(391.7341, device='cuda:0')
epoch: 143 test_true_pfm: 5652.068025623907 sim_pfm: 636.9979885613915
episode: 572 training return: tensor(394.4337, device='cuda:0')
episode: 573 training return: tensor(520.3574, device='cuda:0')
episode: 574 training return: tensor(417.4266, device='cuda:0')
episode: 575 training return: tensor(404.0883, device='cuda:0')
epoch: 144 test_true_pfm: 5627.4461708065255 sim_pfm: 650.132624916189
episode: 576 training return: tensor(500.9170, device='cuda:0')
episode: 577 training return: tensor(392.5084, device='cuda:0')
episode: 578 training return: tensor(524.9025, device='cuda:0')
episode: 579 training return: tensor(571.0488, device='cuda:0')
epoch: 145 test_true_pfm: 5688.1579485052025 sim_pfm: 613.9336261140028
episode: 580 training return: tensor(539.8724, device='cuda:0')
episode: 581 training return: tensor(524.2377, device='cuda:0')
episode: 582 training return: tensor(436.7185, device='cuda:0')
episode: 583 training return: tensor(626.6755, device='cuda:0')
epoch: 146 test_true_pfm: 5766.866408903487 sim_pfm: 671.9508777115649
episode: 584 training return: tensor(584.4134, device='cuda:0')
episode: 585 training return: tensor(445.6916, device='cuda:0')
episode: 586 training return: tensor(688.1552, device='cuda:0')
episode: 587 training return: tensor(608.9875, device='cuda:0')
epoch: 147 test_true_pfm: 5782.68180658192 sim_pfm: 671.1122826140685
episode: 588 training return: tensor(468.6859, device='cuda:0')
episode: 589 training return: tensor(543.7094, device='cuda:0')
episode: 590 training return: tensor(590.6992, device='cuda:0')
episode: 591 training return: tensor(448.5040, device='cuda:0')
epoch: 148 test_true_pfm: 5673.857770639268 sim_pfm: 668.5595873810817
episode: 592 training return: tensor(608.1826, device='cuda:0')
episode: 593 training return: tensor(593.6172, device='cuda:0')
episode: 594 training return: tensor(570.5366, device='cuda:0')
episode: 595 training return: tensor(501.1348, device='cuda:0')
epoch: 149 test_true_pfm: 5633.481905426287 sim_pfm: 725.771142234696
episode: 596 training return: tensor(615.9101, device='cuda:0')
episode: 597 training return: tensor(534.9568, device='cuda:0')
episode: 598 training return: tensor(607.3629, device='cuda:0')
episode: 599 training return: tensor(576.7437, device='cuda:0')
epoch: 150 test_true_pfm: 5765.042461056266 sim_pfm: 652.375089124466
