['--alg', 'sac', '--env', 'Swimmer-v3', '--learn', 'uncertainty', '--traj', 'medium', '--seed', '4', '--data', '100000']
epoch: 0 training_loss 0.41718455120921133 test_loss: 0.31116328239440916
epoch: 1 training_loss 0.2996169285476208 test_loss: 0.25390698909759524
epoch: 2 training_loss 0.2627550534904003 test_loss: 0.2502033472061157
epoch: 3 training_loss 0.24740127429366113 test_loss: 0.29272725582122805
epoch: 4 training_loss 0.23147748783230782 test_loss: 0.23793978691101075
epoch: 5 training_loss 0.24227616503834726 test_loss: 0.25391511917114257
epoch: 6 training_loss 0.24723492562770844 test_loss: 0.21677522659301757
epoch: 7 training_loss 0.22302713952958583 test_loss: 0.2159219741821289
epoch: 8 training_loss 0.21171534709632397 test_loss: 0.20913970470428467
epoch: 9 training_loss 0.2108372473716736 test_loss: 0.2232673406600952
epoch: 10 training_loss 0.22490276619791985 test_loss: 0.21634149551391602
epoch: 11 training_loss 0.21032246559858322 test_loss: 0.20851376056671142
epoch: 12 training_loss 0.21002266496419908 test_loss: 0.23388900756835937
epoch: 13 training_loss 0.21142679437994957 test_loss: 0.20938487052917482
epoch: 14 training_loss 0.21309212997555732 test_loss: 0.2104719877243042
epoch: 15 training_loss 0.20313185974955558 test_loss: 0.19030122756958007
epoch: 16 training_loss 0.2208137223869562 test_loss: 0.20783886909484864
epoch: 17 training_loss 0.20416837751865388 test_loss: 0.2002997636795044
epoch: 18 training_loss 0.19607037968933583 test_loss: 0.21146996021270753
epoch: 19 training_loss 0.21033193469047545 test_loss: 0.23591935634613037
epoch: 20 training_loss 0.20516203120350837 test_loss: 0.2091010570526123
epoch: 21 training_loss 0.2064513948559761 test_loss: 0.22426857948303222
epoch: 22 training_loss 0.1930360460281372 test_loss: 0.2275193452835083
epoch: 23 training_loss 0.20501850426197052 test_loss: 0.20515778064727783
epoch: 24 training_loss 0.20203845702111722 test_loss: 0.21316921710968018
epoch: 25 training_loss 0.20082896143198015 test_loss: 0.1958270788192749
epoch: 26 training_loss 0.1984639422595501 test_loss: 0.20258738994598388
epoch: 27 training_loss 0.20355580151081085 test_loss: 0.20984876155853271
epoch: 28 training_loss 0.19579909414052962 test_loss: 0.20161812305450438
epoch: 29 training_loss 0.19764572218060494 test_loss: 0.21689908504486083
epoch: 30 training_loss 0.20239334389567376 test_loss: 0.21003458499908448
epoch: 31 training_loss 0.1993079026043415 test_loss: 0.20144248008728027
epoch: 32 training_loss 0.19942311733961104 test_loss: 0.2114337921142578
epoch: 33 training_loss 0.19782276913523675 test_loss: 0.19388248920440673
epoch: 34 training_loss 0.19465055510401727 test_loss: 0.2013951539993286
epoch: 35 training_loss 0.19480591207742692 test_loss: 0.19334144592285157
epoch: 36 training_loss 0.1886801768094301 test_loss: 0.21117260456085205
epoch: 37 training_loss 0.1922532320022583 test_loss: 0.20380089282989503
epoch: 38 training_loss 0.202094102576375 test_loss: 0.21112825870513915
epoch: 39 training_loss 0.1907456099241972 test_loss: 0.19972214698791504
epoch: 40 training_loss 0.1879461929947138 test_loss: 0.18277240991592408
epoch: 41 training_loss 0.18634136818349362 test_loss: 0.21762850284576415
epoch: 42 training_loss 0.18890801809728144 test_loss: 0.19679865837097169
epoch: 43 training_loss 0.2086174350976944 test_loss: 0.20385260581970216
epoch: 44 training_loss 0.19523376554250718 test_loss: 0.20852527618408204
epoch: 45 training_loss 0.19041362397372721 test_loss: 0.194488263130188
epoch: 46 training_loss 0.18831796385347843 test_loss: 0.21996614933013917
epoch: 47 training_loss 0.18600114040076732 test_loss: 0.19732450246810912
epoch: 48 training_loss 0.1902579042315483 test_loss: 0.18791625499725342
epoch: 49 training_loss 0.18915183551609516 test_loss: 0.1854897141456604
epoch: 50 training_loss 0.18854911766946317 test_loss: 0.1980821132659912
epoch: 51 training_loss 0.18950302630662919 test_loss: 0.19196488857269287
epoch: 52 training_loss 0.1989773006737232 test_loss: 0.1903443455696106
epoch: 53 training_loss 0.1913880191743374 test_loss: 0.192882239818573
epoch: 54 training_loss 0.18347810789942742 test_loss: 0.218172287940979
epoch: 55 training_loss 0.1831544654071331 test_loss: 0.195401394367218
epoch: 56 training_loss 0.1837086084485054 test_loss: 0.1964370846748352
epoch: 57 training_loss 0.19123100377619268 test_loss: 0.2022623300552368
epoch: 58 training_loss 0.19434348963201045 test_loss: 0.20583004951477052
epoch: 59 training_loss 0.18146039679646492 test_loss: 0.19291183948516846
epoch: 60 training_loss 0.18671381011605262 test_loss: 0.18774702548980712
epoch: 61 training_loss 0.18626321569085122 test_loss: 0.19355518817901612
epoch: 62 training_loss 0.18359521798789502 test_loss: 0.18795568943023683
epoch: 63 training_loss 0.18423344649374485 test_loss: 0.20829598903656005
epoch: 64 training_loss 0.19347351290285586 test_loss: 0.19124870300292968
epoch: 65 training_loss 0.19615925461053849 test_loss: 0.20769271850585938
epoch: 66 training_loss 0.19034500569105148 test_loss: 0.188590669631958
epoch: 67 training_loss 0.18461820408701896 test_loss: 0.19878861904144288
epoch: 68 training_loss 0.19480683900415896 test_loss: 0.18626049757003785
epoch: 69 training_loss 0.1834506306052208 test_loss: 0.19168936014175414
epoch: 70 training_loss 0.18142925642430782 test_loss: 0.18728101253509521
epoch: 71 training_loss 0.18934305146336555 test_loss: 0.18839571475982667
epoch: 72 training_loss 0.18278863914310933 test_loss: 0.20074269771575928
epoch: 73 training_loss 0.18074473805725574 test_loss: 0.18204262256622314
epoch: 74 training_loss 0.18407593205571174 test_loss: 0.18294098377227783
epoch: 75 training_loss 0.18134840466082097 test_loss: 0.18538265228271483
epoch: 76 training_loss 0.18466844715178013 test_loss: 0.18840280771255494
epoch: 77 training_loss 0.18150438562035562 test_loss: 0.21001806259155273
epoch: 78 training_loss 0.18607451260089874 test_loss: 0.20138297080993653
epoch: 79 training_loss 0.17683725826442243 test_loss: 0.18969945907592772
epoch: 80 training_loss 0.18428272739052773 test_loss: 0.1978076696395874
epoch: 81 training_loss 0.18202568240463735 test_loss: 0.17995425462722778
epoch: 82 training_loss 0.19288730524480344 test_loss: 0.2191753387451172
epoch: 83 training_loss 0.18350629545748234 test_loss: 0.1857777714729309
epoch: 84 training_loss 0.1840056661516428 test_loss: 0.1761702060699463
epoch: 85 training_loss 0.17949636749923228 test_loss: 0.20780770778656005
epoch: 86 training_loss 0.18476204864680768 test_loss: 0.19547978639602662
epoch: 87 training_loss 0.17031936034560202 test_loss: 0.18644320964813232
epoch: 88 training_loss 0.18292009733617307 test_loss: 0.2018970489501953
epoch: 89 training_loss 0.179762115329504 test_loss: 0.2077198028564453
epoch: 90 training_loss 0.18337231874465942 test_loss: 0.19241422414779663
epoch: 91 training_loss 0.1803149353712797 test_loss: 0.18854179382324218
epoch: 92 training_loss 0.1789088974148035 test_loss: 0.19197566509246827
epoch: 93 training_loss 0.1868290887773037 test_loss: 0.19652453660964966
epoch: 94 training_loss 0.18646928146481515 test_loss: 0.18385963439941405
epoch: 95 training_loss 0.18426490120589734 test_loss: 0.1925103783607483
epoch: 96 training_loss 0.18289950244128705 test_loss: 0.20801644325256347
epoch: 97 training_loss 0.1804187736660242 test_loss: 0.20038273334503173
epoch: 98 training_loss 0.17861041001975536 test_loss: 0.2040797233581543
epoch: 99 training_loss 0.17773449175059797 test_loss: 0.1942540407180786
epoch: 100 training_loss 0.17532092072069644 test_loss: 0.18147244453430175
epoch: 101 training_loss 0.17901441939175128 test_loss: 0.17662394046783447
epoch: 102 training_loss 0.18301296681165696 test_loss: 0.18944242000579833
epoch: 103 training_loss 0.18176450602710248 test_loss: 0.1912032723426819
epoch: 104 training_loss 0.18622731149196625 test_loss: 0.23882713317871093
epoch: 105 training_loss 0.182741065248847 test_loss: 0.19188687801361085
epoch: 106 training_loss 0.18239918865263463 test_loss: 0.18522491455078124
epoch: 107 training_loss 0.17664301976561547 test_loss: 0.18355520963668823
epoch: 108 training_loss 0.17844882495701314 test_loss: 0.18956348896026612
epoch: 109 training_loss 0.17286034993827343 test_loss: 0.18361194133758546
epoch: 110 training_loss 0.1806968518346548 test_loss: 0.18252031803131102
epoch: 111 training_loss 0.18535531997680665 test_loss: 0.20391449928283692
epoch: 112 training_loss 0.1892629236727953 test_loss: 0.1939162015914917
epoch: 113 training_loss 0.17721081770956515 test_loss: 0.21613683700561523
epoch: 114 training_loss 0.17833317004144192 test_loss: 0.19617750644683837
epoch: 115 training_loss 0.17786321982741357 test_loss: 0.18987786769866943
epoch: 116 training_loss 0.18224925927817823 test_loss: 0.1970294237136841
epoch: 117 training_loss 0.17607850171625614 test_loss: 0.20039920806884765
epoch: 118 training_loss 0.17204609856009484 test_loss: 0.17541153430938722
epoch: 119 training_loss 0.1802310860902071 test_loss: 0.1907572865486145
epoch: 120 training_loss 0.1810514321178198 test_loss: 0.19910848140716553
epoch: 121 training_loss 0.1848548649251461 test_loss: 0.20221035480499266
epoch: 122 training_loss 0.1840803773701191 test_loss: 0.2014616012573242
epoch: 123 training_loss 0.19577275097370148 test_loss: 0.21202259063720702
epoch: 124 training_loss 0.18573920197784902 test_loss: 0.21327195167541504
epoch: 125 training_loss 0.18137165166437627 test_loss: 0.18411699533462525
epoch: 126 training_loss 0.17698286652565 test_loss: 0.17650631666183472
epoch: 127 training_loss 0.17677096098661424 test_loss: 0.181485915184021
epoch: 128 training_loss 0.1693044902384281 test_loss: 0.18670116662979125
epoch: 129 training_loss 0.17914594292640687 test_loss: 0.18244057893753052
epoch: 130 training_loss 0.1702949111163616 test_loss: 0.17113131284713745
epoch: 131 training_loss 0.17344110302627086 test_loss: 0.17997803688049316
epoch: 132 training_loss 0.18316018857061864 test_loss: 0.2014636754989624
epoch: 133 training_loss 0.17246925577521324 test_loss: 0.19091978073120117
epoch: 134 training_loss 0.17540914125740528 test_loss: 0.1842046022415161
epoch: 135 training_loss 0.18125498980283739 test_loss: 0.18838375806808472
epoch: 136 training_loss 0.1723222690075636 test_loss: 0.18277353048324585
epoch: 137 training_loss 0.16739217564463615 test_loss: 0.19460639953613282
epoch: 138 training_loss 0.18323722891509533 test_loss: 0.1838040828704834
epoch: 139 training_loss 0.17670139856636524 test_loss: 0.17812567949295044
epoch: 140 training_loss 0.1747937987744808 test_loss: 0.1942082405090332
epoch: 141 training_loss 0.17471460543572903 test_loss: 0.18771895170211791
epoch: 142 training_loss 0.1764180860668421 test_loss: 0.23317537307739258
epoch: 143 training_loss 0.17598512262105942 test_loss: 0.18361954689025878
epoch: 144 training_loss 0.18005520306527614 test_loss: 0.18852206468582153
epoch: 145 training_loss 0.1781030598282814 test_loss: 0.19093836545944215
epoch: 146 training_loss 0.1712943609058857 test_loss: 0.18537358045578003
epoch: 147 training_loss 0.1814250348508358 test_loss: 0.18327755928039552
epoch: 148 training_loss 0.17081826277077197 test_loss: 0.17884423732757568
epoch: 149 training_loss 0.17927243754267694 test_loss: 0.19900621175765992
epoch: 0 training_loss 0.42731067910790443 test_loss: 0.3200256109237671
epoch: 1 training_loss 0.3015253607928753 test_loss: 0.3057805299758911
epoch: 2 training_loss 0.2634187889099121 test_loss: 0.24356362819671631
epoch: 3 training_loss 0.25705220192670825 test_loss: 0.24190945625305177
epoch: 4 training_loss 0.2409505121409893 test_loss: 0.23105952739715577
epoch: 5 training_loss 0.23188523709774017 test_loss: 0.22440626621246337
epoch: 6 training_loss 0.2303049984574318 test_loss: 0.21887543201446533
epoch: 7 training_loss 0.22492463789880277 test_loss: 0.26370012760162354
epoch: 8 training_loss 0.2257652300596237 test_loss: 0.25235786437988283
epoch: 9 training_loss 0.2299723319709301 test_loss: 0.28244500160217284
epoch: 10 training_loss 0.22491119533777237 test_loss: 0.23949947357177734
epoch: 11 training_loss 0.2154589104652405 test_loss: 0.20382139682769776
epoch: 12 training_loss 0.21255881048738956 test_loss: 0.2153611421585083
epoch: 13 training_loss 0.21014643602073194 test_loss: 0.21251473426818848
epoch: 14 training_loss 0.2134595863521099 test_loss: 0.17989838123321533
epoch: 15 training_loss 0.21401968315243722 test_loss: 0.21470134258270263
epoch: 16 training_loss 0.21855176635086537 test_loss: 0.1969576358795166
epoch: 17 training_loss 0.20940807804465295 test_loss: 0.19214084148406982
epoch: 18 training_loss 0.20751723378896714 test_loss: 0.20907056331634521
epoch: 19 training_loss 0.20954591348767282 test_loss: 0.2116403102874756
epoch: 20 training_loss 0.20136300660669804 test_loss: 0.20185461044311523
epoch: 21 training_loss 0.21226989097893237 test_loss: 0.19484193325042726
epoch: 22 training_loss 0.20483755268156528 test_loss: 0.19262864589691162
epoch: 23 training_loss 0.2155421656370163 test_loss: 0.23741939067840576
epoch: 24 training_loss 0.21403767064213752 test_loss: 0.20300498008728027
epoch: 25 training_loss 0.20766821578145028 test_loss: 0.19712356328964234
epoch: 26 training_loss 0.20232060186564924 test_loss: 0.1957452416419983
epoch: 27 training_loss 0.20928777672350407 test_loss: 0.19256608486175536
epoch: 28 training_loss 0.1991348011046648 test_loss: 0.20348455905914306
epoch: 29 training_loss 0.20056717291474344 test_loss: 0.20861728191375734
epoch: 30 training_loss 0.20721238285303115 test_loss: 0.2024845838546753
epoch: 31 training_loss 0.19841917239129545 test_loss: 0.20293524265289306
epoch: 32 training_loss 0.2022105310857296 test_loss: 0.19232960939407348
epoch: 33 training_loss 0.20914750799536705 test_loss: 0.23836424350738525
epoch: 34 training_loss 0.19998991057276727 test_loss: 0.23935608863830565
epoch: 35 training_loss 0.20650161869823933 test_loss: 0.22635138034820557
epoch: 36 training_loss 0.19790148228406906 test_loss: 0.2111271619796753
epoch: 37 training_loss 0.2008150650560856 test_loss: 0.20319030284881592
epoch: 38 training_loss 0.19569276310503483 test_loss: 0.19139899015426637
epoch: 39 training_loss 0.20027728170156478 test_loss: 0.19712700843811035
epoch: 40 training_loss 0.20097531601786614 test_loss: 0.18590078353881836
epoch: 41 training_loss 0.18410616390407086 test_loss: 0.1775697112083435
epoch: 42 training_loss 0.20076371781527996 test_loss: 0.18922971487045287
epoch: 43 training_loss 0.1972068291902542 test_loss: 0.20851459503173828
epoch: 44 training_loss 0.19328633412718774 test_loss: 0.19261977672576905
epoch: 45 training_loss 0.19574006631970406 test_loss: 0.19583029747009278
epoch: 46 training_loss 0.19160585694015025 test_loss: 0.19994447231292725
epoch: 47 training_loss 0.19590223990380765 test_loss: 0.20233235359191895
epoch: 48 training_loss 0.19287572406232356 test_loss: 0.19504436254501342
epoch: 49 training_loss 0.19763719879090785 test_loss: 0.19203176498413085
epoch: 50 training_loss 0.19283077590167522 test_loss: 0.19694284200668336
epoch: 51 training_loss 0.19077622920274734 test_loss: 0.19780006408691406
epoch: 52 training_loss 0.201906303986907 test_loss: 0.2032303810119629
epoch: 53 training_loss 0.18900101236999034 test_loss: 0.19527463912963866
epoch: 54 training_loss 0.18772556781768798 test_loss: 0.17808902263641357
epoch: 55 training_loss 0.18927414432168008 test_loss: 0.20063369274139403
epoch: 56 training_loss 0.18770867690443993 test_loss: 0.20186357498168944
epoch: 57 training_loss 0.19190319024026395 test_loss: 0.1898847699165344
epoch: 58 training_loss 0.2019668719917536 test_loss: 0.1871273398399353
epoch: 59 training_loss 0.19234037071466445 test_loss: 0.18889561891555787
epoch: 60 training_loss 0.18618550688028335 test_loss: 0.20153849124908446
epoch: 61 training_loss 0.18529406681656838 test_loss: 0.183063280582428
epoch: 62 training_loss 0.18804470628499984 test_loss: 0.19681752920150758
epoch: 63 training_loss 0.18996755607426166 test_loss: 0.18727703094482423
epoch: 64 training_loss 0.19108225181698799 test_loss: 0.18266161680221557
epoch: 65 training_loss 0.2039335172623396 test_loss: 0.19308834075927733
epoch: 66 training_loss 0.18581210866570472 test_loss: 0.18838075399398804
epoch: 67 training_loss 0.18113974519073964 test_loss: 0.19293354749679564
epoch: 68 training_loss 0.18944839566946028 test_loss: 0.18921703100204468
epoch: 69 training_loss 0.18974409580230714 test_loss: 0.2022231101989746
epoch: 70 training_loss 0.18635816887021064 test_loss: 0.18831881284713745
epoch: 71 training_loss 0.18835915103554726 test_loss: 0.17597246170043945
epoch: 72 training_loss 0.19406823799014092 test_loss: 0.2103496313095093
epoch: 73 training_loss 0.18920757591724396 test_loss: 0.20408267974853517
epoch: 74 training_loss 0.18982188507914544 test_loss: 0.18265390396118164
epoch: 75 training_loss 0.18856834001839162 test_loss: 0.21967520713806152
epoch: 76 training_loss 0.19242372527718543 test_loss: 0.18873242139816285
epoch: 77 training_loss 0.1868615946918726 test_loss: 0.1987607479095459
epoch: 78 training_loss 0.1838583240658045 test_loss: 0.18596335649490356
epoch: 79 training_loss 0.18880590036511422 test_loss: 0.19344714879989625
epoch: 80 training_loss 0.1837922466546297 test_loss: 0.1965949773788452
epoch: 81 training_loss 0.18250224664807319 test_loss: 0.17581391334533691
epoch: 82 training_loss 0.18478713534772395 test_loss: 0.18840477466583253
epoch: 83 training_loss 0.1792870020866394 test_loss: 0.17802470922470093
epoch: 84 training_loss 0.17954514779150485 test_loss: 0.2076894998550415
epoch: 85 training_loss 0.19988417774438857 test_loss: 0.1911570906639099
epoch: 86 training_loss 0.18882093742489814 test_loss: 0.16890745162963866
epoch: 87 training_loss 0.18144600853323936 test_loss: 0.18636494874954224
epoch: 88 training_loss 0.18605550602078438 test_loss: 0.17687749862670898
epoch: 89 training_loss 0.18270326271653176 test_loss: 0.20662693977355956
epoch: 90 training_loss 0.18661241561174394 test_loss: 0.16833395957946778
epoch: 91 training_loss 0.18290973000228405 test_loss: 0.18612343072891235
epoch: 92 training_loss 0.17746701583266258 test_loss: 0.18523527383804322
epoch: 93 training_loss 0.18516484685242177 test_loss: 0.20907261371612548
epoch: 94 training_loss 0.1864243659377098 test_loss: 0.2252664566040039
epoch: 95 training_loss 0.18329163908958435 test_loss: 0.19024264812469482
epoch: 96 training_loss 0.1804793606698513 test_loss: 0.17959802150726317
epoch: 97 training_loss 0.18258620865643024 test_loss: 0.19619193077087402
epoch: 98 training_loss 0.1788410972058773 test_loss: 0.17073866128921508
epoch: 99 training_loss 0.18295790694653988 test_loss: 0.1937810182571411
epoch: 100 training_loss 0.1833103869855404 test_loss: 0.1852823853492737
epoch: 101 training_loss 0.18709719471633435 test_loss: 0.19738837480545043
epoch: 102 training_loss 0.18405549451708794 test_loss: 0.19269945621490478
epoch: 103 training_loss 0.18120948709547519 test_loss: 0.20241882801055908
epoch: 104 training_loss 0.18445162266492843 test_loss: 0.1905989170074463
epoch: 105 training_loss 0.18321638770401477 test_loss: 0.20499491691589355
epoch: 106 training_loss 0.17453197598457337 test_loss: 0.1926979899406433
epoch: 107 training_loss 0.17737964391708375 test_loss: 0.21624429225921632
epoch: 108 training_loss 0.18318308934569358 test_loss: 0.18985873460769653
epoch: 109 training_loss 0.18649039216339588 test_loss: 0.1923591136932373
epoch: 110 training_loss 0.17985695354640485 test_loss: 0.181751549243927
epoch: 111 training_loss 0.1839570704102516 test_loss: 0.18448233604431152
epoch: 112 training_loss 0.18923504322767257 test_loss: 0.19508516788482666
epoch: 113 training_loss 0.18755307532846927 test_loss: 0.23115081787109376
epoch: 114 training_loss 0.19123852126300334 test_loss: 0.18651373386383058
epoch: 115 training_loss 0.1780007215589285 test_loss: 0.18045711517333984
epoch: 116 training_loss 0.17264664337038993 test_loss: 0.18455677032470702
epoch: 117 training_loss 0.17949621573090555 test_loss: 0.18304957151412965
epoch: 118 training_loss 0.18056637406349182 test_loss: 0.1803802490234375
epoch: 119 training_loss 0.178685200214386 test_loss: 0.18369399309158324
epoch: 120 training_loss 0.1749153409898281 test_loss: 0.1835312843322754
epoch: 121 training_loss 0.17565568566322326 test_loss: 0.19232970476150513
epoch: 122 training_loss 0.18140650235116482 test_loss: 0.18547120094299316
epoch: 123 training_loss 0.17777829557657243 test_loss: 0.18388983011245727
epoch: 124 training_loss 0.18071945503354073 test_loss: 0.1887371301651001
epoch: 125 training_loss 0.17485707186162472 test_loss: 0.19573676586151123
epoch: 126 training_loss 0.18672411285340787 test_loss: 0.19201943874359131
epoch: 127 training_loss 0.18069743625819684 test_loss: 0.18922460079193115
epoch: 128 training_loss 0.17459240071475507 test_loss: 0.1864860773086548
epoch: 129 training_loss 0.1712101900577545 test_loss: 0.1938068985939026
epoch: 130 training_loss 0.17898703567683696 test_loss: 0.18688790798187255
epoch: 131 training_loss 0.16953605011105538 test_loss: 0.1956135392189026
epoch: 132 training_loss 0.18102133892476557 test_loss: 0.19753295183181763
epoch: 133 training_loss 0.18148451663553714 test_loss: 0.17560117244720458
epoch: 134 training_loss 0.1781816602498293 test_loss: 0.18756870031356812
epoch: 135 training_loss 0.17651305072009563 test_loss: 0.18503345251083375
epoch: 136 training_loss 0.17159235060214997 test_loss: 0.18428937196731568
epoch: 137 training_loss 0.17730085343122481 test_loss: 0.1769916296005249
epoch: 138 training_loss 0.1753488512337208 test_loss: 0.19785255193710327
epoch: 139 training_loss 0.1734439976140857 test_loss: 0.18857213258743286
epoch: 140 training_loss 0.18649141922593115 test_loss: 0.18453035354614258
epoch: 141 training_loss 0.17397101268172263 test_loss: 0.1732361674308777
epoch: 142 training_loss 0.19164584919810296 test_loss: 0.18239872455596923
epoch: 143 training_loss 0.17771208770573138 test_loss: 0.18506121635437012
epoch: 144 training_loss 0.1713043048977852 test_loss: 0.15931037664413453
epoch: 145 training_loss 0.16916447840631008 test_loss: 0.19796671867370605
epoch: 146 training_loss 0.1755118490755558 test_loss: 0.18454742431640625
epoch: 147 training_loss 0.1725994297862053 test_loss: 0.18907604217529297
epoch: 148 training_loss 0.17169427245855332 test_loss: 0.19710878133773804
epoch: 149 training_loss 0.17437827482819557 test_loss: 0.1770877480506897
epoch: 0 training_loss 0.42459436506032944 test_loss: 0.3564002990722656
epoch: 1 training_loss 0.28761505246162417 test_loss: 0.24086487293243408
epoch: 2 training_loss 0.25864041030406953 test_loss: 0.23504228591918946
epoch: 3 training_loss 0.24213461697101593 test_loss: 0.225730562210083
epoch: 4 training_loss 0.22314048871397973 test_loss: 0.22757017612457275
epoch: 5 training_loss 0.2295239271223545 test_loss: 0.22088384628295898
epoch: 6 training_loss 0.22300339132547378 test_loss: 0.22723796367645263
epoch: 7 training_loss 0.22196227937936783 test_loss: 0.22482359409332275
epoch: 8 training_loss 0.2300598603487015 test_loss: 0.25180504322052
epoch: 9 training_loss 0.21233911849558354 test_loss: 0.20790348052978516
epoch: 10 training_loss 0.21053947046399116 test_loss: 0.19995707273483276
epoch: 11 training_loss 0.20687647432088851 test_loss: 0.21851685047149658
epoch: 12 training_loss 0.21012280225753785 test_loss: 0.20496728420257568
epoch: 13 training_loss 0.21533604174852372 test_loss: 0.216990065574646
epoch: 14 training_loss 0.20963565707206727 test_loss: 0.20630273818969727
epoch: 15 training_loss 0.21025431640446185 test_loss: 0.2087641477584839
epoch: 16 training_loss 0.2082333292067051 test_loss: 0.20948302745819092
epoch: 17 training_loss 0.20457779981195925 test_loss: 0.22014923095703126
epoch: 18 training_loss 0.19937813557684422 test_loss: 0.23169515132904053
epoch: 19 training_loss 0.19423380084335803 test_loss: 0.20452768802642823
epoch: 20 training_loss 0.203643896356225 test_loss: 0.2031261682510376
epoch: 21 training_loss 0.21185175001621245 test_loss: 0.21911633014678955
epoch: 22 training_loss 0.20308356389403343 test_loss: 0.19284862279891968
epoch: 23 training_loss 0.20343730337917804 test_loss: 0.20802912712097169
epoch: 24 training_loss 0.2021491166204214 test_loss: 0.19376181364059447
epoch: 25 training_loss 0.1953773745149374 test_loss: 0.21638028621673583
epoch: 26 training_loss 0.20768158175051213 test_loss: 0.20587821006774903
epoch: 27 training_loss 0.19985422618687154 test_loss: 0.19706902503967286
epoch: 28 training_loss 0.20680188499391078 test_loss: 0.2086106538772583
epoch: 29 training_loss 0.20517876356840134 test_loss: 0.19083316326141359
epoch: 30 training_loss 0.19792250163853167 test_loss: 0.2026196002960205
epoch: 31 training_loss 0.19708317950367926 test_loss: 0.18988617658615112
epoch: 32 training_loss 0.19791338987648488 test_loss: 0.1947927713394165
epoch: 33 training_loss 0.1980490858107805 test_loss: 0.19330291748046874
epoch: 34 training_loss 0.19469625689089298 test_loss: 0.19935063123703003
epoch: 35 training_loss 0.19915763922035695 test_loss: 0.1929130434989929
epoch: 36 training_loss 0.1924362612515688 test_loss: 0.21968061923980714
epoch: 37 training_loss 0.19937191635370255 test_loss: 0.18976353406906127
epoch: 38 training_loss 0.19699243701994418 test_loss: 0.19803783893585206
epoch: 39 training_loss 0.19824160754680634 test_loss: 0.23032104969024658
epoch: 40 training_loss 0.1988401063531637 test_loss: 0.19857983589172362
epoch: 41 training_loss 0.1883333220332861 test_loss: 0.20457072257995607
epoch: 42 training_loss 0.19482081055641173 test_loss: 0.1926914095878601
epoch: 43 training_loss 0.19761704593896867 test_loss: 0.20988619327545166
epoch: 44 training_loss 0.18672991871833802 test_loss: 0.18521900177001954
epoch: 45 training_loss 0.18539097227156162 test_loss: 0.18802101612091066
epoch: 46 training_loss 0.19320630155503749 test_loss: 0.1914821982383728
epoch: 47 training_loss 0.19020768448710443 test_loss: 0.20531938076019288
epoch: 48 training_loss 0.18531698890030385 test_loss: 0.194705069065094
epoch: 49 training_loss 0.1964597411453724 test_loss: 0.200109601020813
epoch: 50 training_loss 0.190355723798275 test_loss: 0.20264501571655275
epoch: 51 training_loss 0.1944396434724331 test_loss: 0.19554450511932372
epoch: 52 training_loss 0.1935933205485344 test_loss: 0.20504913330078126
epoch: 53 training_loss 0.19107596889138223 test_loss: 0.18662337064743043
epoch: 54 training_loss 0.1860793048888445 test_loss: 0.19223781824111938
epoch: 55 training_loss 0.1903392071276903 test_loss: 0.19634968042373657
epoch: 56 training_loss 0.19502439707517624 test_loss: 0.18624016046524047
epoch: 57 training_loss 0.1887115314602852 test_loss: 0.1963981032371521
epoch: 58 training_loss 0.1905136127024889 test_loss: 0.1943256139755249
epoch: 59 training_loss 0.1946718916296959 test_loss: 0.1911030650138855
epoch: 60 training_loss 0.1905976814031601 test_loss: 0.19249193668365477
epoch: 61 training_loss 0.1825798011571169 test_loss: 0.1920287013053894
epoch: 62 training_loss 0.18804668821394444 test_loss: 0.188989520072937
epoch: 63 training_loss 0.19026718132197856 test_loss: 0.18227109909057618
epoch: 64 training_loss 0.18884138248860835 test_loss: 0.19598571062088013
epoch: 65 training_loss 0.19071587905287743 test_loss: 0.18223321437835693
epoch: 66 training_loss 0.19099072881042958 test_loss: 0.18598220348358155
epoch: 67 training_loss 0.18862881429493428 test_loss: 0.19709084033966065
epoch: 68 training_loss 0.19348259411752225 test_loss: 0.19687879085540771
epoch: 69 training_loss 0.18482990838587285 test_loss: 0.18788857460021974
epoch: 70 training_loss 0.19195879653096198 test_loss: 0.1897262692451477
epoch: 71 training_loss 0.18096612989902497 test_loss: 0.18479796648025512
epoch: 72 training_loss 0.19182939283549785 test_loss: 0.19553298950195314
epoch: 73 training_loss 0.1996552535891533 test_loss: 0.182657790184021
epoch: 74 training_loss 0.19166675209999084 test_loss: 0.22029988765716552
epoch: 75 training_loss 0.18937324605882166 test_loss: 0.18272294998168945
epoch: 76 training_loss 0.17813194677233696 test_loss: 0.18566579818725587
epoch: 77 training_loss 0.1888620976358652 test_loss: 0.18722267150878907
epoch: 78 training_loss 0.18562762558460236 test_loss: 0.21365044116973878
epoch: 79 training_loss 0.18150348111987114 test_loss: 0.2031027555465698
epoch: 80 training_loss 0.18214297510683536 test_loss: 0.19603878259658813
epoch: 81 training_loss 0.18339396238327027 test_loss: 0.17825719118118286
epoch: 82 training_loss 0.18847413681447506 test_loss: 0.19077547788619995
epoch: 83 training_loss 0.17807083949446678 test_loss: 0.1857777714729309
epoch: 84 training_loss 0.19016849242150782 test_loss: 0.179959237575531
epoch: 85 training_loss 0.18644923500716687 test_loss: 0.19554895162582397
epoch: 86 training_loss 0.18422068379819392 test_loss: 0.1840166926383972
epoch: 87 training_loss 0.17657094612717628 test_loss: 0.19285845756530762
epoch: 88 training_loss 0.1864385025203228 test_loss: 0.19220349788665772
epoch: 89 training_loss 0.17597040355205537 test_loss: 0.18307271003723144
epoch: 90 training_loss 0.18601358525454997 test_loss: 0.19453988075256348
epoch: 91 training_loss 0.18571791268885135 test_loss: 0.1818867802619934
epoch: 92 training_loss 0.19027809470891952 test_loss: 0.20259628295898438
epoch: 93 training_loss 0.18273306757211685 test_loss: 0.1916689395904541
epoch: 94 training_loss 0.18432408772408962 test_loss: 0.19232784509658812
epoch: 95 training_loss 0.18543351553380488 test_loss: 0.19337096214294433
epoch: 96 training_loss 0.18803440898656845 test_loss: 0.1781529664993286
epoch: 97 training_loss 0.1812610001116991 test_loss: 0.19720600843429564
epoch: 98 training_loss 0.18115449592471122 test_loss: 0.18449769020080567
epoch: 99 training_loss 0.18416536673903466 test_loss: 0.19347553253173827
epoch: 100 training_loss 0.18248808182775975 test_loss: 0.18208831548690796
epoch: 101 training_loss 0.1782732303440571 test_loss: 0.1876118302345276
epoch: 102 training_loss 0.1854569421708584 test_loss: 0.17442402839660645
epoch: 103 training_loss 0.1823907858133316 test_loss: 0.19173113107681275
epoch: 104 training_loss 0.17870519906282425 test_loss: 0.20009820461273192
epoch: 105 training_loss 0.18123040229082107 test_loss: 0.18774940967559814
epoch: 106 training_loss 0.1876294217258692 test_loss: 0.17808114290237426
epoch: 107 training_loss 0.17951184414327145 test_loss: 0.1900144100189209
epoch: 108 training_loss 0.18572173722088337 test_loss: 0.19069104194641112
epoch: 109 training_loss 0.18151794001460075 test_loss: 0.19807004928588867
epoch: 110 training_loss 0.17310603015124798 test_loss: 0.19997867345809936
epoch: 111 training_loss 0.18221930138766765 test_loss: 0.18477017879486085
epoch: 112 training_loss 0.18473800212144853 test_loss: 0.18486528396606444
epoch: 113 training_loss 0.17974416702985763 test_loss: 0.20922553539276123
epoch: 114 training_loss 0.18541010186076165 test_loss: 0.1857448101043701
epoch: 115 training_loss 0.1792160677164793 test_loss: 0.1904840350151062
epoch: 116 training_loss 0.17566258415579797 test_loss: 0.19652315378189086
epoch: 117 training_loss 0.18334170043468476 test_loss: 0.18703948259353637
epoch: 118 training_loss 0.1886327274143696 test_loss: 0.19160776138305663
epoch: 119 training_loss 0.18096167385578155 test_loss: 0.17764438390731813
epoch: 120 training_loss 0.1740856885164976 test_loss: 0.18683068752288817
epoch: 121 training_loss 0.17650093637406827 test_loss: 0.17818368673324586
epoch: 122 training_loss 0.1849604357033968 test_loss: 0.2056563377380371
epoch: 123 training_loss 0.1750076712667942 test_loss: 0.18975539207458497
epoch: 124 training_loss 0.17607722140848636 test_loss: 0.19467257261276244
epoch: 125 training_loss 0.19033966109156608 test_loss: 0.1871896505355835
epoch: 126 training_loss 0.1811478776484728 test_loss: 0.2004981279373169
epoch: 127 training_loss 0.18366565652191638 test_loss: 0.19534281492233277
epoch: 128 training_loss 0.18121660090982913 test_loss: 0.17895677089691162
epoch: 129 training_loss 0.18036956824362277 test_loss: 0.18060951232910155
epoch: 130 training_loss 0.1745716577768326 test_loss: 0.19464612007141113
epoch: 131 training_loss 0.18479286707937717 test_loss: 0.19018405675888062
epoch: 132 training_loss 0.18060339756309987 test_loss: 0.1840085506439209
epoch: 133 training_loss 0.17676516145467758 test_loss: 0.18807108402252198
epoch: 134 training_loss 0.17550813920795919 test_loss: 0.19440798759460448
epoch: 135 training_loss 0.17973239593207835 test_loss: 0.19781843423843384
epoch: 136 training_loss 0.1764435162395239 test_loss: 0.18389129638671875
epoch: 137 training_loss 0.18209985353052616 test_loss: 0.176603364944458
epoch: 138 training_loss 0.1741365087032318 test_loss: 0.18918336629867555
epoch: 139 training_loss 0.17763801485300065 test_loss: 0.1922824501991272
epoch: 140 training_loss 0.18472094736993314 test_loss: 0.20632083415985109
epoch: 141 training_loss 0.17621696785092353 test_loss: 0.1845036745071411
epoch: 142 training_loss 0.16701154679059982 test_loss: 0.19253307580947876
epoch: 143 training_loss 0.17432290002703665 test_loss: 0.1827468156814575
epoch: 144 training_loss 0.18503632113337518 test_loss: 0.18507267236709596
epoch: 145 training_loss 0.1703845002502203 test_loss: 0.17992404699325562
epoch: 146 training_loss 0.17124146819114686 test_loss: 0.18440252542495728
epoch: 147 training_loss 0.18331901229918002 test_loss: 0.18197159767150878
epoch: 148 training_loss 0.17358707077801228 test_loss: 0.1697969079017639
epoch: 149 training_loss 0.17718309611082078 test_loss: 0.20133390426635742
epoch: 0 training_loss 0.43155630230903624 test_loss: 0.32076168060302734
epoch: 1 training_loss 0.2943142938613892 test_loss: 0.2638729095458984
epoch: 2 training_loss 0.25699750930070875 test_loss: 0.26097745895385743
epoch: 3 training_loss 0.245306104272604 test_loss: 0.24694321155548096
epoch: 4 training_loss 0.23442384526133536 test_loss: 0.24483180046081543
epoch: 5 training_loss 0.2210594195127487 test_loss: 0.22813284397125244
epoch: 6 training_loss 0.22153890430927276 test_loss: 0.2048184871673584
epoch: 7 training_loss 0.22208112224936485 test_loss: 0.2609918832778931
epoch: 8 training_loss 0.23595759481191636 test_loss: 0.21215577125549318
epoch: 9 training_loss 0.21860260710120202 test_loss: 0.21953840255737306
epoch: 10 training_loss 0.23409013271331788 test_loss: 0.22213122844696045
epoch: 11 training_loss 0.21859951466321945 test_loss: 0.22925539016723634
epoch: 12 training_loss 0.21987310752272607 test_loss: 0.20153393745422363
epoch: 13 training_loss 0.2116025387495756 test_loss: 0.2115629196166992
epoch: 14 training_loss 0.22410181239247323 test_loss: 0.2285527467727661
epoch: 15 training_loss 0.22188381522893905 test_loss: 0.2500073671340942
epoch: 16 training_loss 0.2137212735414505 test_loss: 0.21121068000793458
epoch: 17 training_loss 0.21457042783498764 test_loss: 0.21969952583312988
epoch: 18 training_loss 0.22816453203558923 test_loss: 0.2205172061920166
epoch: 19 training_loss 0.21027902767062187 test_loss: 0.2050467014312744
epoch: 20 training_loss 0.2216957475990057 test_loss: 0.22381534576416015
epoch: 21 training_loss 0.20775323793292044 test_loss: 0.21952860355377196
epoch: 22 training_loss 0.21032014288008213 test_loss: 0.2028977870941162
epoch: 23 training_loss 0.20472425170242786 test_loss: 0.21939623355865479
epoch: 24 training_loss 0.20366148345172405 test_loss: 0.21714394092559813
epoch: 25 training_loss 0.2072893086820841 test_loss: 0.19474161863327027
epoch: 26 training_loss 0.21361098140478135 test_loss: 0.19903920888900756
epoch: 27 training_loss 0.20345168963074683 test_loss: 0.20193393230438234
epoch: 28 training_loss 0.20134990513324738 test_loss: 0.19439952373504638
epoch: 29 training_loss 0.20698495678603648 test_loss: 0.20560934543609619
epoch: 30 training_loss 0.20559507474303246 test_loss: 0.19881366491317748
epoch: 31 training_loss 0.20439014859497548 test_loss: 0.19295203685760498
epoch: 32 training_loss 0.21002388827502727 test_loss: 0.20435593128204346
epoch: 33 training_loss 0.19253023579716683 test_loss: 0.22169106006622313
epoch: 34 training_loss 0.19758064076304435 test_loss: 0.21267378330230713
epoch: 35 training_loss 0.21112838238477707 test_loss: 0.1913178563117981
epoch: 36 training_loss 0.2056830806285143 test_loss: 0.20275306701660156
epoch: 37 training_loss 0.19659746147692203 test_loss: 0.19383183717727662
epoch: 38 training_loss 0.19532535761594771 test_loss: 0.2199106454849243
epoch: 39 training_loss 0.19460325554013252 test_loss: 0.18857909440994264
epoch: 40 training_loss 0.20445247076451778 test_loss: 0.19324311017990112
epoch: 41 training_loss 0.19853552587330342 test_loss: 0.20139315128326415
epoch: 42 training_loss 0.19484925128519534 test_loss: 0.20342733860015869
epoch: 43 training_loss 0.19358211405575276 test_loss: 0.21035101413726806
epoch: 44 training_loss 0.19884407944977284 test_loss: 0.19915834665298462
epoch: 45 training_loss 0.19091797359287738 test_loss: 0.19868592023849488
epoch: 46 training_loss 0.19195869579911232 test_loss: 0.18769338130950927
epoch: 47 training_loss 0.19009328991174698 test_loss: 0.18446407318115235
epoch: 48 training_loss 0.18790663167834282 test_loss: 0.17835958003997804
epoch: 49 training_loss 0.2011609910428524 test_loss: 0.1905229926109314
epoch: 50 training_loss 0.1956922321021557 test_loss: 0.2000784158706665
epoch: 51 training_loss 0.2024404650181532 test_loss: 0.18587037324905395
epoch: 52 training_loss 0.1933109411597252 test_loss: 0.19011718034744263
epoch: 53 training_loss 0.1973816379904747 test_loss: 0.21236422061920165
epoch: 54 training_loss 0.1919575724005699 test_loss: 0.19666379690170288
epoch: 55 training_loss 0.20113636642694474 test_loss: 0.19693615436553955
epoch: 56 training_loss 0.19108470983803272 test_loss: 0.19910671710968017
epoch: 57 training_loss 0.19768776170909405 test_loss: 0.20320920944213866
epoch: 58 training_loss 0.1934133291244507 test_loss: 0.19907740354537964
epoch: 59 training_loss 0.1885246818512678 test_loss: 0.19087685346603395
epoch: 60 training_loss 0.19297972872853278 test_loss: 0.1878493070602417
epoch: 61 training_loss 0.1899308054894209 test_loss: 0.19674574136734008
epoch: 62 training_loss 0.18302664995193482 test_loss: 0.19361838102340698
epoch: 63 training_loss 0.19721969336271286 test_loss: 0.1861438512802124
epoch: 64 training_loss 0.18960323810577392 test_loss: 0.19189345836639404
epoch: 65 training_loss 0.1893924269080162 test_loss: 0.1851397395133972
epoch: 66 training_loss 0.1913258034735918 test_loss: 0.1794085383415222
epoch: 67 training_loss 0.18316391192376613 test_loss: 0.17621519565582275
epoch: 68 training_loss 0.1940407456457615 test_loss: 0.18360753059387208
epoch: 69 training_loss 0.205679978877306 test_loss: 0.1966361403465271
epoch: 70 training_loss 0.1921367110311985 test_loss: 0.19911503791809082
epoch: 71 training_loss 0.19454157412052153 test_loss: 0.18333495855331422
epoch: 72 training_loss 0.1935311186313629 test_loss: 0.1918163299560547
epoch: 73 training_loss 0.19018086723983288 test_loss: 0.18529375791549682
epoch: 74 training_loss 0.1858030565083027 test_loss: 0.19844970703125
epoch: 75 training_loss 0.18854044042527676 test_loss: 0.18761860132217406
epoch: 76 training_loss 0.18794820532202722 test_loss: 0.18835538625717163
epoch: 77 training_loss 0.17887441076338292 test_loss: 0.1931815266609192
epoch: 78 training_loss 0.18815582044422627 test_loss: 0.2034684419631958
epoch: 79 training_loss 0.20802466310560702 test_loss: 0.18063778877258302
epoch: 80 training_loss 0.19546874225139618 test_loss: 0.1964152932167053
epoch: 81 training_loss 0.18698451310396194 test_loss: 0.1735494017601013
epoch: 82 training_loss 0.20056954354047776 test_loss: 0.19215723276138305
epoch: 83 training_loss 0.1799916361272335 test_loss: 0.1791880249977112
epoch: 84 training_loss 0.18366479851305484 test_loss: 0.18153681755065917
epoch: 85 training_loss 0.18769500002264977 test_loss: 0.19851967096328735
epoch: 86 training_loss 0.1859885600954294 test_loss: 0.2098918676376343
epoch: 87 training_loss 0.18544128783047198 test_loss: 0.20372884273529052
epoch: 88 training_loss 0.1893638326227665 test_loss: 0.20601894855499267
epoch: 89 training_loss 0.18090976774692535 test_loss: 0.185790753364563
epoch: 90 training_loss 0.18743921630084515 test_loss: 0.17496010065078735
epoch: 91 training_loss 0.17972299546003342 test_loss: 0.1919601559638977
epoch: 92 training_loss 0.19164938129484654 test_loss: 0.20339529514312743
epoch: 93 training_loss 0.18558176547288896 test_loss: 0.19648728370666504
epoch: 94 training_loss 0.18329990424215795 test_loss: 0.18452552556991578
epoch: 95 training_loss 0.18759278550744057 test_loss: 0.18436495065689087
epoch: 96 training_loss 0.18126090116798876 test_loss: 0.1801857590675354
epoch: 97 training_loss 0.19199206955730916 test_loss: 0.19771664142608641
epoch: 98 training_loss 0.18164742432534695 test_loss: 0.20247375965118408
epoch: 99 training_loss 0.18514858804643153 test_loss: 0.18597023487091063
epoch: 100 training_loss 0.1886527856439352 test_loss: 0.1878315806388855
epoch: 101 training_loss 0.18218555741012096 test_loss: 0.19685150384902955
epoch: 102 training_loss 0.1894578082114458 test_loss: 0.1874642014503479
epoch: 103 training_loss 0.18867127679288387 test_loss: 0.18354164361953734
epoch: 104 training_loss 0.18340740658342838 test_loss: 0.18193768262863158
epoch: 105 training_loss 0.17467388972640038 test_loss: 0.19913097620010375
epoch: 106 training_loss 0.192962114661932 test_loss: 0.21932265758514405
epoch: 107 training_loss 0.1830092190951109 test_loss: 0.1808169960975647
epoch: 108 training_loss 0.18173937641084195 test_loss: 0.19909207820892333
epoch: 109 training_loss 0.18697047621011734 test_loss: 0.17631109952926635
epoch: 110 training_loss 0.1797042914479971 test_loss: 0.19990839958190917
epoch: 111 training_loss 0.1806679666787386 test_loss: 0.18908092975616456
epoch: 112 training_loss 0.18913728110492228 test_loss: 0.17820082902908324
epoch: 113 training_loss 0.18473105259239675 test_loss: 0.18997161388397216
epoch: 114 training_loss 0.19785374768078326 test_loss: 0.2047106981277466
epoch: 115 training_loss 0.20124002628028392 test_loss: 0.18679842948913575
epoch: 116 training_loss 0.1850985649228096 test_loss: 0.18086838722229004
epoch: 117 training_loss 0.18436979740858078 test_loss: 0.21130619049072266
epoch: 118 training_loss 0.17548594914376736 test_loss: 0.17844198942184447
epoch: 119 training_loss 0.18220721691846847 test_loss: 0.19327346086502076
epoch: 120 training_loss 0.1855756851285696 test_loss: 0.18019683361053468
epoch: 121 training_loss 0.1773767114430666 test_loss: 0.19184597730636596
epoch: 122 training_loss 0.1800887594372034 test_loss: 0.19568971395492554
epoch: 123 training_loss 0.18429750092327596 test_loss: 0.19667333364486694
epoch: 124 training_loss 0.1841969972103834 test_loss: 0.18811450004577637
epoch: 125 training_loss 0.18583306685090065 test_loss: 0.18479472398757935
epoch: 126 training_loss 0.17992342047393323 test_loss: 0.19029368162155152
epoch: 127 training_loss 0.183265775218606 test_loss: 0.18246697187423705
epoch: 128 training_loss 0.18456181146204473 test_loss: 0.2093400239944458
epoch: 129 training_loss 0.18295659095048905 test_loss: 0.18140166997909546
epoch: 130 training_loss 0.18785329066216946 test_loss: 0.19382195472717284
epoch: 131 training_loss 0.18590204544365407 test_loss: 0.17547910213470458
epoch: 132 training_loss 0.1826448977738619 test_loss: 0.19335023164749146
epoch: 133 training_loss 0.18452229097485542 test_loss: 0.19685883522033693
epoch: 134 training_loss 0.17939671039581298 test_loss: 0.18696863651275636
epoch: 135 training_loss 0.18716969683766366 test_loss: 0.17706410884857177
epoch: 136 training_loss 0.17710219398140908 test_loss: 0.2002183675765991
epoch: 137 training_loss 0.1909343207627535 test_loss: 0.20349886417388915
epoch: 138 training_loss 0.1765413411706686 test_loss: 0.17620911598205566
epoch: 139 training_loss 0.17881179541349412 test_loss: 0.19728695154190062
epoch: 140 training_loss 0.18372838586568832 test_loss: 0.19199520349502563
epoch: 141 training_loss 0.1895114079117775 test_loss: 0.18498378992080688
epoch: 142 training_loss 0.18164207503199578 test_loss: 0.1891131043434143
epoch: 143 training_loss 0.18576268114149572 test_loss: 0.1948786973953247
epoch: 144 training_loss 0.18413083001971245 test_loss: 0.16964865922927858
epoch: 145 training_loss 0.18519074216485024 test_loss: 0.18840171098709108
epoch: 146 training_loss 0.18462930105626582 test_loss: 0.2019913911819458
epoch: 147 training_loss 0.1820644798874855 test_loss: 0.19120261669158936
epoch: 148 training_loss 0.1751908230781555 test_loss: 0.1915408968925476
epoch: 149 training_loss 0.1779451449215412 test_loss: 0.1949275851249695
episode: 0 training return: -481.9435114683484
episode: 1 training return: -445.37425365140496
episode: 2 training return: -396.26725467181757
episode: 3 training return: -320.6590635109544
epoch: 1 test_true_pfm: 30.716708003834295 sim_pfm: -193.29580603991448
episode: 4 training return: -270.7425539519083
episode: 5 training return: -449.65794539391607
episode: 6 training return: -436.0928365710853
episode: 7 training return: -374.6829724194235
epoch: 2 test_true_pfm: 19.921923137714828 sim_pfm: -131.03550196304377
episode: 8 training return: -497.78539026673997
episode: 9 training return: -418.8142972786789
episode: 10 training return: -357.70239305348747
episode: 11 training return: -222.03736116265333
epoch: 3 test_true_pfm: 27.20924260331704 sim_pfm: -126.52743563721162
episode: 12 training return: -187.76247294720858
episode: 13 training return: -212.75716507775076
episode: 14 training return: -230.37804880234853
episode: 15 training return: -224.19174552165464
epoch: 4 test_true_pfm: 30.011941773678554 sim_pfm: -77.42752254475312
episode: 16 training return: -180.38170336696302
episode: 17 training return: -98.67132909304573
episode: 18 training return: -126.99755769705965
episode: 19 training return: -13.926522626066962
epoch: 5 test_true_pfm: 26.392105177030327 sim_pfm: 133.77908816352047
episode: 20 training return: -39.46085551434781
episode: 21 training return: -31.142249210842202
episode: 22 training return: -112.40808801050183
episode: 23 training return: -38.238569786303295
epoch: 6 test_true_pfm: 25.96959122766607 sim_pfm: -118.91157827020432
episode: 24 training return: 48.60462399372198
episode: 25 training return: -62.06736016767618
episode: 26 training return: 53.256718388706076
episode: 27 training return: -49.34951688084224
epoch: 7 test_true_pfm: 37.81695341074678 sim_pfm: 19.50679793893791
episode: 28 training return: -59.155754570278106
episode: 29 training return: -3.0573089788323125
episode: 30 training return: 15.861141065645606
episode: 31 training return: -32.899334002797566
epoch: 8 test_true_pfm: 33.28227585237273 sim_pfm: 18.79951258767794
episode: 32 training return: 31.138015070304213
episode: 33 training return: -10.926457975424572
episode: 34 training return: 20.472972753329685
episode: 35 training return: 22.134103358956466
epoch: 9 test_true_pfm: 27.022769902116305 sim_pfm: 80.65566579189083
episode: 36 training return: 42.81112811952007
episode: 37 training return: 103.35813198541989
episode: 38 training return: 61.75911076992058
episode: 39 training return: 131.66890377325356
epoch: 10 test_true_pfm: 26.53679095507705 sim_pfm: 182.07419111755283
episode: 40 training return: 139.02786357148088
episode: 41 training return: 158.62523690325193
episode: 42 training return: 123.97867515557434
episode: 43 training return: 166.4488209478878
epoch: 11 test_true_pfm: 36.30287373805387 sim_pfm: 208.47267330996365
episode: 44 training return: 165.39638525035272
episode: 45 training return: 134.30912184222532
episode: 46 training return: 186.69299376414355
episode: 47 training return: 179.86651751327324
epoch: 12 test_true_pfm: 41.393599160806986 sim_pfm: 138.00972104266737
episode: 48 training return: 289.3809382797297
episode: 49 training return: 225.70664036629935
episode: 50 training return: 173.94854618183754
episode: 51 training return: 203.77184736835395
epoch: 13 test_true_pfm: 47.754653152854864 sim_pfm: 430.05887954390687
episode: 52 training return: 370.9988341408928
episode: 53 training return: 364.7250249424272
episode: 54 training return: 406.6075365585296
episode: 55 training return: 255.9242914947377
epoch: 14 test_true_pfm: 39.314820657526546 sim_pfm: 407.6858430739946
episode: 56 training return: 446.72407392537554
episode: 57 training return: 291.8269552915989
episode: 58 training return: 460.59125457058735
episode: 59 training return: 266.463156810809
epoch: 15 test_true_pfm: 39.77255066842671 sim_pfm: 384.02545812099
episode: 60 training return: 256.2172706963613
episode: 61 training return: 445.6419500021297
episode: 62 training return: 479.85876162668956
episode: 63 training return: 441.90447979121126
epoch: 16 test_true_pfm: 40.887872120006286 sim_pfm: 48.94689164626053
episode: 64 training return: 454.4563438986847
episode: 65 training return: 444.091139339829
episode: 66 training return: 459.81946080644406
episode: 67 training return: 456.63356651760154
epoch: 17 test_true_pfm: 41.7689328516227 sim_pfm: 277.353315505118
episode: 68 training return: 450.9285043459324
episode: 69 training return: 471.45420097381094
episode: 70 training return: 272.51331398389226
episode: 71 training return: 451.01867077452124
epoch: 18 test_true_pfm: 49.09159037309191 sim_pfm: 596.3708963463872
episode: 72 training return: 475.57754926742257
episode: 73 training return: 470.9954485893081
episode: 74 training return: 466.6016440503927
episode: 75 training return: 469.26097635876687
epoch: 19 test_true_pfm: 46.38557735815231 sim_pfm: 590.4174691358794
episode: 76 training return: 477.6219089334771
episode: 77 training return: 470.7204693700966
episode: 78 training return: 499.1841253843713
episode: 79 training return: 483.5948786616311
epoch: 20 test_true_pfm: 40.44662346435157 sim_pfm: 602.11428823811
episode: 80 training return: 462.71199043248276
episode: 81 training return: 463.44142993907604
episode: 82 training return: 458.87507836060496
episode: 83 training return: 494.6495660942095
epoch: 21 test_true_pfm: 47.358161177216175 sim_pfm: 595.6232672237023
episode: 84 training return: 479.43524092242757
episode: 85 training return: 469.3089936220989
episode: 86 training return: 489.8184647694309
episode: 87 training return: 480.7502001588314
epoch: 22 test_true_pfm: 50.27845894466984 sim_pfm: 609.1303993445497
episode: 88 training return: 479.7759904854303
episode: 89 training return: 475.1944319150068
episode: 90 training return: 451.4338429723778
episode: 91 training return: 462.09740228528875
epoch: 23 test_true_pfm: 49.73425692880488 sim_pfm: 607.3869187384516
episode: 92 training return: 460.21407840354715
episode: 93 training return: 492.9710961115881
episode: 94 training return: 487.0904703650768
episode: 95 training return: 482.5280485213864
epoch: 24 test_true_pfm: 46.51238885256104 sim_pfm: 602.7573649025343
episode: 96 training return: 452.87376965078124
episode: 97 training return: 492.79354080939925
episode: 98 training return: 487.77703789267434
episode: 99 training return: 480.5936613595092
epoch: 25 test_true_pfm: 50.82720588724681 sim_pfm: 609.9634473465394
episode: 100 training return: 478.6683595195987
episode: 101 training return: 482.50014031996
episode: 102 training return: 478.428035533903
episode: 103 training return: 468.9821351335657
epoch: 26 test_true_pfm: 47.2787522256364 sim_pfm: 617.2296407263043
episode: 104 training return: 460.9970059014543
episode: 105 training return: 492.57009045412775
episode: 106 training return: 499.0587034769962
episode: 107 training return: 491.4937621086273
epoch: 27 test_true_pfm: 40.63963128084644 sim_pfm: 624.0944488746383
episode: 108 training return: 484.0973750135167
episode: 109 training return: 482.3842756350166
episode: 110 training return: 482.835417566762
episode: 111 training return: 498.2056603010206
epoch: 28 test_true_pfm: 38.39607354726383 sim_pfm: 604.3925858026621
episode: 112 training return: 472.3665872412637
episode: 113 training return: 466.8536338097945
episode: 114 training return: 489.27488021759586
episode: 115 training return: 482.4990185281145
epoch: 29 test_true_pfm: 44.00387050186611 sim_pfm: 636.1984975087934
episode: 116 training return: 500.64748233621947
episode: 117 training return: 477.856619702593
episode: 118 training return: 473.54455093627746
episode: 119 training return: 486.33889274435126
epoch: 30 test_true_pfm: 45.66853940424414 sim_pfm: 630.3131963019046
episode: 120 training return: 503.0812057667784
episode: 121 training return: 521.3237120175022
episode: 122 training return: 509.9613268441047
episode: 123 training return: 501.44530007274375
epoch: 31 test_true_pfm: 40.041783151670664 sim_pfm: 621.4938808825335
episode: 124 training return: 500.1810971733713
episode: 125 training return: 484.24406791237817
episode: 126 training return: 496.89510218368366
episode: 127 training return: 506.24622977258974
epoch: 32 test_true_pfm: 46.71261989917988 sim_pfm: 650.0666074204795
episode: 128 training return: 490.8559946086118
episode: 129 training return: 483.0073247497176
episode: 130 training return: 499.93938803833606
episode: 131 training return: 495.3461528381057
epoch: 33 test_true_pfm: 41.832832349516174 sim_pfm: 609.2419480347982
episode: 132 training return: 492.7521113493648
episode: 133 training return: 476.5697716706721
episode: 134 training return: 498.65852654852586
episode: 135 training return: 495.901461802714
epoch: 34 test_true_pfm: 37.934024622005104 sim_pfm: 615.7553447007934
episode: 136 training return: 497.84036070167963
episode: 137 training return: 508.5917604994904
episode: 138 training return: 488.0846724137201
episode: 139 training return: 507.5820825060179
epoch: 35 test_true_pfm: 40.89039770157782 sim_pfm: 624.1409247359858
episode: 140 training return: 474.11315001178
episode: 141 training return: 498.12823983107495
episode: 142 training return: 494.8388804329561
episode: 143 training return: 477.46809321455623
epoch: 36 test_true_pfm: 44.77183682485628 sim_pfm: 631.5132970125388
episode: 144 training return: 500.44594994465405
episode: 145 training return: 497.03496062130375
episode: 146 training return: 479.44347763956426
episode: 147 training return: 477.28623774646275
epoch: 37 test_true_pfm: 43.54653558604041 sim_pfm: 616.125038410787
episode: 148 training return: 498.01039749277845
episode: 149 training return: 480.97396666617936
episode: 150 training return: 493.9661908481601
episode: 151 training return: 486.0969638625143
epoch: 38 test_true_pfm: 45.46768677365646 sim_pfm: 627.6711783931033
episode: 152 training return: 519.4476793486683
episode: 153 training return: 512.2302563843414
episode: 154 training return: 474.9071097153872
episode: 155 training return: 489.5614498908539
epoch: 39 test_true_pfm: 45.90950146036942 sim_pfm: 636.9955526574942
episode: 156 training return: 494.48735841299685
episode: 157 training return: 496.0328775115461
episode: 158 training return: 503.596805403343
episode: 159 training return: 514.4787652870895
epoch: 40 test_true_pfm: 40.46197166315309 sim_pfm: 622.432974516239
episode: 160 training return: 501.0038531671991
episode: 161 training return: 503.6223833973932
episode: 162 training return: 489.3869008314746
episode: 163 training return: 488.1823155806964
epoch: 41 test_true_pfm: 48.22630101755926 sim_pfm: 630.8695593040559
episode: 164 training return: 507.9785493436754
episode: 165 training return: 510.3389525258695
episode: 166 training return: 504.2434373206166
episode: 167 training return: 452.5384685685411
epoch: 42 test_true_pfm: 42.106492132628865 sim_pfm: 636.1874396359336
episode: 168 training return: 513.0494783710296
episode: 169 training return: 494.6961171686628
episode: 170 training return: 502.5430855329835
episode: 171 training return: 486.48998060962833
epoch: 43 test_true_pfm: 42.04685814995876 sim_pfm: 628.339437774005
episode: 172 training return: 499.00913204900576
episode: 173 training return: 496.869857212059
episode: 174 training return: 488.08828427441813
episode: 175 training return: 499.6105635416045
epoch: 44 test_true_pfm: 42.196615649627674 sim_pfm: 629.7125405877503
episode: 176 training return: 514.849820000766
episode: 177 training return: 508.62901756297725
episode: 178 training return: 500.9930817698908
episode: 179 training return: 497.3584349604291
epoch: 45 test_true_pfm: 46.32963235956248 sim_pfm: 634.400347884422
episode: 180 training return: 498.8313289862483
episode: 181 training return: 489.9463227743178
episode: 182 training return: 498.52667535087284
episode: 183 training return: 508.8046919199664
epoch: 46 test_true_pfm: 45.607080258376904 sim_pfm: 151.7476344626867
episode: 184 training return: 484.46062435174167
episode: 185 training return: 491.2961351791039
episode: 186 training return: 500.9100266382502
episode: 187 training return: 485.0854162475909
epoch: 47 test_true_pfm: 47.101162355274255 sim_pfm: 665.8424532973448
episode: 188 training return: 513.4667533288605
episode: 189 training return: 489.8837480745611
episode: 190 training return: 515.399722520456
episode: 191 training return: 509.1500152012006
epoch: 48 test_true_pfm: 35.91326868416462 sim_pfm: 641.3817014517573
episode: 192 training return: 516.6164714959363
episode: 193 training return: 497.68341511376417
episode: 194 training return: 503.76544938335894
episode: 195 training return: 481.9948016963239
epoch: 49 test_true_pfm: 45.60199569554916 sim_pfm: 648.2634423404212
episode: 196 training return: 502.5256498964851
episode: 197 training return: 513.5798963727921
episode: 198 training return: 503.32487914743734
episode: 199 training return: 504.1397163491738
epoch: 50 test_true_pfm: 44.311724968298996 sim_pfm: 633.4885650921598
episode: 200 training return: 528.0680185169913
episode: 201 training return: 504.81368492362196
episode: 202 training return: 503.007294712561
episode: 203 training return: 495.72578818424347
epoch: 51 test_true_pfm: 39.11409691780773 sim_pfm: 625.7627871522162
episode: 204 training return: 492.54429557667993
episode: 205 training return: 491.318140871723
episode: 206 training return: 504.9511500226274
episode: 207 training return: 502.9158048671728
epoch: 52 test_true_pfm: 39.425871185276826 sim_pfm: 658.6841231031384
episode: 208 training return: 509.3829480236315
episode: 209 training return: 503.1152248589598
episode: 210 training return: 490.652234003816
episode: 211 training return: 505.17153347105966
epoch: 53 test_true_pfm: 50.51152640075993 sim_pfm: 610.3365258784174
episode: 212 training return: 495.56401228274655
episode: 213 training return: 497.29770364028764
episode: 214 training return: 512.2270490133683
episode: 215 training return: 481.4339965814282
epoch: 54 test_true_pfm: 41.09319798757474 sim_pfm: 631.0938607855842
episode: 216 training return: 487.5061407776852
episode: 217 training return: 490.0393186792246
episode: 218 training return: 502.9713166235922
episode: 219 training return: 487.28797188710865
epoch: 55 test_true_pfm: 38.235601989067526 sim_pfm: 654.382271004843
episode: 220 training return: 496.63358721341876
episode: 221 training return: 522.7435965033587
episode: 222 training return: 491.3966683123283
episode: 223 training return: 490.08994772927696
epoch: 56 test_true_pfm: 36.37276218497849 sim_pfm: 637.0046906289268
episode: 224 training return: 504.024276971853
episode: 225 training return: 504.45802283331676
episode: 226 training return: 508.6757757386311
episode: 227 training return: 518.585430631267
epoch: 57 test_true_pfm: 42.836822425404854 sim_pfm: 644.2495528636059
episode: 228 training return: 519.1514365509126
episode: 229 training return: 498.98142853507534
episode: 230 training return: 515.9416576498465
episode: 231 training return: 497.66857664693003
epoch: 58 test_true_pfm: 45.459431246104465 sim_pfm: 647.1923925055961
episode: 232 training return: 530.1298863279189
episode: 233 training return: 509.393122408468
episode: 234 training return: 495.47616912793364
episode: 235 training return: 526.2886887772737
epoch: 59 test_true_pfm: 46.38463110008627 sim_pfm: 641.3544880843546
episode: 236 training return: 491.3546889179586
episode: 237 training return: 497.13235410108416
episode: 238 training return: 513.9757182636043
episode: 239 training return: 508.2334646325682
epoch: 60 test_true_pfm: 41.68375963050926 sim_pfm: 637.947475147009
episode: 240 training return: 517.9521884837472
episode: 241 training return: 532.2943194228048
episode: 242 training return: 514.0998928684787
episode: 243 training return: 503.7987627319679
epoch: 61 test_true_pfm: 46.94265257731124 sim_pfm: 647.9911713559068
episode: 244 training return: 508.9628781240281
episode: 245 training return: 512.8476189094451
episode: 246 training return: 523.7965499929085
episode: 247 training return: 491.78035743509844
epoch: 62 test_true_pfm: 47.78349785207522 sim_pfm: 658.8499991318134
episode: 248 training return: 488.76000082447655
episode: 249 training return: 494.5813384172728
episode: 250 training return: 494.70369088896047
episode: 251 training return: 501.08238151399695
epoch: 63 test_true_pfm: 49.48150432848294 sim_pfm: 641.9945073457945
episode: 252 training return: 497.4843376971373
episode: 253 training return: 504.1175239289119
episode: 254 training return: 495.4189270049009
episode: 255 training return: 500.13661924067526
epoch: 64 test_true_pfm: 36.94323478973511 sim_pfm: 624.5634196471885
episode: 256 training return: 501.7715181802274
episode: 257 training return: 512.4225174556349
episode: 258 training return: 495.8382610685779
episode: 259 training return: 499.75518992316734
epoch: 65 test_true_pfm: 43.01542743760007 sim_pfm: 646.0938106767588
episode: 260 training return: 498.4847022556623
episode: 261 training return: 514.3106219039507
episode: 262 training return: 501.44630915677135
episode: 263 training return: 498.70686219164736
epoch: 66 test_true_pfm: 34.7351917253953 sim_pfm: 607.2511689287231
episode: 264 training return: 515.4089392041532
episode: 265 training return: 515.9063843646285
episode: 266 training return: 498.56142061976186
episode: 267 training return: 509.75870066721984
epoch: 67 test_true_pfm: 44.7416107819746 sim_pfm: 639.5441874829486
episode: 268 training return: 500.71692976792986
episode: 269 training return: 519.3694900062951
episode: 270 training return: 486.2035689029704
episode: 271 training return: 511.713584512869
epoch: 68 test_true_pfm: 36.15511266113349 sim_pfm: 642.9595296981092
episode: 272 training return: 494.5093555982345
episode: 273 training return: 497.79450377077416
episode: 274 training return: 515.1178617209805
episode: 275 training return: 508.5516505751126
epoch: 69 test_true_pfm: 42.20759808444154 sim_pfm: 630.9610968050418
episode: 276 training return: 480.509678292352
episode: 277 training return: 494.19988737053495
episode: 278 training return: 512.9489964866253
episode: 279 training return: 513.5023520313234
epoch: 70 test_true_pfm: 39.250320970238675 sim_pfm: 628.0418195985247
episode: 280 training return: 487.83427143406055
episode: 281 training return: 476.12036578857527
episode: 282 training return: 518.3679589768609
episode: 283 training return: 511.1639755496659
epoch: 71 test_true_pfm: 45.95823980989389 sim_pfm: 649.5440429875655
episode: 284 training return: 498.6321603749949
episode: 285 training return: 493.8198149797337
episode: 286 training return: 523.7429830297007
episode: 287 training return: 494.7778594616614
epoch: 72 test_true_pfm: 43.718678516906955 sim_pfm: 641.6336989384357
episode: 288 training return: 506.145832345381
episode: 289 training return: 522.0525088996119
episode: 290 training return: 517.3584005927146
episode: 291 training return: 525.5164756007553
epoch: 73 test_true_pfm: 47.97623322866523 sim_pfm: 653.1128684701702
episode: 292 training return: 502.69565834087064
episode: 293 training return: 511.14773973759674
episode: 294 training return: 502.87188756033754
episode: 295 training return: 497.1896369891515
epoch: 74 test_true_pfm: 45.17757846924619 sim_pfm: 652.2473322252927
episode: 296 training return: 496.5312482427036
episode: 297 training return: 508.05998470422327
episode: 298 training return: 491.7276966371416
episode: 299 training return: 504.136120576703
epoch: 75 test_true_pfm: 38.47455078225506 sim_pfm: 624.4801373285236
episode: 300 training return: 498.97350844042074
episode: 301 training return: 526.4629926731568
episode: 302 training return: 493.56655461144703
episode: 303 training return: 497.5755670307927
epoch: 76 test_true_pfm: 37.07721365536873 sim_pfm: 625.9685764752683
episode: 304 training return: 512.4687072799325
episode: 305 training return: 513.8656510216146
episode: 306 training return: 496.3571099310606
episode: 307 training return: 502.52151449966857
epoch: 77 test_true_pfm: 45.01326771251851 sim_pfm: 626.9732005062798
episode: 308 training return: 493.88091131285216
episode: 309 training return: 522.0974011842035
episode: 310 training return: 505.4613783949886
episode: 311 training return: 509.0538965845689
epoch: 78 test_true_pfm: 42.52773347582932 sim_pfm: 623.7568177466795
episode: 312 training return: 512.9288301334248
episode: 313 training return: 507.64421815815865
episode: 314 training return: 498.83791922381494
episode: 315 training return: 521.7216631791646
epoch: 79 test_true_pfm: 42.06421281247882 sim_pfm: 646.7782677253987
episode: 316 training return: 512.840571401884
episode: 317 training return: 499.53139239662073
episode: 318 training return: 490.54321787359606
episode: 319 training return: 504.5755076969291
epoch: 80 test_true_pfm: 36.24841371180188 sim_pfm: 649.8837864920652
episode: 320 training return: 489.6975144898176
episode: 321 training return: 506.71530926354524
episode: 322 training return: 485.71267820809203
episode: 323 training return: 503.8247396025328
epoch: 81 test_true_pfm: 44.074005890785436 sim_pfm: 623.3926631495875
episode: 324 training return: 491.5497621742739
episode: 325 training return: 502.8208559857016
episode: 326 training return: 513.6710850572139
episode: 327 training return: 492.2282398958174
epoch: 82 test_true_pfm: 43.266033579514705 sim_pfm: 611.7824591671155
episode: 328 training return: 519.851852474247
episode: 329 training return: 503.9883941970074
episode: 330 training return: 503.8056976348691
episode: 331 training return: 514.2134319243306
epoch: 83 test_true_pfm: 44.478665986826165 sim_pfm: 651.5955454008744
episode: 332 training return: 513.8476757636972
episode: 333 training return: 494.1283820303087
episode: 334 training return: 501.7449001362357
episode: 335 training return: 509.18817497546877
epoch: 84 test_true_pfm: 44.30226192352874 sim_pfm: 636.9044006138321
episode: 336 training return: 522.0811862877139
episode: 337 training return: 517.403450015089
episode: 338 training return: 517.6962943971278
episode: 339 training return: 500.42198384040523
epoch: 85 test_true_pfm: 43.695464973013465 sim_pfm: 639.609403923035
episode: 340 training return: 511.1678634535117
episode: 341 training return: 512.1851782473664
episode: 342 training return: 507.5850942917783
episode: 343 training return: 489.20878054128406
epoch: 86 test_true_pfm: 40.943362853055966 sim_pfm: 621.6353614054742
episode: 344 training return: 499.2060674290392
episode: 345 training return: 503.7107931466667
episode: 346 training return: 501.6087207638825
episode: 347 training return: 503.9958044724224
epoch: 87 test_true_pfm: 40.809338697434725 sim_pfm: 627.2605013267117
episode: 348 training return: 502.6244495993809
episode: 349 training return: 490.42467378123473
episode: 350 training return: 512.9667408392753
episode: 351 training return: 498.20906400618367
epoch: 88 test_true_pfm: 43.09942171957105 sim_pfm: 619.142036489263
episode: 352 training return: 502.0991716254178
episode: 353 training return: 516.7892151562047
episode: 354 training return: 520.9370040469594
episode: 355 training return: 512.9896698061823
epoch: 89 test_true_pfm: 42.16174782404559 sim_pfm: 633.2284896801635
episode: 356 training return: 518.6889636018309
episode: 357 training return: 518.2255926157065
episode: 358 training return: 521.4117687450674
episode: 359 training return: 494.6250450665192
epoch: 90 test_true_pfm: 42.50793819919007 sim_pfm: 648.9484348577741
episode: 360 training return: 510.90483712940784
episode: 361 training return: 509.950903396613
episode: 362 training return: 518.6467057890002
episode: 363 training return: 503.842012283085
epoch: 91 test_true_pfm: 41.248280384083245 sim_pfm: 643.8244181226504
episode: 364 training return: 508.73082203266955
episode: 365 training return: 513.4024015781586
episode: 366 training return: 508.4018118496185
episode: 367 training return: 522.7005143400447
epoch: 92 test_true_pfm: 38.73719204364615 sim_pfm: 644.6943705078169
episode: 368 training return: 488.29048555751376
episode: 369 training return: 498.0052103076969
episode: 370 training return: 521.0929209230457
episode: 371 training return: 505.77030302019045
epoch: 93 test_true_pfm: 39.62891123471621 sim_pfm: 651.9291571377129
episode: 372 training return: 488.5686492350093
episode: 373 training return: 505.5436530259738
episode: 374 training return: 524.2452224343617
episode: 375 training return: 497.4298625584519
epoch: 94 test_true_pfm: 44.27004902254817 sim_pfm: 629.7400901901757
episode: 376 training return: 505.38123344283036
episode: 377 training return: 500.19891590555096
episode: 378 training return: 516.6624549718783
episode: 379 training return: 510.4736419637489
epoch: 95 test_true_pfm: 44.81329406389144 sim_pfm: 645.0814816900098
episode: 380 training return: 507.8821952283878
episode: 381 training return: 493.89962180454665
episode: 382 training return: 486.01296133003774
episode: 383 training return: 527.6222893851328
epoch: 96 test_true_pfm: 44.00215064005558 sim_pfm: 625.9938840854153
episode: 384 training return: 515.7577388422471
episode: 385 training return: 516.0450213825623
episode: 386 training return: 507.45380223165944
episode: 387 training return: 520.1593153989181
epoch: 97 test_true_pfm: 46.8051691558106 sim_pfm: 635.1707414235258
episode: 388 training return: 482.8440327568939
episode: 389 training return: 515.1174752991078
episode: 390 training return: 502.66206864793224
episode: 391 training return: 506.43032517059413
epoch: 98 test_true_pfm: 37.25093824565072 sim_pfm: 625.0820233495125
episode: 392 training return: 497.80835741069836
episode: 393 training return: 508.3655748066515
episode: 394 training return: 479.5827120518395
episode: 395 training return: 496.4767310151604
epoch: 99 test_true_pfm: 42.1632616870581 sim_pfm: 654.619517977476
episode: 396 training return: 510.31089549718024
episode: 397 training return: 509.24770716957937
episode: 398 training return: 517.9020183191351
episode: 399 training return: 520.3791724581531
epoch: 100 test_true_pfm: 43.856694374558714 sim_pfm: 657.1448185906024
episode: 400 training return: 502.0658915635751
episode: 401 training return: 469.25648200011466
episode: 402 training return: 506.42327690717474
episode: 403 training return: 509.7663532872134
epoch: 101 test_true_pfm: 39.89895553358358 sim_pfm: 628.9538242530992
episode: 404 training return: 510.9568082273014
episode: 405 training return: 493.35939778083616
episode: 406 training return: 509.0487160264148
episode: 407 training return: 503.14583171522065
epoch: 102 test_true_pfm: 41.010517145660685 sim_pfm: 656.764744739913
episode: 408 training return: 516.9633697575407
episode: 409 training return: 495.3205288557728
episode: 410 training return: 512.8557057154484
episode: 411 training return: 517.2792532656574
epoch: 103 test_true_pfm: 46.28996952974816 sim_pfm: 639.1358008207116
episode: 412 training return: 510.4113142507368
episode: 413 training return: 510.8062808690943
episode: 414 training return: 518.515836881826
episode: 415 training return: 504.7560687669353
epoch: 104 test_true_pfm: 46.00764878729342 sim_pfm: 660.4400851276274
episode: 416 training return: 504.5255772416325
episode: 417 training return: 526.3353414010527
episode: 418 training return: 493.211398759695
episode: 419 training return: 521.6959171515732
epoch: 105 test_true_pfm: 38.86415615444475 sim_pfm: 640.927567243169
episode: 420 training return: 513.0844099082984
episode: 421 training return: 517.9183642888186
episode: 422 training return: 528.0311371967413
episode: 423 training return: 506.5907708355135
epoch: 106 test_true_pfm: 35.42167704110761 sim_pfm: 608.1804690349633
episode: 424 training return: 496.3954677869356
episode: 425 training return: 520.1607007627007
episode: 426 training return: 509.72386526258083
episode: 427 training return: 518.5924247868705
epoch: 107 test_true_pfm: 43.32031874762381 sim_pfm: 655.6782315923738
episode: 428 training return: 497.77749348081596
episode: 429 training return: 501.22397300344846
episode: 430 training return: 504.41662033282165
episode: 431 training return: 512.1231144447429
epoch: 108 test_true_pfm: 40.21650231259603 sim_pfm: 645.0497034657063
episode: 432 training return: 521.8757200359603
episode: 433 training return: 508.3252845354689
episode: 434 training return: 514.63557277523
episode: 435 training return: 524.729108616747
epoch: 109 test_true_pfm: 42.543233776621356 sim_pfm: 634.7525150879445
episode: 436 training return: 498.24935136133377
episode: 437 training return: 512.324335399773
episode: 438 training return: 519.8085315216048
episode: 439 training return: 502.31377235375766
epoch: 110 test_true_pfm: 43.48208547071004 sim_pfm: 633.3368695528357
episode: 440 training return: 527.8135934656591
episode: 441 training return: 492.3046659985141
episode: 442 training return: 488.093760472024
episode: 443 training return: 515.8102646193701
epoch: 111 test_true_pfm: 43.77833793350461 sim_pfm: 656.9798817375626
episode: 444 training return: 500.2210897846695
episode: 445 training return: 526.6490678956679
episode: 446 training return: 500.78527001822084
episode: 447 training return: 503.2046159867725
epoch: 112 test_true_pfm: 38.46945363483428 sim_pfm: 644.310340532306
episode: 448 training return: 503.5455330924117
episode: 449 training return: 509.7795427601593
episode: 450 training return: 528.1381714248453
episode: 451 training return: 497.2958227485222
epoch: 113 test_true_pfm: 47.102507114861616 sim_pfm: 635.1442915955114
episode: 452 training return: 512.9126974832684
episode: 453 training return: 504.2913448145937
episode: 454 training return: 530.8908687576402
episode: 455 training return: 515.1653781531144
epoch: 114 test_true_pfm: 39.67495688302476 sim_pfm: 651.9851946161663
episode: 456 training return: 510.99477420804817
episode: 457 training return: 511.5297362995092
episode: 458 training return: 533.0782532857745
episode: 459 training return: 503.2426021334306
epoch: 115 test_true_pfm: 44.27965484818319 sim_pfm: 649.491171107674
episode: 460 training return: 506.88000555303205
episode: 461 training return: 490.71965459964974
episode: 462 training return: 506.38590692559137
episode: 463 training return: 506.5962102388603
epoch: 116 test_true_pfm: 43.04364489985252 sim_pfm: 638.1235291670617
episode: 464 training return: 516.116901763671
episode: 465 training return: 522.7268952224612
episode: 466 training return: 523.0565273353017
episode: 467 training return: 516.2396463188301
epoch: 117 test_true_pfm: 39.13566118169789 sim_pfm: 639.363139910359
episode: 468 training return: 510.48839679974844
episode: 469 training return: 510.7433466150415
episode: 470 training return: 494.5916895733736
episode: 471 training return: 522.5137920686045
epoch: 118 test_true_pfm: 41.47789967432601 sim_pfm: 627.089411750827
episode: 472 training return: 503.73088954001577
episode: 473 training return: 512.6202633693352
episode: 474 training return: 506.3776549153521
episode: 475 training return: 497.8868236676976
epoch: 119 test_true_pfm: 38.21822333769741 sim_pfm: 632.2626108489029
episode: 476 training return: 505.9236582129543
episode: 477 training return: 514.1074636313292
episode: 478 training return: 487.8216658830848
episode: 479 training return: 516.1192756271809
epoch: 120 test_true_pfm: 40.05677645948936 sim_pfm: 655.1597036786208
episode: 480 training return: 511.03894866444125
episode: 481 training return: 507.03495377871866
episode: 482 training return: 494.167565167187
episode: 483 training return: 522.0840029416242
epoch: 121 test_true_pfm: 34.564040938161526 sim_pfm: 619.2239033129722
episode: 484 training return: 508.19498366027756
episode: 485 training return: 517.0685611736194
episode: 486 training return: 500.56304181685374
episode: 487 training return: 524.0810589955216
epoch: 122 test_true_pfm: 36.97605873854503 sim_pfm: 643.6533776694781
episode: 488 training return: 510.90745961019627
episode: 489 training return: 526.1016196260565
episode: 490 training return: 514.0754520624453
episode: 491 training return: 495.4730961628201
epoch: 123 test_true_pfm: 46.304011607184464 sim_pfm: 592.6935240931298
episode: 492 training return: 487.9295623727015
episode: 493 training return: 507.72757439495615
episode: 494 training return: 532.6415601828103
episode: 495 training return: 500.3419281561797
epoch: 124 test_true_pfm: 41.61177746101382 sim_pfm: 618.9715524692048
episode: 496 training return: 530.3070517792264
episode: 497 training return: 496.866640365747
episode: 498 training return: 476.87390039651723
episode: 499 training return: 519.2129389186648
epoch: 125 test_true_pfm: 43.783275786586444 sim_pfm: 604.8348108022047
episode: 500 training return: 502.4737482024415
episode: 501 training return: 494.99288551415725
episode: 502 training return: 505.59799919106854
episode: 503 training return: 515.797474667141
epoch: 126 test_true_pfm: 38.39617586035211 sim_pfm: 640.2712135459
episode: 504 training return: 499.07982965653383
episode: 505 training return: 505.8482525415652
episode: 506 training return: 503.4843637998632
episode: 507 training return: 517.8530988663309
epoch: 127 test_true_pfm: 35.376548249623404 sim_pfm: 643.0484197013667
episode: 508 training return: 524.9879393009845
episode: 509 training return: 505.162751846252
episode: 510 training return: 510.2956463209753
episode: 511 training return: 508.64524821375767
epoch: 128 test_true_pfm: 45.149981594360696 sim_pfm: 635.9827053307749
episode: 512 training return: 513.4826255381919
episode: 513 training return: 518.4182410769923
episode: 514 training return: 501.2269576538747
episode: 515 training return: 511.35689224836176
epoch: 129 test_true_pfm: 44.985637939502006 sim_pfm: 658.479925335792
episode: 516 training return: 517.8559369691226
episode: 517 training return: 509.64913724876226
episode: 518 training return: 523.6735489728975
episode: 519 training return: 519.1044820183222
epoch: 130 test_true_pfm: 43.51311906693737 sim_pfm: 657.0803658450095
episode: 520 training return: 527.2464228730705
episode: 521 training return: 512.5446754241284
episode: 522 training return: 524.6830392924634
episode: 523 training return: 503.69148017485844
epoch: 131 test_true_pfm: 38.31153645179953 sim_pfm: 615.8461187615059
episode: 524 training return: 521.5888587107694
episode: 525 training return: 509.34635832859453
episode: 526 training return: 527.136798135308
episode: 527 training return: 504.30181762148214
epoch: 132 test_true_pfm: 45.963065066814295 sim_pfm: 640.0498554986879
episode: 528 training return: 520.7805057013276
episode: 529 training return: 482.278979916529
episode: 530 training return: 527.9419155813343
episode: 531 training return: 533.4141084843502
epoch: 133 test_true_pfm: 40.355627890256464 sim_pfm: 647.5623328265549
episode: 532 training return: 503.48578825724405
episode: 533 training return: 501.6607916262361
episode: 534 training return: 506.6617395836574
episode: 535 training return: 498.7433119754697
epoch: 134 test_true_pfm: 40.12551271723979 sim_pfm: 639.0547981759624
episode: 536 training return: 516.691334345157
episode: 537 training return: 510.193517627917
episode: 538 training return: 495.6557070800519
episode: 539 training return: 504.95212066848245
epoch: 135 test_true_pfm: 42.5160721928083 sim_pfm: 653.0683481691314
episode: 540 training return: 520.1099349471983
episode: 541 training return: 531.3603348615587
episode: 542 training return: 500.387688365636
episode: 543 training return: 493.7658729924805
epoch: 136 test_true_pfm: 41.00592784742276 sim_pfm: 650.1737813897535
episode: 544 training return: 519.9702273021422
episode: 545 training return: 520.3735964636622
episode: 546 training return: 518.3215698696475
episode: 547 training return: 517.3489779681502
epoch: 137 test_true_pfm: 45.43813841357029 sim_pfm: 659.9413674188962
episode: 548 training return: 527.5094487806919
episode: 549 training return: 491.94880824244126
episode: 550 training return: 511.70425024987765
episode: 551 training return: 523.8570361430086
epoch: 138 test_true_pfm: 41.32298498080922 sim_pfm: 659.157886363034
episode: 552 training return: 496.5880683908728
episode: 553 training return: 517.5626522258658
episode: 554 training return: 495.62638641535
episode: 555 training return: 494.5636481204062
epoch: 139 test_true_pfm: 44.17580372611221 sim_pfm: 657.2887460369898
episode: 556 training return: 512.062464306045
episode: 557 training return: 510.12641477454576
episode: 558 training return: 521.0433477814347
episode: 559 training return: 521.50297540417
epoch: 140 test_true_pfm: 40.64820705339955 sim_pfm: 640.292701745897
episode: 560 training return: 505.5224156492734
episode: 561 training return: 531.873295776118
episode: 562 training return: 507.5185827967636
episode: 563 training return: 507.77208813855566
epoch: 141 test_true_pfm: 44.01422350110931 sim_pfm: 658.3897532263153
episode: 564 training return: 514.43077135841
episode: 565 training return: 516.5203795228607
episode: 566 training return: 505.76849122849666
episode: 567 training return: 522.529685069376
epoch: 142 test_true_pfm: 37.722327954008755 sim_pfm: 666.4898566887572
episode: 568 training return: 525.4681285786788
episode: 569 training return: 524.0844534648311
episode: 570 training return: 499.53489277884586
episode: 571 training return: 487.0580524287249
epoch: 143 test_true_pfm: 44.501391915649755 sim_pfm: 666.4697157221436
episode: 572 training return: 501.34230783144324
episode: 573 training return: 485.13426084382485
episode: 574 training return: 516.7513566797138
episode: 575 training return: 528.5941699230689
epoch: 144 test_true_pfm: 37.154328313330645 sim_pfm: 662.3628190815682
episode: 576 training return: 515.1014147871607
episode: 577 training return: 510.0878731832872
episode: 578 training return: 514.2106049686884
episode: 579 training return: 494.50299823245774
epoch: 145 test_true_pfm: 42.638516889788995 sim_pfm: 654.8591864203084
episode: 580 training return: 522.9916433274709
episode: 581 training return: 505.7630021209546
episode: 582 training return: 514.2159949833265
episode: 583 training return: 526.5391581318431
epoch: 146 test_true_pfm: 43.1815115134803 sim_pfm: 637.2181294537314
episode: 584 training return: 495.6102186127767
episode: 585 training return: 520.819581750868
episode: 586 training return: 513.5319385542311
episode: 587 training return: 501.6829953274954
epoch: 147 test_true_pfm: 36.12929111492044 sim_pfm: 641.6007100335815
episode: 588 training return: 515.1936092558293
episode: 589 training return: 513.5654915050898
episode: 590 training return: 502.13240867656606
episode: 591 training return: 500.5347191436422
epoch: 148 test_true_pfm: 44.29713254573257 sim_pfm: 633.6389755176116
episode: 592 training return: 506.9996122791239
episode: 593 training return: 517.3563064117425
episode: 594 training return: 502.44053645393586
episode: 595 training return: 510.83749891000434
epoch: 149 test_true_pfm: 39.55743011731895 sim_pfm: 662.8269020974359
episode: 596 training return: 499.38439819811543
episode: 597 training return: 522.5285557597311
episode: 598 training return: 511.9427198633991
episode: 599 training return: 517.6392749732903
epoch: 150 test_true_pfm: 39.68654041781872 sim_pfm: 654.4956650034507
