['--alg', 'sac', '--env', 'Swimmer-v3', '--learn', 'uncertainty', '--traj', 'medium', '--seed', '1', '--data', '100000']
epoch: 0 training_loss 0.4274217699468136 test_loss: 0.3893416166305542
epoch: 1 training_loss 0.2951147738099098 test_loss: 0.2621182441711426
epoch: 2 training_loss 0.2647391484677792 test_loss: 0.2627967596054077
epoch: 3 training_loss 0.245932487398386 test_loss: 0.2277966022491455
epoch: 4 training_loss 0.23736724980175494 test_loss: 0.24297525882720947
epoch: 5 training_loss 0.23655084669589996 test_loss: 0.2155216693878174
epoch: 6 training_loss 0.22157614111900328 test_loss: 0.2280376672744751
epoch: 7 training_loss 0.2215758600085974 test_loss: 0.24619677066802978
epoch: 8 training_loss 0.20995627462863922 test_loss: 0.2095653772354126
epoch: 9 training_loss 0.21991902247071266 test_loss: 0.21774017810821533
epoch: 10 training_loss 0.2149858896434307 test_loss: 0.2161548137664795
epoch: 11 training_loss 0.21727401822805403 test_loss: 0.2198241949081421
epoch: 12 training_loss 0.20597697496414186 test_loss: 0.2010009765625
epoch: 13 training_loss 0.22285959497094154 test_loss: 0.21358423233032225
epoch: 14 training_loss 0.20931502245366573 test_loss: 0.21889665126800537
epoch: 15 training_loss 0.21792662307620048 test_loss: 0.22252814769744872
epoch: 16 training_loss 0.2149955704063177 test_loss: 0.20658693313598633
epoch: 17 training_loss 0.1993350798636675 test_loss: 0.21036829948425292
epoch: 18 training_loss 0.2061761100590229 test_loss: 0.20739221572875977
epoch: 19 training_loss 0.2035453952103853 test_loss: 0.21942288875579835
epoch: 20 training_loss 0.2044629604369402 test_loss: 0.1965256452560425
epoch: 21 training_loss 0.20048329085111619 test_loss: 0.2111201763153076
epoch: 22 training_loss 0.19825691677629947 test_loss: 0.2165670871734619
epoch: 23 training_loss 0.19705623887479307 test_loss: 0.20713417530059813
epoch: 24 training_loss 0.20317767083644866 test_loss: 0.19810713529586793
epoch: 25 training_loss 0.21134225964546205 test_loss: 0.1883295178413391
epoch: 26 training_loss 0.20409294888377189 test_loss: 0.19892194271087646
epoch: 27 training_loss 0.21339757069945336 test_loss: 0.20213329792022705
epoch: 28 training_loss 0.19922434031963349 test_loss: 0.22127923965454102
epoch: 29 training_loss 0.1902686069905758 test_loss: 0.20068683624267578
epoch: 30 training_loss 0.19130836464464665 test_loss: 0.20430343151092528
epoch: 31 training_loss 0.1931911140680313 test_loss: 0.19395524263381958
epoch: 32 training_loss 0.20550259128212928 test_loss: 0.2006976842880249
epoch: 33 training_loss 0.19003984674811364 test_loss: 0.20492346286773683
epoch: 34 training_loss 0.19245239660143854 test_loss: 0.20611052513122557
epoch: 35 training_loss 0.1931975217163563 test_loss: 0.1908373713493347
epoch: 36 training_loss 0.19556487500667571 test_loss: 0.20990021228790284
epoch: 37 training_loss 0.1929386468231678 test_loss: 0.19865976572036742
epoch: 38 training_loss 0.20337186105549335 test_loss: 0.20464797019958497
epoch: 39 training_loss 0.1912837442755699 test_loss: 0.20167937278747558
epoch: 40 training_loss 0.19068396985530853 test_loss: 0.18845574855804442
epoch: 41 training_loss 0.19272630758583545 test_loss: 0.2000345230102539
epoch: 42 training_loss 0.19451519057154656 test_loss: 0.19107111692428588
epoch: 43 training_loss 0.19110791198909283 test_loss: 0.19946033954620362
epoch: 44 training_loss 0.19109130695462226 test_loss: 0.19881365299224854
epoch: 45 training_loss 0.1944386574625969 test_loss: 0.2066720247268677
epoch: 46 training_loss 0.19061691738665104 test_loss: 0.21463420391082763
epoch: 47 training_loss 0.18230563402175903 test_loss: 0.2309025287628174
epoch: 48 training_loss 0.192842895463109 test_loss: 0.1881527066230774
epoch: 49 training_loss 0.19912755399942397 test_loss: 0.18843963146209716
epoch: 50 training_loss 0.18574998393654824 test_loss: 0.1914048671722412
epoch: 51 training_loss 0.1879755412787199 test_loss: 0.19781433343887328
epoch: 52 training_loss 0.18418162368237972 test_loss: 0.1948479652404785
epoch: 53 training_loss 0.18473845466971398 test_loss: 0.1959211826324463
epoch: 54 training_loss 0.18793889105319977 test_loss: 0.17951338291168212
epoch: 55 training_loss 0.1883402693271637 test_loss: 0.20003437995910645
epoch: 56 training_loss 0.17523518681526185 test_loss: 0.19495916366577148
epoch: 57 training_loss 0.19251875512301922 test_loss: 0.18618993759155272
epoch: 58 training_loss 0.18720699541270733 test_loss: 0.21492538452148438
epoch: 59 training_loss 0.1894781182706356 test_loss: 0.20459771156311035
epoch: 60 training_loss 0.1827427300065756 test_loss: 0.18599956035614013
epoch: 61 training_loss 0.18665518887341023 test_loss: 0.1916848063468933
epoch: 62 training_loss 0.18303289975970982 test_loss: 0.20441901683807373
epoch: 63 training_loss 0.18717312380671502 test_loss: 0.19381674528121948
epoch: 64 training_loss 0.187394467741251 test_loss: 0.18454889059066773
epoch: 65 training_loss 0.19037660837173462 test_loss: 0.19077563285827637
epoch: 66 training_loss 0.18565418265759945 test_loss: 0.18428847789764405
epoch: 67 training_loss 0.18154519647359849 test_loss: 0.1970962405204773
epoch: 68 training_loss 0.19719080239534378 test_loss: 0.1876633048057556
epoch: 69 training_loss 0.17999037668108941 test_loss: 0.17579976320266724
epoch: 70 training_loss 0.18497068911790848 test_loss: 0.17370517253875734
epoch: 71 training_loss 0.18609255768358707 test_loss: 0.18605391979217528
epoch: 72 training_loss 0.190237892344594 test_loss: 0.18705631494522096
epoch: 73 training_loss 0.1886301926523447 test_loss: 0.18212426900863649
epoch: 74 training_loss 0.1845522103458643 test_loss: 0.19266099929809571
epoch: 75 training_loss 0.18741071201860904 test_loss: 0.19071003198623657
epoch: 76 training_loss 0.18465623527765274 test_loss: 0.20258798599243164
epoch: 77 training_loss 0.17872350268065929 test_loss: 0.17708746194839478
epoch: 78 training_loss 0.18704957969486713 test_loss: 0.20002985000610352
epoch: 79 training_loss 0.18885932132601738 test_loss: 0.20536537170410157
epoch: 80 training_loss 0.1876836735010147 test_loss: 0.17701820135116578
epoch: 81 training_loss 0.1799701876938343 test_loss: 0.1920868992805481
epoch: 82 training_loss 0.17805750004947185 test_loss: 0.1925025224685669
epoch: 83 training_loss 0.18525421991944313 test_loss: 0.20421290397644043
epoch: 84 training_loss 0.18202352099120617 test_loss: 0.19169024229049683
epoch: 85 training_loss 0.18617992423474788 test_loss: 0.18780378103256226
epoch: 86 training_loss 0.18341035686433316 test_loss: 0.17852699756622314
epoch: 87 training_loss 0.18444457568228245 test_loss: 0.19116415977478027
epoch: 88 training_loss 0.1828321521729231 test_loss: 0.17356706857681276
epoch: 89 training_loss 0.18167961008846759 test_loss: 0.19392359256744385
epoch: 90 training_loss 0.19011719800531865 test_loss: 0.19930837154388428
epoch: 91 training_loss 0.1870231368392706 test_loss: 0.1861151337623596
epoch: 92 training_loss 0.1874293687194586 test_loss: 0.19228659868240355
epoch: 93 training_loss 0.17340874657034874 test_loss: 0.19732062816619872
epoch: 94 training_loss 0.17837052293121813 test_loss: 0.18268976211547852
epoch: 95 training_loss 0.18301251024007797 test_loss: 0.1924544930458069
epoch: 96 training_loss 0.17509282469749451 test_loss: 0.1921502709388733
epoch: 97 training_loss 0.17969601221382617 test_loss: 0.18815133571624756
epoch: 98 training_loss 0.1774652487784624 test_loss: 0.21641802787780762
epoch: 99 training_loss 0.176559878885746 test_loss: 0.19794199466705323
epoch: 100 training_loss 0.1774235662072897 test_loss: 0.20238468647003174
epoch: 101 training_loss 0.17917858436703682 test_loss: 0.19113726615905763
epoch: 102 training_loss 0.1850136411935091 test_loss: 0.18968396186828612
epoch: 103 training_loss 0.18488301627337933 test_loss: 0.1882575273513794
epoch: 104 training_loss 0.1729387242347002 test_loss: 0.19124743938446045
epoch: 105 training_loss 0.17905695214867592 test_loss: 0.19349656105041504
epoch: 106 training_loss 0.17739681221544742 test_loss: 0.195063316822052
epoch: 107 training_loss 0.17276619434356688 test_loss: 0.18213621377944947
epoch: 108 training_loss 0.17684310480952262 test_loss: 0.19506170749664306
epoch: 109 training_loss 0.17814309969544412 test_loss: 0.17596930265426636
epoch: 110 training_loss 0.18812393069267272 test_loss: 0.18159495592117308
epoch: 111 training_loss 0.1768140208721161 test_loss: 0.17463849782943724
epoch: 112 training_loss 0.1796923430263996 test_loss: 0.1952928900718689
epoch: 113 training_loss 0.1825415963679552 test_loss: 0.18575098514556884
epoch: 114 training_loss 0.17295700497925282 test_loss: 0.20113639831542968
epoch: 115 training_loss 0.1810740517824888 test_loss: 0.19557081460952758
epoch: 116 training_loss 0.1828089215606451 test_loss: 0.18942271471023558
epoch: 117 training_loss 0.1832917420566082 test_loss: 0.192361581325531
epoch: 118 training_loss 0.17942067831754685 test_loss: 0.20120978355407715
epoch: 119 training_loss 0.17710627742111684 test_loss: 0.1776193380355835
epoch: 120 training_loss 0.1780579187721014 test_loss: 0.1805514693260193
epoch: 121 training_loss 0.1762939950823784 test_loss: 0.1790143609046936
epoch: 122 training_loss 0.18383293323218822 test_loss: 0.2010806083679199
epoch: 123 training_loss 0.17787524335086347 test_loss: 0.18487406969070436
epoch: 124 training_loss 0.18331524915993214 test_loss: 0.19295451641082764
epoch: 125 training_loss 0.175666059628129 test_loss: 0.18911343812942505
epoch: 126 training_loss 0.17769208997488023 test_loss: 0.22085554599761964
epoch: 127 training_loss 0.1849065924435854 test_loss: 0.19745705127716065
epoch: 128 training_loss 0.18393936134874822 test_loss: 0.19617348909378052
epoch: 129 training_loss 0.18057444281876087 test_loss: 0.1991652250289917
epoch: 130 training_loss 0.16655444607138634 test_loss: 0.18742771148681642
epoch: 131 training_loss 0.16417828507721424 test_loss: 0.1875491738319397
epoch: 132 training_loss 0.17491423435509204 test_loss: 0.18777027130126953
epoch: 133 training_loss 0.18519516177475454 test_loss: 0.23196132183074952
epoch: 134 training_loss 0.17658775821328163 test_loss: 0.17323508262634277
epoch: 135 training_loss 0.17552609950304032 test_loss: 0.18941805362701417
epoch: 136 training_loss 0.17985085934400558 test_loss: 0.1725008249282837
epoch: 137 training_loss 0.17865184344351293 test_loss: 0.17763384580612182
epoch: 138 training_loss 0.17217322424054146 test_loss: 0.19465402364730836
epoch: 139 training_loss 0.18019406974315644 test_loss: 0.18712786436080933
epoch: 140 training_loss 0.17453319795429706 test_loss: 0.1817731738090515
epoch: 141 training_loss 0.17614553719758988 test_loss: 0.18600125312805177
epoch: 142 training_loss 0.17603589832782746 test_loss: 0.17893513441085815
epoch: 143 training_loss 0.1740850415825844 test_loss: 0.18639057874679565
epoch: 144 training_loss 0.17553254179656505 test_loss: 0.19740688800811768
epoch: 145 training_loss 0.17619507662951947 test_loss: 0.1851195812225342
epoch: 146 training_loss 0.1741125548630953 test_loss: 0.1730779767036438
epoch: 147 training_loss 0.1767864828556776 test_loss: 0.18476732969284057
epoch: 148 training_loss 0.17772123575210572 test_loss: 0.183288311958313
epoch: 149 training_loss 0.17252280361950398 test_loss: 0.20373315811157228
epoch: 0 training_loss 0.42676897034049033 test_loss: 0.29958224296569824
epoch: 1 training_loss 0.26628282204270365 test_loss: 0.2682598829269409
epoch: 2 training_loss 0.2620331312716007 test_loss: 0.23358392715454102
epoch: 3 training_loss 0.24906741246581077 test_loss: 0.2390745162963867
epoch: 4 training_loss 0.24331736572086812 test_loss: 0.2285205602645874
epoch: 5 training_loss 0.22262871846556664 test_loss: 0.23168544769287108
epoch: 6 training_loss 0.22422666065394878 test_loss: 0.23726074695587157
epoch: 7 training_loss 0.22951166704297066 test_loss: 0.21995234489440918
epoch: 8 training_loss 0.22657015353441237 test_loss: 0.21578328609466552
epoch: 9 training_loss 0.21426488369703292 test_loss: 0.22636241912841798
epoch: 10 training_loss 0.21623220570385457 test_loss: 0.2191462755203247
epoch: 11 training_loss 0.21970734879374504 test_loss: 0.22132697105407714
epoch: 12 training_loss 0.21467568971216677 test_loss: 0.22244751453399658
epoch: 13 training_loss 0.21954295307397842 test_loss: 0.20719568729400634
epoch: 14 training_loss 0.2048801187425852 test_loss: 0.19713430404663085
epoch: 15 training_loss 0.2116537693142891 test_loss: 0.20478253364562987
epoch: 16 training_loss 0.2175480554997921 test_loss: 0.20887882709503175
epoch: 17 training_loss 0.2132267501950264 test_loss: 0.21308567523956298
epoch: 18 training_loss 0.2146539030969143 test_loss: 0.20957319736480712
epoch: 19 training_loss 0.2128171645849943 test_loss: 0.21499669551849365
epoch: 20 training_loss 0.20866333827376365 test_loss: 0.19898656606674195
epoch: 21 training_loss 0.2179846529662609 test_loss: 0.23522274494171141
epoch: 22 training_loss 0.21080535002052783 test_loss: 0.21247358322143556
epoch: 23 training_loss 0.21736893132328988 test_loss: 0.19697146415710448
epoch: 24 training_loss 0.2062472192943096 test_loss: 0.19583020210266114
epoch: 25 training_loss 0.21458505123853683 test_loss: 0.20802366733551025
epoch: 26 training_loss 0.19947606332600118 test_loss: 0.20871641635894775
epoch: 27 training_loss 0.19800543844699858 test_loss: 0.18098893165588378
epoch: 28 training_loss 0.20393979020416736 test_loss: 0.21133899688720703
epoch: 29 training_loss 0.20834852173924445 test_loss: 0.19656978845596312
epoch: 30 training_loss 0.20452453650534153 test_loss: 0.20964643955230713
epoch: 31 training_loss 0.21556461326777934 test_loss: 0.1932855248451233
epoch: 32 training_loss 0.20431478217244148 test_loss: 0.19438910484313965
epoch: 33 training_loss 0.20103873535990716 test_loss: 0.2083456039428711
epoch: 34 training_loss 0.19967837400734426 test_loss: 0.20565004348754884
epoch: 35 training_loss 0.20337060354650022 test_loss: 0.19469215869903564
epoch: 36 training_loss 0.20329864911735057 test_loss: 0.20534696578979492
epoch: 37 training_loss 0.20284348398447036 test_loss: 0.19257373809814454
epoch: 38 training_loss 0.2027064311504364 test_loss: 0.19185831546783447
epoch: 39 training_loss 0.19642907932400702 test_loss: 0.20437781810760497
epoch: 40 training_loss 0.1977833753079176 test_loss: 0.1852855920791626
epoch: 41 training_loss 0.19794518426060675 test_loss: 0.212933349609375
epoch: 42 training_loss 0.19343712478876113 test_loss: 0.19243072271347045
epoch: 43 training_loss 0.18967010490596295 test_loss: 0.2008991241455078
epoch: 44 training_loss 0.19905835516750814 test_loss: 0.19479715824127197
epoch: 45 training_loss 0.19372812181711196 test_loss: 0.19803227186203004
epoch: 46 training_loss 0.18852646671235562 test_loss: 0.18554484844207764
epoch: 47 training_loss 0.19057664155960083 test_loss: 0.1812658429145813
epoch: 48 training_loss 0.19480570837855338 test_loss: 0.1832824468612671
epoch: 49 training_loss 0.1979601151496172 test_loss: 0.1940865159034729
epoch: 50 training_loss 0.19799410611391066 test_loss: 0.20287001132965088
epoch: 51 training_loss 0.18923684507608413 test_loss: 0.18925132751464843
epoch: 52 training_loss 0.18791753619909288 test_loss: 0.25316786766052246
epoch: 53 training_loss 0.1934860994666815 test_loss: 0.18426599502563476
epoch: 54 training_loss 0.19891469180583954 test_loss: 0.17189472913742065
epoch: 55 training_loss 0.19178957395255566 test_loss: 0.198538076877594
epoch: 56 training_loss 0.1894788008928299 test_loss: 0.18048563003540039
epoch: 57 training_loss 0.1925150191783905 test_loss: 0.18454965353012084
epoch: 58 training_loss 0.18756354793906213 test_loss: 0.1855444073677063
epoch: 59 training_loss 0.1849768042564392 test_loss: 0.19748414754867555
epoch: 60 training_loss 0.1858428655564785 test_loss: 0.20243995189666747
epoch: 61 training_loss 0.19360613845288754 test_loss: 0.1930406928062439
epoch: 62 training_loss 0.192406138330698 test_loss: 0.19104191064834594
epoch: 63 training_loss 0.19389293394982815 test_loss: 0.17993538379669188
epoch: 64 training_loss 0.18910145439207554 test_loss: 0.18974493741989135
epoch: 65 training_loss 0.19067050941288471 test_loss: 0.18574302196502684
epoch: 66 training_loss 0.2011837251484394 test_loss: 0.19798502922058106
epoch: 67 training_loss 0.18798418961465357 test_loss: 0.20305659770965576
epoch: 68 training_loss 0.18885238692164422 test_loss: 0.187706458568573
epoch: 69 training_loss 0.18998669363558293 test_loss: 0.17843263149261473
epoch: 70 training_loss 0.18736854441463946 test_loss: 0.20635614395141602
epoch: 71 training_loss 0.18716148480772973 test_loss: 0.17756106853485107
epoch: 72 training_loss 0.1888004720956087 test_loss: 0.1887723207473755
epoch: 73 training_loss 0.18764447942376136 test_loss: 0.19164495468139647
epoch: 74 training_loss 0.18767392821609974 test_loss: 0.20054707527160645
epoch: 75 training_loss 0.18977218486368655 test_loss: 0.2034456253051758
epoch: 76 training_loss 0.18767232954502105 test_loss: 0.19178431034088134
epoch: 77 training_loss 0.19128239557147025 test_loss: 0.17944167852401732
epoch: 78 training_loss 0.18430542327463628 test_loss: 0.19506027698516845
epoch: 79 training_loss 0.18379529483616353 test_loss: 0.16963794231414794
epoch: 80 training_loss 0.19662642806768418 test_loss: 0.19034942388534545
epoch: 81 training_loss 0.18647480987012385 test_loss: 0.18464524745941163
epoch: 82 training_loss 0.18983005940914155 test_loss: 0.17461888790130614
epoch: 83 training_loss 0.18165123954415321 test_loss: 0.17422115802764893
epoch: 84 training_loss 0.18416392512619495 test_loss: 0.19012596607208251
epoch: 85 training_loss 0.18637616418302058 test_loss: 0.19861427545547486
epoch: 86 training_loss 0.18509507969021796 test_loss: 0.19564051628112794
epoch: 87 training_loss 0.18152182079851628 test_loss: 0.17499613761901855
epoch: 88 training_loss 0.18688756361603737 test_loss: 0.1950196623802185
epoch: 89 training_loss 0.19113618455827236 test_loss: 0.1897553563117981
epoch: 90 training_loss 0.18373215325176717 test_loss: 0.18080801963806153
epoch: 91 training_loss 0.1877825627475977 test_loss: 0.20015978813171387
epoch: 92 training_loss 0.18095110379159451 test_loss: 0.17565065622329712
epoch: 93 training_loss 0.18337126545608043 test_loss: 0.18875570297241212
epoch: 94 training_loss 0.19059231333434581 test_loss: 0.1818259596824646
epoch: 95 training_loss 0.18523857191205026 test_loss: 0.19640389680862427
epoch: 96 training_loss 0.18746411375701427 test_loss: 0.17296326160430908
epoch: 97 training_loss 0.18271048031747342 test_loss: 0.18493040800094604
epoch: 98 training_loss 0.18745955608785153 test_loss: 0.1932835340499878
epoch: 99 training_loss 0.18413427531719206 test_loss: 0.18243404626846313
epoch: 100 training_loss 0.19136039607226848 test_loss: 0.18514397144317626
epoch: 101 training_loss 0.18697948433458805 test_loss: 0.1883861780166626
epoch: 102 training_loss 0.18559654094278813 test_loss: 0.18165000677108764
epoch: 103 training_loss 0.1915774577856064 test_loss: 0.1925272226333618
epoch: 104 training_loss 0.1748462314158678 test_loss: 0.19410579204559325
epoch: 105 training_loss 0.18590086422860622 test_loss: 0.1884804844856262
epoch: 106 training_loss 0.18464977703988553 test_loss: 0.19204074144363403
epoch: 107 training_loss 0.18156644739210606 test_loss: 0.1938799023628235
epoch: 108 training_loss 0.18597096957266332 test_loss: 0.19354406595230103
epoch: 109 training_loss 0.19304817132651805 test_loss: 0.20710535049438478
epoch: 110 training_loss 0.19106934249401092 test_loss: 0.17431628704071045
epoch: 111 training_loss 0.1897360011190176 test_loss: 0.19307752847671508
epoch: 112 training_loss 0.19002530090510844 test_loss: 0.1869913697242737
epoch: 113 training_loss 0.1795767655223608 test_loss: 0.16187719106674195
epoch: 114 training_loss 0.17967679925262928 test_loss: 0.19286253452301025
epoch: 115 training_loss 0.18289331644773482 test_loss: 0.1843276023864746
epoch: 116 training_loss 0.17821403168141842 test_loss: 0.1834326982498169
epoch: 117 training_loss 0.1838054621219635 test_loss: 0.20006179809570312
epoch: 118 training_loss 0.18756881572306155 test_loss: 0.1897115468978882
epoch: 119 training_loss 0.19214270755648613 test_loss: 0.18615204095840454
epoch: 120 training_loss 0.19119903698563576 test_loss: 0.186159884929657
epoch: 121 training_loss 0.18301355987787246 test_loss: 0.1827839970588684
epoch: 122 training_loss 0.18446193926036358 test_loss: 0.17535569667816162
epoch: 123 training_loss 0.17904296092689037 test_loss: 0.18742085695266725
epoch: 124 training_loss 0.18141428895294667 test_loss: 0.1901449203491211
epoch: 125 training_loss 0.17913832761347293 test_loss: 0.19758371114730836
epoch: 126 training_loss 0.17778492748737335 test_loss: 0.1797603964805603
epoch: 127 training_loss 0.1820886192470789 test_loss: 0.17949429750442505
epoch: 128 training_loss 0.1831420088559389 test_loss: 0.18623639345169068
epoch: 129 training_loss 0.17765616938471795 test_loss: 0.17682591676712037
epoch: 130 training_loss 0.1816083438694477 test_loss: 0.19843755960464476
epoch: 131 training_loss 0.1831727809458971 test_loss: 0.19134758710861205
epoch: 132 training_loss 0.1824790195375681 test_loss: 0.15997661352157594
epoch: 133 training_loss 0.18500276930630208 test_loss: 0.18069138526916503
epoch: 134 training_loss 0.1825561013072729 test_loss: 0.1902426838874817
epoch: 135 training_loss 0.17899671338498593 test_loss: 0.19991589784622193
epoch: 136 training_loss 0.17314553402364255 test_loss: 0.18073123693466187
epoch: 137 training_loss 0.17904129259288312 test_loss: 0.18965684175491332
epoch: 138 training_loss 0.18990109145641326 test_loss: 0.1831190586090088
epoch: 139 training_loss 0.18565279982984065 test_loss: 0.19294008016586303
epoch: 140 training_loss 0.17497847020626067 test_loss: 0.1779070019721985
epoch: 141 training_loss 0.18320384338498116 test_loss: 0.17993617057800293
epoch: 142 training_loss 0.18807146906852723 test_loss: 0.23113102912902833
epoch: 143 training_loss 0.18307374335825444 test_loss: 0.1812382936477661
epoch: 144 training_loss 0.18144248217344283 test_loss: 0.17688199281692504
epoch: 145 training_loss 0.18020293593406678 test_loss: 0.18451522588729857
epoch: 146 training_loss 0.17177068263292314 test_loss: 0.1914613962173462
epoch: 147 training_loss 0.17986591443419456 test_loss: 0.19121893644332885
epoch: 148 training_loss 0.18436822675168515 test_loss: 0.18603616952896118
epoch: 149 training_loss 0.18014323011040687 test_loss: 0.17308164834976197
epoch: 0 training_loss 0.4397762015461922 test_loss: 0.3188836336135864
epoch: 1 training_loss 0.28141927540302275 test_loss: 0.2358481168746948
epoch: 2 training_loss 0.26771066546440125 test_loss: 0.22953445911407472
epoch: 3 training_loss 0.24187641873955726 test_loss: 0.24793503284454346
epoch: 4 training_loss 0.24449225544929504 test_loss: 0.2543152332305908
epoch: 5 training_loss 0.2569136102497578 test_loss: 0.263992166519165
epoch: 6 training_loss 0.22715109661221505 test_loss: 0.22695493698120117
epoch: 7 training_loss 0.21097971484065056 test_loss: 0.21738083362579347
epoch: 8 training_loss 0.21848270073533058 test_loss: 0.21408066749572754
epoch: 9 training_loss 0.2251299524307251 test_loss: 0.2150655508041382
epoch: 10 training_loss 0.22079391196370124 test_loss: 0.24467685222625732
epoch: 11 training_loss 0.2228195585310459 test_loss: 0.2249636173248291
epoch: 12 training_loss 0.210823874771595 test_loss: 0.2272470474243164
epoch: 13 training_loss 0.21079867504537106 test_loss: 0.21632864475250244
epoch: 14 training_loss 0.21012462936341764 test_loss: 0.20227503776550293
epoch: 15 training_loss 0.2078106278926134 test_loss: 0.21022143363952636
epoch: 16 training_loss 0.2119077168405056 test_loss: 0.2134394645690918
epoch: 17 training_loss 0.21165042005479337 test_loss: 0.2294245719909668
epoch: 18 training_loss 0.20506610706448555 test_loss: 0.23367562294006347
epoch: 19 training_loss 0.20945536553859712 test_loss: 0.21882243156433107
epoch: 20 training_loss 0.20419409714639186 test_loss: 0.19962576627731324
epoch: 21 training_loss 0.21225294172763826 test_loss: 0.22610180377960204
epoch: 22 training_loss 0.2081179165095091 test_loss: 0.2115173578262329
epoch: 23 training_loss 0.21580821335315703 test_loss: 0.20679752826690673
epoch: 24 training_loss 0.19652017459273338 test_loss: 0.2167651891708374
epoch: 25 training_loss 0.2078451194614172 test_loss: 0.22524232864379884
epoch: 26 training_loss 0.2013362793624401 test_loss: 0.19502192735671997
epoch: 27 training_loss 0.20640164002776146 test_loss: 0.19009393453598022
epoch: 28 training_loss 0.1945658481121063 test_loss: 0.21512868404388427
epoch: 29 training_loss 0.1951349073648453 test_loss: 0.20622363090515136
epoch: 30 training_loss 0.2003193363547325 test_loss: 0.1989719271659851
epoch: 31 training_loss 0.20653058923780918 test_loss: 0.19323068857192993
epoch: 32 training_loss 0.19761841908097266 test_loss: 0.2520778179168701
epoch: 33 training_loss 0.19433846101164817 test_loss: 0.1959740161895752
epoch: 34 training_loss 0.19148553818464278 test_loss: 0.22068519592285157
epoch: 35 training_loss 0.19903573468327523 test_loss: 0.2008798360824585
epoch: 36 training_loss 0.2030697313696146 test_loss: 0.21218581199645997
epoch: 37 training_loss 0.20229101046919823 test_loss: 0.18580254316329955
epoch: 38 training_loss 0.19357797019183637 test_loss: 0.21418895721435546
epoch: 39 training_loss 0.20256098523736 test_loss: 0.2072592258453369
epoch: 40 training_loss 0.19200270250439644 test_loss: 0.2033390522003174
epoch: 41 training_loss 0.19977486021816732 test_loss: 0.1957991600036621
epoch: 42 training_loss 0.19668949648737907 test_loss: 0.18098758459091185
epoch: 43 training_loss 0.18995836570858957 test_loss: 0.21589365005493164
epoch: 44 training_loss 0.18393884368240834 test_loss: 0.22115736007690429
epoch: 45 training_loss 0.19365617357194423 test_loss: 0.1952691435813904
epoch: 46 training_loss 0.19634194865822793 test_loss: 0.2035038948059082
epoch: 47 training_loss 0.19706249877810478 test_loss: 0.20178892612457275
epoch: 48 training_loss 0.1934790989756584 test_loss: 0.18475747108459473
epoch: 49 training_loss 0.19268019899725913 test_loss: 0.2184974431991577
epoch: 50 training_loss 0.202549342289567 test_loss: 0.21748180389404298
epoch: 51 training_loss 0.19871760077774525 test_loss: 0.1978192448616028
epoch: 52 training_loss 0.18991576328873636 test_loss: 0.1923113226890564
epoch: 53 training_loss 0.18539786525070667 test_loss: 0.18816125392913818
epoch: 54 training_loss 0.20020617306232452 test_loss: 0.19706156253814697
epoch: 55 training_loss 0.19252547360956668 test_loss: 0.20358312129974365
epoch: 56 training_loss 0.19352454662323 test_loss: 0.19735506772994996
epoch: 57 training_loss 0.1903686273097992 test_loss: 0.20327575206756593
epoch: 58 training_loss 0.18940014511346817 test_loss: 0.19624358415603638
epoch: 59 training_loss 0.18534079380333424 test_loss: 0.20446555614471434
epoch: 60 training_loss 0.19034407570958137 test_loss: 0.2067314863204956
epoch: 61 training_loss 0.1956140047311783 test_loss: 0.18734269142150878
epoch: 62 training_loss 0.18761469431221486 test_loss: 0.1914017915725708
epoch: 63 training_loss 0.184834725856781 test_loss: 0.19749096632003785
epoch: 64 training_loss 0.184239561855793 test_loss: 0.19800394773483276
epoch: 65 training_loss 0.1908024052530527 test_loss: 0.21804513931274414
epoch: 66 training_loss 0.1888620363175869 test_loss: 0.19423469305038452
epoch: 67 training_loss 0.1890221145004034 test_loss: 0.19876880645751954
epoch: 68 training_loss 0.18883496843278408 test_loss: 0.20251863002777098
epoch: 69 training_loss 0.19465036250650883 test_loss: 0.19867829084396363
epoch: 70 training_loss 0.1907135245203972 test_loss: 0.19636051654815673
epoch: 71 training_loss 0.19232330851256849 test_loss: 0.2100130558013916
epoch: 72 training_loss 0.18288779951632023 test_loss: 0.18864247798919678
epoch: 73 training_loss 0.1909004095941782 test_loss: 0.2081733226776123
epoch: 74 training_loss 0.18709052264690398 test_loss: 0.1910964846611023
epoch: 75 training_loss 0.18839019194245338 test_loss: 0.22632408142089844
epoch: 76 training_loss 0.18895285941660403 test_loss: 0.19943718910217284
epoch: 77 training_loss 0.18541086301207543 test_loss: 0.19216455221176149
epoch: 78 training_loss 0.17767879627645017 test_loss: 0.20083146095275878
epoch: 79 training_loss 0.18276765383780003 test_loss: 0.18843883275985718
epoch: 80 training_loss 0.19254445552825927 test_loss: 0.1816328763961792
epoch: 81 training_loss 0.1908077622205019 test_loss: 0.22311360836029054
epoch: 82 training_loss 0.18662559397518635 test_loss: 0.18911457061767578
epoch: 83 training_loss 0.19375330939888954 test_loss: 0.18485560417175292
epoch: 84 training_loss 0.1856347008794546 test_loss: 0.19550917148590088
epoch: 85 training_loss 0.1817682982981205 test_loss: 0.19407081604003906
epoch: 86 training_loss 0.18106643714010714 test_loss: 0.19626197814941407
epoch: 87 training_loss 0.1819079339504242 test_loss: 0.19987795352935792
epoch: 88 training_loss 0.18549186818301677 test_loss: 0.18645368814468383
epoch: 89 training_loss 0.18692216150462626 test_loss: 0.19989445209503173
epoch: 90 training_loss 0.1836336463689804 test_loss: 0.18183504343032836
epoch: 91 training_loss 0.19406965136528015 test_loss: 0.19293222427368165
epoch: 92 training_loss 0.18203459843993186 test_loss: 0.1949556827545166
epoch: 93 training_loss 0.1838645140081644 test_loss: 0.2009028434753418
epoch: 94 training_loss 0.17638554200530052 test_loss: 0.18902865648269654
epoch: 95 training_loss 0.18111755579710007 test_loss: 0.21319317817687988
epoch: 96 training_loss 0.1806069356203079 test_loss: 0.1886562943458557
epoch: 97 training_loss 0.18357447102665903 test_loss: 0.18341944217681885
epoch: 98 training_loss 0.18710846364498138 test_loss: 0.19972931146621703
epoch: 99 training_loss 0.18269655138254165 test_loss: 0.18384698629379273
epoch: 100 training_loss 0.18682253777980803 test_loss: 0.1819595217704773
epoch: 101 training_loss 0.18127704940736294 test_loss: 0.17923052310943605
epoch: 102 training_loss 0.1726324949413538 test_loss: 0.1820363759994507
epoch: 103 training_loss 0.18517535626888276 test_loss: 0.20770771503448487
epoch: 104 training_loss 0.18641741104424 test_loss: 0.18156219720840455
epoch: 105 training_loss 0.18637672826647758 test_loss: 0.19799619913101196
epoch: 106 training_loss 0.1869699927419424 test_loss: 0.18568847179412842
epoch: 107 training_loss 0.18186107613146305 test_loss: 0.184770667552948
epoch: 108 training_loss 0.1781733513623476 test_loss: 0.16600751876831055
epoch: 109 training_loss 0.180629069134593 test_loss: 0.1739575147628784
epoch: 110 training_loss 0.18559823542833329 test_loss: 0.1936766982078552
epoch: 111 training_loss 0.18109493412077426 test_loss: 0.18119646310806276
epoch: 112 training_loss 0.1781513189524412 test_loss: 0.19945708513259888
epoch: 113 training_loss 0.18381099447607993 test_loss: 0.18709890842437743
epoch: 114 training_loss 0.18310005970299245 test_loss: 0.18141870498657225
epoch: 115 training_loss 0.17850266013294458 test_loss: 0.18263329267501832
epoch: 116 training_loss 0.18462291069328784 test_loss: 0.20131311416625977
epoch: 117 training_loss 0.17970493353903294 test_loss: 0.19255374670028685
epoch: 118 training_loss 0.18187147162854672 test_loss: 0.17643262147903443
epoch: 119 training_loss 0.18348557211458683 test_loss: 0.1737888216972351
epoch: 120 training_loss 0.1805952398478985 test_loss: 0.20793144702911376
epoch: 121 training_loss 0.1870396126061678 test_loss: 0.18226560354232788
epoch: 122 training_loss 0.1776915680617094 test_loss: 0.1826922059059143
epoch: 123 training_loss 0.18049963265657426 test_loss: 0.1799992561340332
epoch: 124 training_loss 0.17352604001760483 test_loss: 0.17572275400161744
epoch: 125 training_loss 0.16681268699467183 test_loss: 0.18138589859008789
epoch: 126 training_loss 0.17919672779738904 test_loss: 0.17964028120040892
epoch: 127 training_loss 0.1760310971736908 test_loss: 0.1839622974395752
epoch: 128 training_loss 0.17856350660324097 test_loss: 0.18228012323379517
epoch: 129 training_loss 0.1813632870465517 test_loss: 0.19401533603668214
epoch: 130 training_loss 0.17849277272820474 test_loss: 0.18984849452972413
epoch: 131 training_loss 0.16810189187526703 test_loss: 0.19508308172225952
epoch: 132 training_loss 0.17829515174031257 test_loss: 0.197567617893219
epoch: 133 training_loss 0.17928674206137657 test_loss: 0.1795273542404175
epoch: 134 training_loss 0.1744944128394127 test_loss: 0.17501455545425415
epoch: 135 training_loss 0.18347931139171123 test_loss: 0.18451199531555176
epoch: 136 training_loss 0.1834733173251152 test_loss: 0.19072930812835692
epoch: 137 training_loss 0.17636569693684578 test_loss: 0.18417402505874633
epoch: 138 training_loss 0.17747078582644463 test_loss: 0.1728774666786194
epoch: 139 training_loss 0.17813380993902683 test_loss: 0.19451477527618408
epoch: 140 training_loss 0.17054686419665813 test_loss: 0.19963773488998413
epoch: 141 training_loss 0.17599317900836467 test_loss: 0.18198784589767455
epoch: 142 training_loss 0.18537658840417862 test_loss: 0.1903875708580017
epoch: 143 training_loss 0.17803555198013782 test_loss: 0.19519826173782348
epoch: 144 training_loss 0.17213291801512243 test_loss: 0.18386564254760743
epoch: 145 training_loss 0.17587872013449668 test_loss: 0.17097088098526
epoch: 146 training_loss 0.177319350913167 test_loss: 0.1827308416366577
epoch: 147 training_loss 0.18150942333042622 test_loss: 0.19295241832733154
epoch: 148 training_loss 0.177881895378232 test_loss: 0.1841692566871643
epoch: 149 training_loss 0.17222057737410068 test_loss: 0.1947952389717102
epoch: 0 training_loss 0.3974740281701088 test_loss: 0.2909038305282593
epoch: 1 training_loss 0.278431563526392 test_loss: 0.25769240856170655
epoch: 2 training_loss 0.25247805640101434 test_loss: 0.24542300701141356
epoch: 3 training_loss 0.2444690902531147 test_loss: 0.2674457788467407
epoch: 4 training_loss 0.24123350247740746 test_loss: 0.2266925096511841
epoch: 5 training_loss 0.22661110624670983 test_loss: 0.2339158296585083
epoch: 6 training_loss 0.22985295653343202 test_loss: 0.22197742462158204
epoch: 7 training_loss 0.22065058425068856 test_loss: 0.19485191106796265
epoch: 8 training_loss 0.22048834100365639 test_loss: 0.23476223945617675
epoch: 9 training_loss 0.2200363366305828 test_loss: 0.2336299180984497
epoch: 10 training_loss 0.21891985163092614 test_loss: 0.2105125904083252
epoch: 11 training_loss 0.226856799274683 test_loss: 0.20499916076660157
epoch: 12 training_loss 0.2203522888571024 test_loss: 0.20677340030670166
epoch: 13 training_loss 0.2174997153878212 test_loss: 0.2020951271057129
epoch: 14 training_loss 0.21376529440283776 test_loss: 0.23450539112091065
epoch: 15 training_loss 0.2134954537451267 test_loss: 0.21850125789642333
epoch: 16 training_loss 0.2038321849703789 test_loss: 0.2103886365890503
epoch: 17 training_loss 0.21353991650044918 test_loss: 0.20842623710632324
epoch: 18 training_loss 0.21246106885373592 test_loss: 0.20807769298553466
epoch: 19 training_loss 0.20824728801846504 test_loss: 0.20695035457611083
epoch: 20 training_loss 0.21081917822360993 test_loss: 0.2021861791610718
epoch: 21 training_loss 0.21041207522153854 test_loss: 0.20836083889007567
epoch: 22 training_loss 0.20228084325790405 test_loss: 0.20275406837463378
epoch: 23 training_loss 0.2191864573955536 test_loss: 0.20187833309173583
epoch: 24 training_loss 0.20620789982378482 test_loss: 0.19373931884765624
epoch: 25 training_loss 0.1996185988932848 test_loss: 0.22928254604339598
epoch: 26 training_loss 0.20631881326436996 test_loss: 0.21168091297149658
epoch: 27 training_loss 0.20989439867436885 test_loss: 0.1879979133605957
epoch: 28 training_loss 0.2027139412611723 test_loss: 0.21185920238494874
epoch: 29 training_loss 0.20592918403446675 test_loss: 0.20977518558502198
epoch: 30 training_loss 0.19964655697345735 test_loss: 0.19561113119125367
epoch: 31 training_loss 0.19418801471590996 test_loss: 0.18779296875
epoch: 32 training_loss 0.20490315355360508 test_loss: 0.19855868816375732
epoch: 33 training_loss 0.2129753991961479 test_loss: 0.1858474850654602
epoch: 34 training_loss 0.20803504265844822 test_loss: 0.23729267120361328
epoch: 35 training_loss 0.21053341120481492 test_loss: 0.21793088912963868
epoch: 36 training_loss 0.20449326813220978 test_loss: 0.1987065315246582
epoch: 37 training_loss 0.19472142972052098 test_loss: 0.18989073038101195
epoch: 38 training_loss 0.19780323587357998 test_loss: 0.22022879123687744
epoch: 39 training_loss 0.19248272374272346 test_loss: 0.19736049175262452
epoch: 40 training_loss 0.20597460322082042 test_loss: 0.20434305667877198
epoch: 41 training_loss 0.19705407619476317 test_loss: 0.19900217056274414
epoch: 42 training_loss 0.2001825124770403 test_loss: 0.19674344062805177
epoch: 43 training_loss 0.2099203445762396 test_loss: 0.1857720732688904
epoch: 44 training_loss 0.19785475298762323 test_loss: 0.20649213790893556
epoch: 45 training_loss 0.2042528200894594 test_loss: 0.1975342035293579
epoch: 46 training_loss 0.19532301396131516 test_loss: 0.18946995735168456
epoch: 47 training_loss 0.19395451083779336 test_loss: 0.19858256578445435
epoch: 48 training_loss 0.20109388142824172 test_loss: 0.18923580646514893
epoch: 49 training_loss 0.19739522114396096 test_loss: 0.1763131856918335
epoch: 50 training_loss 0.19669619277119638 test_loss: 0.20901262760162354
epoch: 51 training_loss 0.19914704106748105 test_loss: 0.18828089237213136
epoch: 52 training_loss 0.19311339624226093 test_loss: 0.197933828830719
epoch: 53 training_loss 0.18718801856040954 test_loss: 0.18520901203155518
epoch: 54 training_loss 0.18831218644976616 test_loss: 0.18488857746124268
epoch: 55 training_loss 0.19280053116381168 test_loss: 0.1919850468635559
epoch: 56 training_loss 0.2034542640298605 test_loss: 0.19649275541305541
epoch: 57 training_loss 0.2051247364282608 test_loss: 0.23500776290893555
epoch: 58 training_loss 0.19699505418539048 test_loss: 0.19292840957641602
epoch: 59 training_loss 0.19292932406067848 test_loss: 0.18458839654922485
epoch: 60 training_loss 0.19109171710908412 test_loss: 0.19039590358734132
epoch: 61 training_loss 0.19409142270684243 test_loss: 0.19275074005126952
epoch: 62 training_loss 0.18744595550000667 test_loss: 0.19645549058914186
epoch: 63 training_loss 0.19061402268707753 test_loss: 0.18999676704406737
epoch: 64 training_loss 0.19196540139615537 test_loss: 0.18731389045715333
epoch: 65 training_loss 0.19509221196174623 test_loss: 0.1841396927833557
epoch: 66 training_loss 0.18911957122385503 test_loss: 0.16982860565185548
epoch: 67 training_loss 0.19457786783576012 test_loss: 0.21295197010040284
epoch: 68 training_loss 0.18706579104065896 test_loss: 0.17523907423019408
epoch: 69 training_loss 0.1895832581073046 test_loss: 0.19789689779281616
epoch: 70 training_loss 0.19815428525209428 test_loss: 0.19374979734420777
epoch: 71 training_loss 0.1982849331945181 test_loss: 0.1923748731613159
epoch: 72 training_loss 0.1866044469922781 test_loss: 0.19358305931091307
epoch: 73 training_loss 0.18905562974512577 test_loss: 0.1930302858352661
epoch: 74 training_loss 0.19359374701976775 test_loss: 0.19433051347732544
epoch: 75 training_loss 0.1983147157728672 test_loss: 0.18442727327346803
epoch: 76 training_loss 0.18744320444762708 test_loss: 0.1797669053077698
epoch: 77 training_loss 0.1884544701129198 test_loss: 0.18473366498947144
epoch: 78 training_loss 0.19099765077233313 test_loss: 0.18880609273910523
epoch: 79 training_loss 0.1900923376530409 test_loss: 0.19313803911209107
epoch: 80 training_loss 0.1878740283846855 test_loss: 0.1979319453239441
epoch: 81 training_loss 0.1826964918524027 test_loss: 0.18419815301895143
epoch: 82 training_loss 0.1946336267888546 test_loss: 0.19140774011611938
epoch: 83 training_loss 0.19732024773955345 test_loss: 0.18374648094177246
epoch: 84 training_loss 0.18532255254685878 test_loss: 0.20035133361816407
epoch: 85 training_loss 0.1857550134509802 test_loss: 0.22064895629882814
epoch: 86 training_loss 0.1916722508519888 test_loss: 0.18195356130599977
epoch: 87 training_loss 0.18643043957650662 test_loss: 0.1941381573677063
epoch: 88 training_loss 0.18566997081041337 test_loss: 0.18792580366134642
epoch: 89 training_loss 0.1871330001950264 test_loss: 0.205039119720459
epoch: 90 training_loss 0.1849166862666607 test_loss: 0.20345380306243896
epoch: 91 training_loss 0.1883448960632086 test_loss: 0.19100303649902345
epoch: 92 training_loss 0.18988951750099659 test_loss: 0.20146629810333253
epoch: 93 training_loss 0.1847896732389927 test_loss: 0.18125109672546386
epoch: 94 training_loss 0.1856154379248619 test_loss: 0.16780368089675904
epoch: 95 training_loss 0.18041290678083896 test_loss: 0.201452898979187
epoch: 96 training_loss 0.20080640330910682 test_loss: 0.1840355157852173
epoch: 97 training_loss 0.1874189282953739 test_loss: 0.18051711320877076
epoch: 98 training_loss 0.19400290541350843 test_loss: 0.18206846714019775
epoch: 99 training_loss 0.19072191908955574 test_loss: 0.1825800895690918
epoch: 100 training_loss 0.18810662031173705 test_loss: 0.18712128400802613
epoch: 101 training_loss 0.18685644418001174 test_loss: 0.19723730087280272
epoch: 102 training_loss 0.18171487957239152 test_loss: 0.17744884490966797
epoch: 103 training_loss 0.177235594317317 test_loss: 0.19676402807235718
epoch: 104 training_loss 0.18169684655964374 test_loss: 0.18084524869918822
epoch: 105 training_loss 0.19196434885263444 test_loss: 0.19346808195114135
epoch: 106 training_loss 0.1885651895403862 test_loss: 0.1913095474243164
epoch: 107 training_loss 0.18525422520935536 test_loss: 0.18095816373825074
epoch: 108 training_loss 0.18051532223820688 test_loss: 0.18626689910888672
epoch: 109 training_loss 0.18853926852345468 test_loss: 0.17394871711730958
epoch: 110 training_loss 0.18854808956384658 test_loss: 0.17779815196990967
epoch: 111 training_loss 0.18573583483695985 test_loss: 0.19960321187973024
epoch: 112 training_loss 0.18507568240165712 test_loss: 0.1867806077003479
epoch: 113 training_loss 0.1837113604694605 test_loss: 0.18689779043197632
epoch: 114 training_loss 0.19328741557896137 test_loss: 0.1924600124359131
epoch: 115 training_loss 0.18994578272104262 test_loss: 0.19302324056625367
epoch: 116 training_loss 0.19340569034218788 test_loss: 0.18747485876083375
epoch: 117 training_loss 0.179743977189064 test_loss: 0.20705668926239013
epoch: 118 training_loss 0.1891096880286932 test_loss: 0.1774514675140381
epoch: 119 training_loss 0.18571870431303977 test_loss: 0.18107181787490845
epoch: 120 training_loss 0.18248568043112756 test_loss: 0.1971440315246582
epoch: 121 training_loss 0.18367583028972148 test_loss: 0.1822843074798584
epoch: 122 training_loss 0.186885234862566 test_loss: 0.17745825052261352
epoch: 123 training_loss 0.1792087269574404 test_loss: 0.20223536491394042
epoch: 124 training_loss 0.18079584777355195 test_loss: 0.18756805658340453
epoch: 125 training_loss 0.18406275570392608 test_loss: 0.1919718861579895
epoch: 126 training_loss 0.18215263970196247 test_loss: 0.21892914772033692
epoch: 127 training_loss 0.17887846775352956 test_loss: 0.20065803527832032
epoch: 128 training_loss 0.1840392265468836 test_loss: 0.21249003410339357
epoch: 129 training_loss 0.18463135235011577 test_loss: 0.19719619750976564
epoch: 130 training_loss 0.1869089188426733 test_loss: 0.1889886736869812
epoch: 131 training_loss 0.1832528945058584 test_loss: 0.18891119956970215
epoch: 132 training_loss 0.175412260517478 test_loss: 0.1820740818977356
epoch: 133 training_loss 0.1823785937577486 test_loss: 0.20794332027435303
epoch: 134 training_loss 0.18430891193449497 test_loss: 0.17129232883453369
epoch: 135 training_loss 0.18212253011763097 test_loss: 0.18276215791702272
epoch: 136 training_loss 0.19189441211521627 test_loss: 0.17195662260055541
epoch: 137 training_loss 0.17938325650990009 test_loss: 0.17774126529693604
epoch: 138 training_loss 0.1735765802115202 test_loss: 0.19668630361557007
epoch: 139 training_loss 0.18280071914196014 test_loss: 0.18399735689163207
epoch: 140 training_loss 0.19225367590785025 test_loss: 0.1850131630897522
epoch: 141 training_loss 0.19339384019374847 test_loss: 0.18324977159500122
epoch: 142 training_loss 0.1891837077587843 test_loss: 0.18842025995254516
epoch: 143 training_loss 0.17446063838899137 test_loss: 0.18731386661529542
epoch: 144 training_loss 0.1848405520617962 test_loss: 0.1739865779876709
epoch: 145 training_loss 0.1770695824176073 test_loss: 0.18242646455764772
epoch: 146 training_loss 0.18162379488348962 test_loss: 0.17677594423294068
epoch: 147 training_loss 0.1773725863546133 test_loss: 0.19171904325485228
epoch: 148 training_loss 0.1750295889377594 test_loss: 0.18982502222061157
epoch: 149 training_loss 0.18269162967801095 test_loss: 0.175984787940979
episode: 0 training return: -363.5508641468648
episode: 1 training return: -343.95665586127706
episode: 2 training return: -550.0056060371396
episode: 3 training return: -407.50315270126356
epoch: 1 test_true_pfm: 29.195582960693958 sim_pfm: -312.59563665645226
episode: 4 training return: -542.987328854106
episode: 5 training return: -251.32540924830465
episode: 6 training return: -319.74961521894875
episode: 7 training return: -471.10467287576563
epoch: 2 test_true_pfm: 29.97786418644219 sim_pfm: -323.9988949872474
episode: 8 training return: -321.1307263484798
episode: 9 training return: -461.1851708597428
episode: 10 training return: -222.8341041464123
episode: 11 training return: -152.7899710816969
epoch: 3 test_true_pfm: 25.875481374303632 sim_pfm: -91.43177890835868
episode: 12 training return: -116.72081555406682
episode: 13 training return: -52.142344987458785
episode: 14 training return: -29.658188860205485
episode: 15 training return: -55.8367351674661
epoch: 4 test_true_pfm: 28.46159504704268 sim_pfm: -69.127040825058
episode: 16 training return: -108.86404024110394
episode: 17 training return: -51.18317543041956
episode: 18 training return: -240.21467490689398
episode: 19 training return: -128.91822600093533
epoch: 5 test_true_pfm: 58.08001159832344 sim_pfm: 119.01304048967472
episode: 20 training return: -33.24195049654833
episode: 21 training return: -32.332435149393085
episode: 22 training return: 57.24300398714253
episode: 23 training return: 78.43257448757404
epoch: 6 test_true_pfm: 42.16226631593663 sim_pfm: -53.64465541827351
episode: 24 training return: 231.42991299719952
episode: 25 training return: 200.01526500488646
episode: 26 training return: 146.5203043318795
episode: 27 training return: 394.3529142380977
epoch: 7 test_true_pfm: 52.42302715803143 sim_pfm: 425.6213821864985
episode: 28 training return: 398.7643755926973
episode: 29 training return: 414.12062937287703
episode: 30 training return: 405.2465205440525
episode: 31 training return: 418.944285731514
epoch: 8 test_true_pfm: 47.10329419573478 sim_pfm: 470.59501785295873
episode: 32 training return: 436.18268023738943
episode: 33 training return: 464.50224070202916
episode: 34 training return: 422.25894098154896
episode: 35 training return: 444.19452901110964
epoch: 9 test_true_pfm: 44.159898647855236 sim_pfm: 499.187866815344
episode: 36 training return: 457.69053038972083
episode: 37 training return: 432.01361909791973
episode: 38 training return: 441.8281874350455
episode: 39 training return: 435.23846739683427
epoch: 10 test_true_pfm: 52.65757116562572 sim_pfm: 559.4044889768953
episode: 40 training return: 425.7838984718919
episode: 41 training return: 458.4963047177727
episode: 42 training return: 454.7213536879775
episode: 43 training return: 447.2283070667761
epoch: 11 test_true_pfm: 52.51994486931531 sim_pfm: 579.8345389604391
episode: 44 training return: 448.63061350845084
episode: 45 training return: 468.14898703777385
episode: 46 training return: 460.6117891810697
episode: 47 training return: 469.603963284919
epoch: 12 test_true_pfm: 49.682752031662666 sim_pfm: 601.4194583295889
episode: 48 training return: 468.9849287205418
episode: 49 training return: 480.3996135214558
episode: 50 training return: 419.0725020310257
episode: 51 training return: 450.92613038579583
epoch: 13 test_true_pfm: 49.32931870079535 sim_pfm: 598.1944519593585
episode: 52 training return: 477.2697561924664
episode: 53 training return: 464.0052003273897
episode: 54 training return: 476.4978514668374
episode: 55 training return: 481.0882842461002
epoch: 14 test_true_pfm: 45.52372592070765 sim_pfm: 602.3290435089895
episode: 56 training return: 484.8432762695715
episode: 57 training return: 470.5274683070819
episode: 58 training return: 477.4833561336122
episode: 59 training return: 462.99511488981426
epoch: 15 test_true_pfm: 53.48481700273001 sim_pfm: 614.3071386848304
episode: 60 training return: 459.49160804873213
episode: 61 training return: 490.5194437901407
episode: 62 training return: 478.69731871847404
episode: 63 training return: 488.17743575188666
epoch: 16 test_true_pfm: 53.25285681163041 sim_pfm: 532.671290637248
episode: 64 training return: 487.01893588067816
episode: 65 training return: 464.7656186736737
episode: 66 training return: 477.68054356389683
episode: 67 training return: 456.790826477149
epoch: 17 test_true_pfm: 56.82190486110344 sim_pfm: 577.7761781782237
episode: 68 training return: 449.26101703331744
episode: 69 training return: 488.91619829175494
episode: 70 training return: 454.91531105666553
episode: 71 training return: 456.0129926909634
epoch: 18 test_true_pfm: 47.44249247012334 sim_pfm: 580.2436484771084
episode: 72 training return: 460.20183352427256
episode: 73 training return: 474.29622559917345
episode: 74 training return: 445.6725742853955
episode: 75 training return: 477.363315736698
epoch: 19 test_true_pfm: 52.271410182644225 sim_pfm: 572.0664163263096
episode: 76 training return: 480.95665941438523
episode: 77 training return: 450.9865385774761
episode: 78 training return: 448.7512776322058
episode: 79 training return: 450.07830833449833
epoch: 20 test_true_pfm: 50.58037427423661 sim_pfm: 594.9932656963606
episode: 80 training return: 455.3445787583402
episode: 81 training return: 462.3118411572101
episode: 82 training return: 447.4784770516846
episode: 83 training return: 475.8873876806746
epoch: 21 test_true_pfm: 46.222723207818596 sim_pfm: 573.7679575821621
episode: 84 training return: 460.21185812077067
episode: 85 training return: 451.42004253029637
episode: 86 training return: 485.98385300766375
episode: 87 training return: 467.1163776812874
epoch: 22 test_true_pfm: 50.91704461493052 sim_pfm: 579.0104625027919
episode: 88 training return: 475.4756920259172
episode: 89 training return: 480.80712676326027
episode: 90 training return: 472.72927364344946
episode: 91 training return: 455.68121646100025
epoch: 23 test_true_pfm: 57.51727126187857 sim_pfm: 557.347999039288
episode: 92 training return: 452.43478911535385
episode: 93 training return: 470.1655019058614
episode: 94 training return: 488.91746784546115
episode: 95 training return: 470.04739437159714
epoch: 24 test_true_pfm: 55.13451256698693 sim_pfm: 570.9738045963494
episode: 96 training return: 485.4478600875609
episode: 97 training return: 455.9601075066086
episode: 98 training return: 484.5299977714847
episode: 99 training return: 466.85128065663906
epoch: 25 test_true_pfm: 42.58149812969621 sim_pfm: 575.9329534046205
episode: 100 training return: 478.918260727595
episode: 101 training return: 495.3783918735299
episode: 102 training return: 467.6689045666891
episode: 103 training return: 478.7894295631291
epoch: 26 test_true_pfm: 41.2255320935814 sim_pfm: 602.2382754855549
episode: 104 training return: 466.8165236368552
episode: 105 training return: 485.4092641149167
episode: 106 training return: 468.1585372096003
episode: 107 training return: 477.2965973644748
epoch: 27 test_true_pfm: 48.66262964661362 sim_pfm: 558.0051952390029
episode: 108 training return: 485.9761890249682
episode: 109 training return: 459.7118378404299
episode: 110 training return: 482.3793288184123
episode: 111 training return: 466.35384904300486
epoch: 28 test_true_pfm: 55.92916266531137 sim_pfm: 611.5307178841996
episode: 112 training return: 470.2743924256474
episode: 113 training return: 491.6838904504239
episode: 114 training return: 483.3947239993712
episode: 115 training return: 468.82253285868103
epoch: 29 test_true_pfm: 48.221804696020776 sim_pfm: 566.7494166833116
episode: 116 training return: 472.20815333493096
episode: 117 training return: 469.56930511722175
episode: 118 training return: 473.35659829331854
episode: 119 training return: 453.4408296512132
epoch: 30 test_true_pfm: 47.469037935789935 sim_pfm: 597.6796688683925
episode: 120 training return: 460.79481887382497
episode: 121 training return: 484.8352496860385
episode: 122 training return: 465.9233520394291
episode: 123 training return: 476.6774225725047
epoch: 31 test_true_pfm: 50.09584853386567 sim_pfm: 608.4843719255227
episode: 124 training return: 465.2891412419916
episode: 125 training return: 466.4638509426279
episode: 126 training return: 466.11812318897586
episode: 127 training return: 461.6153327647456
epoch: 32 test_true_pfm: 44.365900579172965 sim_pfm: 580.8856194321701
episode: 128 training return: 464.902737505338
episode: 129 training return: 466.39126281392566
episode: 130 training return: 470.9496554382657
episode: 131 training return: 481.30017891829004
epoch: 33 test_true_pfm: 56.062433328614226 sim_pfm: 600.0412096342576
episode: 132 training return: 464.37907108306683
episode: 133 training return: 468.51429221272105
episode: 134 training return: 479.8239694314581
episode: 135 training return: 463.6123979400157
epoch: 34 test_true_pfm: 60.122975217097235 sim_pfm: 588.6329558684901
episode: 136 training return: 454.0488849386852
episode: 137 training return: 454.2954656410888
episode: 138 training return: 484.1259032304858
episode: 139 training return: 481.4776542083157
epoch: 35 test_true_pfm: 62.68878676525183 sim_pfm: 591.7533821841502
episode: 140 training return: 463.66738851054987
episode: 141 training return: 468.1506028134258
episode: 142 training return: 485.56644683689296
episode: 143 training return: 459.1829093412741
epoch: 36 test_true_pfm: 48.67251816916356 sim_pfm: 588.3812052408686
episode: 144 training return: 471.976221865834
episode: 145 training return: 466.2435241257463
episode: 146 training return: 477.6133140805469
episode: 147 training return: 495.78224040627384
epoch: 37 test_true_pfm: 49.656629884049615 sim_pfm: 584.532440700723
episode: 148 training return: 486.5482188929649
episode: 149 training return: 479.09346288667365
episode: 150 training return: 468.2021528647547
episode: 151 training return: 445.5586209894142
epoch: 38 test_true_pfm: 53.01240664021242 sim_pfm: 576.8771252146195
episode: 152 training return: 475.95512557648743
episode: 153 training return: 480.13099563621057
episode: 154 training return: 467.76026005532185
episode: 155 training return: 464.1219283284065
epoch: 39 test_true_pfm: 59.51266526014396 sim_pfm: 597.8294628752164
episode: 156 training return: 463.8063033088549
episode: 157 training return: 472.54912189170227
episode: 158 training return: 474.33887175255563
episode: 159 training return: 452.84969408345705
epoch: 40 test_true_pfm: 47.639192111105736 sim_pfm: 565.646873161749
episode: 160 training return: 482.3882779562458
episode: 161 training return: 462.4532318577532
episode: 162 training return: 479.31376967597606
episode: 163 training return: 465.1561032226975
epoch: 41 test_true_pfm: 51.351506123867885 sim_pfm: 604.1103544294962
episode: 164 training return: 486.9512534318036
episode: 165 training return: 491.0896547202351
episode: 166 training return: 494.4613722388033
episode: 167 training return: 477.9376954333139
epoch: 42 test_true_pfm: 53.416631291331704 sim_pfm: 555.5577552811998
episode: 168 training return: 441.60414859555283
episode: 169 training return: 470.7242205860076
episode: 170 training return: 449.76407588687835
episode: 171 training return: 472.2365868081451
epoch: 43 test_true_pfm: 46.58986916279465 sim_pfm: 546.8575102541607
episode: 172 training return: 496.938714678905
episode: 173 training return: 498.7116663465148
episode: 174 training return: 478.74515147230295
episode: 175 training return: 482.6880332390816
epoch: 44 test_true_pfm: 47.16268761765218 sim_pfm: 593.1052108832808
episode: 176 training return: 479.5688737469039
episode: 177 training return: 478.14682116081207
episode: 178 training return: 494.8343064809216
episode: 179 training return: 482.17828483257017
epoch: 45 test_true_pfm: 56.45585898283118 sim_pfm: 584.3822355861773
episode: 180 training return: 484.17611215313747
episode: 181 training return: 485.91977870454366
episode: 182 training return: 474.26772667976246
episode: 183 training return: 466.3282292125814
epoch: 46 test_true_pfm: 52.39772570359737 sim_pfm: 602.1365335400009
episode: 184 training return: 491.784432414977
episode: 185 training return: 472.86440681048765
episode: 186 training return: 475.4036807572133
episode: 187 training return: 488.48639066841105
epoch: 47 test_true_pfm: 55.8657625244926 sim_pfm: 574.7509078785318
episode: 188 training return: 461.81864949778077
episode: 189 training return: 495.0630097805663
episode: 190 training return: 497.72547445137803
episode: 191 training return: 503.42128111300383
epoch: 48 test_true_pfm: 54.689521050181156 sim_pfm: 585.6533489854854
episode: 192 training return: 474.8538450841924
episode: 193 training return: 461.4433587446389
episode: 194 training return: 502.0785601888189
episode: 195 training return: 467.80693475636406
epoch: 49 test_true_pfm: 61.385868221158084 sim_pfm: 579.699545418784
episode: 196 training return: 497.8612641213458
episode: 197 training return: 484.53863019020525
episode: 198 training return: 485.95805696464913
episode: 199 training return: 468.72500978810695
epoch: 50 test_true_pfm: 52.10038472314175 sim_pfm: 593.0120252471604
episode: 200 training return: 478.96457459307277
episode: 201 training return: 472.882279340329
episode: 202 training return: 488.2806632684655
episode: 203 training return: 491.2401409138496
epoch: 51 test_true_pfm: 59.293776874523985 sim_pfm: 592.4369938743657
episode: 204 training return: 462.12303985210735
episode: 205 training return: 467.0824906920772
episode: 206 training return: 480.94480682546566
episode: 207 training return: 483.0479361242766
epoch: 52 test_true_pfm: 46.94474702521842 sim_pfm: 597.7025994270349
episode: 208 training return: 489.6614010590899
episode: 209 training return: 461.98515192737335
episode: 210 training return: 485.2903556981772
episode: 211 training return: 481.57202340847607
epoch: 53 test_true_pfm: 41.707042197851415 sim_pfm: 575.5624782849162
episode: 212 training return: 465.1094213474858
episode: 213 training return: 474.6566816995463
episode: 214 training return: 479.8396157921942
episode: 215 training return: 486.4412721982363
epoch: 54 test_true_pfm: 43.409057612026565 sim_pfm: 582.0994035846861
episode: 216 training return: 490.8372590779352
episode: 217 training return: 490.8996002460636
episode: 218 training return: 496.34766605642045
episode: 219 training return: 490.23626401188466
epoch: 55 test_true_pfm: 54.04006071239457 sim_pfm: 604.6420813782719
episode: 220 training return: 503.11504617829195
episode: 221 training return: 474.6645218981451
episode: 222 training return: 492.1704744399714
episode: 223 training return: 486.5844758232821
epoch: 56 test_true_pfm: 41.82258247076982 sim_pfm: 577.5401631059552
episode: 224 training return: 473.34331296809546
episode: 225 training return: 473.25563281430317
episode: 226 training return: 448.96425527853427
episode: 227 training return: 491.99643097365185
epoch: 57 test_true_pfm: 44.42513798409686 sim_pfm: 601.0170528764822
episode: 228 training return: 474.32604663508766
episode: 229 training return: 471.4204410610525
episode: 230 training return: 498.65945298917893
episode: 231 training return: 487.61466945479606
epoch: 58 test_true_pfm: 43.54223090887471 sim_pfm: 584.7258593157263
episode: 232 training return: 479.53029266322505
episode: 233 training return: 491.10853607345587
episode: 234 training return: 480.0606664839594
episode: 235 training return: 494.4490408979478
epoch: 59 test_true_pfm: 47.97376389159971 sim_pfm: 593.1964310738613
episode: 236 training return: 482.38726861722125
episode: 237 training return: 494.01393497424135
episode: 238 training return: 488.47054352429933
episode: 239 training return: 475.4161373891397
epoch: 60 test_true_pfm: 48.84680501270514 sim_pfm: 596.7595613892939
episode: 240 training return: 494.9381049393099
episode: 241 training return: 499.8528466675256
episode: 242 training return: 484.4732947666612
episode: 243 training return: 484.6044802321356
epoch: 61 test_true_pfm: 51.285627435038755 sim_pfm: 580.1244834594321
episode: 244 training return: 495.65537778642357
episode: 245 training return: 506.7424977546065
episode: 246 training return: 479.3213257652263
episode: 247 training return: 492.35445390752426
epoch: 62 test_true_pfm: 60.057684940078055 sim_pfm: 579.8951562049409
episode: 248 training return: 499.77290969218325
episode: 249 training return: 485.12717091663245
episode: 250 training return: 507.49348124979485
episode: 251 training return: 481.1932607219659
epoch: 63 test_true_pfm: 52.6987873093991 sim_pfm: 602.1498294448307
episode: 252 training return: 474.61703961439605
episode: 253 training return: 507.1712075568578
episode: 254 training return: 479.3252537026824
episode: 255 training return: 498.433708381398
epoch: 64 test_true_pfm: 44.08318922702315 sim_pfm: 606.7547370468084
episode: 256 training return: 501.04763696421526
episode: 257 training return: 511.12310632819765
episode: 258 training return: 481.78826981080664
episode: 259 training return: 487.4697496717213
epoch: 65 test_true_pfm: 41.756422494422175 sim_pfm: 573.9595929538116
episode: 260 training return: 478.9035223841532
episode: 261 training return: 493.1236593933144
episode: 262 training return: 503.9192691877619
episode: 263 training return: 499.1230426389607
epoch: 66 test_true_pfm: 50.122669398628155 sim_pfm: 559.9619658238793
episode: 264 training return: 504.8878837928348
episode: 265 training return: 491.0674154049183
episode: 266 training return: 500.84589640930426
episode: 267 training return: 486.29681870187846
epoch: 67 test_true_pfm: 47.18704385363418 sim_pfm: 595.9063464724013
episode: 268 training return: 478.7128808833234
episode: 269 training return: 468.67552276019757
episode: 270 training return: 492.5724554207013
episode: 271 training return: 498.186297282744
epoch: 68 test_true_pfm: 48.819976853082586 sim_pfm: 598.4486834476269
episode: 272 training return: 501.93122380663567
episode: 273 training return: 484.99763402287243
episode: 274 training return: 475.52884783317546
episode: 275 training return: 484.1136807163062
epoch: 69 test_true_pfm: 56.24513934147209 sim_pfm: 592.4073349080666
episode: 276 training return: 491.31698537383414
episode: 277 training return: 512.5442763977333
episode: 278 training return: 507.4961148321853
episode: 279 training return: 489.668843120491
epoch: 70 test_true_pfm: 56.91998143267201 sim_pfm: 602.8592428142256
episode: 280 training return: 503.0740541720811
episode: 281 training return: 484.99028187804174
episode: 282 training return: 495.6291241646864
episode: 283 training return: 486.142567621051
epoch: 71 test_true_pfm: 44.6438634010939 sim_pfm: 633.1296622528989
episode: 284 training return: 488.5747370118911
episode: 285 training return: 504.9809795719275
episode: 286 training return: 498.995519912127
episode: 287 training return: 494.48479223339643
epoch: 72 test_true_pfm: 52.31585405552507 sim_pfm: 586.9858793944672
episode: 288 training return: 503.6255178867058
episode: 289 training return: 493.2802495816327
episode: 290 training return: 465.73925226386507
episode: 291 training return: 479.47143742553664
epoch: 73 test_true_pfm: 53.98750421284748 sim_pfm: 580.5210833428187
episode: 292 training return: 497.5927346931191
episode: 293 training return: 466.49251182940156
episode: 294 training return: 486.69900380717337
episode: 295 training return: 503.4148906754318
epoch: 74 test_true_pfm: 51.75544991943192 sim_pfm: 620.9394369456483
episode: 296 training return: 494.4177551212613
episode: 297 training return: 471.9054154119615
episode: 298 training return: 472.1909430352249
episode: 299 training return: 486.9916001319547
epoch: 75 test_true_pfm: 45.0919530498359 sim_pfm: 569.3893827247645
episode: 300 training return: 508.0093532418069
episode: 301 training return: 472.49305195681444
episode: 302 training return: 494.6052870341464
episode: 303 training return: 495.888995749554
epoch: 76 test_true_pfm: 41.37340890528379 sim_pfm: 570.6958803663146
episode: 304 training return: 474.2855905632701
episode: 305 training return: 495.0437506698918
episode: 306 training return: 486.98460869425094
episode: 307 training return: 475.65999033184534
epoch: 77 test_true_pfm: 52.78037593160665 sim_pfm: 617.0616364136807
episode: 308 training return: 486.6663490960515
episode: 309 training return: 496.2539446768941
episode: 310 training return: 509.60450989695926
episode: 311 training return: 504.2050756582203
epoch: 78 test_true_pfm: 43.70961974056733 sim_pfm: 613.1853710140753
episode: 312 training return: 486.44205456277484
episode: 313 training return: 487.8915744090047
episode: 314 training return: 488.62288036383717
episode: 315 training return: 495.32473273243244
epoch: 79 test_true_pfm: 46.03420520493827 sim_pfm: 608.9410670705407
episode: 316 training return: 472.16543266257736
episode: 317 training return: 485.368133981838
episode: 318 training return: 486.75868591289714
episode: 319 training return: 480.4621853043297
epoch: 80 test_true_pfm: 46.76513878203581 sim_pfm: 575.2396984084103
episode: 320 training return: 495.5722709058118
episode: 321 training return: 478.38852281630136
episode: 322 training return: 474.76168463888666
episode: 323 training return: 474.88775511632264
epoch: 81 test_true_pfm: 54.71981031670905 sim_pfm: 598.9603983231815
episode: 324 training return: 484.78415281118083
episode: 325 training return: 486.8770429706175
episode: 326 training return: 490.4567599722552
episode: 327 training return: 488.97960785121165
epoch: 82 test_true_pfm: 47.45536283109674 sim_pfm: 594.5878494058665
episode: 328 training return: 495.0702264521953
episode: 329 training return: 482.5029679257374
episode: 330 training return: 480.0420096569397
episode: 331 training return: 483.0625987887874
epoch: 83 test_true_pfm: 52.2851029334493 sim_pfm: 608.4400073135614
episode: 332 training return: 506.46099435807486
episode: 333 training return: 499.2695199251502
episode: 334 training return: 498.00778544227177
episode: 335 training return: 493.31989578998633
epoch: 84 test_true_pfm: 46.48606423524023 sim_pfm: 580.4455707094312
episode: 336 training return: 499.4527989439913
episode: 337 training return: 487.39096996920216
episode: 338 training return: 487.4392545576909
episode: 339 training return: 492.0105867083702
epoch: 85 test_true_pfm: 51.451436388242314 sim_pfm: 633.0885467478996
episode: 340 training return: 480.1921151565171
episode: 341 training return: 502.0366038653272
episode: 342 training return: 495.66532518821293
episode: 343 training return: 488.2776107317514
epoch: 86 test_true_pfm: 44.75951763613896 sim_pfm: 587.3013260018064
episode: 344 training return: 492.31321687164245
episode: 345 training return: 492.23096578555845
episode: 346 training return: 485.82281621320476
episode: 347 training return: 484.59961684858797
epoch: 87 test_true_pfm: 46.016115033188505 sim_pfm: 616.4236923976504
episode: 348 training return: 492.3116607575027
episode: 349 training return: 491.20900721442683
episode: 350 training return: 494.58630011440835
episode: 351 training return: 507.85880405602
epoch: 88 test_true_pfm: 54.720379850836906 sim_pfm: 599.9043788657574
episode: 352 training return: 503.51296651585983
episode: 353 training return: 489.8226289062938
episode: 354 training return: 492.7850161823488
episode: 355 training return: 483.83463736145774
epoch: 89 test_true_pfm: 52.65945946413124 sim_pfm: 597.0786941637779
episode: 356 training return: 483.2342654242742
episode: 357 training return: 480.5779267156025
episode: 358 training return: 500.76517032920924
episode: 359 training return: 499.89527438485646
epoch: 90 test_true_pfm: 44.533050026373076 sim_pfm: 588.1744264443258
episode: 360 training return: 503.9983254312168
episode: 361 training return: 500.9048732814701
episode: 362 training return: 486.5605112212368
episode: 363 training return: 498.06182619183716
epoch: 91 test_true_pfm: 40.93959748405816 sim_pfm: 586.3049089819287
episode: 364 training return: 510.7668527763354
episode: 365 training return: 468.30187022805626
episode: 366 training return: 503.5099353260699
episode: 367 training return: 501.5170105571179
epoch: 92 test_true_pfm: 51.46778837676004 sim_pfm: 600.1527378695682
episode: 368 training return: 464.17829520553306
episode: 369 training return: 485.83173025862436
episode: 370 training return: 496.36715461577353
episode: 371 training return: 497.4138862208628
epoch: 93 test_true_pfm: 42.02612181857243 sim_pfm: 590.9115273144824
episode: 372 training return: 487.0753096592605
episode: 373 training return: 477.22081798247615
episode: 374 training return: 480.877118819032
episode: 375 training return: 474.3581506254838
epoch: 94 test_true_pfm: 42.36304406526218 sim_pfm: 583.2848435414245
episode: 376 training return: 481.02625614944833
episode: 377 training return: 479.04667336073277
episode: 378 training return: 498.55184786948206
episode: 379 training return: 492.1657571056765
epoch: 95 test_true_pfm: 45.44069124644429 sim_pfm: 595.652303858445
episode: 380 training return: 504.7231186989266
episode: 381 training return: 499.9171377687543
episode: 382 training return: 488.3834201124755
episode: 383 training return: 504.310719158631
epoch: 96 test_true_pfm: 42.11790377300976 sim_pfm: 605.4503550833014
episode: 384 training return: 495.760900617819
episode: 385 training return: 505.7575007899432
episode: 386 training return: 485.0150252170709
episode: 387 training return: 495.17334973966877
epoch: 97 test_true_pfm: 42.387212014883595 sim_pfm: 589.2766458203049
episode: 388 training return: 475.74081220612175
episode: 389 training return: 496.42695186051213
episode: 390 training return: 460.5226167902251
episode: 391 training return: 481.80680709803676
epoch: 98 test_true_pfm: 49.178968112892676 sim_pfm: 589.1447857600309
episode: 392 training return: 491.2743101659398
episode: 393 training return: 478.7819613275088
episode: 394 training return: 476.59003941314097
episode: 395 training return: 478.05877277966664
epoch: 99 test_true_pfm: 45.58065901114353 sim_pfm: 604.5584327647863
episode: 396 training return: 485.6285246032914
episode: 397 training return: 500.99566832845494
episode: 398 training return: 504.35631221294955
episode: 399 training return: 486.1184966367625
epoch: 100 test_true_pfm: 45.77836350701176 sim_pfm: 605.6350323279472
episode: 400 training return: 482.0412512273332
episode: 401 training return: 469.0491457446178
episode: 402 training return: 507.7912695115082
episode: 403 training return: 504.58545607155924
epoch: 101 test_true_pfm: 47.3148787010093 sim_pfm: 597.7128226491221
episode: 404 training return: 486.8815449207806
episode: 405 training return: 481.3590850582481
episode: 406 training return: 498.04986312895215
episode: 407 training return: 491.31251921216614
epoch: 102 test_true_pfm: 47.34010255405972 sim_pfm: 593.4042688172528
episode: 408 training return: 489.6338436639906
episode: 409 training return: 491.04987196381336
episode: 410 training return: 481.25081765011663
episode: 411 training return: 507.03169599812816
epoch: 103 test_true_pfm: 45.71960500494243 sim_pfm: 617.3118698907479
episode: 412 training return: 489.98046144239555
episode: 413 training return: 491.54178683714434
episode: 414 training return: 502.5006804240587
episode: 415 training return: 513.8639260039043
epoch: 104 test_true_pfm: 52.8572232072685 sim_pfm: 593.607388402836
episode: 416 training return: 478.1001035199973
episode: 417 training return: 496.23152149736984
episode: 418 training return: 481.0428039996748
episode: 419 training return: 497.6222723591472
epoch: 105 test_true_pfm: 48.81195403857205 sim_pfm: 619.6957305711007
episode: 420 training return: 473.46348992577833
episode: 421 training return: 500.508856089086
episode: 422 training return: 500.1902764514941
episode: 423 training return: 487.5908567375208
epoch: 106 test_true_pfm: 37.07463853162936 sim_pfm: 606.8176471873735
episode: 424 training return: 515.1735029935205
episode: 425 training return: 500.0623921886292
episode: 426 training return: 492.8626937194932
episode: 427 training return: 484.95043251970156
epoch: 107 test_true_pfm: 48.058714871566245 sim_pfm: 615.6260231761735
episode: 428 training return: 500.6800908745626
episode: 429 training return: 476.2142977814311
episode: 430 training return: 470.89759444894247
episode: 431 training return: 476.16679309681757
epoch: 108 test_true_pfm: 53.551907306030735 sim_pfm: 600.8311586802623
episode: 432 training return: 461.98295241997045
episode: 433 training return: 472.09505173043954
episode: 434 training return: 480.7844071444824
episode: 435 training return: 478.0568219414876
epoch: 109 test_true_pfm: 46.39400758088539 sim_pfm: 599.7128149293039
episode: 436 training return: 472.6407915856072
episode: 437 training return: 485.1876575927176
episode: 438 training return: 511.0378527354757
episode: 439 training return: 507.28162773367893
epoch: 110 test_true_pfm: 51.61931281575364 sim_pfm: 598.9207365094578
episode: 440 training return: 485.72243921332085
episode: 441 training return: 495.6561838634867
episode: 442 training return: 505.1487894416302
episode: 443 training return: 506.85315938056857
epoch: 111 test_true_pfm: 39.810311515275885 sim_pfm: 630.972223369627
episode: 444 training return: 490.6870453831108
episode: 445 training return: 503.8654339353778
episode: 446 training return: 500.0609297614215
episode: 447 training return: 505.286481206712
epoch: 112 test_true_pfm: 42.3430428623491 sim_pfm: 578.7940141647451
episode: 448 training return: 484.9553660004761
episode: 449 training return: 493.42734704212126
episode: 450 training return: 499.5859352269786
episode: 451 training return: 468.9265206696781
epoch: 113 test_true_pfm: 45.089701752985256 sim_pfm: 606.4663697873301
episode: 452 training return: 479.1735999826699
episode: 453 training return: 498.061272651133
episode: 454 training return: 477.0040401391518
episode: 455 training return: 507.45419758520814
epoch: 114 test_true_pfm: 47.862804220622856 sim_pfm: 603.1645284306861
episode: 456 training return: 494.0409310169919
episode: 457 training return: 488.44660627178337
episode: 458 training return: 480.8057920197412
episode: 459 training return: 499.6125056107715
epoch: 115 test_true_pfm: 44.15530991778567 sim_pfm: 617.9417444354688
episode: 460 training return: 487.18978801737325
episode: 461 training return: 496.9353048451573
episode: 462 training return: 482.81792298371636
episode: 463 training return: 486.0423067372942
epoch: 116 test_true_pfm: 52.85341717581176 sim_pfm: 607.1974198407584
episode: 464 training return: 505.45767814552215
episode: 465 training return: 479.3231418717882
episode: 466 training return: 493.0247645615418
episode: 467 training return: 483.00613712847894
epoch: 117 test_true_pfm: 42.8152879471891 sim_pfm: 615.4517003415713
episode: 468 training return: 481.1210511410204
episode: 469 training return: 493.1567876157447
episode: 470 training return: 514.0686420105509
episode: 471 training return: 497.6185315170927
epoch: 118 test_true_pfm: 41.64452452244854 sim_pfm: 582.0576244446208
episode: 472 training return: 513.48707131135
episode: 473 training return: 498.01029310306893
episode: 474 training return: 505.20702563735995
episode: 475 training return: 473.18522626677645
epoch: 119 test_true_pfm: 48.13285875600326 sim_pfm: 588.0731789019871
episode: 476 training return: 493.2502876467436
episode: 477 training return: 500.56995620831776
episode: 478 training return: 479.96714148162374
episode: 479 training return: 508.8231307116544
epoch: 120 test_true_pfm: 41.92877741696241 sim_pfm: 590.5821006401145
episode: 480 training return: 495.58628988378354
episode: 481 training return: 483.63915654968696
episode: 482 training return: 502.9246203759287
episode: 483 training return: 492.92893555230484
epoch: 121 test_true_pfm: 44.867062054494596 sim_pfm: 620.3076029753493
episode: 484 training return: 502.3600036912792
episode: 485 training return: 483.51201291474604
episode: 486 training return: 493.1652753839483
episode: 487 training return: 506.9231461813211
epoch: 122 test_true_pfm: 50.61662532822904 sim_pfm: 584.3556266380197
episode: 488 training return: 515.4261750662166
episode: 489 training return: 481.7413996238478
episode: 490 training return: 494.09285198734443
episode: 491 training return: 489.22400102356175
epoch: 123 test_true_pfm: 53.61719062269802 sim_pfm: 605.0577890780978
episode: 492 training return: 500.1755397194625
episode: 493 training return: 482.31831334069057
episode: 494 training return: 496.7528675293988
episode: 495 training return: 484.9840398014154
epoch: 124 test_true_pfm: 44.20729671718468 sim_pfm: 611.8572991681684
episode: 496 training return: 490.76482470989237
episode: 497 training return: 501.7901371292744
episode: 498 training return: 484.98238076645157
episode: 499 training return: 501.29045318105176
epoch: 125 test_true_pfm: 41.604321631372265 sim_pfm: 604.5554561509707
episode: 500 training return: 489.2087741950028
episode: 501 training return: 488.7804183025213
episode: 502 training return: 521.4300429980196
episode: 503 training return: 479.7319388077116
epoch: 126 test_true_pfm: 52.06486100374201 sim_pfm: 616.2695676450428
episode: 504 training return: 488.5422401092247
episode: 505 training return: 487.2005188138317
episode: 506 training return: 481.87535272176103
episode: 507 training return: 498.43571161681325
epoch: 127 test_true_pfm: 48.1448770351959 sim_pfm: 592.697463802684
episode: 508 training return: 519.7419710921994
episode: 509 training return: 480.75754671876126
episode: 510 training return: 503.55713970807193
episode: 511 training return: 498.6504164755936
epoch: 128 test_true_pfm: 41.660739869321766 sim_pfm: 593.6561470985797
episode: 512 training return: 484.0884008737692
episode: 513 training return: 500.8347213701643
episode: 514 training return: 490.02247981017035
episode: 515 training return: 476.2349585213253
epoch: 129 test_true_pfm: 47.08644718138567 sim_pfm: 589.8195594671034
episode: 516 training return: 473.18608335821136
episode: 517 training return: 495.6786452059876
episode: 518 training return: 485.74460697440094
episode: 519 training return: 499.2044494982823
epoch: 130 test_true_pfm: 44.70415680816499 sim_pfm: 596.912391791832
episode: 520 training return: 494.3255879885479
episode: 521 training return: 509.6122581136588
episode: 522 training return: 474.6505603937897
episode: 523 training return: 505.8239444590766
epoch: 131 test_true_pfm: 47.990033751278695 sim_pfm: 593.7572844348316
episode: 524 training return: 477.1573389234352
episode: 525 training return: 509.64043125977486
episode: 526 training return: 494.3176818207241
episode: 527 training return: 484.5311741140127
epoch: 132 test_true_pfm: 46.5177749729163 sim_pfm: 608.4913593174448
episode: 528 training return: 479.1612385546161
episode: 529 training return: 506.27965424246673
episode: 530 training return: 482.72432977081894
episode: 531 training return: 526.5842200214948
epoch: 133 test_true_pfm: 41.086291385084515 sim_pfm: 607.8604855027223
episode: 532 training return: 492.8697514623779
episode: 533 training return: 489.2787129464849
episode: 534 training return: 493.35442659680007
episode: 535 training return: 517.7370387390854
epoch: 134 test_true_pfm: 49.918899842909376 sim_pfm: 618.5956881934102
episode: 536 training return: 488.2760844111747
episode: 537 training return: 503.7581747713976
episode: 538 training return: 487.35955297519064
episode: 539 training return: 466.59622323903153
epoch: 135 test_true_pfm: 38.716059544959265 sim_pfm: 605.2708498765866
episode: 540 training return: 510.5113916585877
episode: 541 training return: 491.25561466264844
episode: 542 training return: 498.2466704982617
episode: 543 training return: 478.16193177602486
epoch: 136 test_true_pfm: 48.80517793198165 sim_pfm: 590.4770747646426
episode: 544 training return: 494.7830217012555
episode: 545 training return: 503.4777673461647
episode: 546 training return: 502.6019575339946
episode: 547 training return: 471.70583045023324
epoch: 137 test_true_pfm: 42.88689435832213 sim_pfm: 596.1180613626511
episode: 548 training return: 496.72864834159367
episode: 549 training return: 469.42081831994795
episode: 550 training return: 503.2744991139762
episode: 551 training return: 482.9298673664321
epoch: 138 test_true_pfm: 48.577257276165845 sim_pfm: 613.0314566774618
episode: 552 training return: 493.3663317524655
episode: 553 training return: 492.09829961248255
episode: 554 training return: 509.9656317523752
episode: 555 training return: 493.1030670026742
epoch: 139 test_true_pfm: 49.52986135712106 sim_pfm: 616.6700790717374
episode: 556 training return: 499.39719147006736
episode: 557 training return: 501.8359680836409
episode: 558 training return: 513.8926633808162
episode: 559 training return: 499.2254855750391
epoch: 140 test_true_pfm: 48.604490163898426 sim_pfm: 594.6378413162776
episode: 560 training return: 512.9703453408638
episode: 561 training return: 490.58787802674
episode: 562 training return: 498.38408998193415
episode: 563 training return: 485.40194650949934
epoch: 141 test_true_pfm: 46.17868499196283 sim_pfm: 602.8254994716035
episode: 564 training return: 516.2837271073944
episode: 565 training return: 499.4986708440592
episode: 566 training return: 496.5488949983792
episode: 567 training return: 485.19197330782
epoch: 142 test_true_pfm: 42.41813701953257 sim_pfm: 585.3110378759978
episode: 568 training return: 481.75542780133935
episode: 569 training return: 490.48567098500246
episode: 570 training return: 496.12121969864666
episode: 571 training return: 485.40847931737693
epoch: 143 test_true_pfm: 47.27177584213508 sim_pfm: 617.2547415377412
episode: 572 training return: 501.31253252720893
episode: 573 training return: 493.22771206206414
episode: 574 training return: 494.848291329724
episode: 575 training return: 498.1357707649877
epoch: 144 test_true_pfm: 38.531774182054406 sim_pfm: 595.6604068617484
episode: 576 training return: 499.4697398519328
episode: 577 training return: 485.65255630904375
episode: 578 training return: 481.86586196887583
episode: 579 training return: 478.0002716265722
epoch: 145 test_true_pfm: 36.59932428560983 sim_pfm: 620.7816603523328
episode: 580 training return: 509.44975963850663
episode: 581 training return: 498.6396956101245
episode: 582 training return: 480.92735612478646
episode: 583 training return: 493.82846860268387
epoch: 146 test_true_pfm: 50.261922372129845 sim_pfm: 586.3452114471808
episode: 584 training return: 518.6086443319313
episode: 585 training return: 498.8904742244549
episode: 586 training return: 496.74882204963774
episode: 587 training return: 488.56369533676195
epoch: 147 test_true_pfm: 43.080655212292974 sim_pfm: 605.9529472527596
episode: 588 training return: 503.5703873405153
episode: 589 training return: 497.83427221426194
episode: 590 training return: 494.37511106910705
episode: 591 training return: 501.9098457897816
epoch: 148 test_true_pfm: 42.01136116061158 sim_pfm: 607.1833978083523
episode: 592 training return: 497.8485891944309
episode: 593 training return: 484.75642070233573
episode: 594 training return: 503.945659316284
episode: 595 training return: 497.61021985685477
epoch: 149 test_true_pfm: 42.83583225766887 sim_pfm: 581.0204087618561
episode: 596 training return: 476.9567301387759
episode: 597 training return: 517.4673317808742
episode: 598 training return: 496.2889788961699
episode: 599 training return: 497.4182355489013
epoch: 150 test_true_pfm: 47.59793095896425 sim_pfm: 601.9966775703551
