['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'uncertainty', '--traj', 'medium', '--seed', '4', '--data', '100000']
epoch: 0 training_loss 0.2658893695473671 test_loss: 0.20800721645355225
epoch: 1 training_loss 0.20872994855046273 test_loss: 0.19642817974090576
epoch: 2 training_loss 0.20066534526646138 test_loss: 0.20784907341003417
epoch: 3 training_loss 0.19276281587779523 test_loss: 0.17438244819641113
epoch: 4 training_loss 0.18855402879416944 test_loss: 0.17302045822143555
epoch: 5 training_loss 0.19839842438697816 test_loss: 0.17520056962966918
epoch: 6 training_loss 0.19176609352231025 test_loss: 0.17318172454833985
epoch: 7 training_loss 0.1850059699267149 test_loss: 0.1925772786140442
epoch: 8 training_loss 0.1977523612231016 test_loss: 0.21610467433929442
epoch: 9 training_loss 0.1828736472129822 test_loss: 0.180449378490448
epoch: 10 training_loss 0.19100862063467502 test_loss: 0.19552696943283082
epoch: 11 training_loss 0.18366326339542866 test_loss: 0.1696150541305542
epoch: 12 training_loss 0.18305779211223125 test_loss: 0.19431008100509645
epoch: 13 training_loss 0.18921696215867997 test_loss: 0.19100722074508666
epoch: 14 training_loss 0.18847303219139577 test_loss: 0.18843866586685182
epoch: 15 training_loss 0.18483230851590635 test_loss: 0.1727260947227478
epoch: 16 training_loss 0.17712910078465938 test_loss: 0.19195796251296998
epoch: 17 training_loss 0.18499008424580096 test_loss: 0.184600830078125
epoch: 18 training_loss 0.18729058884084224 test_loss: 0.16145586967468262
epoch: 19 training_loss 0.18084205463528633 test_loss: 0.18858522176742554
epoch: 20 training_loss 0.1819990672171116 test_loss: 0.17840555906295777
epoch: 21 training_loss 0.17548076674342156 test_loss: 0.1841982126235962
epoch: 22 training_loss 0.1879112845659256 test_loss: 0.187561559677124
epoch: 23 training_loss 0.1903588181734085 test_loss: 0.17712182998657228
epoch: 24 training_loss 0.18083435729146002 test_loss: 0.1795359492301941
epoch: 25 training_loss 0.17965419240295888 test_loss: 0.18604358434677123
epoch: 26 training_loss 0.18172061160206796 test_loss: 0.1722311019897461
epoch: 27 training_loss 0.17843347743153573 test_loss: 0.17187730073928834
epoch: 28 training_loss 0.1839580714702606 test_loss: 0.17293357849121094
epoch: 29 training_loss 0.1791065163165331 test_loss: 0.1831381916999817
epoch: 30 training_loss 0.1854312216490507 test_loss: 0.1729215621948242
epoch: 31 training_loss 0.18101235948503017 test_loss: 0.1647209644317627
epoch: 32 training_loss 0.18082510657608508 test_loss: 0.17499361038208008
epoch: 33 training_loss 0.1809086012095213 test_loss: 0.1974504828453064
epoch: 34 training_loss 0.18578752622008324 test_loss: 0.1847752332687378
epoch: 35 training_loss 0.17438928112387658 test_loss: 0.1701862096786499
epoch: 36 training_loss 0.17806041300296782 test_loss: 0.1925837755203247
epoch: 37 training_loss 0.17901151195168497 test_loss: 0.18082132339477539
epoch: 38 training_loss 0.17841298162937164 test_loss: 0.1682364821434021
epoch: 39 training_loss 0.1767097082734108 test_loss: 0.17914161682128907
epoch: 40 training_loss 0.1810802971571684 test_loss: 0.18402740955352784
epoch: 41 training_loss 0.18204510994255543 test_loss: 0.17557376623153687
epoch: 42 training_loss 0.1757076044380665 test_loss: 0.1836259365081787
epoch: 43 training_loss 0.18414610736072062 test_loss: 0.1839958667755127
epoch: 44 training_loss 0.18211979076266288 test_loss: 0.18936511278152465
epoch: 45 training_loss 0.17721085391938687 test_loss: 0.15904560089111328
epoch: 46 training_loss 0.18661168463528155 test_loss: 0.17327604293823243
epoch: 47 training_loss 0.17871583573520183 test_loss: 0.18095723390579224
epoch: 48 training_loss 0.17476196832954882 test_loss: 0.16698468923568727
epoch: 49 training_loss 0.17604098670184612 test_loss: 0.1775546193122864
epoch: 50 training_loss 0.17963762670755387 test_loss: 0.1787063717842102
epoch: 51 training_loss 0.18088210560381413 test_loss: 0.19780994653701783
epoch: 52 training_loss 0.16933202646672726 test_loss: 0.16284884214401246
epoch: 53 training_loss 0.17404878973960877 test_loss: 0.18251701593399047
epoch: 54 training_loss 0.18083091214299202 test_loss: 0.1564178466796875
epoch: 55 training_loss 0.18071982629597186 test_loss: 0.18758211135864258
epoch: 56 training_loss 0.1808976974338293 test_loss: 0.17767395973205566
epoch: 57 training_loss 0.17220670655369757 test_loss: 0.17800209522247315
epoch: 58 training_loss 0.18051956102252006 test_loss: 0.18514424562454224
epoch: 59 training_loss 0.18002677857875823 test_loss: 0.18538892269134521
epoch: 60 training_loss 0.18096967414021492 test_loss: 0.1753021240234375
epoch: 61 training_loss 0.17585339657962323 test_loss: 0.16579147577285766
epoch: 62 training_loss 0.18231469728052616 test_loss: 0.17140425443649293
epoch: 63 training_loss 0.18115989346057176 test_loss: 0.1822709321975708
epoch: 64 training_loss 0.1829951634258032 test_loss: 0.16985446214675903
epoch: 65 training_loss 0.18531281508505346 test_loss: 0.17516918182373048
epoch: 66 training_loss 0.17645046964287758 test_loss: 0.18669959306716918
epoch: 67 training_loss 0.18456239990890025 test_loss: 0.18032032251358032
epoch: 68 training_loss 0.17891615331172944 test_loss: 0.18914033174514772
epoch: 69 training_loss 0.17918738171458246 test_loss: 0.17784000635147096
epoch: 70 training_loss 0.17416484601795673 test_loss: 0.18389061689376832
epoch: 71 training_loss 0.18183724887669087 test_loss: 0.17824603319168092
epoch: 72 training_loss 0.18138839848339558 test_loss: 0.1643943428993225
epoch: 73 training_loss 0.16954270958900453 test_loss: 0.18218655586242677
epoch: 74 training_loss 0.1778852766752243 test_loss: 0.1796800374984741
epoch: 75 training_loss 0.17610272094607354 test_loss: 0.1849621891975403
epoch: 76 training_loss 0.18434747084975242 test_loss: 0.18459991216659546
epoch: 77 training_loss 0.17731546714901925 test_loss: 0.17868322134017944
epoch: 78 training_loss 0.176986880004406 test_loss: 0.16692728996276857
epoch: 79 training_loss 0.1790246481448412 test_loss: 0.19017733335494996
epoch: 80 training_loss 0.17384370625019074 test_loss: 0.1687405824661255
epoch: 81 training_loss 0.18404060028493405 test_loss: 0.16865454912185668
epoch: 82 training_loss 0.1718825715780258 test_loss: 0.18400804996490477
epoch: 83 training_loss 0.1803739310801029 test_loss: 0.1879616618156433
epoch: 84 training_loss 0.1779428319260478 test_loss: 0.18141545057296754
epoch: 85 training_loss 0.17878455825150014 test_loss: 0.16500501632690429
epoch: 86 training_loss 0.18043866828083993 test_loss: 0.19230880737304687
epoch: 87 training_loss 0.180101952329278 test_loss: 0.15604019165039062
epoch: 88 training_loss 0.18319914616644384 test_loss: 0.1617365837097168
epoch: 89 training_loss 0.17787244379520417 test_loss: 0.1785205125808716
epoch: 90 training_loss 0.17945073522627353 test_loss: 0.1802862763404846
epoch: 91 training_loss 0.18208922185003756 test_loss: 0.1796963095664978
epoch: 92 training_loss 0.17263905115425587 test_loss: 0.175337815284729
epoch: 93 training_loss 0.18034396059811114 test_loss: 0.18856364488601685
epoch: 94 training_loss 0.16569922372698784 test_loss: 0.1683534264564514
epoch: 95 training_loss 0.177890175357461 test_loss: 0.17965664863586425
epoch: 96 training_loss 0.17947787865996362 test_loss: 0.16823832988739013
epoch: 97 training_loss 0.17452956214547158 test_loss: 0.1618110179901123
epoch: 98 training_loss 0.17561889849603177 test_loss: 0.1711362600326538
epoch: 99 training_loss 0.18257867515087128 test_loss: 0.15831234455108642
epoch: 100 training_loss 0.18494601152837276 test_loss: 0.17099684476852417
epoch: 101 training_loss 0.17251879200339318 test_loss: 0.1607515335083008
epoch: 102 training_loss 0.17447121299803256 test_loss: 0.17837461233139038
epoch: 103 training_loss 0.1745063115656376 test_loss: 0.18814537525177003
epoch: 104 training_loss 0.17376043424010276 test_loss: 0.17165874242782592
epoch: 105 training_loss 0.16982894696295261 test_loss: 0.1774931311607361
epoch: 106 training_loss 0.1724699231982231 test_loss: 0.16631335020065308
epoch: 107 training_loss 0.17694914437830447 test_loss: 0.16484154462814332
epoch: 108 training_loss 0.17335467331111432 test_loss: 0.1860916256904602
epoch: 109 training_loss 0.16998118184506894 test_loss: 0.17652225494384766
epoch: 110 training_loss 0.1779486595094204 test_loss: 0.17487585544586182
epoch: 111 training_loss 0.17201025113463403 test_loss: 0.16421529054641723
epoch: 112 training_loss 0.16865856919437647 test_loss: 0.17901500463485717
epoch: 113 training_loss 0.18114255830645562 test_loss: 0.17900735139846802
epoch: 114 training_loss 0.18887199804186822 test_loss: 0.17379682064056395
epoch: 115 training_loss 0.18150175496935844 test_loss: 0.16439858675003052
epoch: 116 training_loss 0.17535420406609772 test_loss: 0.17452459335327147
epoch: 117 training_loss 0.18330666303634643 test_loss: 0.17395410537719727
epoch: 118 training_loss 0.1736697068810463 test_loss: 0.15484464168548584
epoch: 119 training_loss 0.18161435127258302 test_loss: 0.18748657703399657
epoch: 120 training_loss 0.17233177848160267 test_loss: 0.16856406927108764
epoch: 121 training_loss 0.17406794361770153 test_loss: 0.1769604802131653
epoch: 122 training_loss 0.18029413364827632 test_loss: 0.18479429483413695
epoch: 123 training_loss 0.184771031960845 test_loss: 0.16107391119003295
epoch: 124 training_loss 0.17310279577970505 test_loss: 0.1898755669593811
epoch: 125 training_loss 0.1735394499450922 test_loss: 0.1862316608428955
epoch: 126 training_loss 0.17699617713689805 test_loss: 0.1589071273803711
epoch: 127 training_loss 0.17398919597268103 test_loss: 0.18548433780670165
epoch: 128 training_loss 0.17860052190721035 test_loss: 0.18168208599090577
epoch: 129 training_loss 0.17507520765066148 test_loss: 0.1636821985244751
epoch: 130 training_loss 0.17206513278186322 test_loss: 0.18812769651412964
epoch: 131 training_loss 0.17912782728672028 test_loss: 0.18225474357604982
epoch: 132 training_loss 0.1730483692139387 test_loss: 0.1884007453918457
epoch: 133 training_loss 0.17541495256125927 test_loss: 0.1839954972267151
epoch: 134 training_loss 0.17572862580418586 test_loss: 0.17978286743164062
epoch: 135 training_loss 0.17210318073630332 test_loss: 0.19550271034240724
epoch: 136 training_loss 0.1714512724429369 test_loss: 0.17837742567062378
epoch: 137 training_loss 0.1817567189782858 test_loss: 0.19004757404327394
epoch: 138 training_loss 0.1741250039637089 test_loss: 0.17116475105285645
epoch: 139 training_loss 0.17378252185881138 test_loss: 0.17205866575241088
epoch: 140 training_loss 0.17951587840914726 test_loss: 0.1785385251045227
epoch: 141 training_loss 0.18869159452617168 test_loss: 0.1754970908164978
epoch: 142 training_loss 0.18241532802581786 test_loss: 0.16813905239105226
epoch: 143 training_loss 0.1821412605047226 test_loss: 0.16802295446395873
epoch: 144 training_loss 0.18296907611191274 test_loss: 0.18234071731567383
epoch: 145 training_loss 0.173506535962224 test_loss: 0.16683006286621094
epoch: 146 training_loss 0.16715876556932927 test_loss: 0.16846327781677245
epoch: 147 training_loss 0.1787144597619772 test_loss: 0.1732580065727234
epoch: 148 training_loss 0.17106109246611595 test_loss: 0.1804590106010437
epoch: 149 training_loss 0.17356207959353923 test_loss: 0.1771097421646118
epoch: 0 training_loss 0.2560409776866436 test_loss: 0.21374995708465577
epoch: 1 training_loss 0.20592902600765228 test_loss: 0.20699236392974854
epoch: 2 training_loss 0.21302007764577865 test_loss: 0.24231748580932616
epoch: 3 training_loss 0.2068731240183115 test_loss: 0.2115990161895752
epoch: 4 training_loss 0.193058445379138 test_loss: 0.19822771549224855
epoch: 5 training_loss 0.191501949056983 test_loss: 0.21671969890594484
epoch: 6 training_loss 0.187181748598814 test_loss: 0.22567281723022461
epoch: 7 training_loss 0.18985697366297244 test_loss: 0.18319153785705566
epoch: 8 training_loss 0.19179808095097542 test_loss: 0.19011342525482178
epoch: 9 training_loss 0.18407685585319997 test_loss: 0.19426863193511962
epoch: 10 training_loss 0.17666218988597393 test_loss: 0.19663964509963988
epoch: 11 training_loss 0.1867248646169901 test_loss: 0.18354610204696656
epoch: 12 training_loss 0.18202874533832072 test_loss: 0.16609750986099242
epoch: 13 training_loss 0.1829075350239873 test_loss: 0.19590548276901246
epoch: 14 training_loss 0.18708168871700764 test_loss: 0.19411442279815674
epoch: 15 training_loss 0.18354466028511524 test_loss: 0.17774758338928223
epoch: 16 training_loss 0.17685355611145495 test_loss: 0.20521023273468017
epoch: 17 training_loss 0.1808930466324091 test_loss: 0.18217544555664061
epoch: 18 training_loss 0.1796987595409155 test_loss: 0.19804445505142212
epoch: 19 training_loss 0.185354572981596 test_loss: 0.19250035285949707
epoch: 20 training_loss 0.18822239562869073 test_loss: 0.18260114192962645
epoch: 21 training_loss 0.18166173443198205 test_loss: 0.1809286117553711
epoch: 22 training_loss 0.18229841075837613 test_loss: 0.18898216485977173
epoch: 23 training_loss 0.18600549787282944 test_loss: 0.16461389064788817
epoch: 24 training_loss 0.18020797349512577 test_loss: 0.174507737159729
epoch: 25 training_loss 0.18235865600407122 test_loss: 0.1704428791999817
epoch: 26 training_loss 0.18654991418123246 test_loss: 0.19150105714797974
epoch: 27 training_loss 0.17999698102474213 test_loss: 0.18718454837799073
epoch: 28 training_loss 0.17906010039150716 test_loss: 0.18580032587051393
epoch: 29 training_loss 0.17861873418092727 test_loss: 0.18975330591201783
epoch: 30 training_loss 0.18319807320833206 test_loss: 0.1758117437362671
epoch: 31 training_loss 0.18074200108647345 test_loss: 0.1980779528617859
epoch: 32 training_loss 0.1841947853937745 test_loss: 0.1797735095024109
epoch: 33 training_loss 0.18425873801112175 test_loss: 0.18222976922988893
epoch: 34 training_loss 0.18400339372456073 test_loss: 0.19358526468276976
epoch: 35 training_loss 0.1807716128230095 test_loss: 0.1870398998260498
epoch: 36 training_loss 0.18576420031487942 test_loss: 0.2011174201965332
epoch: 37 training_loss 0.18810717932879925 test_loss: 0.18278201818466186
epoch: 38 training_loss 0.17937671162188054 test_loss: 0.16964221000671387
epoch: 39 training_loss 0.18060737617313863 test_loss: 0.20501363277435303
epoch: 40 training_loss 0.17771912083029748 test_loss: 0.1913711905479431
epoch: 41 training_loss 0.18307082310318948 test_loss: 0.17922228574752808
epoch: 42 training_loss 0.17538745187222957 test_loss: 0.18872014284133912
epoch: 43 training_loss 0.17199396461248398 test_loss: 0.20067250728607178
epoch: 44 training_loss 0.18148824140429498 test_loss: 0.17990940809249878
epoch: 45 training_loss 0.18272251985967158 test_loss: 0.18101600408554078
epoch: 46 training_loss 0.18431078411638738 test_loss: 0.17910685539245605
epoch: 47 training_loss 0.1774313899874687 test_loss: 0.17760419845581055
epoch: 48 training_loss 0.18325225181877614 test_loss: 0.17394284009933472
epoch: 49 training_loss 0.17374046996235848 test_loss: 0.1956963300704956
epoch: 50 training_loss 0.18135711193084716 test_loss: 0.19120244979858397
epoch: 51 training_loss 0.18479708693921565 test_loss: 0.18211519718170166
epoch: 52 training_loss 0.1835684683173895 test_loss: 0.1785711884498596
epoch: 53 training_loss 0.18371129304170608 test_loss: 0.17709691524505616
epoch: 54 training_loss 0.18043617613613605 test_loss: 0.19002474546432496
epoch: 55 training_loss 0.18251746341586114 test_loss: 0.18385438919067382
epoch: 56 training_loss 0.18404767915606499 test_loss: 0.1815374970436096
epoch: 57 training_loss 0.1796742108464241 test_loss: 0.18418740034103392
epoch: 58 training_loss 0.1782381459325552 test_loss: 0.18675262928009034
epoch: 59 training_loss 0.17933784998953342 test_loss: 0.1719174265861511
epoch: 60 training_loss 0.1729732745140791 test_loss: 0.17605844736099244
epoch: 61 training_loss 0.17412438683211803 test_loss: 0.19159131050109862
epoch: 62 training_loss 0.17284543946385383 test_loss: 0.17773336172103882
epoch: 63 training_loss 0.17721266999840737 test_loss: 0.1773709774017334
epoch: 64 training_loss 0.17962769098579884 test_loss: 0.1778712749481201
epoch: 65 training_loss 0.17366187706589697 test_loss: 0.19084852933883667
epoch: 66 training_loss 0.1682790892571211 test_loss: 0.17349817752838134
epoch: 67 training_loss 0.18187958680093289 test_loss: 0.17307417392730712
epoch: 68 training_loss 0.1860690012574196 test_loss: 0.18163137435913085
epoch: 69 training_loss 0.1782488463073969 test_loss: 0.18335953950881959
epoch: 70 training_loss 0.1786461677774787 test_loss: 0.1654926657676697
epoch: 71 training_loss 0.1748268397897482 test_loss: 0.17074612379074097
epoch: 72 training_loss 0.1768535217642784 test_loss: 0.173198401927948
epoch: 73 training_loss 0.18062347635626794 test_loss: 0.18530434370040894
epoch: 74 training_loss 0.17688654974102974 test_loss: 0.1964935302734375
epoch: 75 training_loss 0.178964559212327 test_loss: 0.1752694845199585
epoch: 76 training_loss 0.17473013781011104 test_loss: 0.17420530319213867
epoch: 77 training_loss 0.17470667995512484 test_loss: 0.19156177043914796
epoch: 78 training_loss 0.17454857043921948 test_loss: 0.17599833011627197
epoch: 79 training_loss 0.17622561760246755 test_loss: 0.1905754804611206
epoch: 80 training_loss 0.17765134632587432 test_loss: 0.18903961181640624
epoch: 81 training_loss 0.18196817770600318 test_loss: 0.19032776355743408
epoch: 82 training_loss 0.17535797722637653 test_loss: 0.17637157440185547
epoch: 83 training_loss 0.1707607137411833 test_loss: 0.19665281772613524
epoch: 84 training_loss 0.18110561355948449 test_loss: 0.18495477437973024
epoch: 85 training_loss 0.18340367823839188 test_loss: 0.18640754222869874
epoch: 86 training_loss 0.1834297538548708 test_loss: 0.19716737270355225
epoch: 87 training_loss 0.17599607490003108 test_loss: 0.1830671548843384
epoch: 88 training_loss 0.17902533389627934 test_loss: 0.18859246969223023
epoch: 89 training_loss 0.17716917738318444 test_loss: 0.1806192636489868
epoch: 90 training_loss 0.18124021843075752 test_loss: 0.1792625069618225
epoch: 91 training_loss 0.17314282402396203 test_loss: 0.1633859634399414
epoch: 92 training_loss 0.17697571843862533 test_loss: 0.18187592029571534
epoch: 93 training_loss 0.17973431140184404 test_loss: 0.18458523750305175
epoch: 94 training_loss 0.17382783442735672 test_loss: 0.17729271650314332
epoch: 95 training_loss 0.17646900981664657 test_loss: 0.18066866397857667
epoch: 96 training_loss 0.17359722308814526 test_loss: 0.17450509071350098
epoch: 97 training_loss 0.17457863964140416 test_loss: 0.1862301230430603
epoch: 98 training_loss 0.18198305673897266 test_loss: 0.1775197982788086
epoch: 99 training_loss 0.17415235988795758 test_loss: 0.19314147233963014
epoch: 100 training_loss 0.18033009670674802 test_loss: 0.19142708778381348
epoch: 101 training_loss 0.17755818396806716 test_loss: 0.18765827417373657
epoch: 102 training_loss 0.17716647133231164 test_loss: 0.18318852186203002
epoch: 103 training_loss 0.17097202099859715 test_loss: 0.19615195989608764
epoch: 104 training_loss 0.1775127373635769 test_loss: 0.18375736474990845
epoch: 105 training_loss 0.17574134059250354 test_loss: 0.18275272846221924
epoch: 106 training_loss 0.17680084325373172 test_loss: 0.18430482149124144
epoch: 107 training_loss 0.17324949838221074 test_loss: 0.1758859395980835
epoch: 108 training_loss 0.17962597407400607 test_loss: 0.17714895009994508
epoch: 109 training_loss 0.17043329812586308 test_loss: 0.18276394605636598
epoch: 110 training_loss 0.17748674437403678 test_loss: 0.16496388912200927
epoch: 111 training_loss 0.17613548591732978 test_loss: 0.1820871949195862
epoch: 112 training_loss 0.18665752954781056 test_loss: 0.19287046194076538
epoch: 113 training_loss 0.17190989077091218 test_loss: 0.17213282585144044
epoch: 114 training_loss 0.17582174234092235 test_loss: 0.17339601516723632
epoch: 115 training_loss 0.18329334992915391 test_loss: 0.19062589406967162
epoch: 116 training_loss 0.1714374702423811 test_loss: 0.18226773738861085
epoch: 117 training_loss 0.1830725495517254 test_loss: 0.18631329536437988
epoch: 118 training_loss 0.17654032669961453 test_loss: 0.1754976987838745
epoch: 119 training_loss 0.17042793586850166 test_loss: 0.18947594165802
epoch: 120 training_loss 0.17291458524763584 test_loss: 0.17564610242843628
epoch: 121 training_loss 0.1701229341328144 test_loss: 0.15641090869903565
epoch: 122 training_loss 0.1775389088690281 test_loss: 0.16370301246643065
epoch: 123 training_loss 0.17005344681441784 test_loss: 0.19196643829345703
epoch: 124 training_loss 0.1788075118511915 test_loss: 0.18467403650283815
epoch: 125 training_loss 0.17684812881052495 test_loss: 0.19947240352630616
epoch: 126 training_loss 0.17085633479058743 test_loss: 0.1775018811225891
epoch: 127 training_loss 0.17855738505721092 test_loss: 0.19554872512817384
epoch: 128 training_loss 0.17288065254688262 test_loss: 0.1713431715965271
epoch: 129 training_loss 0.17433464132249354 test_loss: 0.18735255002975465
epoch: 130 training_loss 0.17506505213677884 test_loss: 0.1931186079978943
epoch: 131 training_loss 0.169692093282938 test_loss: 0.16606826782226564
epoch: 132 training_loss 0.17890068039298057 test_loss: 0.18769000768661498
epoch: 133 training_loss 0.17862118124961854 test_loss: 0.16932237148284912
epoch: 134 training_loss 0.17911495968699456 test_loss: 0.19319344758987428
epoch: 135 training_loss 0.1840829648077488 test_loss: 0.21320004463195802
epoch: 136 training_loss 0.1702043464034796 test_loss: 0.18437784910202026
epoch: 137 training_loss 0.16628588281571866 test_loss: 0.16601401567459106
epoch: 138 training_loss 0.17648687079548836 test_loss: 0.16798677444458007
epoch: 139 training_loss 0.17283318132162095 test_loss: 0.18319613933563234
epoch: 140 training_loss 0.17454298205673693 test_loss: 0.16809860467910767
epoch: 141 training_loss 0.17364545293152334 test_loss: 0.1746471643447876
epoch: 142 training_loss 0.17145840153098107 test_loss: 0.1925329804420471
epoch: 143 training_loss 0.17401512123644353 test_loss: 0.18885961771011353
epoch: 144 training_loss 0.17192543394863605 test_loss: 0.17615246772766113
epoch: 145 training_loss 0.16833898715674878 test_loss: 0.1713501214981079
epoch: 146 training_loss 0.17562408674508334 test_loss: 0.18814188241958618
epoch: 147 training_loss 0.17195128880441188 test_loss: 0.18547000885009765
epoch: 148 training_loss 0.17528580069541932 test_loss: 0.18557955026626588
epoch: 149 training_loss 0.17434302106499672 test_loss: 0.17571570873260497
epoch: 0 training_loss 0.234325497969985 test_loss: 0.21661088466644288
epoch: 1 training_loss 0.19583092704415322 test_loss: 0.199815034866333
epoch: 2 training_loss 0.20199710480868815 test_loss: 0.19611589908599852
epoch: 3 training_loss 0.19324718929827214 test_loss: 0.1970537304878235
epoch: 4 training_loss 0.19677471034228802 test_loss: 0.18471578359603882
epoch: 5 training_loss 0.19313066318631172 test_loss: 0.21499624252319335
epoch: 6 training_loss 0.18974346190690994 test_loss: 0.21317121982574463
epoch: 7 training_loss 0.18122188091278077 test_loss: 0.2068082571029663
epoch: 8 training_loss 0.17983516313135625 test_loss: 0.2086961269378662
epoch: 9 training_loss 0.18811073396354913 test_loss: 0.19394177198410034
epoch: 10 training_loss 0.18826159693300723 test_loss: 0.19022849798202515
epoch: 11 training_loss 0.18516178496181965 test_loss: 0.17539966106414795
epoch: 12 training_loss 0.18511179253458976 test_loss: 0.17820619344711303
epoch: 13 training_loss 0.17862800776958465 test_loss: 0.18367390632629393
epoch: 14 training_loss 0.18431347951292992 test_loss: 0.1883520245552063
epoch: 15 training_loss 0.17787396661937238 test_loss: 0.19278855323791505
epoch: 16 training_loss 0.19443310379981996 test_loss: 0.18918584585189818
epoch: 17 training_loss 0.17792395770549774 test_loss: 0.1882564902305603
epoch: 18 training_loss 0.1758807161450386 test_loss: 0.18819137811660766
epoch: 19 training_loss 0.17861130498349667 test_loss: 0.2004364013671875
epoch: 20 training_loss 0.1786216825991869 test_loss: 0.1912337064743042
epoch: 21 training_loss 0.17782429151237011 test_loss: 0.2102268695831299
epoch: 22 training_loss 0.19472835540771485 test_loss: 0.19818203449249266
epoch: 23 training_loss 0.18550821468234063 test_loss: 0.21226866245269777
epoch: 24 training_loss 0.18426985032856463 test_loss: 0.175010085105896
epoch: 25 training_loss 0.17146129235625268 test_loss: 0.1763007164001465
epoch: 26 training_loss 0.18213034361600877 test_loss: 0.19278353452682495
epoch: 27 training_loss 0.17202675923705102 test_loss: 0.18760164976119995
epoch: 28 training_loss 0.17500486232340337 test_loss: 0.19927748441696166
epoch: 29 training_loss 0.1756861973553896 test_loss: 0.2032548666000366
epoch: 30 training_loss 0.17687012150883674 test_loss: 0.20215635299682616
epoch: 31 training_loss 0.18703281976282596 test_loss: 0.17500818967819215
epoch: 32 training_loss 0.1790766942501068 test_loss: 0.19666454792022706
epoch: 33 training_loss 0.17431765146553516 test_loss: 0.18863383531570435
epoch: 34 training_loss 0.17704022310674192 test_loss: 0.17553462982177734
epoch: 35 training_loss 0.17626004472374915 test_loss: 0.19028847217559813
epoch: 36 training_loss 0.1738840928673744 test_loss: 0.1934374451637268
epoch: 37 training_loss 0.1745734253525734 test_loss: 0.1623895764350891
epoch: 38 training_loss 0.17843608170747757 test_loss: 0.184261691570282
epoch: 39 training_loss 0.17339214988052845 test_loss: 0.19227732419967652
epoch: 40 training_loss 0.1778722693771124 test_loss: 0.17514207363128662
epoch: 41 training_loss 0.17041828945279122 test_loss: 0.1889287233352661
epoch: 42 training_loss 0.18412996180355548 test_loss: 0.2000964641571045
epoch: 43 training_loss 0.1780902637541294 test_loss: 0.1834688663482666
epoch: 44 training_loss 0.17561784632503985 test_loss: 0.17491309642791747
epoch: 45 training_loss 0.17880235515534879 test_loss: 0.18330589532852173
epoch: 46 training_loss 0.178018217086792 test_loss: 0.20444226264953613
epoch: 47 training_loss 0.1755485635250807 test_loss: 0.18556636571884155
epoch: 48 training_loss 0.17594393752515317 test_loss: 0.18921724557876587
epoch: 49 training_loss 0.182585793659091 test_loss: 0.1798573613166809
epoch: 50 training_loss 0.18109178125858308 test_loss: 0.18824489116668702
epoch: 51 training_loss 0.17356882140040397 test_loss: 0.178174889087677
epoch: 52 training_loss 0.17454428367316724 test_loss: 0.1747756004333496
epoch: 53 training_loss 0.17656683251261712 test_loss: 0.18737289905548096
epoch: 54 training_loss 0.17364794105291367 test_loss: 0.18940815925598145
epoch: 55 training_loss 0.1759049990773201 test_loss: 0.1928098201751709
epoch: 56 training_loss 0.18370673205703497 test_loss: 0.19363603591918946
epoch: 57 training_loss 0.16676146872341632 test_loss: 0.18111225366592407
epoch: 58 training_loss 0.17359837111085652 test_loss: 0.18885480165481566
epoch: 59 training_loss 0.1731332840025425 test_loss: 0.18725459575653075
epoch: 60 training_loss 0.16891562223434448 test_loss: 0.18707106113433838
epoch: 61 training_loss 0.17202104754745962 test_loss: 0.20117297172546386
epoch: 62 training_loss 0.16701929606497287 test_loss: 0.20226795673370362
epoch: 63 training_loss 0.16999904982745648 test_loss: 0.17272311449050903
epoch: 64 training_loss 0.1714772403985262 test_loss: 0.1901584506034851
epoch: 65 training_loss 0.17359752237796783 test_loss: 0.1949758768081665
epoch: 66 training_loss 0.17253175117075442 test_loss: 0.18140113353729248
epoch: 67 training_loss 0.1757319865375757 test_loss: 0.17110872268676758
epoch: 68 training_loss 0.17628711350262166 test_loss: 0.17939079999923707
epoch: 69 training_loss 0.17264524184167385 test_loss: 0.1817789316177368
epoch: 70 training_loss 0.17582308433949947 test_loss: 0.19272314310073851
epoch: 71 training_loss 0.17665109284222125 test_loss: 0.19063687324523926
epoch: 72 training_loss 0.1711519218236208 test_loss: 0.19322944879531861
epoch: 73 training_loss 0.18181872934103013 test_loss: 0.19474225044250487
epoch: 74 training_loss 0.17696873255074025 test_loss: 0.19173299074172973
epoch: 75 training_loss 0.16647130355238915 test_loss: 0.18779619932174682
epoch: 76 training_loss 0.17330934673547746 test_loss: 0.19924970865249633
epoch: 77 training_loss 0.18527820184826851 test_loss: 0.18056209087371827
epoch: 78 training_loss 0.16952576272189618 test_loss: 0.19302537441253662
epoch: 79 training_loss 0.17558461382985116 test_loss: 0.17604165077209472
epoch: 80 training_loss 0.17218406639993192 test_loss: 0.1789494514465332
epoch: 81 training_loss 0.17266410693526268 test_loss: 0.17961663007736206
epoch: 82 training_loss 0.17103811897337437 test_loss: 0.17353928089141846
epoch: 83 training_loss 0.17528701603412628 test_loss: 0.17650163173675537
epoch: 84 training_loss 0.18029327273368836 test_loss: 0.19124460220336914
epoch: 85 training_loss 0.17326642349362373 test_loss: 0.19227062463760375
epoch: 86 training_loss 0.17739628084003925 test_loss: 0.1912132978439331
epoch: 87 training_loss 0.1745044432580471 test_loss: 0.17527612447738647
epoch: 88 training_loss 0.16538668520748614 test_loss: 0.19357539415359498
epoch: 89 training_loss 0.18155904315412044 test_loss: 0.1797475814819336
epoch: 90 training_loss 0.17500259138643742 test_loss: 0.19526982307434082
epoch: 91 training_loss 0.1773674177378416 test_loss: 0.18464367389678954
epoch: 92 training_loss 0.18088813677430152 test_loss: 0.16816011667251587
epoch: 93 training_loss 0.17566850066184997 test_loss: 0.18114662170410156
epoch: 94 training_loss 0.17623811129480602 test_loss: 0.18048394918441774
epoch: 95 training_loss 0.17240595310926438 test_loss: 0.18631123304367064
epoch: 96 training_loss 0.17483846768736838 test_loss: 0.18453643321990967
epoch: 97 training_loss 0.17321517195552588 test_loss: 0.1935647249221802
epoch: 98 training_loss 0.17870604686439037 test_loss: 0.17670369148254395
epoch: 99 training_loss 0.18120731376111507 test_loss: 0.17749171257019042
epoch: 100 training_loss 0.17091532088816166 test_loss: 0.18146663904190063
epoch: 101 training_loss 0.18144185341894625 test_loss: 0.17708088159561158
epoch: 102 training_loss 0.16647680722177027 test_loss: 0.18522350788116454
epoch: 103 training_loss 0.17496638029813766 test_loss: 0.19245104789733886
epoch: 104 training_loss 0.18232565730810166 test_loss: 0.17214503288269042
epoch: 105 training_loss 0.17457618594169616 test_loss: 0.18106954097747802
epoch: 106 training_loss 0.17550890952348708 test_loss: 0.19109755754470825
epoch: 107 training_loss 0.17389420069754125 test_loss: 0.17998628616333007
epoch: 108 training_loss 0.1774661473184824 test_loss: 0.1905062675476074
epoch: 109 training_loss 0.17483935818076135 test_loss: 0.1724892020225525
epoch: 110 training_loss 0.17267070569097995 test_loss: 0.19169094562530517
epoch: 111 training_loss 0.17639347404241562 test_loss: 0.18691228628158568
epoch: 112 training_loss 0.17123549364507198 test_loss: 0.16319842338562013
epoch: 113 training_loss 0.1680640960484743 test_loss: 0.2024161100387573
epoch: 114 training_loss 0.17542377077043056 test_loss: 0.19769930839538574
epoch: 115 training_loss 0.17260715626180173 test_loss: 0.1987307071685791
epoch: 116 training_loss 0.171811698153615 test_loss: 0.19323745965957642
epoch: 117 training_loss 0.18103337422013283 test_loss: 0.18662210702896118
epoch: 118 training_loss 0.16609844125807285 test_loss: 0.19115220308303832
epoch: 119 training_loss 0.18322535496205092 test_loss: 0.17530542612075806
epoch: 120 training_loss 0.1678865223377943 test_loss: 0.17324683666229249
epoch: 121 training_loss 0.17588140457868576 test_loss: 0.193902850151062
epoch: 122 training_loss 0.17869807198643683 test_loss: 0.1819237232208252
epoch: 123 training_loss 0.17833432607352734 test_loss: 0.17001403570175172
epoch: 124 training_loss 0.16426621280610562 test_loss: 0.19455863237380983
epoch: 125 training_loss 0.17852764427661896 test_loss: 0.19365371465682985
epoch: 126 training_loss 0.17597885690629483 test_loss: 0.18026349544525147
epoch: 127 training_loss 0.17517502956092357 test_loss: 0.17450451850891113
epoch: 128 training_loss 0.1768117017298937 test_loss: 0.18957945108413696
epoch: 129 training_loss 0.1651716011017561 test_loss: 0.1955897569656372
epoch: 130 training_loss 0.17851697906851768 test_loss: 0.19862074851989747
epoch: 131 training_loss 0.17725547678768636 test_loss: 0.16861308813095094
epoch: 132 training_loss 0.16860521361231803 test_loss: 0.1979462742805481
epoch: 133 training_loss 0.17362439729273318 test_loss: 0.1837581515312195
epoch: 134 training_loss 0.16566690124571323 test_loss: 0.18801240921020507
epoch: 135 training_loss 0.18228473227471112 test_loss: 0.17518723011016846
epoch: 136 training_loss 0.17927246160805224 test_loss: 0.19324924945831298
epoch: 137 training_loss 0.17567120857536792 test_loss: 0.17830764055252074
epoch: 138 training_loss 0.16999332420527935 test_loss: 0.18832424879074097
epoch: 139 training_loss 0.1692509965598583 test_loss: 0.18495385646820067
epoch: 140 training_loss 0.17599128186702728 test_loss: 0.18626960515975952
epoch: 141 training_loss 0.17013368256390093 test_loss: 0.1900332450866699
epoch: 142 training_loss 0.17428774729371072 test_loss: 0.18962585926055908
epoch: 143 training_loss 0.17287250734865667 test_loss: 0.16848522424697876
epoch: 144 training_loss 0.16919370546936988 test_loss: 0.18594452142715454
epoch: 145 training_loss 0.16956929363310336 test_loss: 0.1895615816116333
epoch: 146 training_loss 0.17012778893113137 test_loss: 0.18778940439224243
epoch: 147 training_loss 0.16708049960434437 test_loss: 0.17383097410202025
epoch: 148 training_loss 0.18066356033086778 test_loss: 0.19554445743560792
epoch: 149 training_loss 0.18116785071790217 test_loss: 0.20209755897521972
epoch: 0 training_loss 0.248357023447752 test_loss: 0.20339457988739013
epoch: 1 training_loss 0.20332692973315716 test_loss: 0.1856081485748291
epoch: 2 training_loss 0.20459426797926425 test_loss: 0.19474133253097534
epoch: 3 training_loss 0.1890219124406576 test_loss: 0.19437747001647948
epoch: 4 training_loss 0.19498681463301182 test_loss: 0.18988734483718872
epoch: 5 training_loss 0.18376569397747516 test_loss: 0.18400591611862183
epoch: 6 training_loss 0.18980648577213288 test_loss: 0.17822275161743165
epoch: 7 training_loss 0.18760471768677234 test_loss: 0.22923285961151124
epoch: 8 training_loss 0.19906472198665143 test_loss: 0.1904695987701416
epoch: 9 training_loss 0.1907152585685253 test_loss: 0.20107128620147705
epoch: 10 training_loss 0.1961666002869606 test_loss: 0.1931178092956543
epoch: 11 training_loss 0.18059055976569652 test_loss: 0.18568917512893676
epoch: 12 training_loss 0.1833392395079136 test_loss: 0.19227566719055175
epoch: 13 training_loss 0.19232580564916135 test_loss: 0.18815045356750487
epoch: 14 training_loss 0.18280337311327458 test_loss: 0.17841582298278807
epoch: 15 training_loss 0.1929720675945282 test_loss: 0.18297452926635743
epoch: 16 training_loss 0.1912676052749157 test_loss: 0.18437085151672364
epoch: 17 training_loss 0.18617062382400035 test_loss: 0.19827800989151
epoch: 18 training_loss 0.18357645332813263 test_loss: 0.18524168729782103
epoch: 19 training_loss 0.18472244523465634 test_loss: 0.19146922826766968
epoch: 20 training_loss 0.19318560212850572 test_loss: 0.19905210733413697
epoch: 21 training_loss 0.1838707559555769 test_loss: 0.18894394636154174
epoch: 22 training_loss 0.18514975890517235 test_loss: 0.16763418912887573
epoch: 23 training_loss 0.18894952416419983 test_loss: 0.1879584312438965
epoch: 24 training_loss 0.18043114095926285 test_loss: 0.17078914642333984
epoch: 25 training_loss 0.18806956067681313 test_loss: 0.18372364044189454
epoch: 26 training_loss 0.18542305782437324 test_loss: 0.18168253898620607
epoch: 27 training_loss 0.18980524823069572 test_loss: 0.17745124101638793
epoch: 28 training_loss 0.18833342634141445 test_loss: 0.18971978425979613
epoch: 29 training_loss 0.182019005343318 test_loss: 0.18244141340255737
epoch: 30 training_loss 0.1775527712702751 test_loss: 0.1804721713066101
epoch: 31 training_loss 0.1857597018033266 test_loss: 0.17799924612045287
epoch: 32 training_loss 0.18872560419142245 test_loss: 0.17468457221984862
epoch: 33 training_loss 0.18442784368991852 test_loss: 0.18123787641525269
epoch: 34 training_loss 0.1911126398295164 test_loss: 0.17567405700683594
epoch: 35 training_loss 0.17522648744285108 test_loss: 0.17504721879959106
epoch: 36 training_loss 0.1860706901550293 test_loss: 0.1849303960800171
epoch: 37 training_loss 0.18010186299681663 test_loss: 0.1814229369163513
epoch: 38 training_loss 0.18521129958331584 test_loss: 0.182035493850708
epoch: 39 training_loss 0.18403871275484562 test_loss: 0.18931179046630858
epoch: 40 training_loss 0.18461039684712888 test_loss: 0.18425716161727906
epoch: 41 training_loss 0.1807825542986393 test_loss: 0.17810606956481934
epoch: 42 training_loss 0.17729642011225225 test_loss: 0.18617286682128906
epoch: 43 training_loss 0.18368343725800515 test_loss: 0.17782586812973022
epoch: 44 training_loss 0.17887481331825256 test_loss: 0.17387197017669678
epoch: 45 training_loss 0.17558139987289906 test_loss: 0.19694738388061522
epoch: 46 training_loss 0.18538649767637252 test_loss: 0.1889391541481018
epoch: 47 training_loss 0.18527913205325602 test_loss: 0.18392491340637207
epoch: 48 training_loss 0.18531472519040107 test_loss: 0.17708065509796142
epoch: 49 training_loss 0.18154992878437043 test_loss: 0.18118706941604615
epoch: 50 training_loss 0.18196324221789836 test_loss: 0.17819153070449828
epoch: 51 training_loss 0.17827534399926662 test_loss: 0.1716444969177246
epoch: 52 training_loss 0.18751226864755155 test_loss: 0.17145414352416993
epoch: 53 training_loss 0.17799050420522688 test_loss: 0.15789154767990113
epoch: 54 training_loss 0.18553916312754154 test_loss: 0.1824025273323059
epoch: 55 training_loss 0.17843539640307426 test_loss: 0.17328269481658937
epoch: 56 training_loss 0.1800078009814024 test_loss: 0.16998822689056398
epoch: 57 training_loss 0.18265284717082977 test_loss: 0.19877709150314332
epoch: 58 training_loss 0.1787400559335947 test_loss: 0.17540568113327026
epoch: 59 training_loss 0.17723937548696994 test_loss: 0.19796924591064452
epoch: 60 training_loss 0.18064305871725084 test_loss: 0.17746338844299317
epoch: 61 training_loss 0.17449698217213153 test_loss: 0.1813580274581909
epoch: 62 training_loss 0.1826278705149889 test_loss: 0.1860630750656128
epoch: 63 training_loss 0.18033885419368745 test_loss: 0.1653037667274475
epoch: 64 training_loss 0.17361006177961827 test_loss: 0.1863715887069702
epoch: 65 training_loss 0.17887317843735218 test_loss: 0.1675235152244568
epoch: 66 training_loss 0.18486021772027017 test_loss: 0.1824345350265503
epoch: 67 training_loss 0.187380286231637 test_loss: 0.17096632719039917
epoch: 68 training_loss 0.1826237939298153 test_loss: 0.1834447979927063
epoch: 69 training_loss 0.18573644176125526 test_loss: 0.16305079460144042
epoch: 70 training_loss 0.18009240821003913 test_loss: 0.18488401174545288
epoch: 71 training_loss 0.18355267949402332 test_loss: 0.19004515409469605
epoch: 72 training_loss 0.1800041326880455 test_loss: 0.17402974367141724
epoch: 73 training_loss 0.18599916838109493 test_loss: 0.19185323715209962
epoch: 74 training_loss 0.18633623644709588 test_loss: 0.18471858501434327
epoch: 75 training_loss 0.17892723955214024 test_loss: 0.19649821519851685
epoch: 76 training_loss 0.18152105681598185 test_loss: 0.1769588589668274
epoch: 77 training_loss 0.18376202322542667 test_loss: 0.17745712995529175
epoch: 78 training_loss 0.18282031506299973 test_loss: 0.17826606035232545
epoch: 79 training_loss 0.17895067170262335 test_loss: 0.18091263771057128
epoch: 80 training_loss 0.1837429066002369 test_loss: 0.19128291606903075
epoch: 81 training_loss 0.1849400455504656 test_loss: 0.18853847980499266
epoch: 82 training_loss 0.17119457326829435 test_loss: 0.18390737771987914
epoch: 83 training_loss 0.18390942804515362 test_loss: 0.1766832709312439
epoch: 84 training_loss 0.17924889653921128 test_loss: 0.16393195390701293
epoch: 85 training_loss 0.17607412792742252 test_loss: 0.17135953903198242
epoch: 86 training_loss 0.17454004943370818 test_loss: 0.17021437883377075
epoch: 87 training_loss 0.1806318884342909 test_loss: 0.17879441976547242
epoch: 88 training_loss 0.1808240408450365 test_loss: 0.19497276544570924
epoch: 89 training_loss 0.1818613538891077 test_loss: 0.18487174510955812
epoch: 90 training_loss 0.180133046656847 test_loss: 0.17276270389556886
epoch: 91 training_loss 0.177619144693017 test_loss: 0.1683474898338318
epoch: 92 training_loss 0.18432925298810005 test_loss: 0.16587989330291747
epoch: 93 training_loss 0.17409024626016617 test_loss: 0.17146674394607545
epoch: 94 training_loss 0.1849527632445097 test_loss: 0.17340335845947266
epoch: 95 training_loss 0.17355323575437068 test_loss: 0.17649046182632447
epoch: 96 training_loss 0.1835096228867769 test_loss: 0.18886502981185913
epoch: 97 training_loss 0.1781136827170849 test_loss: 0.16243975162506102
epoch: 98 training_loss 0.17467926777899265 test_loss: 0.17699431180953978
epoch: 99 training_loss 0.18154755122959615 test_loss: 0.1674143671989441
epoch: 100 training_loss 0.17866809144616128 test_loss: 0.16846579313278198
epoch: 101 training_loss 0.18459867767989635 test_loss: 0.15449641942977904
epoch: 102 training_loss 0.1782524685561657 test_loss: 0.18691177368164064
epoch: 103 training_loss 0.18346621714532374 test_loss: 0.17642383575439452
epoch: 104 training_loss 0.18446906492114068 test_loss: 0.18333690166473388
epoch: 105 training_loss 0.17714327566325663 test_loss: 0.18075189590454102
epoch: 106 training_loss 0.179280479028821 test_loss: 0.18622359037399291
epoch: 107 training_loss 0.18110098816454412 test_loss: 0.16827263832092285
epoch: 108 training_loss 0.18290940344333648 test_loss: 0.17347240447998047
epoch: 109 training_loss 0.17464636504650116 test_loss: 0.17824718952178956
epoch: 110 training_loss 0.17010351248085498 test_loss: 0.18276896476745605
epoch: 111 training_loss 0.18689829789102078 test_loss: 0.1818473219871521
epoch: 112 training_loss 0.18011434108018876 test_loss: 0.17551958560943604
epoch: 113 training_loss 0.17679492346942424 test_loss: 0.18159420490264894
epoch: 114 training_loss 0.17864030964672564 test_loss: 0.18174725770950317
epoch: 115 training_loss 0.17966886393725873 test_loss: 0.17958866357803344
epoch: 116 training_loss 0.17454088389873504 test_loss: 0.18086458444595338
epoch: 117 training_loss 0.1785015431791544 test_loss: 0.1786160945892334
epoch: 118 training_loss 0.1746869330108166 test_loss: 0.17903168201446534
epoch: 119 training_loss 0.1829086845368147 test_loss: 0.175538969039917
epoch: 120 training_loss 0.18475580729544164 test_loss: 0.18260890245437622
epoch: 121 training_loss 0.17693184949457647 test_loss: 0.18601591587066652
epoch: 122 training_loss 0.17604258842766285 test_loss: 0.1759038209915161
epoch: 123 training_loss 0.1860251610726118 test_loss: 0.17111018896102906
epoch: 124 training_loss 0.17428454980254174 test_loss: 0.16955214738845825
epoch: 125 training_loss 0.17798844583332538 test_loss: 0.18453022241592407
epoch: 126 training_loss 0.1684494636952877 test_loss: 0.18827719688415528
epoch: 127 training_loss 0.17430645279586315 test_loss: 0.17711374759674073
epoch: 128 training_loss 0.18035405576229097 test_loss: 0.1872994303703308
epoch: 129 training_loss 0.17212888598442078 test_loss: 0.1673394560813904
epoch: 130 training_loss 0.1784970024228096 test_loss: 0.18058226108551026
epoch: 131 training_loss 0.1793423979729414 test_loss: 0.17239757776260375
epoch: 132 training_loss 0.17558785192668439 test_loss: 0.1865597128868103
epoch: 133 training_loss 0.17751470163464547 test_loss: 0.18960756063461304
epoch: 134 training_loss 0.17926023736596108 test_loss: 0.16048309803009034
epoch: 135 training_loss 0.1777517857402563 test_loss: 0.18542616367340087
epoch: 136 training_loss 0.1718886536359787 test_loss: 0.1761074423789978
epoch: 137 training_loss 0.18186915740370752 test_loss: 0.17777057886123657
epoch: 138 training_loss 0.17458166494965555 test_loss: 0.18702605962753296
epoch: 139 training_loss 0.17583779834210872 test_loss: 0.1791669249534607
epoch: 140 training_loss 0.18019830405712128 test_loss: 0.17927343845367433
epoch: 141 training_loss 0.17739427410066128 test_loss: 0.1759866237640381
epoch: 142 training_loss 0.18715694971382618 test_loss: 0.1641959309577942
epoch: 143 training_loss 0.17876982487738133 test_loss: 0.17044012546539306
epoch: 144 training_loss 0.18287365943193434 test_loss: 0.16475315093994142
epoch: 145 training_loss 0.18029927775263788 test_loss: 0.17880895137786865
epoch: 146 training_loss 0.17955559507012367 test_loss: 0.1566334366798401
epoch: 147 training_loss 0.1787746561318636 test_loss: 0.17484235763549805
epoch: 148 training_loss 0.17836685568094254 test_loss: 0.16940248012542725
epoch: 149 training_loss 0.17745878450572491 test_loss: 0.16806313991546631
episode: 0 training return: -711.3869833614772
episode: 1 training return: -682.1720679434679
episode: 2 training return: -808.10766740118
episode: 3 training return: -732.3417518814032
epoch: 1 test_true_pfm: 211.37861648077265 sim_pfm: -884.8274555111947
episode: 4 training return: -734.8208475414127
episode: 5 training return: -697.6803788994345
episode: 6 training return: -625.0009069225496
episode: 7 training return: -752.3190659957262
epoch: 2 test_true_pfm: 198.78312799632184 sim_pfm: -882.6343376626987
episode: 8 training return: -632.7446547508692
episode: 9 training return: -447.8318335401016
episode: 10 training return: -846.9917862941538
episode: 11 training return: -864.8546621306147
epoch: 3 test_true_pfm: 226.96055357291763 sim_pfm: -876.039910416256
episode: 12 training return: -863.8397407453652
episode: 13 training return: -879.6135371283167
episode: 14 training return: -864.547561903061
episode: 15 training return: -864.3924869665675
epoch: 4 test_true_pfm: 243.27048571062372 sim_pfm: -865.5858811321455
episode: 16 training return: -866.2762118041865
episode: 17 training return: -855.5247879785286
episode: 18 training return: -765.0661723860864
episode: 19 training return: -823.4610191000719
epoch: 5 test_true_pfm: 4.352533499298774 sim_pfm: -554.5286947652676
episode: 20 training return: -609.1482576444431
episode: 21 training return: -796.017416976633
episode: 22 training return: -825.8129769855043
episode: 23 training return: -820.369781526075
epoch: 6 test_true_pfm: 224.01775639088305 sim_pfm: -835.347300982119
episode: 24 training return: -823.968761486752
episode: 25 training return: -841.6752820842398
episode: 26 training return: -829.4687534946719
episode: 27 training return: -823.9309787203669
epoch: 7 test_true_pfm: 260.7244471609349 sim_pfm: -823.0417226488502
episode: 28 training return: -818.8805043343099
episode: 29 training return: -803.277000416824
episode: 30 training return: -807.3783352828063
episode: 31 training return: -791.7891780174042
epoch: 8 test_true_pfm: 340.26205011108453 sim_pfm: -802.1613311393336
episode: 32 training return: -797.9835883186198
episode: 33 training return: -788.2256584791253
episode: 34 training return: -771.0995772756509
episode: 35 training return: -779.2800370827565
epoch: 9 test_true_pfm: 285.2471400534904 sim_pfm: -815.2131395576944
episode: 36 training return: -782.4666641391818
episode: 37 training return: -762.928150714533
episode: 38 training return: -767.9038119445377
episode: 39 training return: -741.7838866647057
epoch: 10 test_true_pfm: 240.91340401428934 sim_pfm: -750.598244444733
episode: 40 training return: -743.4185385067866
episode: 41 training return: -739.4401855746781
episode: 42 training return: -740.8969614751574
episode: 43 training return: -714.9061263546921
epoch: 11 test_true_pfm: 300.97142170423456 sim_pfm: -720.2518737957095
episode: 44 training return: -717.9955676780759
episode: 45 training return: -688.7373233798384
episode: 46 training return: -668.8805197870514
episode: 47 training return: -683.5562610943506
epoch: 12 test_true_pfm: 262.4002468071446 sim_pfm: -647.5576770021497
episode: 48 training return: -688.4723854679044
episode: 49 training return: -633.8922533456489
episode: 50 training return: -622.9705519963278
episode: 51 training return: -616.3348292890032
epoch: 13 test_true_pfm: 238.09486110210273 sim_pfm: -520.6545712486763
episode: 52 training return: -604.893816653664
episode: 53 training return: -547.3567370596028
episode: 54 training return: -553.9208840036544
episode: 55 training return: -523.9283574637237
epoch: 14 test_true_pfm: 236.4703911559932 sim_pfm: -606.7087586300213
episode: 56 training return: -564.8904004428007
episode: 57 training return: -554.866167245143
episode: 58 training return: -586.8435286006924
episode: 59 training return: -618.3296767026002
epoch: 15 test_true_pfm: 222.351182260173 sim_pfm: -626.2513138761518
episode: 60 training return: -631.3214304955334
episode: 61 training return: -672.2297801405895
episode: 62 training return: -628.5522125767092
episode: 63 training return: -680.1704320466454
epoch: 16 test_true_pfm: 286.09258186836087 sim_pfm: -618.7332917914777
episode: 64 training return: -646.5736179606195
episode: 65 training return: -618.4355954571412
episode: 66 training return: -577.7132985949881
episode: 67 training return: -547.0208338203812
epoch: 17 test_true_pfm: 266.14256955095016 sim_pfm: -497.7252024749828
episode: 68 training return: -536.0232628644876
episode: 69 training return: -514.3043348676248
episode: 70 training return: -538.3984817670521
episode: 71 training return: -546.879643614236
epoch: 18 test_true_pfm: 317.6356432090159 sim_pfm: -480.36357283422575
episode: 72 training return: -521.9578464762856
episode: 73 training return: -538.4439962275679
episode: 74 training return: -524.6707959706396
episode: 75 training return: -523.1756445778742
epoch: 19 test_true_pfm: 287.33986917126305 sim_pfm: -490.5845079455164
episode: 76 training return: -537.6444418449458
episode: 77 training return: -519.6489546118246
episode: 78 training return: -520.0447141687976
episode: 79 training return: -524.373504211156
epoch: 20 test_true_pfm: 307.0364632318389 sim_pfm: -476.73849667243695
episode: 80 training return: -532.8404963879168
episode: 81 training return: -523.088848277141
episode: 82 training return: -525.1153358750731
episode: 83 training return: -522.2528454213119
epoch: 21 test_true_pfm: 292.01400232067067 sim_pfm: -503.1518788293611
episode: 84 training return: -519.2052423724637
episode: 85 training return: -518.8913927488042
episode: 86 training return: -527.2982595559289
episode: 87 training return: -511.77170220635816
epoch: 22 test_true_pfm: 334.13047277136735 sim_pfm: -467.1605954254298
episode: 88 training return: -522.292897186168
episode: 89 training return: -538.4936338800044
episode: 90 training return: -525.9484455572392
episode: 91 training return: -533.0197400304237
epoch: 23 test_true_pfm: 289.76809407899424 sim_pfm: -477.79563045693686
episode: 92 training return: -520.437664705631
episode: 93 training return: -515.3522475058215
episode: 94 training return: -508.65589639689534
episode: 95 training return: -512.998986615194
epoch: 24 test_true_pfm: 295.8492517441823 sim_pfm: -478.8508583455455
episode: 96 training return: -514.3734187128211
episode: 97 training return: -510.23167439770305
episode: 98 training return: -512.0386917335262
episode: 99 training return: -514.6740512275708
epoch: 25 test_true_pfm: 317.83602548218556 sim_pfm: -473.4110457613586
episode: 100 training return: -511.3304861059991
episode: 101 training return: -512.032370446288
episode: 102 training return: -526.0485543643939
episode: 103 training return: -515.4095989710884
epoch: 26 test_true_pfm: 292.2124054618019 sim_pfm: -478.19247968372065
episode: 104 training return: -512.376806777255
episode: 105 training return: -510.6793322352443
episode: 106 training return: -512.7692760795306
episode: 107 training return: -518.8852560995923
epoch: 27 test_true_pfm: 296.83104498859035 sim_pfm: -471.88366232836347
episode: 108 training return: -503.56549235621327
episode: 109 training return: -497.0153481675222
episode: 110 training return: -527.0757778723448
episode: 111 training return: -514.6626573007559
epoch: 28 test_true_pfm: 303.8708030297532 sim_pfm: -471.4393942812133
episode: 112 training return: -517.5507442181349
episode: 113 training return: -511.3819086824007
episode: 114 training return: -506.2524416744929
episode: 115 training return: -513.6042261580955
epoch: 29 test_true_pfm: 318.2296364443219 sim_pfm: -471.5632361230191
episode: 116 training return: -517.4407843502933
episode: 117 training return: -509.3519802868789
episode: 118 training return: -512.2298807011231
episode: 119 training return: -506.54296696780364
epoch: 30 test_true_pfm: 306.11233513928016 sim_pfm: -461.2290545158091
episode: 120 training return: -503.8196796579997
episode: 121 training return: -512.721957657259
episode: 122 training return: -503.939636244815
episode: 123 training return: -513.6058831495641
epoch: 31 test_true_pfm: 303.7128635747237 sim_pfm: -464.4503150276692
episode: 124 training return: -502.5559814103672
episode: 125 training return: -529.9664904160092
episode: 126 training return: -509.3379437312311
episode: 127 training return: -501.7390041009082
epoch: 32 test_true_pfm: 292.67353609145056 sim_pfm: -464.894927633246
episode: 128 training return: -502.3025222171137
episode: 129 training return: -505.0679606431437
episode: 130 training return: -504.1470078925476
episode: 131 training return: -503.6811611090014
epoch: 33 test_true_pfm: 299.9349796842751 sim_pfm: -469.92905102780975
episode: 132 training return: -523.3729271825727
episode: 133 training return: -510.8495395556321
episode: 134 training return: -505.02474989035846
episode: 135 training return: -501.7948925478768
epoch: 34 test_true_pfm: 311.9033131072137 sim_pfm: -459.904283087648
episode: 136 training return: -505.97938967661537
episode: 137 training return: -515.1398787632395
episode: 138 training return: -507.4474436801195
episode: 139 training return: -505.63668429889714
epoch: 35 test_true_pfm: 310.02388206320455 sim_pfm: -456.9299037477031
episode: 140 training return: -513.1886807880805
episode: 141 training return: -497.548231201697
episode: 142 training return: -502.537150341809
episode: 143 training return: -506.32815398500975
epoch: 36 test_true_pfm: 278.4306027592029 sim_pfm: -473.90039250861037
episode: 144 training return: -509.1289525039029
episode: 145 training return: -506.34857125561166
episode: 146 training return: -512.8231778290913
episode: 147 training return: -510.8734913341658
epoch: 37 test_true_pfm: 302.8120683731338 sim_pfm: -471.7749363482773
episode: 148 training return: -503.0304290285684
episode: 149 training return: -508.40162154698896
episode: 150 training return: -509.4990890226559
episode: 151 training return: -507.3883837093927
epoch: 38 test_true_pfm: 316.2028040465212 sim_pfm: -468.9663722011093
episode: 152 training return: -503.34177259824463
episode: 153 training return: -503.7930945463589
episode: 154 training return: -505.7729006981924
episode: 155 training return: -504.2781017108913
epoch: 39 test_true_pfm: 291.812917233483 sim_pfm: -460.67048845139783
episode: 156 training return: -511.0035367477681
episode: 157 training return: -507.71039963421856
episode: 158 training return: -515.8154234402025
episode: 159 training return: -494.6088670328556
epoch: 40 test_true_pfm: 286.26888480758123 sim_pfm: -470.51831132892534
episode: 160 training return: -511.7996275434424
episode: 161 training return: -496.24788742978444
episode: 162 training return: -504.4945300915373
episode: 163 training return: -504.41555992672824
epoch: 41 test_true_pfm: 293.9973446984539 sim_pfm: -463.1594798737224
episode: 164 training return: -500.8241905674876
episode: 165 training return: -501.0252216376792
episode: 166 training return: -493.30712511983006
episode: 167 training return: -498.51109549835815
epoch: 42 test_true_pfm: 297.9766572367891 sim_pfm: -465.9280099222924
episode: 168 training return: -496.37175099586153
episode: 169 training return: -502.9982103096467
episode: 170 training return: -496.81853482028254
episode: 171 training return: -495.3826787177066
epoch: 43 test_true_pfm: 326.3262977932977 sim_pfm: -457.7094776470176
episode: 172 training return: -496.70261562853665
episode: 173 training return: -486.7763402158215
episode: 174 training return: -497.3478783713989
episode: 175 training return: -493.4398079325119
epoch: 44 test_true_pfm: 294.6486194742045 sim_pfm: -459.5322104270304
episode: 176 training return: -489.72499901223586
episode: 177 training return: -497.07384238485355
episode: 178 training return: -485.7884815635118
episode: 179 training return: -497.48334673351104
epoch: 45 test_true_pfm: 334.491792303628 sim_pfm: -448.54880676800076
episode: 180 training return: -498.03975820812315
episode: 181 training return: -495.7905560973519
episode: 182 training return: -493.65794685347055
episode: 183 training return: -498.1608863554098
epoch: 46 test_true_pfm: 368.50855276253543 sim_pfm: -447.5551186243401
episode: 184 training return: -507.5261438073903
episode: 185 training return: -491.217804011333
episode: 186 training return: -495.4779421793258
episode: 187 training return: -495.2136534325237
epoch: 47 test_true_pfm: 315.5779930078806 sim_pfm: -449.89776044168616
episode: 188 training return: -500.1151168506518
episode: 189 training return: -487.496957589642
episode: 190 training return: -504.30965879724056
episode: 191 training return: -498.92810224049555
epoch: 48 test_true_pfm: 312.0985932761794 sim_pfm: -460.7089622159436
episode: 192 training return: -489.30557127743344
episode: 193 training return: -515.3476337568409
episode: 194 training return: -513.3189039744022
episode: 195 training return: -490.42908511610864
epoch: 49 test_true_pfm: 347.63816187118204 sim_pfm: -456.16910576565846
episode: 196 training return: -501.7688271851145
episode: 197 training return: -500.57589017047496
episode: 198 training return: -499.21187740791106
episode: 199 training return: -496.0778555736859
epoch: 50 test_true_pfm: 338.96925346130894 sim_pfm: -451.50654475940837
episode: 200 training return: -490.15406760617236
episode: 201 training return: -498.00748332340896
episode: 202 training return: -490.7498751436874
episode: 203 training return: -490.255703205576
epoch: 51 test_true_pfm: 345.124975683098 sim_pfm: -446.10736850537006
episode: 204 training return: -485.08419376164966
episode: 205 training return: -495.9626743682768
episode: 206 training return: -505.0814538087206
episode: 207 training return: -490.897799127025
epoch: 52 test_true_pfm: 332.44133121804674 sim_pfm: -458.0980905693428
episode: 208 training return: -498.9296241024453
episode: 209 training return: -484.87219603257006
episode: 210 training return: -487.76461976805894
episode: 211 training return: -492.998386379206
epoch: 53 test_true_pfm: 313.7285293869281 sim_pfm: -456.3343779936344
episode: 212 training return: -494.71414326550143
episode: 213 training return: -499.9987398716914
episode: 214 training return: -501.05880425091175
episode: 215 training return: -502.59745935350924
epoch: 54 test_true_pfm: 318.601437335202 sim_pfm: -457.9429456622497
episode: 216 training return: -496.50614980255546
episode: 217 training return: -507.4403326057569
episode: 218 training return: -478.3948871464771
episode: 219 training return: -497.8196960224593
epoch: 55 test_true_pfm: 359.2914136163351 sim_pfm: -450.57675703613677
episode: 220 training return: -493.44682296405693
episode: 221 training return: -497.38705252110884
episode: 222 training return: -488.5072562660031
episode: 223 training return: -485.048945165459
epoch: 56 test_true_pfm: 349.7202486322228 sim_pfm: -443.90829903781014
episode: 224 training return: -496.932433564096
episode: 225 training return: -496.76357121156155
episode: 226 training return: -502.28407550781225
episode: 227 training return: -502.09930269972824
epoch: 57 test_true_pfm: 331.98497732345066 sim_pfm: -449.041639111919
episode: 228 training return: -496.2609979589831
episode: 229 training return: -490.68037292397736
episode: 230 training return: -489.4463635531375
episode: 231 training return: -494.5894181101874
epoch: 58 test_true_pfm: 336.55426083101673 sim_pfm: -453.5959810738793
episode: 232 training return: -489.7958760492654
episode: 233 training return: -487.5783516639451
episode: 234 training return: -487.70325302143107
episode: 235 training return: -498.9469328845342
epoch: 59 test_true_pfm: 359.5608172398864 sim_pfm: -453.46089730868226
episode: 236 training return: -497.33250883868965
episode: 237 training return: -502.15225499884747
episode: 238 training return: -504.10713679528357
episode: 239 training return: -484.53498034450655
epoch: 60 test_true_pfm: 363.46931572666017 sim_pfm: -444.93077615645547
episode: 240 training return: -507.3354189596738
episode: 241 training return: -495.6259252309911
episode: 242 training return: -492.00497783229133
episode: 243 training return: -502.34199575602315
epoch: 61 test_true_pfm: 332.52958646290193 sim_pfm: -460.21124839698973
episode: 244 training return: -488.20806340964765
episode: 245 training return: -498.96162429204657
episode: 246 training return: -500.15841541678174
episode: 247 training return: -494.3642709327547
epoch: 62 test_true_pfm: 335.35110350572745 sim_pfm: -445.46080793462585
episode: 248 training return: -479.925994900979
episode: 249 training return: -494.51251176024
episode: 250 training return: -496.95003754498026
episode: 251 training return: -499.1792543811103
epoch: 63 test_true_pfm: 342.7108163002599 sim_pfm: -447.3167909756971
episode: 252 training return: -491.8960934581999
episode: 253 training return: -492.52006655599973
episode: 254 training return: -495.7992136378625
episode: 255 training return: -505.4078264590654
epoch: 64 test_true_pfm: 346.58205366309875 sim_pfm: -458.044679417232
episode: 256 training return: -491.4197155103082
episode: 257 training return: -492.2844251556046
episode: 258 training return: -492.571573669706
episode: 259 training return: -503.30435113452205
epoch: 65 test_true_pfm: 384.1821127350304 sim_pfm: -437.81902157324095
episode: 260 training return: -491.8596563200752
episode: 261 training return: -506.3459946047318
episode: 262 training return: -494.6400240689029
episode: 263 training return: -504.43438660441467
epoch: 66 test_true_pfm: 374.0394279361947 sim_pfm: -447.7590674397233
episode: 264 training return: -496.8258269933011
episode: 265 training return: -483.2136477232885
episode: 266 training return: -491.23349079395155
episode: 267 training return: -493.33294788385956
epoch: 67 test_true_pfm: 350.8438968145363 sim_pfm: -451.9437029421329
episode: 268 training return: -487.6254077390842
episode: 269 training return: -481.9763830412328
episode: 270 training return: -485.69995281904585
episode: 271 training return: -484.3585738899567
epoch: 68 test_true_pfm: 332.1806006887606 sim_pfm: -448.7732909692607
episode: 272 training return: -485.28230280728604
episode: 273 training return: -499.7722389343701
episode: 274 training return: -486.2864603168292
episode: 275 training return: -497.6179625482794
epoch: 69 test_true_pfm: 350.9677199497135 sim_pfm: -448.0171857919509
episode: 276 training return: -491.7987579802796
episode: 277 training return: -501.7831485968972
episode: 278 training return: -489.07866720558997
episode: 279 training return: -494.7440578111214
epoch: 70 test_true_pfm: 366.70895647019614 sim_pfm: -451.09240992811993
episode: 280 training return: -485.19215605069945
episode: 281 training return: -483.5519600779521
episode: 282 training return: -479.0651755230721
episode: 283 training return: -483.4239411907259
epoch: 71 test_true_pfm: 363.1015420955125 sim_pfm: -443.6900804278894
episode: 284 training return: -489.0461887900488
episode: 285 training return: -493.88854690775776
episode: 286 training return: -475.8158054232233
episode: 287 training return: -482.5252184885563
epoch: 72 test_true_pfm: 364.29705866184986 sim_pfm: -442.3050212780866
episode: 288 training return: -481.1145673412241
episode: 289 training return: -496.28150403062716
episode: 290 training return: -479.27610852613276
episode: 291 training return: -482.7943750597284
epoch: 73 test_true_pfm: 397.6676067661048 sim_pfm: -440.24059709555837
episode: 292 training return: -493.8799937810465
episode: 293 training return: -482.9862110384257
episode: 294 training return: -476.0067372241038
episode: 295 training return: -486.38420114298566
epoch: 74 test_true_pfm: 400.43240370507004 sim_pfm: -438.49579992923185
episode: 296 training return: -486.5873371906359
episode: 297 training return: -491.7165143942487
episode: 298 training return: -482.29517863390754
episode: 299 training return: -477.0518834268333
epoch: 75 test_true_pfm: 367.21617546992366 sim_pfm: -443.6911960035464
episode: 300 training return: -489.9284910291237
episode: 301 training return: -487.75705328889654
episode: 302 training return: -494.86190836073985
episode: 303 training return: -497.4627943683963
epoch: 76 test_true_pfm: 369.6752698512286 sim_pfm: -442.0165940663082
episode: 304 training return: -488.9904514786638
episode: 305 training return: -472.37889557209604
episode: 306 training return: -490.73084537068587
episode: 307 training return: -485.42373522691605
epoch: 77 test_true_pfm: 376.92854822442405 sim_pfm: -442.59311729895563
episode: 308 training return: -491.0270607966578
episode: 309 training return: -482.7889125337035
episode: 310 training return: -485.67408190447713
episode: 311 training return: -477.7308922410295
epoch: 78 test_true_pfm: 379.66558442252636 sim_pfm: -436.6016371121363
episode: 312 training return: -493.6608805689598
episode: 313 training return: -480.92986973977867
episode: 314 training return: -486.98845019001544
episode: 315 training return: -482.75586165067534
epoch: 79 test_true_pfm: 385.0530720130998 sim_pfm: -442.9068518691529
episode: 316 training return: -494.36461092844706
episode: 317 training return: -493.87881206961174
episode: 318 training return: -495.3920690731713
episode: 319 training return: -493.98920071523236
epoch: 80 test_true_pfm: 410.47680846607983 sim_pfm: -435.7654149540263
episode: 320 training return: -481.43831061046654
episode: 321 training return: -476.0240962539384
episode: 322 training return: -497.25357527901707
episode: 323 training return: -484.73647211988236
epoch: 81 test_true_pfm: 371.8860001807928 sim_pfm: -440.72223801023
episode: 324 training return: -487.28784208234924
episode: 325 training return: -487.67593720656237
episode: 326 training return: -481.6121785262777
episode: 327 training return: -487.27284510904116
epoch: 82 test_true_pfm: 348.23824974901663 sim_pfm: -453.5968896294609
episode: 328 training return: -484.82263942298135
episode: 329 training return: -482.67189207723624
episode: 330 training return: -493.6522639391217
episode: 331 training return: -477.8686361938962
epoch: 83 test_true_pfm: 362.8458734446358 sim_pfm: -444.9887460358734
episode: 332 training return: -481.29247279931684
episode: 333 training return: -481.27202549147125
episode: 334 training return: -488.05281915959625
episode: 335 training return: -488.96118210270265
epoch: 84 test_true_pfm: 376.7789479103956 sim_pfm: -443.22795715537205
episode: 336 training return: -477.3819615531004
episode: 337 training return: -479.6810620482474
episode: 338 training return: -494.77272414380053
episode: 339 training return: -483.3280805746436
epoch: 85 test_true_pfm: 385.43564987887316 sim_pfm: -437.6277280932923
episode: 340 training return: -470.4814302824416
episode: 341 training return: -477.23629148040186
episode: 342 training return: -480.94496680761983
episode: 343 training return: -477.31210267258655
epoch: 86 test_true_pfm: 383.83673137646286 sim_pfm: -441.0799085470203
episode: 344 training return: -488.28759912466444
episode: 345 training return: -485.3977433208211
episode: 346 training return: -483.949191099354
episode: 347 training return: -472.15873075669987
epoch: 87 test_true_pfm: 393.79228166549564 sim_pfm: -430.32778323196703
episode: 348 training return: -479.36159322457104
episode: 349 training return: -479.2159740343152
episode: 350 training return: -482.5032840832272
episode: 351 training return: -491.0987497716193
epoch: 88 test_true_pfm: 379.9503522652132 sim_pfm: -443.4635972388325
episode: 352 training return: -483.37380407798355
episode: 353 training return: -483.8772362803802
episode: 354 training return: -481.7570809610713
episode: 355 training return: -478.63701265929416
epoch: 89 test_true_pfm: 369.3295450759529 sim_pfm: -440.09000273648763
episode: 356 training return: -493.618041858975
episode: 357 training return: -487.62261868915726
episode: 358 training return: -496.060648964781
episode: 359 training return: -487.200196026637
epoch: 90 test_true_pfm: 395.89390278242416 sim_pfm: -435.93955780453643
episode: 360 training return: -491.579481981335
episode: 361 training return: -473.02514789143714
episode: 362 training return: -486.85914624010053
episode: 363 training return: -482.7377962072165
epoch: 91 test_true_pfm: 421.7468803285171 sim_pfm: -424.09700840758234
episode: 364 training return: -478.79767793105003
episode: 365 training return: -478.721468651944
episode: 366 training return: -471.5907536263478
episode: 367 training return: -497.15375776981773
epoch: 92 test_true_pfm: 406.10658484465694 sim_pfm: -428.36865260289056
episode: 368 training return: -479.7927615235111
episode: 369 training return: -494.06538482384536
episode: 370 training return: -498.6593378369278
episode: 371 training return: -480.94002810237083
epoch: 93 test_true_pfm: 402.54719150274576 sim_pfm: -438.25606879512253
episode: 372 training return: -490.6062199456474
episode: 373 training return: -484.9709655977884
episode: 374 training return: -484.78044257821875
episode: 375 training return: -480.43822650080506
epoch: 94 test_true_pfm: 369.83791934416655 sim_pfm: -440.57893936796773
episode: 376 training return: -480.49116415289626
episode: 377 training return: -475.37731821606855
episode: 378 training return: -481.35467520466244
episode: 379 training return: -488.30282157993173
epoch: 95 test_true_pfm: 399.033494261974 sim_pfm: -432.17998449408833
episode: 380 training return: -494.74010567268255
episode: 381 training return: -484.5306590568844
episode: 382 training return: -483.18150290241607
episode: 383 training return: -479.0825540879555
epoch: 96 test_true_pfm: 388.833568262544 sim_pfm: -437.39330929858664
episode: 384 training return: -482.14997461801335
episode: 385 training return: -483.6485984617133
episode: 386 training return: -478.7637526853803
episode: 387 training return: -480.017865460529
epoch: 97 test_true_pfm: 369.31395755882994 sim_pfm: -446.34667867527537
episode: 388 training return: -487.49889457546004
episode: 389 training return: -490.03609063035293
episode: 390 training return: -474.62461971526324
episode: 391 training return: -481.6109235962882
epoch: 98 test_true_pfm: 373.88034567802237 sim_pfm: -443.85572766057453
episode: 392 training return: -464.3745179360395
episode: 393 training return: -474.78184417925473
episode: 394 training return: -477.389291879774
episode: 395 training return: -480.5821688380894
epoch: 99 test_true_pfm: 384.7566228130766 sim_pfm: -437.9694174299762
episode: 396 training return: -484.09338146511675
episode: 397 training return: -484.78727543256235
episode: 398 training return: -480.3487960502543
episode: 399 training return: -486.0356774252347
epoch: 100 test_true_pfm: 401.71045610659485 sim_pfm: -436.09997972075206
episode: 400 training return: -488.19691785443376
episode: 401 training return: -457.85679947441116
episode: 402 training return: -477.6691617808052
episode: 403 training return: -461.79857024087056
epoch: 101 test_true_pfm: 404.28300440377114 sim_pfm: -432.2636768865057
episode: 404 training return: -476.91532694802834
episode: 405 training return: -475.09930040117575
episode: 406 training return: -500.05293443329214
episode: 407 training return: -473.28673663456425
epoch: 102 test_true_pfm: 397.164375303789 sim_pfm: -433.15725636226034
episode: 408 training return: -479.0261184037669
episode: 409 training return: -474.28256420199057
episode: 410 training return: -486.264974962787
episode: 411 training return: -468.53525196802684
epoch: 103 test_true_pfm: 416.0099836483239 sim_pfm: -430.142175479615
episode: 412 training return: -472.4053536889756
episode: 413 training return: -478.81964330699634
episode: 414 training return: -480.8096796828531
episode: 415 training return: -474.014493046443
epoch: 104 test_true_pfm: 396.8241542652666 sim_pfm: -428.7410363230203
episode: 416 training return: -480.58281368566753
episode: 417 training return: -466.4228567727356
episode: 418 training return: -470.86672649381404
episode: 419 training return: -483.8791224036479
epoch: 105 test_true_pfm: 407.4409373866495 sim_pfm: -427.0125945809222
episode: 420 training return: -475.9106230981997
episode: 421 training return: -482.68105369099146
episode: 422 training return: -470.93949580987817
episode: 423 training return: -480.2393831784528
epoch: 106 test_true_pfm: 403.53543478612073 sim_pfm: -427.7908603285844
episode: 424 training return: -469.6545389894079
episode: 425 training return: -475.11850628047006
episode: 426 training return: -454.52473479703224
episode: 427 training return: -466.59710975092935
epoch: 107 test_true_pfm: 409.0914480713327 sim_pfm: -430.5206473864496
episode: 428 training return: -477.28948328902914
episode: 429 training return: -468.4856281064985
episode: 430 training return: -466.15858392652564
episode: 431 training return: -481.115884640701
epoch: 108 test_true_pfm: 428.6966828848056 sim_pfm: -427.44725068158476
episode: 432 training return: -468.5089560051605
episode: 433 training return: -472.69404688604953
episode: 434 training return: -467.1713531581264
episode: 435 training return: -484.6111739882629
epoch: 109 test_true_pfm: 398.7998971236313 sim_pfm: -437.03743040430237
episode: 436 training return: -472.6018636009334
episode: 437 training return: -479.20242201301204
episode: 438 training return: -468.7788614352694
episode: 439 training return: -489.58031765593745
epoch: 110 test_true_pfm: 402.98555238441054 sim_pfm: -433.4309533304777
episode: 440 training return: -494.1605257857486
episode: 441 training return: -470.98633787740096
episode: 442 training return: -465.8727133533595
episode: 443 training return: -480.71918468359627
epoch: 111 test_true_pfm: 377.931723875601 sim_pfm: -439.274156108839
episode: 444 training return: -475.96168394843
episode: 445 training return: -475.5772422411904
episode: 446 training return: -477.52249551522686
episode: 447 training return: -465.67074947875057
epoch: 112 test_true_pfm: 435.9555544134808 sim_pfm: -427.3571184325477
episode: 448 training return: -480.18124978666486
episode: 449 training return: -464.7957186541404
episode: 450 training return: -468.5197205342767
episode: 451 training return: -484.0111624766316
epoch: 113 test_true_pfm: 421.80837244293366 sim_pfm: -423.9915461565336
episode: 452 training return: -470.42757057842664
episode: 453 training return: -482.8456921644103
episode: 454 training return: -465.0750140209555
episode: 455 training return: -465.0295243267501
epoch: 114 test_true_pfm: 443.42744469988753 sim_pfm: -419.028912210837
episode: 456 training return: -464.4163399565516
episode: 457 training return: -464.4527913597448
episode: 458 training return: -457.514247629295
episode: 459 training return: -458.81287431649463
epoch: 115 test_true_pfm: 449.2906543726406 sim_pfm: -412.88490376737315
episode: 460 training return: -467.8197023569528
episode: 461 training return: -467.9007426838377
episode: 462 training return: -464.0758378070723
episode: 463 training return: -460.77150887734797
epoch: 116 test_true_pfm: 440.614402774064 sim_pfm: -415.9302768806677
episode: 464 training return: -474.5515125691904
episode: 465 training return: -468.890522336985
episode: 466 training return: -464.7117455986351
episode: 467 training return: -473.04735272467326
epoch: 117 test_true_pfm: 415.6550222024619 sim_pfm: -429.88442772704815
episode: 468 training return: -475.6013024213813
episode: 469 training return: -475.2955650139173
episode: 470 training return: -473.22837147383353
episode: 471 training return: -473.35236453786564
epoch: 118 test_true_pfm: 411.99096519103415 sim_pfm: -431.445448785868
episode: 472 training return: -470.3658035537993
episode: 473 training return: -467.477158211456
episode: 474 training return: -475.6493715014471
episode: 475 training return: -478.18462632361917
epoch: 119 test_true_pfm: 410.99277882852675 sim_pfm: -428.04700010998135
episode: 476 training return: -478.23080040728127
episode: 477 training return: -459.8033193708819
episode: 478 training return: -469.7870220963398
episode: 479 training return: -464.814235817202
epoch: 120 test_true_pfm: 431.7735212195045 sim_pfm: -422.7250787884264
episode: 480 training return: -462.9233457791583
episode: 481 training return: -484.8140793729154
episode: 482 training return: -477.8622513558237
episode: 483 training return: -478.3358289891261
epoch: 121 test_true_pfm: 463.4437868524266 sim_pfm: -415.9637869421393
episode: 484 training return: -457.4203212020144
episode: 485 training return: -481.5104692237501
episode: 486 training return: -458.394261133671
episode: 487 training return: -472.07454774247276
epoch: 122 test_true_pfm: 424.33007279625764 sim_pfm: -426.02175128371096
episode: 488 training return: -481.8009887284506
episode: 489 training return: -475.6478011114648
episode: 490 training return: -477.8900185758021
episode: 491 training return: -463.9364599309357
epoch: 123 test_true_pfm: 412.6705207739648 sim_pfm: -435.6270136648058
episode: 492 training return: -484.08582954082306
episode: 493 training return: -473.673261678257
episode: 494 training return: -478.5775282745488
episode: 495 training return: -478.3223724399443
epoch: 124 test_true_pfm: 435.92118162315046 sim_pfm: -422.55258337056324
episode: 496 training return: -470.99339206690905
episode: 497 training return: -470.9935217255991
episode: 498 training return: -481.1375577108862
episode: 499 training return: -474.5481627219291
epoch: 125 test_true_pfm: 435.84057697196084 sim_pfm: -418.95262553093676
episode: 500 training return: -471.47557960120395
episode: 501 training return: -462.6216613244673
episode: 502 training return: -463.9314753335781
episode: 503 training return: -467.5579023988694
epoch: 126 test_true_pfm: 421.2413233720894 sim_pfm: -426.1294711464786
episode: 504 training return: -462.97017407785506
episode: 505 training return: -477.57878101497283
episode: 506 training return: -472.3770877651133
episode: 507 training return: -469.0488356149436
epoch: 127 test_true_pfm: 438.9406274170897 sim_pfm: -425.8442413219512
episode: 508 training return: -466.44775654155325
episode: 509 training return: -469.3108009872655
episode: 510 training return: -469.2924118107348
episode: 511 training return: -468.8572713717248
epoch: 128 test_true_pfm: 439.62852153001126 sim_pfm: -419.19270784430006
episode: 512 training return: -463.1626978029318
episode: 513 training return: -480.169654972446
episode: 514 training return: -462.21805486340753
episode: 515 training return: -466.3500827523978
epoch: 129 test_true_pfm: 420.48406590173244 sim_pfm: -429.7421929108461
episode: 516 training return: -472.33252629980933
episode: 517 training return: -460.7739712547363
episode: 518 training return: -482.03220167156627
episode: 519 training return: -467.7048486480472
epoch: 130 test_true_pfm: 400.42696242031155 sim_pfm: -435.2573520374189
episode: 520 training return: -477.92323492712666
episode: 521 training return: -474.161328731284
episode: 522 training return: -466.5925078899821
episode: 523 training return: -465.6699342256397
epoch: 131 test_true_pfm: 412.1959772267071 sim_pfm: -428.737293909018
episode: 524 training return: -464.250064074923
episode: 525 training return: -465.5806096452225
episode: 526 training return: -486.20225653241516
episode: 527 training return: -460.9511557124895
epoch: 132 test_true_pfm: 447.59709705872564 sim_pfm: -422.08449888907234
episode: 528 training return: -471.4149482525438
episode: 529 training return: -461.28072888648256
episode: 530 training return: -463.7640205351753
episode: 531 training return: -456.4090609398925
epoch: 133 test_true_pfm: 427.84655626632457 sim_pfm: -423.42611107825337
episode: 532 training return: -476.1430811756095
episode: 533 training return: -455.784047307739
episode: 534 training return: -474.1954176857174
episode: 535 training return: -483.02587055619335
epoch: 134 test_true_pfm: 420.3787135108528 sim_pfm: -425.60188617883176
episode: 536 training return: -467.0056061241782
episode: 537 training return: -465.2144388519587
episode: 538 training return: -475.1180751667084
episode: 539 training return: -479.98314121014613
epoch: 135 test_true_pfm: 478.8609676219775 sim_pfm: -407.1949108948804
episode: 540 training return: -467.1696432578913
episode: 541 training return: -477.8474065186931
episode: 542 training return: -468.942101129585
episode: 543 training return: -470.1384477732912
epoch: 136 test_true_pfm: 456.976360844652 sim_pfm: -415.1912856667421
episode: 544 training return: -466.4352111718336
episode: 545 training return: -485.41704328182215
episode: 546 training return: -475.31838794819424
episode: 547 training return: -464.9384085272989
epoch: 137 test_true_pfm: 438.9461956471932 sim_pfm: -422.18393865330273
episode: 548 training return: -470.55261730923235
episode: 549 training return: -463.84860172278366
episode: 550 training return: -475.893209264461
episode: 551 training return: -471.1847966210509
epoch: 138 test_true_pfm: 449.1492760327226 sim_pfm: -413.5149575623964
episode: 552 training return: -472.9371873013307
episode: 553 training return: -466.0702566528563
episode: 554 training return: -465.42396333649776
episode: 555 training return: -468.8279558422822
epoch: 139 test_true_pfm: 435.2803136714435 sim_pfm: -415.9571420376603
episode: 556 training return: -464.8693727742163
episode: 557 training return: -466.37041301154545
episode: 558 training return: -460.1314913238548
episode: 559 training return: -466.6266708296045
epoch: 140 test_true_pfm: 436.0501345559953 sim_pfm: -423.83399540603403
episode: 560 training return: -475.67547071978964
episode: 561 training return: -456.20767190624264
episode: 562 training return: -465.34065841680143
episode: 563 training return: -483.52643673491133
epoch: 141 test_true_pfm: 427.34817685811544 sim_pfm: -426.16400363650365
episode: 564 training return: -479.2592660277199
episode: 565 training return: -463.37696660673816
episode: 566 training return: -464.40781821663074
episode: 567 training return: -459.8887181266679
epoch: 142 test_true_pfm: 434.1198260296278 sim_pfm: -422.8753695175808
episode: 568 training return: -477.4199624053936
episode: 569 training return: -476.0814861074226
episode: 570 training return: -474.03803941215835
episode: 571 training return: -471.12912482618026
epoch: 143 test_true_pfm: 430.9265262897497 sim_pfm: -419.0923369021885
episode: 572 training return: -477.993784088657
episode: 573 training return: -453.60447564291553
episode: 574 training return: -481.47564158816505
episode: 575 training return: -452.2546386454433
epoch: 144 test_true_pfm: 457.0587457865056 sim_pfm: -417.64469437212193
episode: 576 training return: -455.6655898577779
episode: 577 training return: -467.2319657296614
episode: 578 training return: -445.2523322708659
episode: 579 training return: -464.30312356557937
epoch: 145 test_true_pfm: 480.9026644957938 sim_pfm: -407.72395586558724
episode: 580 training return: -453.84312556739275
episode: 581 training return: -466.22002634815476
episode: 582 training return: -448.27025866217195
episode: 583 training return: -479.71201922828647
epoch: 146 test_true_pfm: 470.695217105194 sim_pfm: -408.66014950976233
episode: 584 training return: -453.9631672587081
episode: 585 training return: -460.5458404757244
episode: 586 training return: -463.65778540538054
episode: 587 training return: -462.8127092955728
epoch: 147 test_true_pfm: 500.15850033089754 sim_pfm: -409.1590777464917
episode: 588 training return: -454.2829973934732
episode: 589 training return: -468.8599048396866
episode: 590 training return: -467.9176567640819
episode: 591 training return: -451.90990814076554
epoch: 148 test_true_pfm: 452.2744736364904 sim_pfm: -416.3952999969735
episode: 592 training return: -470.3345649643813
episode: 593 training return: -475.8158583565556
episode: 594 training return: -455.13080739606994
episode: 595 training return: -464.99217261927555
epoch: 149 test_true_pfm: 500.0697600098833 sim_pfm: -401.95201358301637
episode: 596 training return: -456.3319883985703
episode: 597 training return: -461.3855558035296
episode: 598 training return: -464.8666976824312
episode: 599 training return: -475.6235694615402
epoch: 150 test_true_pfm: 428.0732317167896 sim_pfm: -425.2768503080569
