['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'uncertainty', '--traj', 'medium', '--seed', '3', '--data', '100000']
epoch: 0 training_loss 0.24449903532862663 test_loss: 0.21395521163940429
epoch: 1 training_loss 0.20668453358113767 test_loss: 0.1828382730484009
epoch: 2 training_loss 0.19507510349154472 test_loss: 0.1898030161857605
epoch: 3 training_loss 0.19305777534842491 test_loss: 0.19181238412857055
epoch: 4 training_loss 0.19127917535603045 test_loss: 0.20299916267395018
epoch: 5 training_loss 0.18860477074980736 test_loss: 0.19098230600357055
epoch: 6 training_loss 0.19211346805095672 test_loss: 0.1968219041824341
epoch: 7 training_loss 0.1804317183047533 test_loss: 0.19137685298919677
epoch: 8 training_loss 0.18879802912473678 test_loss: 0.18679651021957397
epoch: 9 training_loss 0.18886631183326244 test_loss: 0.19621341228485106
epoch: 10 training_loss 0.18165348179638385 test_loss: 0.1874160051345825
epoch: 11 training_loss 0.18583442620933055 test_loss: 0.19288411140441894
epoch: 12 training_loss 0.1929062756150961 test_loss: 0.19916436672210694
epoch: 13 training_loss 0.18358036026358604 test_loss: 0.20595593452453614
epoch: 14 training_loss 0.18334306858479976 test_loss: 0.17408442497253418
epoch: 15 training_loss 0.18224616818130016 test_loss: 0.1623460054397583
epoch: 16 training_loss 0.18210810966789723 test_loss: 0.1823035717010498
epoch: 17 training_loss 0.17612686060369015 test_loss: 0.18290302753448487
epoch: 18 training_loss 0.1928006948530674 test_loss: 0.19401391744613647
epoch: 19 training_loss 0.18515708297491074 test_loss: 0.18457176685333251
epoch: 20 training_loss 0.1869780708104372 test_loss: 0.1809351325035095
epoch: 21 training_loss 0.18192677319049835 test_loss: 0.16993410587310792
epoch: 22 training_loss 0.1779314798116684 test_loss: 0.17808425426483154
epoch: 23 training_loss 0.1821075238287449 test_loss: 0.1720173954963684
epoch: 24 training_loss 0.17998708330094815 test_loss: 0.16690423488616943
epoch: 25 training_loss 0.1834976849704981 test_loss: 0.1834876298904419
epoch: 26 training_loss 0.19207776516675948 test_loss: 0.18331626653671265
epoch: 27 training_loss 0.1837800981849432 test_loss: 0.1917004704475403
epoch: 28 training_loss 0.17692632429301738 test_loss: 0.16693116426467897
epoch: 29 training_loss 0.1760975009948015 test_loss: 0.17658003568649291
epoch: 30 training_loss 0.1749168796092272 test_loss: 0.17384166717529298
epoch: 31 training_loss 0.181083849593997 test_loss: 0.18587275743484497
epoch: 32 training_loss 0.17593511499464512 test_loss: 0.169857919216156
epoch: 33 training_loss 0.17712309204041957 test_loss: 0.18131977319717407
epoch: 34 training_loss 0.17607514306902886 test_loss: 0.18525388240814208
epoch: 35 training_loss 0.17227367497980595 test_loss: 0.1797206163406372
epoch: 36 training_loss 0.1831186357140541 test_loss: 0.17737646102905275
epoch: 37 training_loss 0.17699969843029975 test_loss: 0.19550994634628296
epoch: 38 training_loss 0.1805196326225996 test_loss: 0.16658291816711426
epoch: 39 training_loss 0.17288083486258984 test_loss: 0.16362371444702148
epoch: 40 training_loss 0.17809582471847535 test_loss: 0.17267906665802002
epoch: 41 training_loss 0.17978091537952423 test_loss: 0.17623794078826904
epoch: 42 training_loss 0.18223798610270023 test_loss: 0.17570098638534545
epoch: 43 training_loss 0.17718129739165306 test_loss: 0.18001854419708252
epoch: 44 training_loss 0.17976052813231946 test_loss: 0.18911211490631102
epoch: 45 training_loss 0.17509218357503414 test_loss: 0.18084383010864258
epoch: 46 training_loss 0.18152085952460767 test_loss: 0.1786041021347046
epoch: 47 training_loss 0.18388621978461742 test_loss: 0.1823456406593323
epoch: 48 training_loss 0.18294667482376098 test_loss: 0.15994517803192138
epoch: 49 training_loss 0.17600895684212448 test_loss: 0.17164361476898193
epoch: 50 training_loss 0.17675153002142907 test_loss: 0.18265290260314943
epoch: 51 training_loss 0.17890717677772044 test_loss: 0.18483093976974488
epoch: 52 training_loss 0.1816428676992655 test_loss: 0.19479000568389893
epoch: 53 training_loss 0.174046883136034 test_loss: 0.17791935205459594
epoch: 54 training_loss 0.18527609504759313 test_loss: 0.1711408257484436
epoch: 55 training_loss 0.18497697845101357 test_loss: 0.17857660055160524
epoch: 56 training_loss 0.17576859503984452 test_loss: 0.189080548286438
epoch: 57 training_loss 0.17797618873417378 test_loss: 0.18770642280578614
epoch: 58 training_loss 0.17887010462582112 test_loss: 0.18965861797332764
epoch: 59 training_loss 0.1730067027360201 test_loss: 0.18412865400314332
epoch: 60 training_loss 0.17362718410789968 test_loss: 0.18286718130111695
epoch: 61 training_loss 0.17555622220039369 test_loss: 0.17924708127975464
epoch: 62 training_loss 0.18097649313509465 test_loss: 0.17966821193695068
epoch: 63 training_loss 0.1726399639993906 test_loss: 0.190820574760437
epoch: 64 training_loss 0.18013499394059182 test_loss: 0.18077460527420045
epoch: 65 training_loss 0.1811522737890482 test_loss: 0.17882604598999025
epoch: 66 training_loss 0.18325749419629575 test_loss: 0.1623685121536255
epoch: 67 training_loss 0.17466160096228123 test_loss: 0.1882546663284302
epoch: 68 training_loss 0.17887325704097748 test_loss: 0.18326115608215332
epoch: 69 training_loss 0.18536063112318515 test_loss: 0.1829900860786438
epoch: 70 training_loss 0.17949385106563567 test_loss: 0.17317577600479125
epoch: 71 training_loss 0.17474553875625135 test_loss: 0.1824500560760498
epoch: 72 training_loss 0.17160818196833133 test_loss: 0.17126446962356567
epoch: 73 training_loss 0.17868855550885201 test_loss: 0.1682254672050476
epoch: 74 training_loss 0.17451219744980334 test_loss: 0.18469278812408446
epoch: 75 training_loss 0.1741729535907507 test_loss: 0.17549901008605956
epoch: 76 training_loss 0.17749243147671223 test_loss: 0.1683647871017456
epoch: 77 training_loss 0.17889416061341762 test_loss: 0.19177666902542115
epoch: 78 training_loss 0.1755550479888916 test_loss: 0.16842381954193114
epoch: 79 training_loss 0.1755138437449932 test_loss: 0.17706624269485474
epoch: 80 training_loss 0.17422099269926547 test_loss: 0.17575758695602417
epoch: 81 training_loss 0.17667592275887728 test_loss: 0.1651504397392273
epoch: 82 training_loss 0.17283146418631076 test_loss: 0.17339158058166504
epoch: 83 training_loss 0.17115382567048074 test_loss: 0.16668403148651123
epoch: 84 training_loss 0.17909891605377198 test_loss: 0.16310349702835084
epoch: 85 training_loss 0.176408401876688 test_loss: 0.1792399048805237
epoch: 86 training_loss 0.17982827216386796 test_loss: 0.18284186124801635
epoch: 87 training_loss 0.17176314562559128 test_loss: 0.17301006317138673
epoch: 88 training_loss 0.17588317193090916 test_loss: 0.17312724590301515
epoch: 89 training_loss 0.1765685247629881 test_loss: 0.18187483549118041
epoch: 90 training_loss 0.17562275856733323 test_loss: 0.1693536877632141
epoch: 91 training_loss 0.17702586956322194 test_loss: 0.17856459617614745
epoch: 92 training_loss 0.17518148489296437 test_loss: 0.18258391618728637
epoch: 93 training_loss 0.18617813996970653 test_loss: 0.17512835264205934
epoch: 94 training_loss 0.1740577568113804 test_loss: 0.183265221118927
epoch: 95 training_loss 0.1726218692213297 test_loss: 0.18539270162582397
epoch: 96 training_loss 0.17484689965844155 test_loss: 0.1788525938987732
epoch: 97 training_loss 0.1776406379789114 test_loss: 0.18541829586029052
epoch: 98 training_loss 0.1763785658031702 test_loss: 0.17732727527618408
epoch: 99 training_loss 0.18462275378406048 test_loss: 0.18310734033584594
epoch: 100 training_loss 0.17866732187569143 test_loss: 0.18415790796279907
epoch: 101 training_loss 0.17856674708425999 test_loss: 0.176377010345459
epoch: 102 training_loss 0.17578793272376061 test_loss: 0.1735498070716858
epoch: 103 training_loss 0.17501962516456843 test_loss: 0.17721468210220337
epoch: 104 training_loss 0.17778934732079507 test_loss: 0.17072784900665283
epoch: 105 training_loss 0.17155216246843338 test_loss: 0.17972251176834106
epoch: 106 training_loss 0.16329805202782155 test_loss: 0.17119889259338378
epoch: 107 training_loss 0.17328344903886317 test_loss: 0.1871195912361145
epoch: 108 training_loss 0.179367123991251 test_loss: 0.17555328607559204
epoch: 109 training_loss 0.17285846561193466 test_loss: 0.17478373050689697
epoch: 110 training_loss 0.17548456318676472 test_loss: 0.16721993684768677
epoch: 111 training_loss 0.17882304295897483 test_loss: 0.17662137746810913
epoch: 112 training_loss 0.1737281984835863 test_loss: 0.18406742811203003
epoch: 113 training_loss 0.17657600931823253 test_loss: 0.1812272310256958
epoch: 114 training_loss 0.1722195891290903 test_loss: 0.180256986618042
epoch: 115 training_loss 0.17600379541516303 test_loss: 0.15993162393569946
epoch: 116 training_loss 0.1754026620090008 test_loss: 0.16762616634368896
epoch: 117 training_loss 0.1783728825300932 test_loss: 0.1683812379837036
epoch: 118 training_loss 0.17352816928178072 test_loss: 0.17696486711502074
epoch: 119 training_loss 0.17899052053689957 test_loss: 0.18515228033065795
epoch: 120 training_loss 0.17383807431906462 test_loss: 0.1818135142326355
epoch: 121 training_loss 0.16871565639972685 test_loss: 0.17924922704696655
epoch: 122 training_loss 0.17902689501643182 test_loss: 0.1710318684577942
epoch: 123 training_loss 0.18867009408771993 test_loss: 0.20557124614715577
epoch: 124 training_loss 0.17726555712521075 test_loss: 0.16849695444107055
epoch: 125 training_loss 0.17954435788094997 test_loss: 0.18036400079727172
epoch: 126 training_loss 0.168550116494298 test_loss: 0.16222344636917113
epoch: 127 training_loss 0.17230950050055982 test_loss: 0.17217016220092773
epoch: 128 training_loss 0.16941810764372348 test_loss: 0.16542545557022095
epoch: 129 training_loss 0.17761050119996072 test_loss: 0.171073842048645
epoch: 130 training_loss 0.17909538455307483 test_loss: 0.17325247526168824
epoch: 131 training_loss 0.17229011915624143 test_loss: 0.17929303646087646
epoch: 132 training_loss 0.1782444339990616 test_loss: 0.17207047939300538
epoch: 133 training_loss 0.17080691166222095 test_loss: 0.1782848596572876
epoch: 134 training_loss 0.17279249764978885 test_loss: 0.16892385482788086
epoch: 135 training_loss 0.17329807601869107 test_loss: 0.16124315261840821
epoch: 136 training_loss 0.17497761242091656 test_loss: 0.17122706174850463
epoch: 137 training_loss 0.1829739072918892 test_loss: 0.18359938859939576
epoch: 138 training_loss 0.16852646701037885 test_loss: 0.1692452311515808
epoch: 139 training_loss 0.17103659696877002 test_loss: 0.1811181902885437
epoch: 140 training_loss 0.1732596744596958 test_loss: 0.18343178033828736
epoch: 141 training_loss 0.16876200158149005 test_loss: 0.18420050144195557
epoch: 142 training_loss 0.16771368570625783 test_loss: 0.17803176641464233
epoch: 143 training_loss 0.17627730749547482 test_loss: 0.16782846450805664
epoch: 144 training_loss 0.17405979700386523 test_loss: 0.17707905769348145
epoch: 145 training_loss 0.181985489949584 test_loss: 0.175038480758667
epoch: 146 training_loss 0.17246455498039723 test_loss: 0.17354912757873536
epoch: 147 training_loss 0.17417680189013482 test_loss: 0.1745691180229187
epoch: 148 training_loss 0.17255416594445705 test_loss: 0.1700368642807007
epoch: 149 training_loss 0.17637427181005477 test_loss: 0.1746263861656189
epoch: 0 training_loss 0.24788356266915798 test_loss: 0.21489486694335938
epoch: 1 training_loss 0.19807906322181223 test_loss: 0.18835923671722413
epoch: 2 training_loss 0.19794826313853264 test_loss: 0.1925741195678711
epoch: 3 training_loss 0.19585236497223377 test_loss: 0.20079495906829833
epoch: 4 training_loss 0.2005139148980379 test_loss: 0.21088659763336182
epoch: 5 training_loss 0.19244304053485395 test_loss: 0.20153050422668456
epoch: 6 training_loss 0.191063067689538 test_loss: 0.18413664102554322
epoch: 7 training_loss 0.19442289210855962 test_loss: 0.18001261949539185
epoch: 8 training_loss 0.1788490042090416 test_loss: 0.18393962383270263
epoch: 9 training_loss 0.18341227620840073 test_loss: 0.1755201816558838
epoch: 10 training_loss 0.19288766585290432 test_loss: 0.18620526790618896
epoch: 11 training_loss 0.18632447600364685 test_loss: 0.1876368999481201
epoch: 12 training_loss 0.19346574418246745 test_loss: 0.18063253164291382
epoch: 13 training_loss 0.18246502801775932 test_loss: 0.1810895562171936
epoch: 14 training_loss 0.19497323878109454 test_loss: 0.1786391019821167
epoch: 15 training_loss 0.181663238927722 test_loss: 0.18494464159011842
epoch: 16 training_loss 0.17700475364923476 test_loss: 0.1883367419242859
epoch: 17 training_loss 0.1909202267974615 test_loss: 0.18624168634414673
epoch: 18 training_loss 0.18778379395604133 test_loss: 0.1810534954071045
epoch: 19 training_loss 0.1860761423408985 test_loss: 0.17631216049194337
epoch: 20 training_loss 0.1769794598221779 test_loss: 0.18684475421905516
epoch: 21 training_loss 0.1732152882963419 test_loss: 0.19311187267303467
epoch: 22 training_loss 0.17807224929332732 test_loss: 0.1847675323486328
epoch: 23 training_loss 0.18829432673752308 test_loss: 0.1895941376686096
epoch: 24 training_loss 0.1930441615730524 test_loss: 0.174979567527771
epoch: 25 training_loss 0.1847393222153187 test_loss: 0.1821282386779785
epoch: 26 training_loss 0.1789130588620901 test_loss: 0.17258031368255616
epoch: 27 training_loss 0.18632125653326512 test_loss: 0.17525551319122315
epoch: 28 training_loss 0.18478285297751426 test_loss: 0.18237073421478273
epoch: 29 training_loss 0.17871748737990856 test_loss: 0.16871769428253175
epoch: 30 training_loss 0.1873637455701828 test_loss: 0.17695400714874268
epoch: 31 training_loss 0.1803220783174038 test_loss: 0.17812615633010864
epoch: 32 training_loss 0.175261387899518 test_loss: 0.17332015037536622
epoch: 33 training_loss 0.1811699665337801 test_loss: 0.16464037895202638
epoch: 34 training_loss 0.17589462488889696 test_loss: 0.1853783130645752
epoch: 35 training_loss 0.17631263867020608 test_loss: 0.19214422702789308
epoch: 36 training_loss 0.16841148898005487 test_loss: 0.17903832197189332
epoch: 37 training_loss 0.18667136549949645 test_loss: 0.2012098789215088
epoch: 38 training_loss 0.17755095921456815 test_loss: 0.1766587495803833
epoch: 39 training_loss 0.18192802250385284 test_loss: 0.17219454050064087
epoch: 40 training_loss 0.18337156541645527 test_loss: 0.19079740047454835
epoch: 41 training_loss 0.18259891219437122 test_loss: 0.18991926908493043
epoch: 42 training_loss 0.17479626454412936 test_loss: 0.17511236667633057
epoch: 43 training_loss 0.17558942809700967 test_loss: 0.18996745347976685
epoch: 44 training_loss 0.17803012661635875 test_loss: 0.1738192319869995
epoch: 45 training_loss 0.17896794266998767 test_loss: 0.1775231719017029
epoch: 46 training_loss 0.17944575600326063 test_loss: 0.18754125833511354
epoch: 47 training_loss 0.18009065829217433 test_loss: 0.190910804271698
epoch: 48 training_loss 0.1889849217236042 test_loss: 0.1877165198326111
epoch: 49 training_loss 0.1816273456811905 test_loss: 0.18702002763748168
epoch: 50 training_loss 0.18468169502913953 test_loss: 0.1755742311477661
epoch: 51 training_loss 0.1893389667570591 test_loss: 0.18488949537277222
epoch: 52 training_loss 0.1746044736355543 test_loss: 0.18556010723114014
epoch: 53 training_loss 0.17801252461969852 test_loss: 0.17386388778686523
epoch: 54 training_loss 0.17529512874782086 test_loss: 0.18935575485229492
epoch: 55 training_loss 0.18105885803699492 test_loss: 0.18090431690216063
epoch: 56 training_loss 0.17784056670963763 test_loss: 0.1749460816383362
epoch: 57 training_loss 0.1754227624833584 test_loss: 0.16382789611816406
epoch: 58 training_loss 0.1759236064553261 test_loss: 0.179023277759552
epoch: 59 training_loss 0.16926134034991264 test_loss: 0.17268015146255494
epoch: 60 training_loss 0.18633398070931434 test_loss: 0.18078399896621705
epoch: 61 training_loss 0.18104075253009796 test_loss: 0.18269318342208862
epoch: 62 training_loss 0.18732690550386905 test_loss: 0.19356683492660523
epoch: 63 training_loss 0.17616138935089112 test_loss: 0.1691728115081787
epoch: 64 training_loss 0.18410092160105707 test_loss: 0.18994059562683105
epoch: 65 training_loss 0.17921311892569064 test_loss: 0.1959701657295227
epoch: 66 training_loss 0.1767205534130335 test_loss: 0.17192416191101073
epoch: 67 training_loss 0.178153890222311 test_loss: 0.18351645469665528
epoch: 68 training_loss 0.16980372473597527 test_loss: 0.1749414920806885
epoch: 69 training_loss 0.18075635351240635 test_loss: 0.18606804609298705
epoch: 70 training_loss 0.1825227952748537 test_loss: 0.18453011512756348
epoch: 71 training_loss 0.1773562900722027 test_loss: 0.18164752721786498
epoch: 72 training_loss 0.17819691099226476 test_loss: 0.1878133773803711
epoch: 73 training_loss 0.18198179215192795 test_loss: 0.1794702172279358
epoch: 74 training_loss 0.18170315884053706 test_loss: 0.17899073362350465
epoch: 75 training_loss 0.1828776053339243 test_loss: 0.18425509929656983
epoch: 76 training_loss 0.18174339950084686 test_loss: 0.20595574378967285
epoch: 77 training_loss 0.17679327204823495 test_loss: 0.18901667594909669
epoch: 78 training_loss 0.17791776977479457 test_loss: 0.1904981851577759
epoch: 79 training_loss 0.1855404920130968 test_loss: 0.19206167459487916
epoch: 80 training_loss 0.1749016485363245 test_loss: 0.1819766044616699
epoch: 81 training_loss 0.17923036478459836 test_loss: 0.19414933919906616
epoch: 82 training_loss 0.17576899096369744 test_loss: 0.18853912353515626
epoch: 83 training_loss 0.1736023148149252 test_loss: 0.18083698749542237
epoch: 84 training_loss 0.1742617517709732 test_loss: 0.1714787721633911
epoch: 85 training_loss 0.1789430445432663 test_loss: 0.17031265497207643
epoch: 86 training_loss 0.17887777350842954 test_loss: 0.1683874249458313
epoch: 87 training_loss 0.17057073689997196 test_loss: 0.16901776790618897
epoch: 88 training_loss 0.17595559120178222 test_loss: 0.1751449704170227
epoch: 89 training_loss 0.17834442995488645 test_loss: 0.1831132173538208
epoch: 90 training_loss 0.18640437886118888 test_loss: 0.18555049896240233
epoch: 91 training_loss 0.17790857687592507 test_loss: 0.1902836799621582
epoch: 92 training_loss 0.18088383346796036 test_loss: 0.17767997980117797
epoch: 93 training_loss 0.1859320991486311 test_loss: 0.18137189149856567
epoch: 94 training_loss 0.17296592868864535 test_loss: 0.16766924858093263
epoch: 95 training_loss 0.1725713711231947 test_loss: 0.16046440601348877
epoch: 96 training_loss 0.18008942574262618 test_loss: 0.18574602603912355
epoch: 97 training_loss 0.18039048209786415 test_loss: 0.18241465091705322
epoch: 98 training_loss 0.16500729151070118 test_loss: 0.17370083332061767
epoch: 99 training_loss 0.17147733472287655 test_loss: 0.17016112804412842
epoch: 100 training_loss 0.16991233192384242 test_loss: 0.1831255078315735
epoch: 101 training_loss 0.1768386459350586 test_loss: 0.20135657787322997
epoch: 102 training_loss 0.1780398453772068 test_loss: 0.1793028950691223
epoch: 103 training_loss 0.17845393471419813 test_loss: 0.19243123531341552
epoch: 104 training_loss 0.18248903766274452 test_loss: 0.17937718629837035
epoch: 105 training_loss 0.1776901348680258 test_loss: 0.18580732345581055
epoch: 106 training_loss 0.1680970861017704 test_loss: 0.17750526666641236
epoch: 107 training_loss 0.1749139665067196 test_loss: 0.18157938718795777
epoch: 108 training_loss 0.17571235418319703 test_loss: 0.1789261817932129
epoch: 109 training_loss 0.17675184451043605 test_loss: 0.19418587684631347
epoch: 110 training_loss 0.17453488763421773 test_loss: 0.17235157489776612
epoch: 111 training_loss 0.17326539807021618 test_loss: 0.16967284679412842
epoch: 112 training_loss 0.1774507673084736 test_loss: 0.18124104738235475
epoch: 113 training_loss 0.17854230932891368 test_loss: 0.17362319231033324
epoch: 114 training_loss 0.18095545046031475 test_loss: 0.1781109929084778
epoch: 115 training_loss 0.17144495613873004 test_loss: 0.17187578678131105
epoch: 116 training_loss 0.1770741704851389 test_loss: 0.18256281614303588
epoch: 117 training_loss 0.1758488354086876 test_loss: 0.176208758354187
epoch: 118 training_loss 0.17595993019640446 test_loss: 0.17022258043289185
epoch: 119 training_loss 0.17696260958909987 test_loss: 0.19839851856231688
epoch: 120 training_loss 0.16951028749346733 test_loss: 0.1778084635734558
epoch: 121 training_loss 0.17099389351904393 test_loss: 0.17962687015533446
epoch: 122 training_loss 0.18191163741052152 test_loss: 0.17866141796112062
epoch: 123 training_loss 0.1809899678081274 test_loss: 0.19619252681732177
epoch: 124 training_loss 0.1749013701826334 test_loss: 0.16165895462036134
epoch: 125 training_loss 0.16944758087396622 test_loss: 0.18389410972595216
epoch: 126 training_loss 0.17596058517694474 test_loss: 0.181222403049469
epoch: 127 training_loss 0.18248639203608036 test_loss: 0.18716002702713014
epoch: 128 training_loss 0.1744168672710657 test_loss: 0.17115398645401
epoch: 129 training_loss 0.17193291798233987 test_loss: 0.17951712608337403
epoch: 130 training_loss 0.18040355250239373 test_loss: 0.18570280075073242
epoch: 131 training_loss 0.17437240101397036 test_loss: 0.1751462459564209
epoch: 132 training_loss 0.17505117297172545 test_loss: 0.18047062158584595
epoch: 133 training_loss 0.1697648872435093 test_loss: 0.16736714839935302
epoch: 134 training_loss 0.1798190998286009 test_loss: 0.17567672729492187
epoch: 135 training_loss 0.17656721971929074 test_loss: 0.18705726861953736
epoch: 136 training_loss 0.18332827404141427 test_loss: 0.18409147262573242
epoch: 137 training_loss 0.17380785211920738 test_loss: 0.1808754801750183
epoch: 138 training_loss 0.1753964764624834 test_loss: 0.1842210292816162
epoch: 139 training_loss 0.17532094210386276 test_loss: 0.16294091939926147
epoch: 140 training_loss 0.17822656594216824 test_loss: 0.17080588340759278
epoch: 141 training_loss 0.17061774335801602 test_loss: 0.17330316305160523
epoch: 142 training_loss 0.1762200739979744 test_loss: 0.19664279222488404
epoch: 143 training_loss 0.16963695667684078 test_loss: 0.17199000120162963
epoch: 144 training_loss 0.173787235096097 test_loss: 0.189328932762146
epoch: 145 training_loss 0.1724798110127449 test_loss: 0.17111855745315552
epoch: 146 training_loss 0.1708809606358409 test_loss: 0.17770403623580933
epoch: 147 training_loss 0.1678838887065649 test_loss: 0.18678799867630005
epoch: 148 training_loss 0.16646263509988785 test_loss: 0.19118115901947022
epoch: 149 training_loss 0.1761552295088768 test_loss: 0.1901288866996765
epoch: 0 training_loss 0.25266677543520927 test_loss: 0.20967502593994142
epoch: 1 training_loss 0.20694448694586753 test_loss: 0.1987761974334717
epoch: 2 training_loss 0.19867661997675895 test_loss: 0.19042344093322755
epoch: 3 training_loss 0.18950098127126694 test_loss: 0.195694899559021
epoch: 4 training_loss 0.20046576157212256 test_loss: 0.18012787103652955
epoch: 5 training_loss 0.19472811542451382 test_loss: 0.17120784521102905
epoch: 6 training_loss 0.18847043827176094 test_loss: 0.18936163187026978
epoch: 7 training_loss 0.1928788896650076 test_loss: 0.18632395267486573
epoch: 8 training_loss 0.18654864378273486 test_loss: 0.2043018341064453
epoch: 9 training_loss 0.19488516353070737 test_loss: 0.2069077491760254
epoch: 10 training_loss 0.1809821104258299 test_loss: 0.19033795595169067
epoch: 11 training_loss 0.18312871858477592 test_loss: 0.193565034866333
epoch: 12 training_loss 0.18583357632160186 test_loss: 0.18407176733016967
epoch: 13 training_loss 0.18276623651385307 test_loss: 0.190477192401886
epoch: 14 training_loss 0.18217798478901387 test_loss: 0.18873388767242433
epoch: 15 training_loss 0.18213085904717446 test_loss: 0.19827762842178345
epoch: 16 training_loss 0.18230236165225505 test_loss: 0.18918745517730712
epoch: 17 training_loss 0.17869404211640358 test_loss: 0.16712489128112792
epoch: 18 training_loss 0.18279993236064912 test_loss: 0.17543429136276245
epoch: 19 training_loss 0.18385984260588883 test_loss: 0.18322246074676513
epoch: 20 training_loss 0.18739551886916161 test_loss: 0.17504243850708007
epoch: 21 training_loss 0.18143655449151994 test_loss: 0.17833297252655028
epoch: 22 training_loss 0.1841213783621788 test_loss: 0.17632691860198973
epoch: 23 training_loss 0.1852363456785679 test_loss: 0.17520437240600586
epoch: 24 training_loss 0.18195637419819832 test_loss: 0.1834742546081543
epoch: 25 training_loss 0.1843998783826828 test_loss: 0.18034124374389648
epoch: 26 training_loss 0.18563464391976595 test_loss: 0.19105918407440187
epoch: 27 training_loss 0.18037973389029502 test_loss: 0.1927649736404419
epoch: 28 training_loss 0.18448555454611779 test_loss: 0.20025839805603027
epoch: 29 training_loss 0.17840731509029864 test_loss: 0.18618519306182862
epoch: 30 training_loss 0.18160538926720618 test_loss: 0.19387298822402954
epoch: 31 training_loss 0.18146744973957538 test_loss: 0.18345755338668823
epoch: 32 training_loss 0.17925785809755326 test_loss: 0.17546252012252808
epoch: 33 training_loss 0.18096769034862517 test_loss: 0.1802643656730652
epoch: 34 training_loss 0.17248870939016342 test_loss: 0.17225784063339233
epoch: 35 training_loss 0.17434051841497422 test_loss: 0.18481649160385133
epoch: 36 training_loss 0.17425541184842586 test_loss: 0.17594531774520875
epoch: 37 training_loss 0.18080111399292945 test_loss: 0.19409711360931398
epoch: 38 training_loss 0.1761377400159836 test_loss: 0.16835851669311525
epoch: 39 training_loss 0.18013256147503853 test_loss: 0.19474320411682128
epoch: 40 training_loss 0.1747013445943594 test_loss: 0.1759697437286377
epoch: 41 training_loss 0.17498296521604062 test_loss: 0.1915615200996399
epoch: 42 training_loss 0.1748339443653822 test_loss: 0.1795410394668579
epoch: 43 training_loss 0.183495614528656 test_loss: 0.1957457184791565
epoch: 44 training_loss 0.17894665755331515 test_loss: 0.19377113580703736
epoch: 45 training_loss 0.1802005646377802 test_loss: 0.19312692880630494
epoch: 46 training_loss 0.1751315962523222 test_loss: 0.16803352832794188
epoch: 47 training_loss 0.17608253829181195 test_loss: 0.19866234064102173
epoch: 48 training_loss 0.1781761607527733 test_loss: 0.1772892713546753
epoch: 49 training_loss 0.17792973298579454 test_loss: 0.1910552501678467
epoch: 50 training_loss 0.17018295906484127 test_loss: 0.19571369886398315
epoch: 51 training_loss 0.17764417484402656 test_loss: 0.1949410080909729
epoch: 52 training_loss 0.17763471119105817 test_loss: 0.17595093250274657
epoch: 53 training_loss 0.18479983665049077 test_loss: 0.17462873458862305
epoch: 54 training_loss 0.1801207186281681 test_loss: 0.19448328018188477
epoch: 55 training_loss 0.17687264703214167 test_loss: 0.183440101146698
epoch: 56 training_loss 0.1825572969764471 test_loss: 0.16598590612411498
epoch: 57 training_loss 0.1729216106981039 test_loss: 0.17499933242797852
epoch: 58 training_loss 0.1842456628382206 test_loss: 0.18317252397537231
epoch: 59 training_loss 0.18684838175773621 test_loss: 0.19275072813034058
epoch: 60 training_loss 0.1829908699542284 test_loss: 0.19523614645004272
epoch: 61 training_loss 0.17395758911967277 test_loss: 0.1896633744239807
epoch: 62 training_loss 0.17354683734476567 test_loss: 0.19675179719924926
epoch: 63 training_loss 0.18074386194348335 test_loss: 0.1799611806869507
epoch: 64 training_loss 0.18036156184971333 test_loss: 0.1946812629699707
epoch: 65 training_loss 0.18713476568460463 test_loss: 0.17274713516235352
epoch: 66 training_loss 0.17584413342177868 test_loss: 0.18721767663955688
epoch: 67 training_loss 0.18097145013511182 test_loss: 0.17353309392929078
epoch: 68 training_loss 0.17943129025399684 test_loss: 0.1861395239830017
epoch: 69 training_loss 0.17719702996313572 test_loss: 0.19922010898590087
epoch: 70 training_loss 0.17717254865914583 test_loss: 0.1771615743637085
epoch: 71 training_loss 0.17622603833675385 test_loss: 0.16309847831726074
epoch: 72 training_loss 0.17570292294025422 test_loss: 0.17827506065368653
epoch: 73 training_loss 0.18755714066326618 test_loss: 0.1618087649345398
epoch: 74 training_loss 0.17516046836972238 test_loss: 0.18759583234786986
epoch: 75 training_loss 0.17681189894676208 test_loss: 0.1968745231628418
epoch: 76 training_loss 0.1830895518511534 test_loss: 0.18052533864974976
epoch: 77 training_loss 0.17852092191576957 test_loss: 0.1697268605232239
epoch: 78 training_loss 0.17879325665533544 test_loss: 0.18824974298477173
epoch: 79 training_loss 0.17720287144184113 test_loss: 0.1865070343017578
epoch: 80 training_loss 0.17736180424690245 test_loss: 0.19004418849945068
epoch: 81 training_loss 0.17959898114204406 test_loss: 0.18744204044342042
epoch: 82 training_loss 0.18514641128480436 test_loss: 0.19436559677124024
epoch: 83 training_loss 0.17693592436611652 test_loss: 0.17901074886322021
epoch: 84 training_loss 0.1759327220916748 test_loss: 0.1942991018295288
epoch: 85 training_loss 0.1747054335474968 test_loss: 0.18550877571105956
epoch: 86 training_loss 0.17935762591660023 test_loss: 0.17782933712005616
epoch: 87 training_loss 0.17404139570891858 test_loss: 0.171013343334198
epoch: 88 training_loss 0.17414365895092487 test_loss: 0.1832785964012146
epoch: 89 training_loss 0.18518523164093495 test_loss: 0.17875710725784302
epoch: 90 training_loss 0.17227124892175197 test_loss: 0.1680346965789795
epoch: 91 training_loss 0.17416349813342094 test_loss: 0.20543255805969238
epoch: 92 training_loss 0.17637883126735687 test_loss: 0.16641123294830323
epoch: 93 training_loss 0.17544894017279147 test_loss: 0.1656605124473572
epoch: 94 training_loss 0.1832193124294281 test_loss: 0.18388644456863404
epoch: 95 training_loss 0.17316601768136025 test_loss: 0.17257381677627565
epoch: 96 training_loss 0.17219422899186612 test_loss: 0.196918523311615
epoch: 97 training_loss 0.18036863192915917 test_loss: 0.19209216833114623
epoch: 98 training_loss 0.17696826562285423 test_loss: 0.17638177871704103
epoch: 99 training_loss 0.1755312530696392 test_loss: 0.17943646907806396
epoch: 100 training_loss 0.1806263041496277 test_loss: 0.1726965069770813
epoch: 101 training_loss 0.1717704490944743 test_loss: 0.1890636444091797
epoch: 102 training_loss 0.16946285881102086 test_loss: 0.19398027658462524
epoch: 103 training_loss 0.17120147056877613 test_loss: 0.19256529808044434
epoch: 104 training_loss 0.1805937807261944 test_loss: 0.18854418992996216
epoch: 105 training_loss 0.17834200732409955 test_loss: 0.19275228977203368
epoch: 106 training_loss 0.1725173383951187 test_loss: 0.17534260749816893
epoch: 107 training_loss 0.18064700976014136 test_loss: 0.1868648648262024
epoch: 108 training_loss 0.18066649943590163 test_loss: 0.20012290477752687
epoch: 109 training_loss 0.17869048818945885 test_loss: 0.1909603476524353
epoch: 110 training_loss 0.17909744679927825 test_loss: 0.1824067234992981
epoch: 111 training_loss 0.1763594526797533 test_loss: 0.17583136558532714
epoch: 112 training_loss 0.17746139399707317 test_loss: 0.1781698942184448
epoch: 113 training_loss 0.172127301171422 test_loss: 0.1747783303260803
epoch: 114 training_loss 0.17659247875213624 test_loss: 0.18348883390426635
epoch: 115 training_loss 0.17456147506833075 test_loss: 0.18282910585403442
epoch: 116 training_loss 0.17547338418662548 test_loss: 0.18736574649810792
epoch: 117 training_loss 0.17352032378315926 test_loss: 0.1824023962020874
epoch: 118 training_loss 0.17766372933983804 test_loss: 0.18970440626144408
epoch: 119 training_loss 0.17757324911653996 test_loss: 0.18209595680236818
epoch: 120 training_loss 0.18303762823343278 test_loss: 0.17183549404144288
epoch: 121 training_loss 0.17869741916656495 test_loss: 0.20799074172973633
epoch: 122 training_loss 0.17072883911430836 test_loss: 0.18154076337814332
epoch: 123 training_loss 0.1801702105998993 test_loss: 0.18827859163284302
epoch: 124 training_loss 0.1803626672923565 test_loss: 0.18648262023925782
epoch: 125 training_loss 0.17163885332643986 test_loss: 0.1743688702583313
epoch: 126 training_loss 0.17411150455474853 test_loss: 0.17694467306137085
epoch: 127 training_loss 0.17766424037516118 test_loss: 0.1998201608657837
epoch: 128 training_loss 0.17567522078752518 test_loss: 0.19212466478347778
epoch: 129 training_loss 0.18192098788917066 test_loss: 0.18156262636184692
epoch: 130 training_loss 0.1737187161296606 test_loss: 0.177087664604187
epoch: 131 training_loss 0.17537521123886107 test_loss: 0.18152300119400025
epoch: 132 training_loss 0.17194874323904513 test_loss: 0.19893152713775636
epoch: 133 training_loss 0.1752312656864524 test_loss: 0.19992469549179076
epoch: 134 training_loss 0.17688586629927158 test_loss: 0.18679566383361818
epoch: 135 training_loss 0.17784556813538074 test_loss: 0.17967673540115356
epoch: 136 training_loss 0.17641602508723736 test_loss: 0.18749783039093018
epoch: 137 training_loss 0.17701516956090926 test_loss: 0.18490685224533082
epoch: 138 training_loss 0.17472099795937537 test_loss: 0.17311465740203857
epoch: 139 training_loss 0.18397844597697258 test_loss: 0.17934917211532592
epoch: 140 training_loss 0.17631882414221764 test_loss: 0.1727311372756958
epoch: 141 training_loss 0.17199636332690715 test_loss: 0.1856032967567444
epoch: 142 training_loss 0.17484861463308335 test_loss: 0.17563689947128297
epoch: 143 training_loss 0.17462068915367127 test_loss: 0.18846700191497803
epoch: 144 training_loss 0.1762654447555542 test_loss: 0.18091222047805786
epoch: 145 training_loss 0.17227313339710235 test_loss: 0.1860877752304077
epoch: 146 training_loss 0.1702539637684822 test_loss: 0.17787889242172242
epoch: 147 training_loss 0.1812197246402502 test_loss: 0.19760533571243286
epoch: 148 training_loss 0.18024754770100115 test_loss: 0.18268038034439088
epoch: 149 training_loss 0.17551310531795025 test_loss: 0.17589776515960692
epoch: 0 training_loss 0.2620193169265985 test_loss: 0.2234480381011963
epoch: 1 training_loss 0.20031697899103165 test_loss: 0.1881768822669983
epoch: 2 training_loss 0.19650267884135247 test_loss: 0.18784135580062866
epoch: 3 training_loss 0.18637610003352165 test_loss: 0.21118390560150146
epoch: 4 training_loss 0.19593165844678878 test_loss: 0.18809293508529662
epoch: 5 training_loss 0.19699612461030483 test_loss: 0.19014776945114137
epoch: 6 training_loss 0.18158055745065213 test_loss: 0.1831204652786255
epoch: 7 training_loss 0.19095366850495338 test_loss: 0.1905280590057373
epoch: 8 training_loss 0.18231447923928498 test_loss: 0.1848652720451355
epoch: 9 training_loss 0.19881234489381314 test_loss: 0.2017282724380493
epoch: 10 training_loss 0.1873053228110075 test_loss: 0.2080019235610962
epoch: 11 training_loss 0.178780769482255 test_loss: 0.18917176723480225
epoch: 12 training_loss 0.17694411404430865 test_loss: 0.1813364267349243
epoch: 13 training_loss 0.1885188467055559 test_loss: 0.18040452003479004
epoch: 14 training_loss 0.18467373207211493 test_loss: 0.17818024158477783
epoch: 15 training_loss 0.1860922907292843 test_loss: 0.1980717420578003
epoch: 16 training_loss 0.18957627326250076 test_loss: 0.1824595332145691
epoch: 17 training_loss 0.1875194574147463 test_loss: 0.2095630645751953
epoch: 18 training_loss 0.18572616308927536 test_loss: 0.18852373361587524
epoch: 19 training_loss 0.18395193353295325 test_loss: 0.18770546913146974
epoch: 20 training_loss 0.18773720107972622 test_loss: 0.18689132928848268
epoch: 21 training_loss 0.18403994485735894 test_loss: 0.19177684783935547
epoch: 22 training_loss 0.18262077778577804 test_loss: 0.1984463930130005
epoch: 23 training_loss 0.18461592055857182 test_loss: 0.16671792268753052
epoch: 24 training_loss 0.18781711496412754 test_loss: 0.16288518905639648
epoch: 25 training_loss 0.1855591529607773 test_loss: 0.17798984050750732
epoch: 26 training_loss 0.19232620865106584 test_loss: 0.18905856609344482
epoch: 27 training_loss 0.18639054246246814 test_loss: 0.1829628825187683
epoch: 28 training_loss 0.1872146228700876 test_loss: 0.20084631443023682
epoch: 29 training_loss 0.18567820571362972 test_loss: 0.18364057540893555
epoch: 30 training_loss 0.1795790580660105 test_loss: 0.2016695499420166
epoch: 31 training_loss 0.18744580678641795 test_loss: 0.17799092531204225
epoch: 32 training_loss 0.1838070785999298 test_loss: 0.18294237852096557
epoch: 33 training_loss 0.1823598885536194 test_loss: 0.1656561851501465
epoch: 34 training_loss 0.19517248786985875 test_loss: 0.17736728191375734
epoch: 35 training_loss 0.19211691677570342 test_loss: 0.19534488916397094
epoch: 36 training_loss 0.1825661101937294 test_loss: 0.17599084377288818
epoch: 37 training_loss 0.1826252606511116 test_loss: 0.1865459203720093
epoch: 38 training_loss 0.18988807015120984 test_loss: 0.1723881483078003
epoch: 39 training_loss 0.18221800155937673 test_loss: 0.1791369080543518
epoch: 40 training_loss 0.18054552167654037 test_loss: 0.17612251043319702
epoch: 41 training_loss 0.1869200701266527 test_loss: 0.17229905128479003
epoch: 42 training_loss 0.1744432682543993 test_loss: 0.1671999454498291
epoch: 43 training_loss 0.18433641694486141 test_loss: 0.1744328737258911
epoch: 44 training_loss 0.18689980454742908 test_loss: 0.18330212831497192
epoch: 45 training_loss 0.18150461591780187 test_loss: 0.19244737625122071
epoch: 46 training_loss 0.17956779673695564 test_loss: 0.18270906209945678
epoch: 47 training_loss 0.18438877254724503 test_loss: 0.18608133792877196
epoch: 48 training_loss 0.17531676605343818 test_loss: 0.18251752853393555
epoch: 49 training_loss 0.18370331563055514 test_loss: 0.18987569808959961
epoch: 50 training_loss 0.185547446757555 test_loss: 0.18455312252044678
epoch: 51 training_loss 0.1789311046898365 test_loss: 0.20060625076293945
epoch: 52 training_loss 0.18968428444117308 test_loss: 0.1796732783317566
epoch: 53 training_loss 0.17728133842349053 test_loss: 0.16649699211120605
epoch: 54 training_loss 0.17787223659455775 test_loss: 0.17054386138916017
epoch: 55 training_loss 0.1821852322667837 test_loss: 0.1767396330833435
epoch: 56 training_loss 0.18207876160740852 test_loss: 0.1829116940498352
epoch: 57 training_loss 0.1758999067544937 test_loss: 0.18874702453613282
epoch: 58 training_loss 0.17327739290893077 test_loss: 0.18605539798736573
epoch: 59 training_loss 0.17490840919315814 test_loss: 0.18090412616729737
epoch: 60 training_loss 0.17863936662673952 test_loss: 0.19913872480392455
epoch: 61 training_loss 0.17632934845983983 test_loss: 0.17633528709411622
epoch: 62 training_loss 0.17946761183440685 test_loss: 0.18439626693725586
epoch: 63 training_loss 0.17922552272677422 test_loss: 0.18400921821594238
epoch: 64 training_loss 0.17205213412642478 test_loss: 0.17064758539199829
epoch: 65 training_loss 0.18166221730411053 test_loss: 0.19545003175735473
epoch: 66 training_loss 0.1786798855662346 test_loss: 0.18672677278518676
epoch: 67 training_loss 0.17992882952094078 test_loss: 0.1794027328491211
epoch: 68 training_loss 0.18162765726447105 test_loss: 0.19577041864395142
epoch: 69 training_loss 0.1797681614756584 test_loss: 0.18396586179733276
epoch: 70 training_loss 0.17538792289793492 test_loss: 0.16887675523757933
epoch: 71 training_loss 0.17706943221390248 test_loss: 0.18717175722122192
epoch: 72 training_loss 0.17825504284352064 test_loss: 0.19037444591522218
epoch: 73 training_loss 0.17803812958300114 test_loss: 0.1873753547668457
epoch: 74 training_loss 0.1778894156217575 test_loss: 0.17581028938293458
epoch: 75 training_loss 0.18068780705332757 test_loss: 0.17666703462600708
epoch: 76 training_loss 0.17702286690473557 test_loss: 0.178610897064209
epoch: 77 training_loss 0.1768759048730135 test_loss: 0.1661153793334961
epoch: 78 training_loss 0.18163056880235673 test_loss: 0.17363697290420532
epoch: 79 training_loss 0.17886702165007592 test_loss: 0.1730988621711731
epoch: 80 training_loss 0.18419036127626895 test_loss: 0.16559765338897706
epoch: 81 training_loss 0.18573737762868403 test_loss: 0.18968348503112792
epoch: 82 training_loss 0.1713789275288582 test_loss: 0.20090651512145996
epoch: 83 training_loss 0.17889376625418663 test_loss: 0.17652183771133423
epoch: 84 training_loss 0.17900570102035998 test_loss: 0.17830814123153688
epoch: 85 training_loss 0.1777178993821144 test_loss: 0.16837024688720703
epoch: 86 training_loss 0.17834100559353827 test_loss: 0.18120285272598266
epoch: 87 training_loss 0.18331203997135162 test_loss: 0.17274945974349976
epoch: 88 training_loss 0.1845449125766754 test_loss: 0.18847384452819824
epoch: 89 training_loss 0.1824221307784319 test_loss: 0.17610089778900145
epoch: 90 training_loss 0.18436988756060602 test_loss: 0.18295596837997435
epoch: 91 training_loss 0.17903846107423305 test_loss: 0.16893998384475709
epoch: 92 training_loss 0.17280984573066235 test_loss: 0.17488398551940917
epoch: 93 training_loss 0.18343280091881753 test_loss: 0.19093890190124513
epoch: 94 training_loss 0.18420978382229805 test_loss: 0.19221380949020386
epoch: 95 training_loss 0.18171110026538373 test_loss: 0.18699036836624144
epoch: 96 training_loss 0.17655856348574162 test_loss: 0.18418198823928833
epoch: 97 training_loss 0.17370373465120792 test_loss: 0.17849670648574828
epoch: 98 training_loss 0.18191635221242905 test_loss: 0.18497376441955565
epoch: 99 training_loss 0.18194043017923833 test_loss: 0.1848415732383728
epoch: 100 training_loss 0.17803616896271707 test_loss: 0.18606637716293334
epoch: 101 training_loss 0.185177858248353 test_loss: 0.1578995704650879
epoch: 102 training_loss 0.17925091303884982 test_loss: 0.1897166609764099
epoch: 103 training_loss 0.17888134211301804 test_loss: 0.1776189088821411
epoch: 104 training_loss 0.1814456868171692 test_loss: 0.18976227045059205
epoch: 105 training_loss 0.1749649702012539 test_loss: 0.18166646957397461
epoch: 106 training_loss 0.1824753760546446 test_loss: 0.18094277381896973
epoch: 107 training_loss 0.17717170976102353 test_loss: 0.17238394021987916
epoch: 108 training_loss 0.1844089502841234 test_loss: 0.1762067198753357
epoch: 109 training_loss 0.18450175426900386 test_loss: 0.17843602895736693
epoch: 110 training_loss 0.1804093911498785 test_loss: 0.18375084400177003
epoch: 111 training_loss 0.18173902295529842 test_loss: 0.18630951642990112
epoch: 112 training_loss 0.17483952082693577 test_loss: 0.19372572898864746
epoch: 113 training_loss 0.18281066544353963 test_loss: 0.19245758056640624
epoch: 114 training_loss 0.17505658201873303 test_loss: 0.18865803480148316
epoch: 115 training_loss 0.18206814646720887 test_loss: 0.20105125904083251
epoch: 116 training_loss 0.17928643427789212 test_loss: 0.17576783895492554
epoch: 117 training_loss 0.1715138953179121 test_loss: 0.168851101398468
epoch: 118 training_loss 0.1719659810513258 test_loss: 0.19543426036834716
epoch: 119 training_loss 0.1753554219380021 test_loss: 0.1791943073272705
epoch: 120 training_loss 0.1775248856842518 test_loss: 0.1828552007675171
epoch: 121 training_loss 0.17869420073926448 test_loss: 0.18573557138442992
epoch: 122 training_loss 0.1811756207048893 test_loss: 0.1786580801010132
epoch: 123 training_loss 0.17078267611563205 test_loss: 0.1710703492164612
epoch: 124 training_loss 0.19114050410687924 test_loss: 0.19165592193603515
epoch: 125 training_loss 0.17851210243999957 test_loss: 0.16376746892929078
epoch: 126 training_loss 0.18010811634361745 test_loss: 0.173595654964447
epoch: 127 training_loss 0.17905129350721835 test_loss: 0.16170079708099366
epoch: 128 training_loss 0.1945207041501999 test_loss: 0.18320685625076294
epoch: 129 training_loss 0.17725951068103313 test_loss: 0.17200833559036255
epoch: 130 training_loss 0.17912191152572632 test_loss: 0.1784733772277832
epoch: 131 training_loss 0.185483031347394 test_loss: 0.1797136187553406
epoch: 132 training_loss 0.17940559320151805 test_loss: 0.17729085683822632
epoch: 133 training_loss 0.18124218240380288 test_loss: 0.18795278072357177
epoch: 134 training_loss 0.17162979751825333 test_loss: 0.1809612274169922
epoch: 135 training_loss 0.1775217664986849 test_loss: 0.1983112573623657
epoch: 136 training_loss 0.17679818466305733 test_loss: 0.17056515216827392
epoch: 137 training_loss 0.17859344456344842 test_loss: 0.1811972975730896
epoch: 138 training_loss 0.17916236348450185 test_loss: 0.1753668189048767
epoch: 139 training_loss 0.1785733537375927 test_loss: 0.16959619522094727
epoch: 140 training_loss 0.17114901378750802 test_loss: 0.16424529552459716
epoch: 141 training_loss 0.17378486469388008 test_loss: 0.18243693113327025
epoch: 142 training_loss 0.17833311662077903 test_loss: 0.17883496284484862
epoch: 143 training_loss 0.17666043154895306 test_loss: 0.19085863828659058
epoch: 144 training_loss 0.18153552211821078 test_loss: 0.1713519811630249
epoch: 145 training_loss 0.1716188133507967 test_loss: 0.1856829047203064
epoch: 146 training_loss 0.17960611328482629 test_loss: 0.18262150287628173
epoch: 147 training_loss 0.17717534400522708 test_loss: 0.1836305022239685
epoch: 148 training_loss 0.1699916750192642 test_loss: 0.18694379329681396
epoch: 149 training_loss 0.1771638108044863 test_loss: 0.19492819309234619
episode: 0 training return: -805.6681164271849
episode: 1 training return: -827.7921865381367
episode: 2 training return: -1036.68731367775
episode: 3 training return: -1057.6760024400464
epoch: 1 test_true_pfm: 252.62459061613393 sim_pfm: -849.6883012620336
episode: 4 training return: -755.1260343336739
episode: 5 training return: -802.6913255490557
episode: 6 training return: -713.2753688384344
episode: 7 training return: -871.2756565616937
epoch: 2 test_true_pfm: 221.51879107801702 sim_pfm: -759.8099265239667
episode: 8 training return: -982.9564838443758
episode: 9 training return: -780.4440210651434
episode: 10 training return: -807.2872949322492
episode: 11 training return: -834.1869809712867
epoch: 3 test_true_pfm: 193.99740150648026 sim_pfm: -877.4934048893541
episode: 12 training return: -727.4031805355654
episode: 13 training return: -710.8333314073263
episode: 14 training return: -720.5910496582298
episode: 15 training return: -711.3734864180567
epoch: 4 test_true_pfm: 141.01025917325717 sim_pfm: -700.6021291291548
episode: 16 training return: -690.2454200014097
episode: 17 training return: -703.4662893782194
episode: 18 training return: -666.2664116089214
episode: 19 training return: -666.8294905284077
epoch: 5 test_true_pfm: 213.17069414472647 sim_pfm: -611.6715574038291
episode: 20 training return: -647.7641250938677
episode: 21 training return: -657.8921085956019
episode: 22 training return: -601.3391966058543
episode: 23 training return: -727.3041852768954
epoch: 6 test_true_pfm: 172.70718464369483 sim_pfm: -577.8089999210075
episode: 24 training return: -598.9258367419172
episode: 25 training return: -594.7754920761356
episode: 26 training return: -743.1137609415031
episode: 27 training return: -573.3556276644574
epoch: 7 test_true_pfm: 164.33136311057328 sim_pfm: -599.6891942557144
episode: 28 training return: -569.3350862496533
episode: 29 training return: -584.6179509465185
episode: 30 training return: -601.4771857411736
episode: 31 training return: -539.170562536138
epoch: 8 test_true_pfm: 272.70254795636055 sim_pfm: -745.8006583623552
episode: 32 training return: -760.866230080769
episode: 33 training return: -528.2310860377412
episode: 34 training return: -752.5506671587021
episode: 35 training return: -785.0049425661759
epoch: 9 test_true_pfm: 283.4165909461869 sim_pfm: -723.1351247151377
episode: 36 training return: -762.833949240545
episode: 37 training return: -583.6838155213178
episode: 38 training return: -602.3099443407923
episode: 39 training return: -770.4809318500104
epoch: 10 test_true_pfm: 203.19594380847002 sim_pfm: -557.7072700479338
episode: 40 training return: -625.0793835579448
episode: 41 training return: -586.1930015338575
episode: 42 training return: -600.9768389170126
episode: 43 training return: -587.1980678082488
epoch: 11 test_true_pfm: 224.00948923115982 sim_pfm: -555.9431840523345
episode: 44 training return: -580.7906188445832
episode: 45 training return: -593.3861288585842
episode: 46 training return: -575.8372568389141
episode: 47 training return: -581.1074462104035
epoch: 12 test_true_pfm: 249.38261594553327 sim_pfm: -548.643426921651
episode: 48 training return: -571.5254571648322
episode: 49 training return: -589.4885651735214
episode: 50 training return: -587.3872919161847
episode: 51 training return: -592.2358294659638
epoch: 13 test_true_pfm: 247.95915307991459 sim_pfm: -538.8003678266979
episode: 52 training return: -584.3370115412802
episode: 53 training return: -571.8179727984894
episode: 54 training return: -574.6612908727423
episode: 55 training return: -578.5501805068752
epoch: 14 test_true_pfm: 255.471965672249 sim_pfm: -542.7022407689734
episode: 56 training return: -577.2287840867596
episode: 57 training return: -583.1206802100227
episode: 58 training return: -566.6912755308449
episode: 59 training return: -571.9120285420108
epoch: 15 test_true_pfm: 272.20956826987776 sim_pfm: -531.4132618565933
episode: 60 training return: -562.9238163126937
episode: 61 training return: -559.4991005471709
episode: 62 training return: -579.6453745790176
episode: 63 training return: -564.1879199091712
epoch: 16 test_true_pfm: 280.6714022145329 sim_pfm: -522.1422274526935
episode: 64 training return: -553.58070512475
episode: 65 training return: -565.0260826849357
episode: 66 training return: -556.6400187591765
episode: 67 training return: -549.7223393156827
epoch: 17 test_true_pfm: 275.1166723533168 sim_pfm: -516.7923686794034
episode: 68 training return: -546.080867440582
episode: 69 training return: -568.5258708253649
episode: 70 training return: -548.4385017610433
episode: 71 training return: -559.5095388150388
epoch: 18 test_true_pfm: 262.19095924678123 sim_pfm: -523.1162409926783
episode: 72 training return: -551.4669012111844
episode: 73 training return: -552.478270407963
episode: 74 training return: -553.5346764714037
episode: 75 training return: -552.7168269546086
epoch: 19 test_true_pfm: 289.37734700955735 sim_pfm: -506.6102515349124
episode: 76 training return: -561.6526067299503
episode: 77 training return: -544.0453008481144
episode: 78 training return: -537.8697276943533
episode: 79 training return: -538.1062390375524
epoch: 20 test_true_pfm: 325.522761930798 sim_pfm: -501.3364626841323
episode: 80 training return: -553.1653927934113
episode: 81 training return: -554.5051361622304
episode: 82 training return: -537.4200529404379
episode: 83 training return: -534.8679377357082
epoch: 21 test_true_pfm: 321.7534471020461 sim_pfm: -496.6106898692063
episode: 84 training return: -522.382897640775
episode: 85 training return: -538.3900424431433
episode: 86 training return: -545.2267251931175
episode: 87 training return: -555.25581354387
epoch: 22 test_true_pfm: 351.49111560032264 sim_pfm: -492.3716952185771
episode: 88 training return: -547.1470553351026
episode: 89 training return: -546.651334945185
episode: 90 training return: -520.1510313568317
episode: 91 training return: -531.0337753604342
epoch: 23 test_true_pfm: 358.07112067483564 sim_pfm: -486.6776336347568
episode: 92 training return: -502.5253727506164
episode: 93 training return: -542.126993778663
episode: 94 training return: -525.6968298828907
episode: 95 training return: -538.49246697792
epoch: 24 test_true_pfm: 365.22115846879 sim_pfm: -493.1953047896922
episode: 96 training return: -528.1999956760261
episode: 97 training return: -531.2693682704914
episode: 98 training return: -532.7327996334152
episode: 99 training return: -530.6388568674356
epoch: 25 test_true_pfm: 348.4632746817053 sim_pfm: -492.1867177914737
episode: 100 training return: -535.5844034749216
episode: 101 training return: -531.7004454427362
episode: 102 training return: -517.8361724516888
episode: 103 training return: -514.4884076498612
epoch: 26 test_true_pfm: 331.73117237277916 sim_pfm: -490.1617028808644
episode: 104 training return: -516.5264519571319
episode: 105 training return: -523.2605155627773
episode: 106 training return: -533.8538076704186
episode: 107 training return: -522.456828416759
epoch: 27 test_true_pfm: 363.12125225134145 sim_pfm: -492.1136277940706
episode: 108 training return: -519.0470655936341
episode: 109 training return: -550.1466018846913
episode: 110 training return: -531.355196100047
episode: 111 training return: -531.9966125531705
epoch: 28 test_true_pfm: 390.64362877213233 sim_pfm: -478.3426972593315
episode: 112 training return: -520.5616341494759
episode: 113 training return: -523.3938877834431
episode: 114 training return: -509.6231351781617
episode: 115 training return: -506.8170236446078
epoch: 29 test_true_pfm: 370.5197721055902 sim_pfm: -478.30098004700056
episode: 116 training return: -523.3211645476963
episode: 117 training return: -558.3703363851478
episode: 118 training return: -531.9865339621535
episode: 119 training return: -511.246793902527
epoch: 30 test_true_pfm: 342.8450668133882 sim_pfm: -488.89768154627245
episode: 120 training return: -526.250055614437
episode: 121 training return: -513.3885633422875
episode: 122 training return: -521.4808328975947
episode: 123 training return: -525.3036318362539
epoch: 31 test_true_pfm: 360.04405435435484 sim_pfm: -474.5077951926228
episode: 124 training return: -521.3325722356531
episode: 125 training return: -508.21909637801264
episode: 126 training return: -521.087141820528
episode: 127 training return: -530.0429381300763
epoch: 32 test_true_pfm: 321.32981224598257 sim_pfm: -493.7052216794304
episode: 128 training return: -514.0455248138495
episode: 129 training return: -518.8684690733273
episode: 130 training return: -523.2558026221996
episode: 131 training return: -526.1260688888985
epoch: 33 test_true_pfm: 362.8050402809022 sim_pfm: -481.4288621556437
episode: 132 training return: -512.460860473229
episode: 133 training return: -513.0009299530009
episode: 134 training return: -527.5712532390692
episode: 135 training return: -517.1781193241831
epoch: 34 test_true_pfm: 368.8674747677342 sim_pfm: -476.9824926127214
episode: 136 training return: -520.379838440421
episode: 137 training return: -519.8679296874619
episode: 138 training return: -525.481126826996
episode: 139 training return: -529.6054449913497
epoch: 35 test_true_pfm: 347.25761886384 sim_pfm: -482.49201610013796
episode: 140 training return: -516.1736815097098
episode: 141 training return: -521.7174957259657
episode: 142 training return: -524.9644071920916
episode: 143 training return: -536.6202218692431
epoch: 36 test_true_pfm: 354.3666582897053 sim_pfm: -477.09750205882716
episode: 144 training return: -517.9299660722266
episode: 145 training return: -513.2554428009754
episode: 146 training return: -525.4011365392486
episode: 147 training return: -523.9957309346577
epoch: 37 test_true_pfm: 357.29213915093743 sim_pfm: -482.2817381587115
episode: 148 training return: -506.78650483069293
episode: 149 training return: -537.5258768987188
episode: 150 training return: -513.1758702809624
episode: 151 training return: -523.0411523531968
epoch: 38 test_true_pfm: 374.7762913250046 sim_pfm: -478.1135257417761
episode: 152 training return: -523.2352657435534
episode: 153 training return: -525.6630780591937
episode: 154 training return: -515.384111941414
episode: 155 training return: -527.660521523935
epoch: 39 test_true_pfm: 334.9295684037714 sim_pfm: -484.8810888600142
episode: 156 training return: -516.2709745840323
episode: 157 training return: -524.3741350391813
episode: 158 training return: -521.7159379384934
episode: 159 training return: -513.4342193609252
epoch: 40 test_true_pfm: 356.1395889541368 sim_pfm: -478.42080412897434
episode: 160 training return: -519.5165686525228
episode: 161 training return: -517.4864984692646
episode: 162 training return: -531.0235957402591
episode: 163 training return: -509.4525133284186
epoch: 41 test_true_pfm: 361.2948225550383 sim_pfm: -479.57652549946846
episode: 164 training return: -514.6032645772973
episode: 165 training return: -522.6908128091859
episode: 166 training return: -515.3641476382378
episode: 167 training return: -512.2226609309533
epoch: 42 test_true_pfm: 372.22123816208824 sim_pfm: -472.32849298098995
episode: 168 training return: -530.3180990628971
episode: 169 training return: -511.54301167603865
episode: 170 training return: -510.2347524282663
episode: 171 training return: -513.238195688325
epoch: 43 test_true_pfm: 353.9720470690018 sim_pfm: -466.72957502588207
episode: 172 training return: -514.0889267501846
episode: 173 training return: -510.6084874456792
episode: 174 training return: -522.6948686526415
episode: 175 training return: -510.150615519239
epoch: 44 test_true_pfm: 345.9657951613085 sim_pfm: -476.89497366098493
episode: 176 training return: -515.5031252668549
episode: 177 training return: -508.5132547587109
episode: 178 training return: -509.6947381067637
episode: 179 training return: -521.3268946255833
epoch: 45 test_true_pfm: 374.01430407808874 sim_pfm: -475.53119763930295
episode: 180 training return: -518.5165740901334
episode: 181 training return: -515.1768276150766
episode: 182 training return: -518.0744225205892
episode: 183 training return: -534.5917949347767
epoch: 46 test_true_pfm: 354.1453014641354 sim_pfm: -474.86051203784064
episode: 184 training return: -513.8114455015505
episode: 185 training return: -508.61232203185335
episode: 186 training return: -520.1943629472639
episode: 187 training return: -500.55374980066193
epoch: 47 test_true_pfm: 363.5401007295615 sim_pfm: -477.74831694057275
episode: 188 training return: -524.5665409223334
episode: 189 training return: -514.5608890569944
episode: 190 training return: -522.1539103623736
episode: 191 training return: -518.1672950282739
epoch: 48 test_true_pfm: 353.01556560495777 sim_pfm: -474.4965968547677
episode: 192 training return: -514.8514553629902
episode: 193 training return: -509.2233609901475
episode: 194 training return: -518.6223619391751
episode: 195 training return: -505.3746916622835
epoch: 49 test_true_pfm: 362.6030312055923 sim_pfm: -465.02820896663474
episode: 196 training return: -508.21777525175094
episode: 197 training return: -507.71183375854827
episode: 198 training return: -502.76672231389153
episode: 199 training return: -501.77189594529676
epoch: 50 test_true_pfm: 331.1875113609523 sim_pfm: -467.2485276749373
episode: 200 training return: -514.2351306589751
episode: 201 training return: -508.8635446344999
episode: 202 training return: -500.64709403311224
episode: 203 training return: -513.1944544397784
epoch: 51 test_true_pfm: 335.0420636311914 sim_pfm: -480.8508917269148
episode: 204 training return: -514.9729959124509
episode: 205 training return: -508.5028339790256
episode: 206 training return: -508.30202536508875
episode: 207 training return: -517.6669803057412
epoch: 52 test_true_pfm: 371.1176156540671 sim_pfm: -463.2001908803395
episode: 208 training return: -518.6898507880132
episode: 209 training return: -513.5108990138081
episode: 210 training return: -519.9348390323106
episode: 211 training return: -519.5979006215373
epoch: 53 test_true_pfm: 355.72089429137196 sim_pfm: -460.79991116209135
episode: 212 training return: -510.97649172548927
episode: 213 training return: -511.2673184626699
episode: 214 training return: -501.10546950243815
episode: 215 training return: -497.70229047425016
epoch: 54 test_true_pfm: 362.4070541061735 sim_pfm: -467.50824998125535
episode: 216 training return: -521.9536688893596
episode: 217 training return: -503.6208447863044
episode: 218 training return: -515.3404000367341
episode: 219 training return: -514.0553407231747
epoch: 55 test_true_pfm: 351.34012252486855 sim_pfm: -477.44832499610743
episode: 220 training return: -517.8292018608298
episode: 221 training return: -506.3269399604835
episode: 222 training return: -506.6777231083919
episode: 223 training return: -505.1393215332486
epoch: 56 test_true_pfm: 337.99551020610863 sim_pfm: -476.709747621012
episode: 224 training return: -516.2621982420426
episode: 225 training return: -513.2335134648202
episode: 226 training return: -507.71499763732254
episode: 227 training return: -505.7566972798397
epoch: 57 test_true_pfm: 353.8517088620158 sim_pfm: -459.12471702853867
episode: 228 training return: -504.2661578346273
episode: 229 training return: -503.1512858832184
episode: 230 training return: -506.7853489814704
episode: 231 training return: -503.58378058594377
epoch: 58 test_true_pfm: 379.1244411031965 sim_pfm: -465.8943579697687
episode: 232 training return: -499.8580568072776
episode: 233 training return: -496.9310086025387
episode: 234 training return: -508.2075205675404
episode: 235 training return: -511.29692620492375
epoch: 59 test_true_pfm: 339.4763686679822 sim_pfm: -471.2134318163707
episode: 236 training return: -503.8751114270972
episode: 237 training return: -495.6301794251232
episode: 238 training return: -505.6542613256504
episode: 239 training return: -510.6306204838517
epoch: 60 test_true_pfm: 355.8760242186925 sim_pfm: -458.48002208498195
episode: 240 training return: -512.2020447354502
episode: 241 training return: -506.9374664931163
episode: 242 training return: -495.5029667234596
episode: 243 training return: -501.1721667916331
epoch: 61 test_true_pfm: 397.47317180094007 sim_pfm: -445.5866848403244
episode: 244 training return: -499.75337499326395
episode: 245 training return: -507.24598194885095
episode: 246 training return: -505.137709973068
episode: 247 training return: -500.7012797899633
epoch: 62 test_true_pfm: 343.1049230632024 sim_pfm: -467.2591205877712
episode: 248 training return: -501.4418724445454
episode: 249 training return: -499.5883831539414
episode: 250 training return: -504.729891368922
episode: 251 training return: -518.9193744973163
epoch: 63 test_true_pfm: 374.0508227665964 sim_pfm: -459.3845071958103
episode: 252 training return: -513.0975981837845
episode: 253 training return: -512.6292564783986
episode: 254 training return: -517.4163205239124
episode: 255 training return: -512.4463653426512
epoch: 64 test_true_pfm: 350.2564632168613 sim_pfm: -462.4499458182422
episode: 256 training return: -506.3389533328197
episode: 257 training return: -489.5090244618305
episode: 258 training return: -501.98161113870395
episode: 259 training return: -501.4120447202649
epoch: 65 test_true_pfm: 366.43893738725274 sim_pfm: -458.92487581371574
episode: 260 training return: -508.9345648456577
episode: 261 training return: -493.21410570187135
episode: 262 training return: -508.373535679724
episode: 263 training return: -509.5171890796329
epoch: 66 test_true_pfm: 368.4637627156799 sim_pfm: -456.02323248665806
episode: 264 training return: -500.9925631088769
episode: 265 training return: -507.59381311720995
episode: 266 training return: -500.53338260295027
episode: 267 training return: -504.05297627349904
epoch: 67 test_true_pfm: 377.547421551048 sim_pfm: -450.9634957114854
episode: 268 training return: -504.7676848589989
episode: 269 training return: -502.82212310978207
episode: 270 training return: -500.2685372041496
episode: 271 training return: -515.8185675847485
epoch: 68 test_true_pfm: 363.9094099287915 sim_pfm: -453.83143700308216
episode: 272 training return: -520.3628338334069
episode: 273 training return: -500.75037939558587
episode: 274 training return: -499.3128269816142
episode: 275 training return: -506.5674177250155
epoch: 69 test_true_pfm: 354.2648633806978 sim_pfm: -457.22003623172213
episode: 276 training return: -496.4983332075673
episode: 277 training return: -501.0363160962077
episode: 278 training return: -499.6640551380014
episode: 279 training return: -502.47044109803807
epoch: 70 test_true_pfm: 379.7393640331159 sim_pfm: -451.31873843267704
episode: 280 training return: -482.1790569946304
episode: 281 training return: -502.4317681437579
episode: 282 training return: -496.59421602978523
episode: 283 training return: -497.0852900570737
epoch: 71 test_true_pfm: 353.2730483280273 sim_pfm: -452.05686363197384
episode: 284 training return: -497.28913667771695
episode: 285 training return: -507.7293304552028
episode: 286 training return: -500.1365249871656
episode: 287 training return: -496.2603017445169
epoch: 72 test_true_pfm: 356.67426882765926 sim_pfm: -454.7768847535867
episode: 288 training return: -501.8710954941868
episode: 289 training return: -479.0901024510159
episode: 290 training return: -515.4777577382918
episode: 291 training return: -509.91166443073
epoch: 73 test_true_pfm: 360.2223719508849 sim_pfm: -458.1112651092588
episode: 292 training return: -506.248328895013
episode: 293 training return: -496.56348981973724
episode: 294 training return: -512.5332154695409
episode: 295 training return: -501.7305204502678
epoch: 74 test_true_pfm: 369.1787666495636 sim_pfm: -453.5773385585259
episode: 296 training return: -505.9055533043721
episode: 297 training return: -491.7627350588667
episode: 298 training return: -499.56750465224803
episode: 299 training return: -488.44741399105175
epoch: 75 test_true_pfm: 358.8084850935004 sim_pfm: -446.19739583641086
episode: 300 training return: -494.4736225970736
episode: 301 training return: -496.0060093145766
episode: 302 training return: -511.75627402458286
episode: 303 training return: -489.65156688459444
epoch: 76 test_true_pfm: 385.40508372985323 sim_pfm: -445.83696730526157
episode: 304 training return: -498.08709046292375
episode: 305 training return: -499.0716736149557
episode: 306 training return: -510.42859508015357
episode: 307 training return: -507.6890820786313
epoch: 77 test_true_pfm: 380.5108303902236 sim_pfm: -449.1783986929992
episode: 308 training return: -488.87639978543206
episode: 309 training return: -497.74050714519024
episode: 310 training return: -478.88330400593634
episode: 311 training return: -521.5238170266782
epoch: 78 test_true_pfm: 367.04570406129227 sim_pfm: -441.9258313458611
episode: 312 training return: -495.94964420472496
episode: 313 training return: -488.7465391475401
episode: 314 training return: -491.654550112254
episode: 315 training return: -493.1268547400846
epoch: 79 test_true_pfm: 381.58570810388125 sim_pfm: -440.91186204737886
episode: 316 training return: -494.2806925816769
episode: 317 training return: -497.20097391053247
episode: 318 training return: -490.2808108756204
episode: 319 training return: -495.49672868984516
epoch: 80 test_true_pfm: 381.31582195547213 sim_pfm: -439.2625493934854
episode: 320 training return: -499.29757160446775
episode: 321 training return: -491.327285051847
episode: 322 training return: -484.1467291902173
episode: 323 training return: -484.4806355324631
epoch: 81 test_true_pfm: 376.0242401015842 sim_pfm: -446.81633900169726
episode: 324 training return: -498.27924269450045
episode: 325 training return: -499.5631726443093
episode: 326 training return: -493.24763248157944
episode: 327 training return: -499.0761511921151
epoch: 82 test_true_pfm: 334.63100729958063 sim_pfm: -454.768033917174
episode: 328 training return: -488.06706559959395
episode: 329 training return: -503.6914966577751
episode: 330 training return: -505.0283138415087
episode: 331 training return: -502.44596607914764
epoch: 83 test_true_pfm: 384.85028363045996 sim_pfm: -439.52914028381616
episode: 332 training return: -488.4197059145325
episode: 333 training return: -493.020769892541
episode: 334 training return: -503.0430070947291
episode: 335 training return: -492.5963445785265
epoch: 84 test_true_pfm: 372.93537410280305 sim_pfm: -447.3898057279777
episode: 336 training return: -498.91505126737167
episode: 337 training return: -511.13212015665886
episode: 338 training return: -497.9130336680249
episode: 339 training return: -491.32613533554115
epoch: 85 test_true_pfm: 371.9965963579905 sim_pfm: -448.9060848487824
episode: 340 training return: -472.4715135673511
episode: 341 training return: -479.3293782444277
episode: 342 training return: -500.45415652796555
episode: 343 training return: -494.7433786866626
epoch: 86 test_true_pfm: 365.58146939192557 sim_pfm: -448.1249827702782
episode: 344 training return: -489.30326203047053
episode: 345 training return: -469.7055075076903
episode: 346 training return: -483.05044207633034
episode: 347 training return: -492.4069499021025
epoch: 87 test_true_pfm: 360.72046100880266 sim_pfm: -445.8161920336279
episode: 348 training return: -496.76904969787324
episode: 349 training return: -499.4703097482752
episode: 350 training return: -506.0925020516686
episode: 351 training return: -488.9154347066208
epoch: 88 test_true_pfm: 366.99330716764354 sim_pfm: -446.96399169463535
episode: 352 training return: -493.7308174370303
episode: 353 training return: -495.5444103628375
episode: 354 training return: -501.1425258500314
episode: 355 training return: -492.198970372065
epoch: 89 test_true_pfm: 337.35094236356537 sim_pfm: -449.2765075043815
episode: 356 training return: -493.6473217018785
episode: 357 training return: -490.13981715569446
episode: 358 training return: -478.4963563549833
episode: 359 training return: -482.4392644340368
epoch: 90 test_true_pfm: 379.1418158391009 sim_pfm: -442.3878267206057
episode: 360 training return: -498.05976914542373
episode: 361 training return: -489.92057571112844
episode: 362 training return: -494.63604962779664
episode: 363 training return: -490.06743098441893
epoch: 91 test_true_pfm: 413.4750413431416 sim_pfm: -432.32262427702744
episode: 364 training return: -497.86750012269516
episode: 365 training return: -475.246791918286
episode: 366 training return: -472.9403818209103
episode: 367 training return: -488.6519802166837
epoch: 92 test_true_pfm: 401.40019487250333 sim_pfm: -440.73957863399596
episode: 368 training return: -481.6190333911691
episode: 369 training return: -477.01136808501883
episode: 370 training return: -499.8516320076665
episode: 371 training return: -491.9398584964558
epoch: 93 test_true_pfm: 381.558946886608 sim_pfm: -439.97250867150206
episode: 372 training return: -495.24514547513036
episode: 373 training return: -480.26904237643356
episode: 374 training return: -495.36698103281964
episode: 375 training return: -476.48589524405435
epoch: 94 test_true_pfm: 378.66697274012296 sim_pfm: -434.3344524834524
episode: 376 training return: -499.2533837486139
episode: 377 training return: -478.5507082006795
episode: 378 training return: -481.4797836715988
episode: 379 training return: -478.4073038209767
epoch: 95 test_true_pfm: 375.0646246393464 sim_pfm: -437.3220974037234
episode: 380 training return: -493.1458536316098
episode: 381 training return: -483.16533182973757
episode: 382 training return: -489.00469716309107
episode: 383 training return: -490.7912656284538
epoch: 96 test_true_pfm: 393.1261464593587 sim_pfm: -432.6322888869137
episode: 384 training return: -474.2788430357264
episode: 385 training return: -489.0233400579752
episode: 386 training return: -471.39864128977473
episode: 387 training return: -475.75765751509414
epoch: 97 test_true_pfm: 411.6747770309116 sim_pfm: -432.1617809728743
episode: 388 training return: -466.33058037767375
episode: 389 training return: -476.7784777395666
episode: 390 training return: -468.43414300613364
episode: 391 training return: -481.0959521565867
epoch: 98 test_true_pfm: 401.5503944921338 sim_pfm: -433.1561989051117
episode: 392 training return: -485.2614431125607
episode: 393 training return: -484.42454522457314
episode: 394 training return: -480.53079248587323
episode: 395 training return: -465.36440759825814
epoch: 99 test_true_pfm: 409.5499596779473 sim_pfm: -431.0530697752265
episode: 396 training return: -481.5260660693088
episode: 397 training return: -485.93958500115394
episode: 398 training return: -482.99501857629343
episode: 399 training return: -480.3380348761166
epoch: 100 test_true_pfm: 399.88342863368234 sim_pfm: -432.9964568341664
episode: 400 training return: -489.57457294180915
episode: 401 training return: -474.75165862686623
episode: 402 training return: -487.8252660530985
episode: 403 training return: -468.73117310246766
epoch: 101 test_true_pfm: 429.6066816619247 sim_pfm: -421.2912977150642
episode: 404 training return: -466.87425394153865
episode: 405 training return: -480.46691799306114
episode: 406 training return: -491.1198292288522
episode: 407 training return: -472.014402522569
epoch: 102 test_true_pfm: 409.06029757439 sim_pfm: -423.20917468113225
episode: 408 training return: -480.83120449320774
episode: 409 training return: -489.00694121293645
episode: 410 training return: -469.35485735595444
episode: 411 training return: -475.0185989087318
epoch: 103 test_true_pfm: 425.9565788876561 sim_pfm: -414.24751737927045
episode: 412 training return: -482.01035049558436
episode: 413 training return: -479.03182926000744
episode: 414 training return: -480.6214611524586
episode: 415 training return: -468.7628116165145
epoch: 104 test_true_pfm: 390.7865969263391 sim_pfm: -429.25617410486774
episode: 416 training return: -468.1926537591113
episode: 417 training return: -478.52276342971373
episode: 418 training return: -458.0485204854253
episode: 419 training return: -487.73962070492814
epoch: 105 test_true_pfm: 437.9824420499133 sim_pfm: -419.1803881056724
episode: 420 training return: -475.77206006780904
episode: 421 training return: -480.12479570861296
episode: 422 training return: -467.74067223582944
episode: 423 training return: -468.64066900667416
epoch: 106 test_true_pfm: 413.76215005654404 sim_pfm: -418.82565354812704
episode: 424 training return: -478.6064054350285
episode: 425 training return: -477.49505904911547
episode: 426 training return: -488.9423177657226
episode: 427 training return: -486.9515742686675
epoch: 107 test_true_pfm: 444.1841114828708 sim_pfm: -416.079275851233
episode: 428 training return: -486.85218988281036
episode: 429 training return: -471.2418324652463
episode: 430 training return: -489.42342861376716
episode: 431 training return: -478.5556388085141
epoch: 108 test_true_pfm: 403.81737797452416 sim_pfm: -430.5412726793017
episode: 432 training return: -481.1222957213299
episode: 433 training return: -476.51500090399395
episode: 434 training return: -479.31532499332206
episode: 435 training return: -462.46363487139706
epoch: 109 test_true_pfm: 434.33631939669834 sim_pfm: -426.27778119939893
episode: 436 training return: -473.08257017873507
episode: 437 training return: -483.2651628744824
episode: 438 training return: -472.7754071618827
episode: 439 training return: -478.6234217273335
epoch: 110 test_true_pfm: 502.27234893960167 sim_pfm: -396.09845673632185
episode: 440 training return: -476.0900181596142
episode: 441 training return: -493.99164460318156
episode: 442 training return: -483.83628275410854
episode: 443 training return: -474.49659290341464
epoch: 111 test_true_pfm: 418.44508940643027 sim_pfm: -430.52012589974214
episode: 444 training return: -482.93193399581736
episode: 445 training return: -483.45870775191634
episode: 446 training return: -477.0167617524737
episode: 447 training return: -472.8173054434387
epoch: 112 test_true_pfm: 426.89059481146415 sim_pfm: -418.83714847313973
episode: 448 training return: -469.8399115872254
episode: 449 training return: -482.15184003112523
episode: 450 training return: -484.73964469731305
episode: 451 training return: -488.3395838445765
epoch: 113 test_true_pfm: 410.84779206875277 sim_pfm: -427.4125133365328
episode: 452 training return: -482.53543699528524
episode: 453 training return: -492.5707520933233
episode: 454 training return: -480.1823266214218
episode: 455 training return: -476.95524082493614
epoch: 114 test_true_pfm: 417.5409402759383 sim_pfm: -422.71941645519405
episode: 456 training return: -475.12776520108196
episode: 457 training return: -457.2860826375125
episode: 458 training return: -483.76329940809734
episode: 459 training return: -474.59770036017096
epoch: 115 test_true_pfm: 475.28727316988767 sim_pfm: -408.49082771635466
episode: 460 training return: -471.7669196308042
episode: 461 training return: -470.1106492260858
episode: 462 training return: -474.51451996295975
episode: 463 training return: -453.1010053845567
epoch: 116 test_true_pfm: 460.42455840079805 sim_pfm: -409.02618141917134
episode: 464 training return: -467.6407876468086
episode: 465 training return: -464.9649377966137
episode: 466 training return: -469.94626797374514
episode: 467 training return: -476.25482358969003
epoch: 117 test_true_pfm: 468.71404321391924 sim_pfm: -412.96324359188475
episode: 468 training return: -500.1076249787296
episode: 469 training return: -470.731223980602
episode: 470 training return: -464.79713285038605
episode: 471 training return: -464.7390362277506
epoch: 118 test_true_pfm: 490.94822062220527 sim_pfm: -400.21753736596065
episode: 472 training return: -468.730298314877
episode: 473 training return: -477.30257203964953
episode: 474 training return: -464.1747128620884
episode: 475 training return: -464.03567391134766
epoch: 119 test_true_pfm: 475.10582958038094 sim_pfm: -402.51916475516146
episode: 476 training return: -463.6859917572483
episode: 477 training return: -479.3451216015567
episode: 478 training return: -464.6443197524773
episode: 479 training return: -469.3057370913926
epoch: 120 test_true_pfm: 470.07656037298307 sim_pfm: -406.0239944281722
episode: 480 training return: -477.37549061436465
episode: 481 training return: -468.59700228361106
episode: 482 training return: -436.15474359944454
episode: 483 training return: -459.43018050293557
epoch: 121 test_true_pfm: 463.82212339509624 sim_pfm: -403.15945582956056
episode: 484 training return: -459.07651369737204
episode: 485 training return: -474.99795058530555
episode: 486 training return: -486.89199030766343
episode: 487 training return: -475.7625291880957
epoch: 122 test_true_pfm: 462.0445090596031 sim_pfm: -407.5972940994704
episode: 488 training return: -480.359747840211
episode: 489 training return: -469.5498031347062
episode: 490 training return: -466.8897293382678
episode: 491 training return: -463.1518910035881
epoch: 123 test_true_pfm: 521.2612044876585 sim_pfm: -389.8062539416795
episode: 492 training return: -463.6461585486515
episode: 493 training return: -468.46909686467245
episode: 494 training return: -454.8740189273116
episode: 495 training return: -477.5241148339738
epoch: 124 test_true_pfm: 474.135200483653 sim_pfm: -408.143314096837
episode: 496 training return: -470.05896524909264
episode: 497 training return: -468.48731100668704
episode: 498 training return: -475.08287476725894
episode: 499 training return: -487.05362836154166
epoch: 125 test_true_pfm: 488.945856002169 sim_pfm: -412.5887590759468
episode: 500 training return: -452.18400109624
episode: 501 training return: -457.7589287406351
episode: 502 training return: -460.44384003764185
episode: 503 training return: -463.57990574660647
epoch: 126 test_true_pfm: 461.77099612603405 sim_pfm: -406.214426353572
episode: 504 training return: -472.4509604361001
episode: 505 training return: -444.666397123227
episode: 506 training return: -447.40667020513473
episode: 507 training return: -474.8460977229012
epoch: 127 test_true_pfm: 485.92635627913154 sim_pfm: -396.27383204043025
episode: 508 training return: -449.1270998089394
episode: 509 training return: -458.01856476373837
episode: 510 training return: -449.31985759764757
episode: 511 training return: -475.98901936649185
epoch: 128 test_true_pfm: 476.53430140129007 sim_pfm: -397.5640580068178
episode: 512 training return: -471.48886719433233
episode: 513 training return: -446.30643088737247
episode: 514 training return: -449.69030895350653
episode: 515 training return: -454.7433572741515
epoch: 129 test_true_pfm: 474.461260761703 sim_pfm: -403.97592724813944
episode: 516 training return: -431.02630602954116
episode: 517 training return: -445.2617320443682
episode: 518 training return: -458.4630551332964
episode: 519 training return: -477.6895499397584
epoch: 130 test_true_pfm: 475.6811097466104 sim_pfm: -400.3981600323694
episode: 520 training return: -458.850172386631
episode: 521 training return: -449.99801806364195
episode: 522 training return: -455.9935546796427
episode: 523 training return: -458.24051538728065
epoch: 131 test_true_pfm: 500.70482863439605 sim_pfm: -406.15473006661335
episode: 524 training return: -467.25782388168733
episode: 525 training return: -446.28930855940825
episode: 526 training return: -457.96154500371654
episode: 527 training return: -446.25694234101826
epoch: 132 test_true_pfm: 483.9662781808506 sim_pfm: -408.23536020438206
episode: 528 training return: -446.2375323481717
episode: 529 training return: -445.66312223282875
episode: 530 training return: -446.1377732456902
episode: 531 training return: -444.6748764698562
epoch: 133 test_true_pfm: 513.0686889153777 sim_pfm: -387.50688129127883
episode: 532 training return: -443.072516104535
episode: 533 training return: -460.80544416079323
episode: 534 training return: -448.091283775197
episode: 535 training return: -443.2435285043976
epoch: 134 test_true_pfm: 519.4049305584562 sim_pfm: -393.01084156303006
episode: 536 training return: -442.6857585962502
episode: 537 training return: -438.2605427567415
episode: 538 training return: -445.17445092289154
episode: 539 training return: -442.10945398082987
epoch: 135 test_true_pfm: 491.32399985454884 sim_pfm: -394.29316046874084
episode: 540 training return: -465.68129276834463
episode: 541 training return: -446.50774831251914
episode: 542 training return: -433.37908636050867
episode: 543 training return: -422.4996227861518
epoch: 136 test_true_pfm: 530.38038972068 sim_pfm: -382.1843674533158
episode: 544 training return: -424.7659160571564
episode: 545 training return: -452.1404917758598
episode: 546 training return: -445.8065198167905
episode: 547 training return: -441.0264277165903
epoch: 137 test_true_pfm: 514.9684972225774 sim_pfm: -389.87420230944826
episode: 548 training return: -439.06642439817506
episode: 549 training return: -418.4666535382717
episode: 550 training return: -452.383697310803
episode: 551 training return: -432.9547441280422
epoch: 138 test_true_pfm: 574.6362029861208 sim_pfm: -367.1527425406527
episode: 552 training return: -446.1621659257358
episode: 553 training return: -426.0598663668418
episode: 554 training return: -442.93285922852044
episode: 555 training return: -426.88552127099814
epoch: 139 test_true_pfm: 491.25790274342165 sim_pfm: -400.8336626036059
episode: 556 training return: -452.3150351148062
episode: 557 training return: -428.95505584585055
episode: 558 training return: -441.71829845641594
episode: 559 training return: -444.6138555495005
epoch: 140 test_true_pfm: 509.32322751473936 sim_pfm: -400.62976644294366
episode: 560 training return: -446.0535300696659
episode: 561 training return: -453.5031384005914
episode: 562 training return: -436.2747287709947
episode: 563 training return: -438.8913201432553
epoch: 141 test_true_pfm: 526.656073486192 sim_pfm: -396.2522844029666
episode: 564 training return: -443.69035961466346
episode: 565 training return: -448.6403940859207
episode: 566 training return: -417.86349149027427
episode: 567 training return: -431.5296386308327
epoch: 142 test_true_pfm: 521.9414610409008 sim_pfm: -390.57613310504047
episode: 568 training return: -451.8216355390183
episode: 569 training return: -439.48029864868084
episode: 570 training return: -431.87490920693307
episode: 571 training return: -443.3171770812837
epoch: 143 test_true_pfm: 552.4338338110175 sim_pfm: -389.8691569535233
episode: 572 training return: -431.7186360514221
episode: 573 training return: -430.5375641594109
episode: 574 training return: -431.6689158997389
episode: 575 training return: -422.031585736827
epoch: 144 test_true_pfm: 613.3698267967033 sim_pfm: -355.44952064059015
episode: 576 training return: -433.40218423364416
episode: 577 training return: -440.68531467901147
episode: 578 training return: -450.5260749879481
episode: 579 training return: -425.65324984644104
epoch: 145 test_true_pfm: 537.6653548913555 sim_pfm: -381.49204470579593
episode: 580 training return: -441.7728911284503
episode: 581 training return: -439.936744470166
episode: 582 training return: -415.3307996544222
episode: 583 training return: -418.9827224210181
epoch: 146 test_true_pfm: 574.8226369691229 sim_pfm: -375.1886183848383
episode: 584 training return: -429.94096533557075
episode: 585 training return: -441.07267285047436
episode: 586 training return: -431.56224960995814
episode: 587 training return: -440.3038848318122
epoch: 147 test_true_pfm: 572.3376134346986 sim_pfm: -367.49043531132224
episode: 588 training return: -442.7465990545191
episode: 589 training return: -410.37608795947125
episode: 590 training return: -432.9863877617637
episode: 591 training return: -446.20593125039056
epoch: 148 test_true_pfm: 573.7779188948697 sim_pfm: -374.27552936642877
episode: 592 training return: -436.6460069669667
episode: 593 training return: -442.80773866248154
episode: 594 training return: -416.5207720589032
episode: 595 training return: -420.77993249546074
epoch: 149 test_true_pfm: 534.6850163915213 sim_pfm: -391.82487921001024
episode: 596 training return: -441.5704494707833
episode: 597 training return: -437.28491836505526
episode: 598 training return: -424.7631591564179
episode: 599 training return: -420.1422145027814
epoch: 150 test_true_pfm: 636.6997273941537 sim_pfm: -359.7513700160235
