['--alg', 'sac', '--env', 'Swimmer-v3', '--learn', 'uncertainty', '--traj', 'medium', '--seed', '3', '--data', '100000']
epoch: 0 training_loss 0.4443686081469059 test_loss: 0.3903231620788574
epoch: 1 training_loss 0.2784924118220806 test_loss: 0.2641135215759277
epoch: 2 training_loss 0.24778229981660843 test_loss: 0.22717616558074952
epoch: 3 training_loss 0.24243497103452682 test_loss: 0.2405562400817871
epoch: 4 training_loss 0.24443819329142571 test_loss: 0.2688551902770996
epoch: 5 training_loss 0.22765666112303734 test_loss: 0.23187835216522218
epoch: 6 training_loss 0.23313975647091867 test_loss: 0.24356889724731445
epoch: 7 training_loss 0.22504258587956427 test_loss: 0.21823031902313234
epoch: 8 training_loss 0.21719496876001357 test_loss: 0.23794329166412354
epoch: 9 training_loss 0.2171672113239765 test_loss: 0.22672319412231445
epoch: 10 training_loss 0.21644547149538995 test_loss: 0.215181565284729
epoch: 11 training_loss 0.21254765555262567 test_loss: 0.24387171268463134
epoch: 12 training_loss 0.20915176190435886 test_loss: 0.21362245082855225
epoch: 13 training_loss 0.22099042601883412 test_loss: 0.2142717123031616
epoch: 14 training_loss 0.21966527678072453 test_loss: 0.20753800868988037
epoch: 15 training_loss 0.20616288788616657 test_loss: 0.19908870458602906
epoch: 16 training_loss 0.2031757504492998 test_loss: 0.21702094078063966
epoch: 17 training_loss 0.2015729670971632 test_loss: 0.21358139514923097
epoch: 18 training_loss 0.20903011314570905 test_loss: 0.21892695426940917
epoch: 19 training_loss 0.2074557552486658 test_loss: 0.25284671783447266
epoch: 20 training_loss 0.20389200031757354 test_loss: 0.19735840559005738
epoch: 21 training_loss 0.19696288265287876 test_loss: 0.20473427772521974
epoch: 22 training_loss 0.20472948685288428 test_loss: 0.23552265167236328
epoch: 23 training_loss 0.1976852322369814 test_loss: 0.2008045196533203
epoch: 24 training_loss 0.1957980539649725 test_loss: 0.19340628385543823
epoch: 25 training_loss 0.19595814757049085 test_loss: 0.2144514799118042
epoch: 26 training_loss 0.19265399023890495 test_loss: 0.22076170444488524
epoch: 27 training_loss 0.19192379839718343 test_loss: 0.1975494384765625
epoch: 28 training_loss 0.1987037777900696 test_loss: 0.2520554542541504
epoch: 29 training_loss 0.1952114224433899 test_loss: 0.20799553394317627
epoch: 30 training_loss 0.19651534050703048 test_loss: 0.1991441011428833
epoch: 31 training_loss 0.19631757728755475 test_loss: 0.20612828731536864
epoch: 32 training_loss 0.1930054137855768 test_loss: 0.19800906181335448
epoch: 33 training_loss 0.19846658803522588 test_loss: 0.21717677116394044
epoch: 34 training_loss 0.1943975619226694 test_loss: 0.1854928970336914
epoch: 35 training_loss 0.1927529952675104 test_loss: 0.2048792839050293
epoch: 36 training_loss 0.2013506692647934 test_loss: 0.1947467803955078
epoch: 37 training_loss 0.1928365583717823 test_loss: 0.19992753267288207
epoch: 38 training_loss 0.18909310184419156 test_loss: 0.19423927068710328
epoch: 39 training_loss 0.18855478048324584 test_loss: 0.202789568901062
epoch: 40 training_loss 0.1887252540141344 test_loss: 0.20009191036224366
epoch: 41 training_loss 0.19159082904458047 test_loss: 0.2099675178527832
epoch: 42 training_loss 0.19041272707283496 test_loss: 0.21514420509338378
epoch: 43 training_loss 0.19322124496102333 test_loss: 0.19457249641418456
epoch: 44 training_loss 0.18883510261774064 test_loss: 0.20277884006500244
epoch: 45 training_loss 0.1915835228562355 test_loss: 0.18292454481124878
epoch: 46 training_loss 0.18587780639529228 test_loss: 0.20013468265533446
epoch: 47 training_loss 0.18635672599077224 test_loss: 0.21633329391479492
epoch: 48 training_loss 0.19265637792646884 test_loss: 0.20574402809143066
epoch: 49 training_loss 0.19219194836914538 test_loss: 0.20943927764892578
epoch: 50 training_loss 0.19450343757867813 test_loss: 0.20660052299499512
epoch: 51 training_loss 0.18615029409527778 test_loss: 0.20204715728759765
epoch: 52 training_loss 0.18508993715047836 test_loss: 0.2063584566116333
epoch: 53 training_loss 0.18735857978463172 test_loss: 0.18763400316238404
epoch: 54 training_loss 0.18559495352208613 test_loss: 0.1999600648880005
epoch: 55 training_loss 0.1844978791475296 test_loss: 0.1988987445831299
epoch: 56 training_loss 0.19268697075545788 test_loss: 0.1979019045829773
epoch: 57 training_loss 0.18868704743683337 test_loss: 0.2336580276489258
epoch: 58 training_loss 0.1870287261903286 test_loss: 0.19058582782745362
epoch: 59 training_loss 0.18350678145885468 test_loss: 0.18733468055725097
epoch: 60 training_loss 0.18149473451077938 test_loss: 0.18895018100738525
epoch: 61 training_loss 0.1847505781799555 test_loss: 0.188231897354126
epoch: 62 training_loss 0.18462787359952926 test_loss: 0.20320672988891603
epoch: 63 training_loss 0.18985109522938728 test_loss: 0.2149747371673584
epoch: 64 training_loss 0.19027988851070404 test_loss: 0.18131164312362671
epoch: 65 training_loss 0.18626300394535064 test_loss: 0.19468846321105956
epoch: 66 training_loss 0.19086233772337435 test_loss: 0.19774497747421266
epoch: 67 training_loss 0.18689993396401405 test_loss: 0.20598716735839845
epoch: 68 training_loss 0.18481077909469604 test_loss: 0.18862160444259643
epoch: 69 training_loss 0.18973691545426846 test_loss: 0.20556023120880126
epoch: 70 training_loss 0.1921501249819994 test_loss: 0.19978621006011962
epoch: 71 training_loss 0.1841497664153576 test_loss: 0.20307116508483886
epoch: 72 training_loss 0.186272886171937 test_loss: 0.21444578170776368
epoch: 73 training_loss 0.18841062046587467 test_loss: 0.19101825952529908
epoch: 74 training_loss 0.18573569051921368 test_loss: 0.192820143699646
epoch: 75 training_loss 0.18183621197938918 test_loss: 0.21457338333129883
epoch: 76 training_loss 0.18337330795824527 test_loss: 0.19723066091537475
epoch: 77 training_loss 0.19052118472754956 test_loss: 0.19314686059951783
epoch: 78 training_loss 0.18348268799483777 test_loss: 0.18828604221343995
epoch: 79 training_loss 0.18201217226684094 test_loss: 0.23067991733551024
epoch: 80 training_loss 0.18516805425286292 test_loss: 0.1944229483604431
epoch: 81 training_loss 0.18749042168259622 test_loss: 0.18962615728378296
epoch: 82 training_loss 0.17861197762191294 test_loss: 0.18949803113937377
epoch: 83 training_loss 0.18284919887781143 test_loss: 0.20074374675750734
epoch: 84 training_loss 0.1832085243612528 test_loss: 0.17968488931655885
epoch: 85 training_loss 0.18122462034225464 test_loss: 0.17967861890792847
epoch: 86 training_loss 0.1911791240423918 test_loss: 0.19185235500335693
epoch: 87 training_loss 0.1799240606278181 test_loss: 0.19520869255065917
epoch: 88 training_loss 0.1871796278655529 test_loss: 0.19204355478286744
epoch: 89 training_loss 0.18292883783578873 test_loss: 0.19146785736083985
epoch: 90 training_loss 0.18586328938603403 test_loss: 0.20019025802612306
epoch: 91 training_loss 0.18412873841822147 test_loss: 0.18238368034362792
epoch: 92 training_loss 0.17776934124529362 test_loss: 0.18447757959365846
epoch: 93 training_loss 0.18264729611575603 test_loss: 0.19312235116958618
epoch: 94 training_loss 0.18077035136520864 test_loss: 0.18886339664459229
epoch: 95 training_loss 0.18772112317383288 test_loss: 0.1907716155052185
epoch: 96 training_loss 0.1800823812931776 test_loss: 0.19472628831863403
epoch: 97 training_loss 0.175147610232234 test_loss: 0.1892522931098938
epoch: 98 training_loss 0.17887009598314763 test_loss: 0.17997130155563354
epoch: 99 training_loss 0.1814081793278456 test_loss: 0.20921428203582765
epoch: 100 training_loss 0.17985955789685248 test_loss: 0.18607674837112426
epoch: 101 training_loss 0.17381314881145954 test_loss: 0.2077241897583008
epoch: 102 training_loss 0.1846280438452959 test_loss: 0.19191770553588866
epoch: 103 training_loss 0.1845655596256256 test_loss: 0.194427227973938
epoch: 104 training_loss 0.18356518238782882 test_loss: 0.19774527549743653
epoch: 105 training_loss 0.17973277904093266 test_loss: 0.18809971809387208
epoch: 106 training_loss 0.1771560351550579 test_loss: 0.1950403332710266
epoch: 107 training_loss 0.17901663206517696 test_loss: 0.19942007064819336
epoch: 108 training_loss 0.17729820758104325 test_loss: 0.19371050596237183
epoch: 109 training_loss 0.18355230249464513 test_loss: 0.2061302423477173
epoch: 110 training_loss 0.175831925496459 test_loss: 0.17854901552200317
epoch: 111 training_loss 0.17590468659996986 test_loss: 0.19162062406539918
epoch: 112 training_loss 0.18553348772227765 test_loss: 0.185901939868927
epoch: 113 training_loss 0.1786734476685524 test_loss: 0.18753643035888673
epoch: 114 training_loss 0.1808676543086767 test_loss: 0.19036413431167604
epoch: 115 training_loss 0.18019529789686203 test_loss: 0.19682916402816772
epoch: 116 training_loss 0.17875769250094892 test_loss: 0.19211851358413695
epoch: 117 training_loss 0.17924540713429452 test_loss: 0.1811475157737732
epoch: 118 training_loss 0.18069525852799415 test_loss: 0.20732469558715821
epoch: 119 training_loss 0.17081304788589477 test_loss: 0.19622865915298462
epoch: 120 training_loss 0.17535368457436562 test_loss: 0.20757222175598145
epoch: 121 training_loss 0.17940668053925038 test_loss: 0.18506405353546143
epoch: 122 training_loss 0.17878147803246974 test_loss: 0.1981031894683838
epoch: 123 training_loss 0.1820224481076002 test_loss: 0.2014155864715576
epoch: 124 training_loss 0.18315985910594462 test_loss: 0.19957860708236694
epoch: 125 training_loss 0.17678585819900036 test_loss: 0.20418853759765626
epoch: 126 training_loss 0.17855534180998803 test_loss: 0.20459287166595458
epoch: 127 training_loss 0.17766727969050408 test_loss: 0.2429929256439209
epoch: 128 training_loss 0.18373916134238244 test_loss: 0.18566082715988158
epoch: 129 training_loss 0.1771721263229847 test_loss: 0.17952886819839478
epoch: 130 training_loss 0.1748703922331333 test_loss: 0.18579750061035155
epoch: 131 training_loss 0.18260130263864993 test_loss: 0.18314855098724364
epoch: 132 training_loss 0.18183517001569272 test_loss: 0.1960547685623169
epoch: 133 training_loss 0.17792648687958718 test_loss: 0.1891941547393799
epoch: 134 training_loss 0.1795910844951868 test_loss: 0.213968825340271
epoch: 135 training_loss 0.1799246409535408 test_loss: 0.18381012678146363
epoch: 136 training_loss 0.17475649587810038 test_loss: 0.20247046947479247
epoch: 137 training_loss 0.17216423697769642 test_loss: 0.17921382188796997
epoch: 138 training_loss 0.18081850983202458 test_loss: 0.19369326829910277
epoch: 139 training_loss 0.17997681975364685 test_loss: 0.18415240049362183
epoch: 140 training_loss 0.18275831423699856 test_loss: 0.19901307821273803
epoch: 141 training_loss 0.1715076694637537 test_loss: 0.18682886362075807
epoch: 142 training_loss 0.18489001736044883 test_loss: 0.19741939306259154
epoch: 143 training_loss 0.1746157042682171 test_loss: 0.17477953433990479
epoch: 144 training_loss 0.17269521214067937 test_loss: 0.18555389642715453
epoch: 145 training_loss 0.1773687256872654 test_loss: 0.18590140342712402
epoch: 146 training_loss 0.18139796689152718 test_loss: 0.1904246211051941
epoch: 147 training_loss 0.18236547596752645 test_loss: 0.17383261919021606
epoch: 148 training_loss 0.1821737948805094 test_loss: 0.18590359687805175
epoch: 149 training_loss 0.1730935351550579 test_loss: 0.1863308310508728
epoch: 0 training_loss 0.44519063413143156 test_loss: 0.3413149118423462
epoch: 1 training_loss 0.3020724065601826 test_loss: 0.26688418388366697
epoch: 2 training_loss 0.26706877067685125 test_loss: 0.2490105390548706
epoch: 3 training_loss 0.24798501551151275 test_loss: 0.2483668327331543
epoch: 4 training_loss 0.23887899577617644 test_loss: 0.28228714466094973
epoch: 5 training_loss 0.22983112975955008 test_loss: 0.21150436401367187
epoch: 6 training_loss 0.2346430279314518 test_loss: 0.25465731620788573
epoch: 7 training_loss 0.23035664677619935 test_loss: 0.25397138595581054
epoch: 8 training_loss 0.22645934119820596 test_loss: 0.22885847091674805
epoch: 9 training_loss 0.22226212546229362 test_loss: 0.23989577293395997
epoch: 10 training_loss 0.2198767302930355 test_loss: 0.22069804668426513
epoch: 11 training_loss 0.22690931648015977 test_loss: 0.2142038583755493
epoch: 12 training_loss 0.22170473709702493 test_loss: 0.207450795173645
epoch: 13 training_loss 0.20744372725486757 test_loss: 0.20911846160888672
epoch: 14 training_loss 0.21484532944858073 test_loss: 0.22386744022369384
epoch: 15 training_loss 0.21508900597691535 test_loss: 0.21003978252410888
epoch: 16 training_loss 0.21137946613132955 test_loss: 0.1962180733680725
epoch: 17 training_loss 0.21420801758766175 test_loss: 0.21340081691741944
epoch: 18 training_loss 0.2070109474658966 test_loss: 0.2328392505645752
epoch: 19 training_loss 0.21688495382666587 test_loss: 0.2053462505340576
epoch: 20 training_loss 0.21536209277808666 test_loss: 0.2144308567047119
epoch: 21 training_loss 0.20314149804413317 test_loss: 0.18946926593780516
epoch: 22 training_loss 0.204570921510458 test_loss: 0.19973547458648683
epoch: 23 training_loss 0.1961969704926014 test_loss: 0.19258240461349488
epoch: 24 training_loss 0.20623195849359036 test_loss: 0.20308349132537842
epoch: 25 training_loss 0.2135569654405117 test_loss: 0.21995441913604735
epoch: 26 training_loss 0.201398598998785 test_loss: 0.21463263034820557
epoch: 27 training_loss 0.21316147454082965 test_loss: 0.20947792530059814
epoch: 28 training_loss 0.20078104808926583 test_loss: 0.2078310251235962
epoch: 29 training_loss 0.20024858236312867 test_loss: 0.20570619106292726
epoch: 30 training_loss 0.20168811433017253 test_loss: 0.20417914390563965
epoch: 31 training_loss 0.20369251817464828 test_loss: 0.19769437313079835
epoch: 32 training_loss 0.20531569577753545 test_loss: 0.21017839908599853
epoch: 33 training_loss 0.2005439157038927 test_loss: 0.20564937591552734
epoch: 34 training_loss 0.2003129506856203 test_loss: 0.22460606098175048
epoch: 35 training_loss 0.19609487026929856 test_loss: 0.20277597904205322
epoch: 36 training_loss 0.19801429174840451 test_loss: 0.17923465967178345
epoch: 37 training_loss 0.2019414196163416 test_loss: 0.22324459552764891
epoch: 38 training_loss 0.19287347756326198 test_loss: 0.18335703611373902
epoch: 39 training_loss 0.19638180814683437 test_loss: 0.20618200302124023
epoch: 40 training_loss 0.195463295802474 test_loss: 0.19591833353042604
epoch: 41 training_loss 0.19599399894475936 test_loss: 0.19192905426025392
epoch: 42 training_loss 0.20451654687523843 test_loss: 0.1969693660736084
epoch: 43 training_loss 0.1951248950511217 test_loss: 0.19384948015213013
epoch: 44 training_loss 0.19600992895662783 test_loss: 0.18775306940078734
epoch: 45 training_loss 0.20258252523839473 test_loss: 0.2754462480545044
epoch: 46 training_loss 0.19066061303019524 test_loss: 0.19142664670944215
epoch: 47 training_loss 0.19148421175777913 test_loss: 0.18167445659637452
epoch: 48 training_loss 0.1938346766680479 test_loss: 0.18172025680541992
epoch: 49 training_loss 0.19542690485715866 test_loss: 0.17331703901290893
epoch: 50 training_loss 0.19563349068164826 test_loss: 0.18268308639526368
epoch: 51 training_loss 0.19686073116958142 test_loss: 0.19591312408447265
epoch: 52 training_loss 0.1947861085832119 test_loss: 0.20142390727996826
epoch: 53 training_loss 0.18773469671607018 test_loss: 0.17802565097808837
epoch: 54 training_loss 0.19759038016200065 test_loss: 0.19008302688598633
epoch: 55 training_loss 0.1956254844367504 test_loss: 0.19941110610961915
epoch: 56 training_loss 0.19373139046132565 test_loss: 0.1935258388519287
epoch: 57 training_loss 0.18722080662846566 test_loss: 0.19434010982513428
epoch: 58 training_loss 0.1902863562107086 test_loss: 0.2065263271331787
epoch: 59 training_loss 0.19114405818283559 test_loss: 0.1860104203224182
epoch: 60 training_loss 0.187607334703207 test_loss: 0.18319822549819947
epoch: 61 training_loss 0.18374769985675812 test_loss: 0.18746961355209352
epoch: 62 training_loss 0.19260342940688133 test_loss: 0.17472474575042723
epoch: 63 training_loss 0.18330684810876846 test_loss: 0.18676130771636962
epoch: 64 training_loss 0.193067914173007 test_loss: 0.18773080110549928
epoch: 65 training_loss 0.18676536850631237 test_loss: 0.18806498050689696
epoch: 66 training_loss 0.1867169389128685 test_loss: 0.1790194869041443
epoch: 67 training_loss 0.18253917209804058 test_loss: 0.19382960796356202
epoch: 68 training_loss 0.18790624126791955 test_loss: 0.18843891620635986
epoch: 69 training_loss 0.1885138053447008 test_loss: 0.18088669776916505
epoch: 70 training_loss 0.18435099527239798 test_loss: 0.18418970108032226
epoch: 71 training_loss 0.18746560238301754 test_loss: 0.19618761539459229
epoch: 72 training_loss 0.18103631548583507 test_loss: 0.18989965915679932
epoch: 73 training_loss 0.18952572479844093 test_loss: 0.17514053583145142
epoch: 74 training_loss 0.18593981996178627 test_loss: 0.18808262348175048
epoch: 75 training_loss 0.18208535589277744 test_loss: 0.18477693796157837
epoch: 76 training_loss 0.18770309664309026 test_loss: 0.21834611892700195
epoch: 77 training_loss 0.19127767726778985 test_loss: 0.17316612005233764
epoch: 78 training_loss 0.18838717617094516 test_loss: 0.20532631874084473
epoch: 79 training_loss 0.1932638332992792 test_loss: 0.19483506679534912
epoch: 80 training_loss 0.19139302134513855 test_loss: 0.1827075481414795
epoch: 81 training_loss 0.17370669141411782 test_loss: 0.1848917841911316
epoch: 82 training_loss 0.18800471283495426 test_loss: 0.19412978887557983
epoch: 83 training_loss 0.18148401871323586 test_loss: 0.20222694873809816
epoch: 84 training_loss 0.1828646744042635 test_loss: 0.1859954833984375
epoch: 85 training_loss 0.18294426307082176 test_loss: 0.195354688167572
epoch: 86 training_loss 0.18951348133385182 test_loss: 0.19775127172470092
epoch: 87 training_loss 0.18405090719461442 test_loss: 0.18449153900146484
epoch: 88 training_loss 0.18632029853761195 test_loss: 0.17826191186904908
epoch: 89 training_loss 0.19028330527245998 test_loss: 0.18509701490402222
epoch: 90 training_loss 0.1844655703753233 test_loss: 0.19047666788101197
epoch: 91 training_loss 0.18672952465713025 test_loss: 0.18723121881484986
epoch: 92 training_loss 0.180089380890131 test_loss: 0.191891872882843
epoch: 93 training_loss 0.17659265704452992 test_loss: 0.18815414905548095
epoch: 94 training_loss 0.18947710141539573 test_loss: 0.19779808521270753
epoch: 95 training_loss 0.18302591606974603 test_loss: 0.18629086017608643
epoch: 96 training_loss 0.182653306722641 test_loss: 0.18822720050811767
epoch: 97 training_loss 0.1831309352815151 test_loss: 0.17677406072616578
epoch: 98 training_loss 0.18217897400259972 test_loss: 0.2010399580001831
epoch: 99 training_loss 0.18223096594214439 test_loss: 0.18748328685760499
epoch: 100 training_loss 0.18303855173289776 test_loss: 0.19425952434539795
epoch: 101 training_loss 0.18084855310618878 test_loss: 0.1898712158203125
epoch: 102 training_loss 0.1870273780822754 test_loss: 0.17931634187698364
epoch: 103 training_loss 0.1811734990030527 test_loss: 0.18834775686264038
epoch: 104 training_loss 0.1862834931910038 test_loss: 0.185472571849823
epoch: 105 training_loss 0.18681238956749438 test_loss: 0.1694311499595642
epoch: 106 training_loss 0.1830750934034586 test_loss: 0.18448824882507325
epoch: 107 training_loss 0.18273538433015346 test_loss: 0.18923356533050537
epoch: 108 training_loss 0.18362709306180477 test_loss: 0.18615782260894775
epoch: 109 training_loss 0.1827423397451639 test_loss: 0.1847473382949829
epoch: 110 training_loss 0.18444370560348033 test_loss: 0.18080254793167114
epoch: 111 training_loss 0.17081809408962725 test_loss: 0.19709467887878418
epoch: 112 training_loss 0.17384638607501984 test_loss: 0.18680121898651122
epoch: 113 training_loss 0.17467665836215018 test_loss: 0.1880321502685547
epoch: 114 training_loss 0.18482875794172288 test_loss: 0.1925378441810608
epoch: 115 training_loss 0.17957272484898568 test_loss: 0.18317686319351195
epoch: 116 training_loss 0.1805276133865118 test_loss: 0.19179768562316896
epoch: 117 training_loss 0.18187743186950683 test_loss: 0.17910841703414918
epoch: 118 training_loss 0.18046399377286435 test_loss: 0.19869728088378907
epoch: 119 training_loss 0.17734322540462016 test_loss: 0.18106087446212768
epoch: 120 training_loss 0.18619782470166682 test_loss: 0.18300751447677613
epoch: 121 training_loss 0.1865430211275816 test_loss: 0.18518537282943726
epoch: 122 training_loss 0.17746330916881561 test_loss: 0.18707894086837767
epoch: 123 training_loss 0.17750620737671852 test_loss: 0.1922774314880371
epoch: 124 training_loss 0.18096638791263103 test_loss: 0.17113783359527587
epoch: 125 training_loss 0.17770106874406338 test_loss: 0.19040840864181519
epoch: 126 training_loss 0.18903068669140338 test_loss: 0.17370803356170655
epoch: 127 training_loss 0.1717854329198599 test_loss: 0.17934030294418335
epoch: 128 training_loss 0.181823772713542 test_loss: 0.17925223112106323
epoch: 129 training_loss 0.17525203429162503 test_loss: 0.17446545362472535
epoch: 130 training_loss 0.1812714782357216 test_loss: 0.18726277351379395
epoch: 131 training_loss 0.17943820118904114 test_loss: 0.175832998752594
epoch: 132 training_loss 0.18138475097715856 test_loss: 0.17657878398895263
epoch: 133 training_loss 0.1808880476653576 test_loss: 0.1727474093437195
epoch: 134 training_loss 0.18455763325095176 test_loss: 0.17460191249847412
epoch: 135 training_loss 0.18611239403486252 test_loss: 0.18846936225891114
epoch: 136 training_loss 0.1844647294282913 test_loss: 0.17650187015533447
epoch: 137 training_loss 0.18167383551597596 test_loss: 0.18443403244018555
epoch: 138 training_loss 0.18030922941863536 test_loss: 0.18733901977539064
epoch: 139 training_loss 0.18415191806852818 test_loss: 0.19250013828277587
epoch: 140 training_loss 0.17365656815469266 test_loss: 0.18224787712097168
epoch: 141 training_loss 0.17630151443183423 test_loss: 0.1841334581375122
epoch: 142 training_loss 0.1799915289878845 test_loss: 0.1910607933998108
epoch: 143 training_loss 0.17540543712675571 test_loss: 0.167676842212677
epoch: 144 training_loss 0.17588139638304712 test_loss: 0.16964268684387207
epoch: 145 training_loss 0.1770270726829767 test_loss: 0.17936919927597045
epoch: 146 training_loss 0.1826720803976059 test_loss: 0.18554760217666627
epoch: 147 training_loss 0.18646577857434748 test_loss: 0.18062589168548585
epoch: 148 training_loss 0.17066269181668758 test_loss: 0.17876145839691163
epoch: 149 training_loss 0.16989513397216796 test_loss: 0.19984042644500732
epoch: 0 training_loss 0.4477575808763504 test_loss: 0.3443312644958496
epoch: 1 training_loss 0.3068469016253948 test_loss: 0.2692975282669067
epoch: 2 training_loss 0.26368624657392503 test_loss: 0.2601747989654541
epoch: 3 training_loss 0.23975742161273955 test_loss: 0.22276651859283447
epoch: 4 training_loss 0.24061536073684692 test_loss: 0.2196507215499878
epoch: 5 training_loss 0.22647080212831497 test_loss: 0.22427234649658204
epoch: 6 training_loss 0.22260391026735304 test_loss: 0.21226949691772462
epoch: 7 training_loss 0.22207777038216592 test_loss: 0.20887718200683594
epoch: 8 training_loss 0.2288664209097624 test_loss: 0.24450421333312988
epoch: 9 training_loss 0.22797913804650308 test_loss: 0.2206723213195801
epoch: 10 training_loss 0.2250451922416687 test_loss: 0.21632416248321534
epoch: 11 training_loss 0.21608146369457246 test_loss: 0.22180461883544922
epoch: 12 training_loss 0.20675691150128842 test_loss: 0.20779197216033934
epoch: 13 training_loss 0.21180559791624545 test_loss: 0.20089421272277833
epoch: 14 training_loss 0.2198143018037081 test_loss: 0.1980873703956604
epoch: 15 training_loss 0.21655235439538956 test_loss: 0.19667049646377563
epoch: 16 training_loss 0.2041909232735634 test_loss: 0.20159559249877929
epoch: 17 training_loss 0.20810094974935056 test_loss: 0.2581170082092285
epoch: 18 training_loss 0.20279958367347717 test_loss: 0.21212170124053956
epoch: 19 training_loss 0.2040891233086586 test_loss: 0.21317245960235595
epoch: 20 training_loss 0.20842693008482457 test_loss: 0.21247689723968505
epoch: 21 training_loss 0.197148744687438 test_loss: 0.2627716541290283
epoch: 22 training_loss 0.20571399375796318 test_loss: 0.18984302282333373
epoch: 23 training_loss 0.19558849841356277 test_loss: 0.2034191370010376
epoch: 24 training_loss 0.20421242699027062 test_loss: 0.19337955713272095
epoch: 25 training_loss 0.20570524409413338 test_loss: 0.2377080202102661
epoch: 26 training_loss 0.22239515021443368 test_loss: 0.209070086479187
epoch: 27 training_loss 0.1955567268282175 test_loss: 0.20177905559539794
epoch: 28 training_loss 0.2049088090658188 test_loss: 0.1994755744934082
epoch: 29 training_loss 0.20208709746599196 test_loss: 0.18991459608078004
epoch: 30 training_loss 0.20229847535490988 test_loss: 0.19301943778991698
epoch: 31 training_loss 0.19592364855110644 test_loss: 0.22378768920898437
epoch: 32 training_loss 0.18802458062767982 test_loss: 0.19400264024734498
epoch: 33 training_loss 0.1954393482208252 test_loss: 0.19100208282470704
epoch: 34 training_loss 0.20047755017876626 test_loss: 0.2105482816696167
epoch: 35 training_loss 0.1865642161667347 test_loss: 0.18838300704956054
epoch: 36 training_loss 0.19696160957217215 test_loss: 0.18002160787582397
epoch: 37 training_loss 0.1994583798199892 test_loss: 0.20216407775878906
epoch: 38 training_loss 0.19329204320907592 test_loss: 0.20717141628265381
epoch: 39 training_loss 0.19672986805438997 test_loss: 0.20427732467651366
epoch: 40 training_loss 0.19553108610212802 test_loss: 0.18842527866363526
epoch: 41 training_loss 0.1895025274157524 test_loss: 0.20000102519989013
epoch: 42 training_loss 0.1966229570657015 test_loss: 0.20866403579711915
epoch: 43 training_loss 0.19536943174898624 test_loss: 0.20357198715209962
epoch: 44 training_loss 0.18776436507701874 test_loss: 0.2075287103652954
epoch: 45 training_loss 0.19154750898480416 test_loss: 0.20508294105529784
epoch: 46 training_loss 0.19446063615381717 test_loss: 0.20662758350372315
epoch: 47 training_loss 0.19276296928524972 test_loss: 0.20025339126586914
epoch: 48 training_loss 0.19378141224384307 test_loss: 0.19090604782104492
epoch: 49 training_loss 0.1818846645206213 test_loss: 0.20955953598022461
epoch: 50 training_loss 0.18938263453543186 test_loss: 0.19506665468215942
epoch: 51 training_loss 0.19291657090187073 test_loss: 0.20229103565216064
epoch: 52 training_loss 0.1877285136282444 test_loss: 0.19129128456115724
epoch: 53 training_loss 0.1953259954601526 test_loss: 0.19141618013381959
epoch: 54 training_loss 0.1913527551293373 test_loss: 0.20702545642852782
epoch: 55 training_loss 0.19266783937811852 test_loss: 0.20203893184661864
epoch: 56 training_loss 0.18553176656365394 test_loss: 0.19766663312911986
epoch: 57 training_loss 0.186901775598526 test_loss: 0.19630380868911743
epoch: 58 training_loss 0.18510462194681168 test_loss: 0.17838045358657836
epoch: 59 training_loss 0.18257134191691876 test_loss: 0.19429749250411987
epoch: 60 training_loss 0.18498043082654475 test_loss: 0.19519346952438354
epoch: 61 training_loss 0.18220672972500324 test_loss: 0.1915316939353943
epoch: 62 training_loss 0.1882840883731842 test_loss: 0.1844583511352539
epoch: 63 training_loss 0.19156234949827194 test_loss: 0.23163762092590331
epoch: 64 training_loss 0.19268544472754 test_loss: 0.18569287061691284
epoch: 65 training_loss 0.18402816630899907 test_loss: 0.1847892165184021
epoch: 66 training_loss 0.18064437858760357 test_loss: 0.19688738584518434
epoch: 67 training_loss 0.19563601098954678 test_loss: 0.18702932596206664
epoch: 68 training_loss 0.1818961262702942 test_loss: 0.20192084312438965
epoch: 69 training_loss 0.18455830007791518 test_loss: 0.22060766220092773
epoch: 70 training_loss 0.1899088304489851 test_loss: 0.18600517511367798
epoch: 71 training_loss 0.1854718490689993 test_loss: 0.18585672378540039
epoch: 72 training_loss 0.18757486209273339 test_loss: 0.20902400016784667
epoch: 73 training_loss 0.19156606808304788 test_loss: 0.18927563428878785
epoch: 74 training_loss 0.17928880870342254 test_loss: 0.18440917730331421
epoch: 75 training_loss 0.1889447271823883 test_loss: 0.1894185423851013
epoch: 76 training_loss 0.18587131671607493 test_loss: 0.19277197122573853
epoch: 77 training_loss 0.1910282552242279 test_loss: 0.19925850629806519
epoch: 78 training_loss 0.18234268307685852 test_loss: 0.20116655826568602
epoch: 79 training_loss 0.19028457753360273 test_loss: 0.2095708131790161
epoch: 80 training_loss 0.18709943346679211 test_loss: 0.20213563442230226
epoch: 81 training_loss 0.18727405436336994 test_loss: 0.1910151481628418
epoch: 82 training_loss 0.18466196253895759 test_loss: 0.19144177436828613
epoch: 83 training_loss 0.18220639035105704 test_loss: 0.1897613525390625
epoch: 84 training_loss 0.18975592404603958 test_loss: 0.18272615671157838
epoch: 85 training_loss 0.1854364751279354 test_loss: 0.20276257991790772
epoch: 86 training_loss 0.1870010942965746 test_loss: 0.19866195917129517
epoch: 87 training_loss 0.18671735920011998 test_loss: 0.18121592998504638
epoch: 88 training_loss 0.1891032739728689 test_loss: 0.1810571074485779
epoch: 89 training_loss 0.1807595156133175 test_loss: 0.18492381572723388
epoch: 90 training_loss 0.1857578104734421 test_loss: 0.19886374473571777
epoch: 91 training_loss 0.1847923331707716 test_loss: 0.2034400224685669
epoch: 92 training_loss 0.18473229184746742 test_loss: 0.19024120569229125
epoch: 93 training_loss 0.1839093153178692 test_loss: 0.17918522357940675
epoch: 94 training_loss 0.18287074245512486 test_loss: 0.19236452579498292
epoch: 95 training_loss 0.1832319578528404 test_loss: 0.20958876609802246
epoch: 96 training_loss 0.17830261960625648 test_loss: 0.18418409824371337
epoch: 97 training_loss 0.18768094167113303 test_loss: 0.18883341550827026
epoch: 98 training_loss 0.17681240610778332 test_loss: 0.17511351108551027
epoch: 99 training_loss 0.1778611133247614 test_loss: 0.19503272771835328
epoch: 100 training_loss 0.18400526732206346 test_loss: 0.18353041410446166
epoch: 101 training_loss 0.18575604394078254 test_loss: 0.19434118270874023
epoch: 102 training_loss 0.18048873409628868 test_loss: 0.17833960056304932
epoch: 103 training_loss 0.18688041031360625 test_loss: 0.203167724609375
epoch: 104 training_loss 0.18180370852351188 test_loss: 0.18019663095474242
epoch: 105 training_loss 0.195302142649889 test_loss: 0.20812692642211914
epoch: 106 training_loss 0.18241474136710167 test_loss: 0.18402870893478393
epoch: 107 training_loss 0.1858224055916071 test_loss: 0.20309548377990722
epoch: 108 training_loss 0.1771595550328493 test_loss: 0.20667145252227784
epoch: 109 training_loss 0.17605093464255334 test_loss: 0.17507858276367189
epoch: 110 training_loss 0.17812867052853107 test_loss: 0.18202906847000122
epoch: 111 training_loss 0.17778525464236736 test_loss: 0.1848589539527893
epoch: 112 training_loss 0.17820263490080834 test_loss: 0.1779157042503357
epoch: 113 training_loss 0.18077318981289864 test_loss: 0.1936175584793091
epoch: 114 training_loss 0.17476628094911575 test_loss: 0.19915558099746705
epoch: 115 training_loss 0.18825022242963313 test_loss: 0.18444844484329223
epoch: 116 training_loss 0.17438146635890006 test_loss: 0.1946605324745178
epoch: 117 training_loss 0.17866583697497845 test_loss: 0.17819908857345582
epoch: 118 training_loss 0.17814300052821636 test_loss: 0.18686667680740357
epoch: 119 training_loss 0.17609217002987862 test_loss: 0.1829383611679077
epoch: 120 training_loss 0.18666100151836873 test_loss: 0.17181185483932496
epoch: 121 training_loss 0.18499234311282634 test_loss: 0.23407199382781982
epoch: 122 training_loss 0.18053508132696153 test_loss: 0.19533830881118774
epoch: 123 training_loss 0.17896220788359643 test_loss: 0.2033608913421631
epoch: 124 training_loss 0.17892026513814926 test_loss: 0.19003459215164184
epoch: 125 training_loss 0.19062840476632117 test_loss: 0.17611353397369384
epoch: 126 training_loss 0.1767757725715637 test_loss: 0.19468270540237426
epoch: 127 training_loss 0.17473504841327667 test_loss: 0.18932275772094725
epoch: 128 training_loss 0.17523202367126942 test_loss: 0.17671754360198974
epoch: 129 training_loss 0.17876401416957377 test_loss: 0.17480456829071045
epoch: 130 training_loss 0.17853858187794686 test_loss: 0.18773494958877562
epoch: 131 training_loss 0.18173290193080902 test_loss: 0.1825554132461548
epoch: 132 training_loss 0.18243412591516972 test_loss: 0.1973053216934204
epoch: 133 training_loss 0.17665337301790715 test_loss: 0.19244266748428346
epoch: 134 training_loss 0.17782930597662927 test_loss: 0.18846663236618041
epoch: 135 training_loss 0.17968956045806408 test_loss: 0.19248287677764891
epoch: 136 training_loss 0.1826789329946041 test_loss: 0.1889543890953064
epoch: 137 training_loss 0.1777022720128298 test_loss: 0.19169268608093262
epoch: 138 training_loss 0.17371599048376082 test_loss: 0.18139944076538086
epoch: 139 training_loss 0.17899667121469975 test_loss: 0.1909183144569397
epoch: 140 training_loss 0.17702772684395313 test_loss: 0.18472450971603394
epoch: 141 training_loss 0.17528512202203272 test_loss: 0.17767943143844606
epoch: 142 training_loss 0.17579529277980327 test_loss: 0.18810441493988037
epoch: 143 training_loss 0.17496716015040875 test_loss: 0.18934292793273927
epoch: 144 training_loss 0.17636481180787086 test_loss: 0.1793319344520569
epoch: 145 training_loss 0.17493305563926698 test_loss: 0.19211336374282836
epoch: 146 training_loss 0.17444428503513337 test_loss: 0.18447319269180298
epoch: 147 training_loss 0.17593709021806717 test_loss: 0.1854029655456543
epoch: 148 training_loss 0.1858503633737564 test_loss: 0.1840474247932434
epoch: 149 training_loss 0.17824492342770099 test_loss: 0.18097056150436402
epoch: 0 training_loss 0.4126024928689003 test_loss: 0.30456483364105225
epoch: 1 training_loss 0.2921559411287308 test_loss: 0.27805314064025877
epoch: 2 training_loss 0.2618567341566086 test_loss: 0.2512115716934204
epoch: 3 training_loss 0.25926864132285116 test_loss: 0.24647140502929688
epoch: 4 training_loss 0.2494938799738884 test_loss: 0.219045090675354
epoch: 5 training_loss 0.2330279244482517 test_loss: 0.22427847385406494
epoch: 6 training_loss 0.22750733666121958 test_loss: 0.2311004877090454
epoch: 7 training_loss 0.2332121217995882 test_loss: 0.1951930284500122
epoch: 8 training_loss 0.22293899334967138 test_loss: 0.21985280513763428
epoch: 9 training_loss 0.21973427161574363 test_loss: 0.19996415376663207
epoch: 10 training_loss 0.21885486468672752 test_loss: 0.20333325862884521
epoch: 11 training_loss 0.23694319248199464 test_loss: 0.24007599353790282
epoch: 12 training_loss 0.22702516719698906 test_loss: 0.21435601711273194
epoch: 13 training_loss 0.2113538195192814 test_loss: 0.20285072326660156
epoch: 14 training_loss 0.21276812121272087 test_loss: 0.22353992462158204
epoch: 15 training_loss 0.2142094437777996 test_loss: 0.20010986328125
epoch: 16 training_loss 0.21151454821228982 test_loss: 0.20424036979675292
epoch: 17 training_loss 0.2109265847504139 test_loss: 0.20435047149658203
epoch: 18 training_loss 0.21335802756249905 test_loss: 0.20492589473724365
epoch: 19 training_loss 0.21078105330467223 test_loss: 0.21007912158966063
epoch: 20 training_loss 0.2118361721932888 test_loss: 0.2333092212677002
epoch: 21 training_loss 0.21330738127231597 test_loss: 0.21129791736602782
epoch: 22 training_loss 0.20504607602953911 test_loss: 0.22510454654693604
epoch: 23 training_loss 0.20537225507199763 test_loss: 0.19598698616027832
epoch: 24 training_loss 0.2041998515278101 test_loss: 0.2027205467224121
epoch: 25 training_loss 0.20495569169521333 test_loss: 0.21164982318878173
epoch: 26 training_loss 0.20655644953250885 test_loss: 0.206328821182251
epoch: 27 training_loss 0.20677658699452878 test_loss: 0.1979507565498352
epoch: 28 training_loss 0.20277005225419997 test_loss: 0.20355708599090577
epoch: 29 training_loss 0.2032368701696396 test_loss: 0.1918772578239441
epoch: 30 training_loss 0.19962031200528144 test_loss: 0.19933133125305175
epoch: 31 training_loss 0.19876720570027828 test_loss: 0.1940685272216797
epoch: 32 training_loss 0.20466301046311855 test_loss: 0.213336181640625
epoch: 33 training_loss 0.1974997541308403 test_loss: 0.1937330484390259
epoch: 34 training_loss 0.20140308350324632 test_loss: 0.1792745590209961
epoch: 35 training_loss 0.19952739641070366 test_loss: 0.19366369247436524
epoch: 36 training_loss 0.2021616443246603 test_loss: 0.20048809051513672
epoch: 37 training_loss 0.19746990390121938 test_loss: 0.2044133424758911
epoch: 38 training_loss 0.19794374704360962 test_loss: 0.22194039821624756
epoch: 39 training_loss 0.20180303134024144 test_loss: 0.18701297044754028
epoch: 40 training_loss 0.206307076215744 test_loss: 0.18374786376953126
epoch: 41 training_loss 0.19069540344178676 test_loss: 0.21000823974609376
epoch: 42 training_loss 0.19499267540872098 test_loss: 0.2103814125061035
epoch: 43 training_loss 0.18993863642215728 test_loss: 0.21031839847564698
epoch: 44 training_loss 0.20375716269016267 test_loss: 0.20575788021087646
epoch: 45 training_loss 0.20424988895654678 test_loss: 0.1955691933631897
epoch: 46 training_loss 0.19386133745312692 test_loss: 0.19383713006973266
epoch: 47 training_loss 0.20119814552366733 test_loss: 0.19112303256988525
epoch: 48 training_loss 0.19246527083218098 test_loss: 0.1841271162033081
epoch: 49 training_loss 0.19387051589787008 test_loss: 0.19824626445770263
epoch: 50 training_loss 0.1910260968655348 test_loss: 0.20809381008148192
epoch: 51 training_loss 0.1901180625706911 test_loss: 0.18726783990859985
epoch: 52 training_loss 0.19356531754136086 test_loss: 0.2021016836166382
epoch: 53 training_loss 0.20039291724562644 test_loss: 0.21543471813201903
epoch: 54 training_loss 0.20367962643504142 test_loss: 0.19601513147354127
epoch: 55 training_loss 0.20340998858213424 test_loss: 0.18962820768356323
epoch: 56 training_loss 0.20312743812799453 test_loss: 0.18815536499023439
epoch: 57 training_loss 0.1923077054321766 test_loss: 0.1858738422393799
epoch: 58 training_loss 0.18655065946280958 test_loss: 0.17871588468551636
epoch: 59 training_loss 0.18692788653075695 test_loss: 0.1808483839035034
epoch: 60 training_loss 0.19500096768140793 test_loss: 0.17684783935546874
epoch: 61 training_loss 0.19969350412487985 test_loss: 0.20830941200256348
epoch: 62 training_loss 0.19335700064897537 test_loss: 0.18988988399505616
epoch: 63 training_loss 0.19103329211473466 test_loss: 0.20337164402008057
epoch: 64 training_loss 0.20125320889055728 test_loss: 0.19918088912963866
epoch: 65 training_loss 0.19478283248841763 test_loss: 0.2132124662399292
epoch: 66 training_loss 0.19280733458697796 test_loss: 0.19581198692321777
epoch: 67 training_loss 0.18567949816584586 test_loss: 0.19381136894226075
epoch: 68 training_loss 0.18992717668414116 test_loss: 0.17909013032913207
epoch: 69 training_loss 0.19467036351561545 test_loss: 0.1874705195426941
epoch: 70 training_loss 0.19004653193056584 test_loss: 0.19692660570144654
epoch: 71 training_loss 0.20054585054516794 test_loss: 0.18622671365737914
epoch: 72 training_loss 0.1866234339773655 test_loss: 0.1908488392829895
epoch: 73 training_loss 0.19463645428419113 test_loss: 0.21089601516723633
epoch: 74 training_loss 0.19376415997743607 test_loss: 0.1842407464981079
epoch: 75 training_loss 0.1956610032171011 test_loss: 0.1966505765914917
epoch: 76 training_loss 0.19258309960365294 test_loss: 0.19118766784667968
epoch: 77 training_loss 0.19526034846901894 test_loss: 0.24093663692474365
epoch: 78 training_loss 0.18697655856609344 test_loss: 0.1887104630470276
epoch: 79 training_loss 0.1947168893367052 test_loss: 0.18454850912094117
epoch: 80 training_loss 0.189223827496171 test_loss: 0.1780884861946106
epoch: 81 training_loss 0.1974557690322399 test_loss: 0.2010734796524048
epoch: 82 training_loss 0.188579912930727 test_loss: 0.19000594615936278
epoch: 83 training_loss 0.18790126964449883 test_loss: 0.19496952295303344
epoch: 84 training_loss 0.19203631199896334 test_loss: 0.20917394161224365
epoch: 85 training_loss 0.1865252061933279 test_loss: 0.21121463775634766
epoch: 86 training_loss 0.1865724268555641 test_loss: 0.16231653690338135
epoch: 87 training_loss 0.1882724890857935 test_loss: 0.19974327087402344
epoch: 88 training_loss 0.18849656715989113 test_loss: 0.18676737546920777
epoch: 89 training_loss 0.19082046784460543 test_loss: 0.19208664894104005
epoch: 90 training_loss 0.18518381781876087 test_loss: 0.1984972596168518
epoch: 91 training_loss 0.184689938724041 test_loss: 0.20674116611480714
epoch: 92 training_loss 0.1880615907162428 test_loss: 0.18762401342391968
epoch: 93 training_loss 0.1832239179313183 test_loss: 0.18373454809188844
epoch: 94 training_loss 0.1808965366333723 test_loss: 0.18918458223342896
epoch: 95 training_loss 0.20356692396104337 test_loss: 0.2015066385269165
epoch: 96 training_loss 0.19215925380587578 test_loss: 0.18713085651397704
epoch: 97 training_loss 0.18727999716997146 test_loss: 0.20823442935943604
epoch: 98 training_loss 0.18596263714134692 test_loss: 0.1972786545753479
epoch: 99 training_loss 0.17701726153492928 test_loss: 0.19225116968154907
epoch: 100 training_loss 0.18400696150958537 test_loss: 0.20406930446624755
epoch: 101 training_loss 0.18121412672102452 test_loss: 0.18498692512512208
epoch: 102 training_loss 0.19658989012241362 test_loss: 0.19028329849243164
epoch: 103 training_loss 0.1844721807539463 test_loss: 0.17652508020401
epoch: 104 training_loss 0.18964269489049912 test_loss: 0.19198557138442993
epoch: 105 training_loss 0.18819310411810874 test_loss: 0.20368752479553223
epoch: 106 training_loss 0.17876871109008788 test_loss: 0.1926223397254944
epoch: 107 training_loss 0.18993785753846168 test_loss: 0.18068475723266603
epoch: 108 training_loss 0.18731248393654823 test_loss: 0.18093968629837037
epoch: 109 training_loss 0.18717856027185917 test_loss: 0.20588386058807373
epoch: 110 training_loss 0.18438203364610672 test_loss: 0.19322718381881715
epoch: 111 training_loss 0.1916030889004469 test_loss: 0.2009488582611084
epoch: 112 training_loss 0.1829087071865797 test_loss: 0.22025933265686035
epoch: 113 training_loss 0.17337259329855442 test_loss: 0.19527435302734375
epoch: 114 training_loss 0.18798480212688445 test_loss: 0.18387742042541505
epoch: 115 training_loss 0.18158920139074325 test_loss: 0.16947386264801026
epoch: 116 training_loss 0.18300439171493055 test_loss: 0.20373234748840333
epoch: 117 training_loss 0.19099495023489 test_loss: 0.1880669116973877
epoch: 118 training_loss 0.1867104259878397 test_loss: 0.1839374542236328
epoch: 119 training_loss 0.18354490898549558 test_loss: 0.2004019260406494
epoch: 120 training_loss 0.19059382662177085 test_loss: 0.1829781174659729
epoch: 121 training_loss 0.1751422466337681 test_loss: 0.1841273069381714
epoch: 122 training_loss 0.17884863771498202 test_loss: 0.19610532522201538
epoch: 123 training_loss 0.17964147225022317 test_loss: 0.20437448024749755
epoch: 124 training_loss 0.178869795948267 test_loss: 0.18010584115982056
epoch: 125 training_loss 0.18178378455340863 test_loss: 0.18092131614685059
epoch: 126 training_loss 0.19142862379550935 test_loss: 0.1726192593574524
epoch: 127 training_loss 0.18631408900022506 test_loss: 0.19211078882217408
epoch: 128 training_loss 0.18265905015170575 test_loss: 0.19842602014541627
epoch: 129 training_loss 0.18345208562910556 test_loss: 0.17434674501419067
epoch: 130 training_loss 0.1794814746081829 test_loss: 0.19753390550613403
epoch: 131 training_loss 0.17982450768351554 test_loss: 0.1875041127204895
epoch: 132 training_loss 0.1796628411859274 test_loss: 0.18377926349639892
epoch: 133 training_loss 0.1779714449495077 test_loss: 0.18235816955566406
epoch: 134 training_loss 0.18443661443889142 test_loss: 0.2153252363204956
epoch: 135 training_loss 0.19412135303020478 test_loss: 0.15793771743774415
epoch: 136 training_loss 0.1834560838341713 test_loss: 0.20151710510253906
epoch: 137 training_loss 0.18223450906574726 test_loss: 0.18553450107574462
epoch: 138 training_loss 0.17430336348712444 test_loss: 0.1848239541053772
epoch: 139 training_loss 0.17819627396762372 test_loss: 0.188205349445343
epoch: 140 training_loss 0.18054778270423413 test_loss: 0.2041707754135132
epoch: 141 training_loss 0.18106221072375775 test_loss: 0.17989355325698853
epoch: 142 training_loss 0.18259096302092076 test_loss: 0.19735896587371826
epoch: 143 training_loss 0.185419529825449 test_loss: 0.18221182823181153
epoch: 144 training_loss 0.17309744067490102 test_loss: 0.18826463222503662
epoch: 145 training_loss 0.17935318931937216 test_loss: 0.1915681004524231
epoch: 146 training_loss 0.18425651922821998 test_loss: 0.17505018711090087
epoch: 147 training_loss 0.1809141205996275 test_loss: 0.19515280723571776
epoch: 148 training_loss 0.18496106415987015 test_loss: 0.17801738977432252
epoch: 149 training_loss 0.17982589595019818 test_loss: 0.18539037704467773
episode: 0 training return: -382.3487826470188
episode: 1 training return: -523.7066654585168
episode: 2 training return: -543.7124974491765
episode: 3 training return: -419.69115250422936
epoch: 1 test_true_pfm: 24.744467366604695 sim_pfm: -24.229087883671088
episode: 4 training return: -596.2197821495902
episode: 5 training return: -474.5924046742177
episode: 6 training return: -594.0335471711783
episode: 7 training return: -483.2749518248616
epoch: 2 test_true_pfm: 36.50898512225941 sim_pfm: -40.625253168090524
episode: 8 training return: -548.1682801176872
episode: 9 training return: -477.8842791683371
episode: 10 training return: -161.75116069103896
episode: 11 training return: -25.87138743318968
epoch: 3 test_true_pfm: 36.97117095325036 sim_pfm: -112.02498902061761
episode: 12 training return: -130.33771622719055
episode: 13 training return: 21.994284332164245
episode: 14 training return: -70.2638133854464
episode: 15 training return: 265.06273809876416
epoch: 4 test_true_pfm: 27.42613095851509 sim_pfm: 477.4595397562371
episode: 16 training return: 288.24054983557494
episode: 17 training return: 291.7544231563797
episode: 18 training return: 270.88643319449056
episode: 19 training return: -176.23026758761534
epoch: 5 test_true_pfm: 50.57435160426589 sim_pfm: 449.3238850137099
episode: 20 training return: 340.1283334984014
episode: 21 training return: 295.6748683086556
episode: 22 training return: 335.41677985762595
episode: 23 training return: 314.80325402950535
epoch: 6 test_true_pfm: 44.6700577350803 sim_pfm: 434.62200211449334
episode: 24 training return: 334.8576772683992
episode: 25 training return: 307.6894110023534
episode: 26 training return: 325.46806634891954
episode: 27 training return: 252.30298738315327
epoch: 7 test_true_pfm: 39.14024889799499 sim_pfm: 433.51774037798924
episode: 28 training return: 347.9116863566365
episode: 29 training return: 268.4885187225354
episode: 30 training return: 332.51525428258805
episode: 31 training return: 381.2360649335754
epoch: 8 test_true_pfm: 44.98216570739865 sim_pfm: 498.0930126858457
episode: 32 training return: 394.65237610566356
episode: 33 training return: 367.2663652305955
episode: 34 training return: 416.29686331002057
episode: 35 training return: 400.5238621253171
epoch: 9 test_true_pfm: 48.500678581657105 sim_pfm: 504.6919932169952
episode: 36 training return: 436.79702814012626
episode: 37 training return: 429.7571825653277
episode: 38 training return: 426.68482643738463
episode: 39 training return: 413.0735806957752
epoch: 10 test_true_pfm: 49.21238458775476 sim_pfm: 501.801510367176
episode: 40 training return: 423.665807084498
episode: 41 training return: 444.7601023034
episode: 42 training return: 432.73405575492524
episode: 43 training return: 423.29726913446143
epoch: 11 test_true_pfm: 50.79774530399924 sim_pfm: 521.6192487967409
episode: 44 training return: 428.97263119298765
episode: 45 training return: 427.21873526329165
episode: 46 training return: 401.49460236178373
episode: 47 training return: 435.67975878801616
epoch: 12 test_true_pfm: 44.73375943165951 sim_pfm: 508.17853812942786
episode: 48 training return: 437.45593992993196
episode: 49 training return: 415.81207052449565
episode: 50 training return: 431.30255541165616
episode: 51 training return: 422.0714906745373
epoch: 13 test_true_pfm: 53.98887282639089 sim_pfm: 538.9852852526776
episode: 52 training return: 422.66832065083287
episode: 53 training return: 436.4223151886783
episode: 54 training return: 418.37019957654394
episode: 55 training return: 428.66811283943133
epoch: 14 test_true_pfm: 51.74325592567103 sim_pfm: 520.372482948579
episode: 56 training return: 428.3536015463532
episode: 57 training return: 419.542128228149
episode: 58 training return: 441.8759454933362
episode: 59 training return: 451.50756780315004
epoch: 15 test_true_pfm: 58.51404995158771 sim_pfm: 520.6785402220974
episode: 60 training return: 429.7619809292209
episode: 61 training return: 437.11264996768097
episode: 62 training return: 417.7032830353744
episode: 63 training return: 426.97081157203166
epoch: 16 test_true_pfm: 53.50104115297966 sim_pfm: 502.3506607218444
episode: 64 training return: 424.29556447764685
episode: 65 training return: 436.75335948254076
episode: 66 training return: 434.93905834021547
episode: 67 training return: 435.53083810653453
epoch: 17 test_true_pfm: 58.96468546813335 sim_pfm: 523.3201626859797
episode: 68 training return: 431.58566083260723
episode: 69 training return: 431.8488496439453
episode: 70 training return: 418.3401975539535
episode: 71 training return: 414.64339610254234
epoch: 18 test_true_pfm: 58.534020175590236 sim_pfm: 504.399670159394
episode: 72 training return: 410.04245162023744
episode: 73 training return: 436.6177510687543
episode: 74 training return: 422.57354718314906
episode: 75 training return: 409.5745133208413
epoch: 19 test_true_pfm: 57.51047366577078 sim_pfm: 551.0777565748871
episode: 76 training return: 429.2626941716305
episode: 77 training return: 435.74217491897343
episode: 78 training return: 447.23204937781674
episode: 79 training return: 447.5250801861848
epoch: 20 test_true_pfm: 54.14107116430076 sim_pfm: 501.8399457901026
episode: 80 training return: 423.20988734758856
episode: 81 training return: 442.14345157948134
episode: 82 training return: 410.86661347149334
episode: 83 training return: 418.5942127116479
epoch: 21 test_true_pfm: 62.67154114757424 sim_pfm: 534.7191719642004
episode: 84 training return: 443.52686364954553
episode: 85 training return: 443.1385817507991
episode: 86 training return: 446.2985639387744
episode: 87 training return: 438.0663991984655
epoch: 22 test_true_pfm: 68.10811805888473 sim_pfm: 534.5057876135071
episode: 88 training return: 429.30149476807946
episode: 89 training return: 442.0471343377357
episode: 90 training return: 448.73695026430255
episode: 91 training return: 445.2546095966991
epoch: 23 test_true_pfm: 53.20004429725256 sim_pfm: 496.4421464644738
episode: 92 training return: 455.2323037106949
episode: 93 training return: 433.37846455149554
episode: 94 training return: 441.6134498376768
episode: 95 training return: 439.7759086269369
epoch: 24 test_true_pfm: 64.65980740600405 sim_pfm: 558.8104524970481
episode: 96 training return: 433.7626900567466
episode: 97 training return: 440.14642940921556
episode: 98 training return: 432.727825945538
episode: 99 training return: 421.33463593338416
epoch: 25 test_true_pfm: 61.64332950377392 sim_pfm: 548.5239822942231
episode: 100 training return: 458.7745591960229
episode: 101 training return: 440.16927207394036
episode: 102 training return: 444.77959882995356
episode: 103 training return: 427.8959771986911
epoch: 26 test_true_pfm: 67.054595277905 sim_pfm: 537.5390211768972
episode: 104 training return: 435.4558079214082
episode: 105 training return: 460.2057861158963
episode: 106 training return: 445.5040834444974
episode: 107 training return: 446.1846110819276
epoch: 27 test_true_pfm: 67.25074841748554 sim_pfm: 527.8884290642
episode: 108 training return: 430.92951817597066
episode: 109 training return: 439.29373993516253
episode: 110 training return: 455.4917943988701
episode: 111 training return: 446.1053182720685
epoch: 28 test_true_pfm: 59.161843737473426 sim_pfm: 527.8531469367501
episode: 112 training return: 443.51082971895124
episode: 113 training return: 413.88721943076297
episode: 114 training return: 446.3328550003114
episode: 115 training return: 447.394309173058
epoch: 29 test_true_pfm: 67.05256072471995 sim_pfm: 541.7350207178829
episode: 116 training return: 425.5091806706438
episode: 117 training return: 460.2019452973368
episode: 118 training return: 462.70806861457015
episode: 119 training return: 435.50296622924463
epoch: 30 test_true_pfm: 73.12522233396992 sim_pfm: 551.1439574417071
episode: 120 training return: 462.50471864077025
episode: 121 training return: 453.49811116510574
episode: 122 training return: 437.72938129695893
episode: 123 training return: 445.136340765811
epoch: 31 test_true_pfm: 67.98620271734204 sim_pfm: 551.2966747515464
episode: 124 training return: 448.96096449710984
episode: 125 training return: 451.23849700839776
episode: 126 training return: 452.39728787738983
episode: 127 training return: 443.83399183186543
epoch: 32 test_true_pfm: 61.3264494705275 sim_pfm: 533.3907640116985
episode: 128 training return: 439.31503344295896
episode: 129 training return: 462.70327175259325
episode: 130 training return: 449.37347238840084
episode: 131 training return: 454.72096400406423
epoch: 33 test_true_pfm: 64.56606840432825 sim_pfm: 523.89343122343
episode: 132 training return: 430.6105247323371
episode: 133 training return: 446.8699796140201
episode: 134 training return: 461.6692299976431
episode: 135 training return: 438.7804352345162
epoch: 34 test_true_pfm: 67.09398321817095 sim_pfm: 553.8071905483795
episode: 136 training return: 455.1846921710338
episode: 137 training return: 454.33093277059754
episode: 138 training return: 426.59482260564266
episode: 139 training return: 448.52084999545093
epoch: 35 test_true_pfm: 58.06043559813497 sim_pfm: 518.4601092022169
episode: 140 training return: 424.0382957859169
episode: 141 training return: 440.48626404772887
episode: 142 training return: 452.65856915469743
episode: 143 training return: 425.61135758858273
epoch: 36 test_true_pfm: 63.19117951783581 sim_pfm: 527.7395973166113
episode: 144 training return: 449.23232350589217
episode: 145 training return: 453.1414181655403
episode: 146 training return: 447.12396606960255
episode: 147 training return: 446.2587090123999
epoch: 37 test_true_pfm: 68.37978352165159 sim_pfm: 545.2447055735935
episode: 148 training return: 439.37420529441505
episode: 149 training return: 472.0030775094725
episode: 150 training return: 440.1511488758372
episode: 151 training return: 432.38267360806526
epoch: 38 test_true_pfm: 57.58160020558338 sim_pfm: 528.3366294576166
episode: 152 training return: 446.8742369168332
episode: 153 training return: 442.91793872056076
episode: 154 training return: 446.22482457167047
episode: 155 training return: 441.91778438458397
epoch: 39 test_true_pfm: 62.86364249279021 sim_pfm: 537.8301239949272
episode: 156 training return: 459.8719064346724
episode: 157 training return: 453.7259372770532
episode: 158 training return: 440.3847296424476
episode: 159 training return: 436.89403488264446
epoch: 40 test_true_pfm: 63.15988245092323 sim_pfm: 555.4587897739242
episode: 160 training return: 453.86676794210666
episode: 161 training return: 449.28268279725387
episode: 162 training return: 438.15132992861953
episode: 163 training return: 451.20397715256337
epoch: 41 test_true_pfm: 63.40930875457472 sim_pfm: 543.1197926303457
episode: 164 training return: 453.69254084494344
episode: 165 training return: 434.58869720988207
episode: 166 training return: 427.53696324332924
episode: 167 training return: 446.98032010224324
epoch: 42 test_true_pfm: 69.59704166417829 sim_pfm: 552.6951396461316
episode: 168 training return: 431.08792662552383
episode: 169 training return: 435.53689244337903
episode: 170 training return: 439.5864235282995
episode: 171 training return: 469.0083874188564
epoch: 43 test_true_pfm: 66.8395168538178 sim_pfm: 559.472701318053
episode: 172 training return: 447.3009579353475
episode: 173 training return: 464.7587240516805
episode: 174 training return: 445.1592825727453
episode: 175 training return: 435.4443998157691
epoch: 44 test_true_pfm: 70.73351451940611 sim_pfm: 561.7594290511308
episode: 176 training return: 457.2582677381952
episode: 177 training return: 438.2976695558299
episode: 178 training return: 432.48496486619507
episode: 179 training return: 454.3365249978608
epoch: 45 test_true_pfm: 68.78152645404207 sim_pfm: 556.898415485793
episode: 180 training return: 442.01665755440786
episode: 181 training return: 451.36496475645333
episode: 182 training return: 460.85547440416275
episode: 183 training return: 449.75596122299834
epoch: 46 test_true_pfm: 65.11921657156346 sim_pfm: 547.7679546331282
episode: 184 training return: 459.59718514727643
episode: 185 training return: 442.6047702657787
episode: 186 training return: 425.8905338703893
episode: 187 training return: 436.6957161886992
epoch: 47 test_true_pfm: 59.81369413613326 sim_pfm: 547.9892254878615
episode: 188 training return: 439.6346971371384
episode: 189 training return: 456.3099308801115
episode: 190 training return: 445.7309874624131
episode: 191 training return: 444.0962786498753
epoch: 48 test_true_pfm: 56.99259717434948 sim_pfm: 522.4410577506284
episode: 192 training return: 448.6053798970323
episode: 193 training return: 453.9319356792909
episode: 194 training return: 442.089933192475
episode: 195 training return: 456.2482993820315
epoch: 49 test_true_pfm: 63.0977322100617 sim_pfm: 539.2353683856778
episode: 196 training return: 453.28228688228353
episode: 197 training return: 443.557667621572
episode: 198 training return: 435.4921981163237
episode: 199 training return: 439.09495341333866
epoch: 50 test_true_pfm: 60.44043005412558 sim_pfm: 541.8719930181022
episode: 200 training return: 453.5053292511223
episode: 201 training return: 451.04601250530743
episode: 202 training return: 449.22221523283275
episode: 203 training return: 462.99594114563297
epoch: 51 test_true_pfm: 70.77109486988654 sim_pfm: 566.7051914161361
episode: 204 training return: 440.44077012392114
episode: 205 training return: 445.94815906677434
episode: 206 training return: 455.5863403763465
episode: 207 training return: 440.03367613667916
epoch: 52 test_true_pfm: 64.41171285655163 sim_pfm: 534.308345644928
episode: 208 training return: 447.9652973122372
episode: 209 training return: 441.91882484506147
episode: 210 training return: 449.09992411002327
episode: 211 training return: 442.65738823081466
epoch: 53 test_true_pfm: 56.56645360025942 sim_pfm: 552.9213635509268
episode: 212 training return: 437.06477219233494
episode: 213 training return: 453.3807025088371
episode: 214 training return: 453.11456976695064
episode: 215 training return: 452.3727526654661
epoch: 54 test_true_pfm: 54.382329279467605 sim_pfm: 541.3994833090298
episode: 216 training return: 462.81709921055625
episode: 217 training return: 443.66115259386436
episode: 218 training return: 451.48096353154034
episode: 219 training return: 452.59241795072296
epoch: 55 test_true_pfm: 59.21330779097315 sim_pfm: 534.2330263124398
episode: 220 training return: 435.05340460548797
episode: 221 training return: 434.903888466608
episode: 222 training return: 454.9752863404444
episode: 223 training return: 438.55975295799016
epoch: 56 test_true_pfm: 68.4024023413471 sim_pfm: 559.014316982069
episode: 224 training return: 433.5145817467955
episode: 225 training return: 442.0923220166836
episode: 226 training return: 462.82791440509783
episode: 227 training return: 430.1425239382482
epoch: 57 test_true_pfm: 63.00874472171689 sim_pfm: 547.398798699362
episode: 228 training return: 454.9436345301726
episode: 229 training return: 460.5935058099234
episode: 230 training return: 427.6358563640866
episode: 231 training return: 441.684101718951
epoch: 58 test_true_pfm: 56.175875330353016 sim_pfm: 539.0274030142663
episode: 232 training return: 442.2682359825728
episode: 233 training return: 470.7858427524391
episode: 234 training return: 451.1934180691739
episode: 235 training return: 426.98619280860777
epoch: 59 test_true_pfm: 59.410037105754895 sim_pfm: 547.5252603510218
episode: 236 training return: 463.7690218530434
episode: 237 training return: 437.0070749324275
episode: 238 training return: 459.3729211115878
episode: 239 training return: 432.3516113161115
epoch: 60 test_true_pfm: 65.8805440088942 sim_pfm: 536.2191274547548
episode: 240 training return: 442.41073847138705
episode: 241 training return: 430.5981421432161
episode: 242 training return: 441.91239710686017
episode: 243 training return: 452.74370631815873
epoch: 61 test_true_pfm: 58.86546486515574 sim_pfm: 549.6610362481275
episode: 244 training return: 461.99010927155945
episode: 245 training return: 432.77043287407537
episode: 246 training return: 443.0603679591133
episode: 247 training return: 439.43286984135
epoch: 62 test_true_pfm: 61.184550361063735 sim_pfm: 533.0787854504948
episode: 248 training return: 452.164769331246
episode: 249 training return: 466.22608531969234
episode: 250 training return: 431.19455206681926
episode: 251 training return: 448.064132077972
epoch: 63 test_true_pfm: 59.256990637890965 sim_pfm: 526.4379733733748
episode: 252 training return: 451.72721276392224
episode: 253 training return: 437.5871049176918
episode: 254 training return: 448.98013401053214
episode: 255 training return: 460.89973606460455
epoch: 64 test_true_pfm: 59.07709657553291 sim_pfm: 554.7100519880287
episode: 256 training return: 450.93405969613445
episode: 257 training return: 454.03289921500993
episode: 258 training return: 450.98869813211326
episode: 259 training return: 460.2490668506739
epoch: 65 test_true_pfm: 59.7863227527184 sim_pfm: 545.8730209313708
episode: 260 training return: 441.85396343530596
episode: 261 training return: 439.42031087226184
episode: 262 training return: 449.9396307174253
episode: 263 training return: 439.48651008108703
epoch: 66 test_true_pfm: 61.840595260368595 sim_pfm: 539.5525702159354
episode: 264 training return: 419.9461713238009
episode: 265 training return: 461.0463717886762
episode: 266 training return: 443.64033303349265
episode: 267 training return: 452.45811254485574
epoch: 67 test_true_pfm: 61.84242374679665 sim_pfm: 542.1588002472959
episode: 268 training return: 454.9406768053555
episode: 269 training return: 457.3616774005563
episode: 270 training return: 453.4539663944796
episode: 271 training return: 459.27700988195755
epoch: 68 test_true_pfm: 62.36186950651946 sim_pfm: 548.26049772575
episode: 272 training return: 461.546690523614
episode: 273 training return: 435.38213932426146
episode: 274 training return: 443.0369470372688
episode: 275 training return: 448.76794143191887
epoch: 69 test_true_pfm: 56.49165940173081 sim_pfm: 538.0173709588188
episode: 276 training return: 480.4144031349293
episode: 277 training return: 464.84450305912225
episode: 278 training return: 452.28503548774387
episode: 279 training return: 452.7628068215868
epoch: 70 test_true_pfm: 65.3679750473894 sim_pfm: 558.9765390131765
episode: 280 training return: 447.60637340412455
episode: 281 training return: 468.23600699405597
episode: 282 training return: 449.8168146698072
episode: 283 training return: 444.2026042023051
epoch: 71 test_true_pfm: 64.64107514354014 sim_pfm: 550.5039888621087
episode: 284 training return: 471.7016588393933
episode: 285 training return: 440.94308506445094
episode: 286 training return: 446.9456972691537
episode: 287 training return: 450.2392356899412
epoch: 72 test_true_pfm: 61.33347104212576 sim_pfm: 538.72099202946
episode: 288 training return: 458.6387035219394
episode: 289 training return: 453.85220415554284
episode: 290 training return: 454.10620764616317
episode: 291 training return: 465.6547662352805
epoch: 73 test_true_pfm: 62.12436317880383 sim_pfm: 536.4378099966167
episode: 292 training return: 463.886861483034
episode: 293 training return: 457.8069537852251
episode: 294 training return: 476.449669847662
episode: 295 training return: 466.4625239769436
epoch: 74 test_true_pfm: 59.69458746360888 sim_pfm: 536.1886639264757
episode: 296 training return: 466.31834919735394
episode: 297 training return: 466.4983124941551
episode: 298 training return: 455.7329462455561
episode: 299 training return: 446.29221064685413
epoch: 75 test_true_pfm: 65.68990385220069 sim_pfm: 553.4878015463975
episode: 300 training return: 446.7342774307106
episode: 301 training return: 458.14482828852385
episode: 302 training return: 464.66596265366115
episode: 303 training return: 442.34621739770336
epoch: 76 test_true_pfm: 66.1078239774812 sim_pfm: 550.0187027696546
episode: 304 training return: 446.25827759365114
episode: 305 training return: 447.956287319647
episode: 306 training return: 459.42877848286446
episode: 307 training return: 462.9235292093038
epoch: 77 test_true_pfm: 66.27053781165795 sim_pfm: 557.2893093403723
episode: 308 training return: 469.49558297079204
episode: 309 training return: 448.76892142364636
episode: 310 training return: 450.25743891361793
episode: 311 training return: 456.09810797604445
epoch: 78 test_true_pfm: 58.51729817251357 sim_pfm: 556.2778450213199
episode: 312 training return: 464.11412339413954
episode: 313 training return: 468.34850120674076
episode: 314 training return: 420.4792106635007
episode: 315 training return: 447.0777520990317
epoch: 79 test_true_pfm: 67.65545139103641 sim_pfm: 557.7175387080015
episode: 316 training return: 446.8815316695101
episode: 317 training return: 442.9654209856352
episode: 318 training return: 438.4747121682737
episode: 319 training return: 438.9838440957173
epoch: 80 test_true_pfm: 67.6113068130412 sim_pfm: 548.6541453574999
episode: 320 training return: 441.4288451097906
episode: 321 training return: 439.60682398924337
episode: 322 training return: 449.60791100836377
episode: 323 training return: 456.62382975614395
epoch: 81 test_true_pfm: 60.93531148329382 sim_pfm: 557.3815094258747
episode: 324 training return: 445.1810072620997
episode: 325 training return: 475.6059475521744
episode: 326 training return: 447.0627178151196
episode: 327 training return: 450.16113402035836
epoch: 82 test_true_pfm: 63.26903850003461 sim_pfm: 550.8472639753649
episode: 328 training return: 466.81380749271875
episode: 329 training return: 439.621679002282
episode: 330 training return: 450.6647937273098
episode: 331 training return: 450.16525686645826
epoch: 83 test_true_pfm: 65.10877030586043 sim_pfm: 545.1790341203089
episode: 332 training return: 449.89083638752805
episode: 333 training return: 461.53660803220043
episode: 334 training return: 465.44507170513253
episode: 335 training return: 454.70039185932274
epoch: 84 test_true_pfm: 55.71241835397112 sim_pfm: 545.0386500928072
episode: 336 training return: 460.83899336379227
episode: 337 training return: 447.63824766790304
episode: 338 training return: 476.7346880984123
episode: 339 training return: 455.9654942003981
epoch: 85 test_true_pfm: 61.90600041853974 sim_pfm: 556.4127215650295
episode: 340 training return: 459.5681059889456
episode: 341 training return: 446.77813652951835
episode: 342 training return: 450.6524430553403
episode: 343 training return: 451.3025729595688
epoch: 86 test_true_pfm: 61.108036477348044 sim_pfm: 553.4588132075822
episode: 344 training return: 435.1635741612748
episode: 345 training return: 460.8845368376281
episode: 346 training return: 462.8545431056244
episode: 347 training return: 456.5154251312725
epoch: 87 test_true_pfm: 62.44751112776819 sim_pfm: 541.0192803455838
episode: 348 training return: 477.58803401614136
episode: 349 training return: 444.81239211566265
episode: 350 training return: 451.41313100590065
episode: 351 training return: 471.2900728195049
epoch: 88 test_true_pfm: 58.697051903455474 sim_pfm: 540.9343708140192
episode: 352 training return: 443.52193976181036
episode: 353 training return: 447.0343917779317
episode: 354 training return: 435.10567143480597
episode: 355 training return: 452.0987293687711
epoch: 89 test_true_pfm: 67.23286776975972 sim_pfm: 550.7387936839687
episode: 356 training return: 461.58920578126225
episode: 357 training return: 456.2280591185193
episode: 358 training return: 450.46619713131935
episode: 359 training return: 470.14783866584935
epoch: 90 test_true_pfm: 64.81952762998644 sim_pfm: 552.1071175833478
episode: 360 training return: 448.9452808050442
episode: 361 training return: 467.25591649411234
episode: 362 training return: 450.45364129832626
episode: 363 training return: 454.43613086460033
epoch: 91 test_true_pfm: 66.85994234267079 sim_pfm: 540.9471316125856
episode: 364 training return: 467.71502424488904
episode: 365 training return: 456.4266157617602
episode: 366 training return: 446.8859549731713
episode: 367 training return: 438.64791048170696
epoch: 92 test_true_pfm: 59.31340073406238 sim_pfm: 546.2079368175679
episode: 368 training return: 448.73253905831746
episode: 369 training return: 451.1553453272596
episode: 370 training return: 471.6436347152671
episode: 371 training return: 449.93399536173797
epoch: 93 test_true_pfm: 57.90905546540997 sim_pfm: 542.0309519570569
episode: 372 training return: 473.88666512917996
episode: 373 training return: 437.1571582042305
episode: 374 training return: 459.1103401066123
episode: 375 training return: 461.48949697838754
epoch: 94 test_true_pfm: 65.64638559742484 sim_pfm: 556.4401907272912
episode: 376 training return: 471.7707989747182
episode: 377 training return: 447.488946290718
episode: 378 training return: 441.28517208056417
episode: 379 training return: 461.39162799329864
epoch: 95 test_true_pfm: 68.36933991262237 sim_pfm: 544.4386436717939
episode: 380 training return: 482.8260046453236
episode: 381 training return: 468.52036887380865
episode: 382 training return: 456.00702624280206
episode: 383 training return: 468.48294502809966
epoch: 96 test_true_pfm: 66.85446827139562 sim_pfm: 562.7725261297395
episode: 384 training return: 459.1376265823596
episode: 385 training return: 452.1594884943701
episode: 386 training return: 451.74253591428817
episode: 387 training return: 468.438597713556
epoch: 97 test_true_pfm: 73.66253165427952 sim_pfm: 555.5995898443091
episode: 388 training return: 465.75005249937186
episode: 389 training return: 426.0928897812173
episode: 390 training return: 459.98520040730904
episode: 391 training return: 460.86884058904855
epoch: 98 test_true_pfm: 59.86847170591829 sim_pfm: 553.1005320966261
episode: 392 training return: 464.4511904231878
episode: 393 training return: 452.78721110615174
episode: 394 training return: 457.0162010196518
episode: 395 training return: 450.2747901393744
epoch: 99 test_true_pfm: 60.52475087889197 sim_pfm: 536.6440114755973
episode: 396 training return: 443.8498386727441
episode: 397 training return: 469.9195370416268
episode: 398 training return: 455.4633057301819
episode: 399 training return: 450.92654783473125
epoch: 100 test_true_pfm: 65.90002093637584 sim_pfm: 547.5667869283708
episode: 400 training return: 448.2335109591109
episode: 401 training return: 448.62725934154025
episode: 402 training return: 465.1266350222326
episode: 403 training return: 454.9895286979735
epoch: 101 test_true_pfm: 68.43368763029298 sim_pfm: 548.1340723493757
episode: 404 training return: 451.4673448559583
episode: 405 training return: 456.0190722300889
episode: 406 training return: 461.02661800622326
episode: 407 training return: 453.97383498870806
epoch: 102 test_true_pfm: 65.27236443417502 sim_pfm: 560.4606410796124
episode: 408 training return: 466.4708123624698
episode: 409 training return: 450.67546225280216
episode: 410 training return: 444.14395321528957
episode: 411 training return: 464.6770631295341
epoch: 103 test_true_pfm: 63.62567020031518 sim_pfm: 545.1364007974145
episode: 412 training return: 456.031729336741
episode: 413 training return: 443.55345289542623
episode: 414 training return: 452.1516671322345
episode: 415 training return: 474.4397645038038
epoch: 104 test_true_pfm: 59.53671257619029 sim_pfm: 548.0214970469647
episode: 416 training return: 454.7779006715447
episode: 417 training return: 462.13494647279055
episode: 418 training return: 455.40349560156756
episode: 419 training return: 440.95718014404207
epoch: 105 test_true_pfm: 71.95906093171766 sim_pfm: 562.1754142193754
episode: 420 training return: 462.8390274212664
episode: 421 training return: 452.5157281789179
episode: 422 training return: 467.3775849855996
episode: 423 training return: 453.42998210448576
epoch: 106 test_true_pfm: 71.46161555123301 sim_pfm: 566.8929797045524
episode: 424 training return: 449.7633532967573
episode: 425 training return: 443.1005358867547
episode: 426 training return: 452.0867039220383
episode: 427 training return: 454.12021109637783
epoch: 107 test_true_pfm: 70.79848880798012 sim_pfm: 560.6454844291297
episode: 428 training return: 447.7662718682583
episode: 429 training return: 440.4354854250056
episode: 430 training return: 462.6203353994172
episode: 431 training return: 466.62403857216776
epoch: 108 test_true_pfm: 57.35518313484969 sim_pfm: 538.2080647818357
episode: 432 training return: 464.25930736488186
episode: 433 training return: 459.57412861276686
episode: 434 training return: 450.9931857459426
episode: 435 training return: 460.4849259144948
epoch: 109 test_true_pfm: 67.86686420684153 sim_pfm: 564.0381919304778
episode: 436 training return: 473.18639042949064
episode: 437 training return: 462.72464229027946
episode: 438 training return: 469.97682180617386
episode: 439 training return: 463.187495153119
epoch: 110 test_true_pfm: 62.973216035681496 sim_pfm: 535.7606335165
episode: 440 training return: 455.3077437682081
episode: 441 training return: 448.32163039326383
episode: 442 training return: 462.2259249150687
episode: 443 training return: 454.9425743397366
epoch: 111 test_true_pfm: 67.63385334096512 sim_pfm: 554.4006990756302
episode: 444 training return: 455.7534892802084
episode: 445 training return: 453.7507330761924
episode: 446 training return: 472.71238088124795
episode: 447 training return: 463.6134375080358
epoch: 112 test_true_pfm: 64.54808610776573 sim_pfm: 557.1319978836013
episode: 448 training return: 467.74000283467535
episode: 449 training return: 452.07017238946787
episode: 450 training return: 453.68212494877315
episode: 451 training return: 461.22568123961264
epoch: 113 test_true_pfm: 62.05511073821316 sim_pfm: 551.0216229016941
episode: 452 training return: 458.91739735534134
episode: 453 training return: 465.10575158122305
episode: 454 training return: 450.21766212861183
episode: 455 training return: 472.05328868596047
epoch: 114 test_true_pfm: 71.75832397781836 sim_pfm: 543.0421373595008
episode: 456 training return: 457.0663076404607
episode: 457 training return: 464.2643980189974
episode: 458 training return: 456.9529098025767
episode: 459 training return: 457.46541429888777
epoch: 115 test_true_pfm: 69.18565694930697 sim_pfm: 554.7054601545087
episode: 460 training return: 466.5647189272916
episode: 461 training return: 465.23473669035434
episode: 462 training return: 465.80792204097435
episode: 463 training return: 474.06997081711273
epoch: 116 test_true_pfm: 67.35350180616359 sim_pfm: 556.7698817322495
episode: 464 training return: 467.74292600792216
episode: 465 training return: 464.9482662781903
episode: 466 training return: 444.31632168449204
episode: 467 training return: 449.21852362615334
epoch: 117 test_true_pfm: 72.98643294652811 sim_pfm: 563.7593095293805
episode: 468 training return: 466.494963444884
episode: 469 training return: 459.1584196459834
episode: 470 training return: 457.8446768770498
episode: 471 training return: 459.3158470324562
epoch: 118 test_true_pfm: 65.25257993829672 sim_pfm: 541.612763465139
episode: 472 training return: 459.5936604194619
episode: 473 training return: 459.22570080709136
episode: 474 training return: 451.1320209060224
episode: 475 training return: 457.73948611818184
epoch: 119 test_true_pfm: 71.22784594130157 sim_pfm: 559.7673592394468
episode: 476 training return: 453.8537272503211
episode: 477 training return: 455.778310441253
episode: 478 training return: 472.2280465336293
episode: 479 training return: 465.22709866616464
epoch: 120 test_true_pfm: 65.33317635416576 sim_pfm: 551.2949193127324
episode: 480 training return: 466.3857222171229
episode: 481 training return: 459.9755495262871
episode: 482 training return: 448.00696016122987
episode: 483 training return: 456.9566639514376
epoch: 121 test_true_pfm: 67.37783701896127 sim_pfm: 563.1675720751614
episode: 484 training return: 457.9846358645925
episode: 485 training return: 468.3681309178112
episode: 486 training return: 488.6043007278227
episode: 487 training return: 465.33610383323327
epoch: 122 test_true_pfm: 62.153102908289135 sim_pfm: 563.5160391725145
episode: 488 training return: 458.81947126969703
episode: 489 training return: 461.3982730698727
episode: 490 training return: 478.0993667868907
episode: 491 training return: 472.94966585741116
epoch: 123 test_true_pfm: 70.44603361907014 sim_pfm: 563.6116179074247
episode: 492 training return: 456.57010867976805
episode: 493 training return: 452.4426508590003
episode: 494 training return: 471.0214451343745
episode: 495 training return: 436.0521942971353
epoch: 124 test_true_pfm: 61.78685440288817 sim_pfm: 552.5239550868749
episode: 496 training return: 463.16258640975826
episode: 497 training return: 450.70493526389015
episode: 498 training return: 456.1250747812851
episode: 499 training return: 450.77517945842106
epoch: 125 test_true_pfm: 61.877123656376035 sim_pfm: 536.4459058490256
episode: 500 training return: 457.98701763041595
episode: 501 training return: 463.17703876312055
episode: 502 training return: 463.5621549091524
episode: 503 training return: 460.01931342842306
epoch: 126 test_true_pfm: 67.61518770930127 sim_pfm: 551.8488522162111
episode: 504 training return: 474.13160130760076
episode: 505 training return: 464.8939383228362
episode: 506 training return: 467.01128323783286
episode: 507 training return: 460.44866773071055
epoch: 127 test_true_pfm: 72.81313109525469 sim_pfm: 542.5596161167953
episode: 508 training return: 461.9567899470173
episode: 509 training return: 475.29286782330405
episode: 510 training return: 462.7802007484863
episode: 511 training return: 448.6633956375846
epoch: 128 test_true_pfm: 67.82305514314288 sim_pfm: 553.2952126828098
episode: 512 training return: 456.9044277213507
episode: 513 training return: 465.4642831228704
episode: 514 training return: 447.7755134794464
episode: 515 training return: 469.52971124761956
epoch: 129 test_true_pfm: 70.61720062036507 sim_pfm: 560.4190979344185
episode: 516 training return: 460.6962149117808
episode: 517 training return: 491.4291545732816
episode: 518 training return: 449.70736957908883
episode: 519 training return: 460.61354209177506
epoch: 130 test_true_pfm: 67.50681926011616 sim_pfm: 562.983648794582
episode: 520 training return: 470.58267246190223
episode: 521 training return: 465.6537350425236
episode: 522 training return: 455.3464695438627
episode: 523 training return: 461.70597221148716
epoch: 131 test_true_pfm: 63.19109673525387 sim_pfm: 546.6167750414318
episode: 524 training return: 463.6999405546576
episode: 525 training return: 448.4358864173776
episode: 526 training return: 458.37178344740795
episode: 527 training return: 460.5592532405213
epoch: 132 test_true_pfm: 64.59020878809612 sim_pfm: 557.1332916939203
episode: 528 training return: 468.7032557267651
episode: 529 training return: 471.7720947357958
episode: 530 training return: 462.56932401994527
episode: 531 training return: 475.504139671553
epoch: 133 test_true_pfm: 67.61030543072806 sim_pfm: 549.562120625645
episode: 532 training return: 466.6218437229815
episode: 533 training return: 465.615748446602
episode: 534 training return: 461.9864091373915
episode: 535 training return: 457.3519386758424
epoch: 134 test_true_pfm: 65.03498376749637 sim_pfm: 567.3201288792346
episode: 536 training return: 454.26471087783017
episode: 537 training return: 460.3753837332383
episode: 538 training return: 461.6732831992397
episode: 539 training return: 461.9030300648897
epoch: 135 test_true_pfm: 58.920962555715676 sim_pfm: 551.8331733097299
episode: 540 training return: 459.5890734286082
episode: 541 training return: 476.8003800019895
episode: 542 training return: 492.0197907390473
episode: 543 training return: 463.6993791957605
epoch: 136 test_true_pfm: 69.72888697195151 sim_pfm: 566.9484975973019
episode: 544 training return: 454.2644886493669
episode: 545 training return: 474.4620654047811
episode: 546 training return: 457.71609575235743
episode: 547 training return: 453.05138128859363
epoch: 137 test_true_pfm: 72.6540632112537 sim_pfm: 564.0206895529025
episode: 548 training return: 467.0669726844261
episode: 549 training return: 476.660801675366
episode: 550 training return: 471.21210448021804
episode: 551 training return: 456.72185466584494
epoch: 138 test_true_pfm: 71.51787500634452 sim_pfm: 573.471094936206
episode: 552 training return: 445.37285705167756
episode: 553 training return: 450.292890645184
episode: 554 training return: 452.52873977050535
episode: 555 training return: 472.65296797515305
epoch: 139 test_true_pfm: 59.4015209391506 sim_pfm: 540.2848246821418
episode: 556 training return: 455.10485640465123
episode: 557 training return: 477.76602828606497
episode: 558 training return: 469.67759007391254
episode: 559 training return: 448.6428553599287
epoch: 140 test_true_pfm: 65.32931829906829 sim_pfm: 557.9142994579092
episode: 560 training return: 441.27377382970843
episode: 561 training return: 468.2568485680893
episode: 562 training return: 470.74956525628005
episode: 563 training return: 478.36011235758644
epoch: 141 test_true_pfm: 69.77079266182136 sim_pfm: 570.7189819635129
episode: 564 training return: 482.9287805178498
episode: 565 training return: 484.9582153484522
episode: 566 training return: 471.1388926223538
episode: 567 training return: 459.4943040483673
epoch: 142 test_true_pfm: 67.0711658383302 sim_pfm: 570.0238985359517
episode: 568 training return: 473.25134279508507
episode: 569 training return: 459.1451447047243
episode: 570 training return: 468.67545138618937
episode: 571 training return: 479.1564611964435
epoch: 143 test_true_pfm: 65.235109765568 sim_pfm: 550.4070932615813
episode: 572 training return: 469.2459870713286
episode: 573 training return: 469.6837507277563
episode: 574 training return: 472.81682845468026
episode: 575 training return: 473.58167886155246
epoch: 144 test_true_pfm: 69.22565144878553 sim_pfm: 553.5240356117625
episode: 576 training return: 473.338023822091
episode: 577 training return: 468.6051015212966
episode: 578 training return: 476.05468640810443
episode: 579 training return: 462.3897665202398
epoch: 145 test_true_pfm: 61.043638891398025 sim_pfm: 552.5867527422891
episode: 580 training return: 464.20868908320267
episode: 581 training return: 479.85648481814576
episode: 582 training return: 471.64326780286143
episode: 583 training return: 461.88338515584707
epoch: 146 test_true_pfm: 69.62508757989339 sim_pfm: 554.8090807583255
episode: 584 training return: 478.8584052460178
episode: 585 training return: 451.5361817673881
episode: 586 training return: 467.50355283251685
episode: 587 training return: 472.5122434859486
epoch: 147 test_true_pfm: 70.71768624565571 sim_pfm: 560.7905403932441
episode: 588 training return: 461.70871077895134
episode: 589 training return: 469.14662739005036
episode: 590 training return: 459.26809020243155
episode: 591 training return: 454.5953853495023
epoch: 148 test_true_pfm: 66.4900004152178 sim_pfm: 557.8445374211159
episode: 592 training return: 483.3028846971457
episode: 593 training return: 478.0162113395918
episode: 594 training return: 460.55481845534854
episode: 595 training return: 458.29981191197027
epoch: 149 test_true_pfm: 60.434321011727604 sim_pfm: 548.4416981767741
episode: 596 training return: 464.21379954793935
episode: 597 training return: 460.5411879315189
episode: 598 training return: 469.92875169108186
episode: 599 training return: 472.5511128189383
epoch: 150 test_true_pfm: 67.58745911933893 sim_pfm: 559.9382980104311
