['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'uncertainty', '--traj', 'medium', '--seed', '2', '--data', '100000']
epoch: 0 training_loss 0.2545356212556362 test_loss: 0.21266558170318603
epoch: 1 training_loss 0.1997889007627964 test_loss: 0.19736068248748778
epoch: 2 training_loss 0.1866308582574129 test_loss: 0.18815816640853883
epoch: 3 training_loss 0.1856032754480839 test_loss: 0.20804715156555176
epoch: 4 training_loss 0.19393361650407315 test_loss: 0.18916845321655273
epoch: 5 training_loss 0.1881404109299183 test_loss: 0.19809455871582032
epoch: 6 training_loss 0.18061164744198321 test_loss: 0.18827213048934938
epoch: 7 training_loss 0.19054999336600303 test_loss: 0.20378313064575196
epoch: 8 training_loss 0.1828840833902359 test_loss: 0.17169342041015626
epoch: 9 training_loss 0.17945574212819337 test_loss: 0.19332411289215087
epoch: 10 training_loss 0.18901850104331971 test_loss: 0.1776704430580139
epoch: 11 training_loss 0.18755364358425142 test_loss: 0.17538238763809205
epoch: 12 training_loss 0.18370379365980624 test_loss: 0.1690871000289917
epoch: 13 training_loss 0.18705233156681061 test_loss: 0.18124722242355346
epoch: 14 training_loss 0.18157280132174491 test_loss: 0.1901712417602539
epoch: 15 training_loss 0.1821338041126728 test_loss: 0.19269057512283325
epoch: 16 training_loss 0.17743219777941704 test_loss: 0.1650591254234314
epoch: 17 training_loss 0.18854652509093284 test_loss: 0.19229357242584227
epoch: 18 training_loss 0.1910380530357361 test_loss: 0.18143632411956787
epoch: 19 training_loss 0.18737190812826157 test_loss: 0.1765156865119934
epoch: 20 training_loss 0.1776776646822691 test_loss: 0.1734676957130432
epoch: 21 training_loss 0.1822974469512701 test_loss: 0.17999529838562012
epoch: 22 training_loss 0.18404821872711183 test_loss: 0.16404762268066406
epoch: 23 training_loss 0.1826826498657465 test_loss: 0.176249098777771
epoch: 24 training_loss 0.18258910275995732 test_loss: 0.1822216749191284
epoch: 25 training_loss 0.17399774737656115 test_loss: 0.18454993963241578
epoch: 26 training_loss 0.1883370267599821 test_loss: 0.16929452419281005
epoch: 27 training_loss 0.17817279800772667 test_loss: 0.17925022840499877
epoch: 28 training_loss 0.17265188917517663 test_loss: 0.18316632509231567
epoch: 29 training_loss 0.17519015192985535 test_loss: 0.18375909328460693
epoch: 30 training_loss 0.17821420766413212 test_loss: 0.17268919944763184
epoch: 31 training_loss 0.1802199799567461 test_loss: 0.17724558115005493
epoch: 32 training_loss 0.17570950094610452 test_loss: 0.16682841777801513
epoch: 33 training_loss 0.18002311989665032 test_loss: 0.1750720500946045
epoch: 34 training_loss 0.1865995679423213 test_loss: 0.18627049922943115
epoch: 35 training_loss 0.17911133289337158 test_loss: 0.17401012182235717
epoch: 36 training_loss 0.1729478868842125 test_loss: 0.17478721141815184
epoch: 37 training_loss 0.17772652640938758 test_loss: 0.19079735279083251
epoch: 38 training_loss 0.17238146662712098 test_loss: 0.18091945648193358
epoch: 39 training_loss 0.1795828864723444 test_loss: 0.16499335765838624
epoch: 40 training_loss 0.18707135267555713 test_loss: 0.1749216675758362
epoch: 41 training_loss 0.17811563715338707 test_loss: 0.17270179986953735
epoch: 42 training_loss 0.18722272157669068 test_loss: 0.17706166505813598
epoch: 43 training_loss 0.17850574545562267 test_loss: 0.17462257146835328
epoch: 44 training_loss 0.18087580561637878 test_loss: 0.16272495985031127
epoch: 45 training_loss 0.1845895491540432 test_loss: 0.17303035259246827
epoch: 46 training_loss 0.1813945747166872 test_loss: 0.17989908456802367
epoch: 47 training_loss 0.17459859110414983 test_loss: 0.16562604904174805
epoch: 48 training_loss 0.17913638025522233 test_loss: 0.15799920558929442
epoch: 49 training_loss 0.17483990713953973 test_loss: 0.16967358589172363
epoch: 50 training_loss 0.1802560868114233 test_loss: 0.18333064317703246
epoch: 51 training_loss 0.18369786374270916 test_loss: 0.16565020084381105
epoch: 52 training_loss 0.18376496389508248 test_loss: 0.19416615962982178
epoch: 53 training_loss 0.1769797644764185 test_loss: 0.19209998846054077
epoch: 54 training_loss 0.18359265401959418 test_loss: 0.17092292308807372
epoch: 55 training_loss 0.17275904055684804 test_loss: 0.17873270511627198
epoch: 56 training_loss 0.17377110838890075 test_loss: 0.1832632303237915
epoch: 57 training_loss 0.18574035994708538 test_loss: 0.18611745834350585
epoch: 58 training_loss 0.17587992206215858 test_loss: 0.17382230758666992
epoch: 59 training_loss 0.18172853119671345 test_loss: 0.15846377611160278
epoch: 60 training_loss 0.17163190633058548 test_loss: 0.18482846021652222
epoch: 61 training_loss 0.1807549936696887 test_loss: 0.18522284030914307
epoch: 62 training_loss 0.18136463068425657 test_loss: 0.18171766996383668
epoch: 63 training_loss 0.17611415542662143 test_loss: 0.16659902334213256
epoch: 64 training_loss 0.1808818231523037 test_loss: 0.1647215485572815
epoch: 65 training_loss 0.176418684348464 test_loss: 0.16811654567718506
epoch: 66 training_loss 0.17599566109478473 test_loss: 0.17031605243682862
epoch: 67 training_loss 0.1765943521261215 test_loss: 0.16041959524154664
epoch: 68 training_loss 0.1809613148868084 test_loss: 0.15305358171463013
epoch: 69 training_loss 0.18177933268249036 test_loss: 0.18451813459396363
epoch: 70 training_loss 0.18018255777657033 test_loss: 0.18271822929382325
epoch: 71 training_loss 0.18492514632642268 test_loss: 0.18336203098297119
epoch: 72 training_loss 0.18287472277879716 test_loss: 0.16856273412704467
epoch: 73 training_loss 0.1807274390757084 test_loss: 0.17305299043655395
epoch: 74 training_loss 0.1759906978905201 test_loss: 0.18162475824356078
epoch: 75 training_loss 0.17712346196174622 test_loss: 0.17173162698745728
epoch: 76 training_loss 0.18288655072450638 test_loss: 0.17438510656356812
epoch: 77 training_loss 0.17658221304416657 test_loss: 0.16730518341064454
epoch: 78 training_loss 0.1793590186536312 test_loss: 0.1597541093826294
epoch: 79 training_loss 0.17760830089449883 test_loss: 0.16695233583450317
epoch: 80 training_loss 0.17316934034228326 test_loss: 0.16648517847061156
epoch: 81 training_loss 0.18826481506228446 test_loss: 0.1762912631034851
epoch: 82 training_loss 0.17256031930446625 test_loss: 0.18101218938827515
epoch: 83 training_loss 0.17188403844833375 test_loss: 0.18103021383285522
epoch: 84 training_loss 0.17393154300749303 test_loss: 0.17820883989334108
epoch: 85 training_loss 0.18031230300664902 test_loss: 0.18120119571685792
epoch: 86 training_loss 0.16816859088838101 test_loss: 0.1667713165283203
epoch: 87 training_loss 0.18355150669813156 test_loss: 0.16405539512634276
epoch: 88 training_loss 0.18091000251471998 test_loss: 0.18684444427490235
epoch: 89 training_loss 0.17557529293000698 test_loss: 0.16406280994415284
epoch: 90 training_loss 0.1785213377326727 test_loss: 0.1642922043800354
epoch: 91 training_loss 0.175198160931468 test_loss: 0.18544306755065917
epoch: 92 training_loss 0.17159967377781868 test_loss: 0.18044276237487794
epoch: 93 training_loss 0.16777707792818547 test_loss: 0.1671340823173523
epoch: 94 training_loss 0.17782492443919182 test_loss: 0.1693244457244873
epoch: 95 training_loss 0.1818026527762413 test_loss: 0.1717892050743103
epoch: 96 training_loss 0.16646542750298976 test_loss: 0.17646346092224122
epoch: 97 training_loss 0.18068843193352221 test_loss: 0.19370481967926026
epoch: 98 training_loss 0.17523859553039073 test_loss: 0.17266974449157715
epoch: 99 training_loss 0.18412127666175365 test_loss: 0.17768523693084717
epoch: 100 training_loss 0.17067162051796914 test_loss: 0.16759551763534547
epoch: 101 training_loss 0.17406073540449143 test_loss: 0.1744952917098999
epoch: 102 training_loss 0.17283090621232985 test_loss: 0.162513267993927
epoch: 103 training_loss 0.17453540802001954 test_loss: 0.17069621086120607
epoch: 104 training_loss 0.17292032092809678 test_loss: 0.18725658655166627
epoch: 105 training_loss 0.18304864794015885 test_loss: 0.1724791169166565
epoch: 106 training_loss 0.17057327575981618 test_loss: 0.18512481451034546
epoch: 107 training_loss 0.17318750396370888 test_loss: 0.17599170207977294
epoch: 108 training_loss 0.17748075902462004 test_loss: 0.1724765419960022
epoch: 109 training_loss 0.17472665566951037 test_loss: 0.16772012710571288
epoch: 110 training_loss 0.17831023462116719 test_loss: 0.17478171586990357
epoch: 111 training_loss 0.18027240604162217 test_loss: 0.17391798496246338
epoch: 112 training_loss 0.17507689528167247 test_loss: 0.16778439283370972
epoch: 113 training_loss 0.173901779204607 test_loss: 0.16092357635498047
epoch: 114 training_loss 0.17762698993086815 test_loss: 0.1768284797668457
epoch: 115 training_loss 0.17149956129491328 test_loss: 0.18355741500854492
epoch: 116 training_loss 0.1781519089639187 test_loss: 0.17055389881134034
epoch: 117 training_loss 0.17741567254066468 test_loss: 0.16913304328918458
epoch: 118 training_loss 0.18560153648257255 test_loss: 0.19009919166564943
epoch: 119 training_loss 0.17871452711522579 test_loss: 0.17433611154556275
epoch: 120 training_loss 0.18131985768675804 test_loss: 0.1941433310508728
epoch: 121 training_loss 0.1757374119758606 test_loss: 0.18302468061447144
epoch: 122 training_loss 0.1667394085228443 test_loss: 0.1727353572845459
epoch: 123 training_loss 0.18268796548247337 test_loss: 0.1934753656387329
epoch: 124 training_loss 0.16938403837382793 test_loss: 0.17359404563903807
epoch: 125 training_loss 0.17058826565742494 test_loss: 0.16849550008773803
epoch: 126 training_loss 0.17863082140684128 test_loss: 0.16681028604507447
epoch: 127 training_loss 0.16577921383082866 test_loss: 0.16594395637512208
epoch: 128 training_loss 0.17561834938824178 test_loss: 0.1725004196166992
epoch: 129 training_loss 0.17890988871455193 test_loss: 0.1868635058403015
epoch: 130 training_loss 0.17005314864218235 test_loss: 0.16514937877655028
epoch: 131 training_loss 0.1697383540868759 test_loss: 0.156801974773407
epoch: 132 training_loss 0.17131831973791123 test_loss: 0.17590792179107667
epoch: 133 training_loss 0.1736056686937809 test_loss: 0.17527185678482055
epoch: 134 training_loss 0.1796837592124939 test_loss: 0.16841930150985718
epoch: 135 training_loss 0.17134232215583325 test_loss: 0.1678129553794861
epoch: 136 training_loss 0.1721460212767124 test_loss: 0.18428242206573486
epoch: 137 training_loss 0.176251764819026 test_loss: 0.17682840824127197
epoch: 138 training_loss 0.17116835467517377 test_loss: 0.1673983335494995
epoch: 139 training_loss 0.17154437161982058 test_loss: 0.1790866255760193
epoch: 140 training_loss 0.17698793292045592 test_loss: 0.17352304458618165
epoch: 141 training_loss 0.1722428909689188 test_loss: 0.1887788414955139
epoch: 142 training_loss 0.17971956431865693 test_loss: 0.17397892475128174
epoch: 143 training_loss 0.1795340184867382 test_loss: 0.16144726276397706
epoch: 144 training_loss 0.17791651651263238 test_loss: 0.195586097240448
epoch: 145 training_loss 0.17622153870761395 test_loss: 0.18428871631622315
epoch: 146 training_loss 0.1695616899430752 test_loss: 0.176985502243042
epoch: 147 training_loss 0.183653436973691 test_loss: 0.15980464220046997
epoch: 148 training_loss 0.1748861350119114 test_loss: 0.1709758758544922
epoch: 149 training_loss 0.17852976091206074 test_loss: 0.1626789927482605
epoch: 0 training_loss 0.2603851614892483 test_loss: 0.21745386123657226
epoch: 1 training_loss 0.20299248546361923 test_loss: 0.1971146821975708
epoch: 2 training_loss 0.2009224745631218 test_loss: 0.21060316562652587
epoch: 3 training_loss 0.19638547338545323 test_loss: 0.2190962553024292
epoch: 4 training_loss 0.1925370692461729 test_loss: 0.18320575952529908
epoch: 5 training_loss 0.18944463059306144 test_loss: 0.19059956073760986
epoch: 6 training_loss 0.18800923623144628 test_loss: 0.1962205648422241
epoch: 7 training_loss 0.18457829490303992 test_loss: 0.1809058427810669
epoch: 8 training_loss 0.19246284037828446 test_loss: 0.17182964086532593
epoch: 9 training_loss 0.19074211694300175 test_loss: 0.1851615309715271
epoch: 10 training_loss 0.18832687102258205 test_loss: 0.18843353986740113
epoch: 11 training_loss 0.18473575949668886 test_loss: 0.18090295791625977
epoch: 12 training_loss 0.17915886409580709 test_loss: 0.1922812581062317
epoch: 13 training_loss 0.17725785203278066 test_loss: 0.18673721551895142
epoch: 14 training_loss 0.19145710334181787 test_loss: 0.1818651080131531
epoch: 15 training_loss 0.1831126344949007 test_loss: 0.18980224132537843
epoch: 16 training_loss 0.18349529087543487 test_loss: 0.17388712167739867
epoch: 17 training_loss 0.1875658144056797 test_loss: 0.19392924308776854
epoch: 18 training_loss 0.18045604534447193 test_loss: 0.20296378135681153
epoch: 19 training_loss 0.19193384252488613 test_loss: 0.2026745080947876
epoch: 20 training_loss 0.1840697331726551 test_loss: 0.17715011835098265
epoch: 21 training_loss 0.18210023552179336 test_loss: 0.19199094772338868
epoch: 22 training_loss 0.18087594605982305 test_loss: 0.1879827380180359
epoch: 23 training_loss 0.1798316916823387 test_loss: 0.19785257577896118
epoch: 24 training_loss 0.18504860885441304 test_loss: 0.17137168645858764
epoch: 25 training_loss 0.18617718167603015 test_loss: 0.19076311588287354
epoch: 26 training_loss 0.18205325022339822 test_loss: 0.1906840205192566
epoch: 27 training_loss 0.1855166970193386 test_loss: 0.1829623579978943
epoch: 28 training_loss 0.18927347302436828 test_loss: 0.19892046451568604
epoch: 29 training_loss 0.18499337442219257 test_loss: 0.1862494945526123
epoch: 30 training_loss 0.18207551896572113 test_loss: 0.1969762325286865
epoch: 31 training_loss 0.1816905003041029 test_loss: 0.18782594203948974
epoch: 32 training_loss 0.1760718832910061 test_loss: 0.18263521194458007
epoch: 33 training_loss 0.1827836384624243 test_loss: 0.2109748363494873
epoch: 34 training_loss 0.18197811134159564 test_loss: 0.19001554250717162
epoch: 35 training_loss 0.1857720237225294 test_loss: 0.1856430172920227
epoch: 36 training_loss 0.17626920379698277 test_loss: 0.18738495111465453
epoch: 37 training_loss 0.18871618181467056 test_loss: 0.18369160890579223
epoch: 38 training_loss 0.17352590411901475 test_loss: 0.17561278343200684
epoch: 39 training_loss 0.17751090168952943 test_loss: 0.1754983425140381
epoch: 40 training_loss 0.17899029672145844 test_loss: 0.18089991807937622
epoch: 41 training_loss 0.1813869834691286 test_loss: 0.18416855335235596
epoch: 42 training_loss 0.18356248937547207 test_loss: 0.18701415061950682
epoch: 43 training_loss 0.1794272933155298 test_loss: 0.18790639638900758
epoch: 44 training_loss 0.1803345612436533 test_loss: 0.17549294233322144
epoch: 45 training_loss 0.17781859248876572 test_loss: 0.17835627794265746
epoch: 46 training_loss 0.1862230384349823 test_loss: 0.19402071237564086
epoch: 47 training_loss 0.18301004953682423 test_loss: 0.18654866218566896
epoch: 48 training_loss 0.17334425657987595 test_loss: 0.18123363256454467
epoch: 49 training_loss 0.17618333254009486 test_loss: 0.1954021453857422
epoch: 50 training_loss 0.1882862366735935 test_loss: 0.177990186214447
epoch: 51 training_loss 0.17738000802695753 test_loss: 0.18635979890823365
epoch: 52 training_loss 0.18292855232954025 test_loss: 0.1963193655014038
epoch: 53 training_loss 0.17703590027987956 test_loss: 0.18450163602828978
epoch: 54 training_loss 0.1745359105616808 test_loss: 0.17955341339111328
epoch: 55 training_loss 0.18030974693596363 test_loss: 0.19168436527252197
epoch: 56 training_loss 0.18304311737418175 test_loss: 0.1792318344116211
epoch: 57 training_loss 0.17857003942131996 test_loss: 0.19635869264602662
epoch: 58 training_loss 0.17908433958888054 test_loss: 0.19404889345169068
epoch: 59 training_loss 0.17364607505500318 test_loss: 0.18082184791564943
epoch: 60 training_loss 0.17924267143011094 test_loss: 0.18713401556015014
epoch: 61 training_loss 0.18259762309491634 test_loss: 0.1730726718902588
epoch: 62 training_loss 0.1776556245982647 test_loss: 0.17593631744384766
epoch: 63 training_loss 0.17897916868329047 test_loss: 0.17826987504959108
epoch: 64 training_loss 0.17850405402481556 test_loss: 0.16425050497055055
epoch: 65 training_loss 0.1723632488399744 test_loss: 0.19185832738876343
epoch: 66 training_loss 0.17753828465938568 test_loss: 0.19532262086868285
epoch: 67 training_loss 0.17458627447485925 test_loss: 0.179740309715271
epoch: 68 training_loss 0.17693333514034748 test_loss: 0.18780299425125122
epoch: 69 training_loss 0.172180987149477 test_loss: 0.17701926231384277
epoch: 70 training_loss 0.1855876086652279 test_loss: 0.18023649454116822
epoch: 71 training_loss 0.17044300228357315 test_loss: 0.18824369907379152
epoch: 72 training_loss 0.17355123974382877 test_loss: 0.18946752548217774
epoch: 73 training_loss 0.17535468865185977 test_loss: 0.20224378108978272
epoch: 74 training_loss 0.17295588284730912 test_loss: 0.17382904291152954
epoch: 75 training_loss 0.1769032047688961 test_loss: 0.1733124852180481
epoch: 76 training_loss 0.18021725222468377 test_loss: 0.17344754934310913
epoch: 77 training_loss 0.18880778789520264 test_loss: 0.18819495439529418
epoch: 78 training_loss 0.17583818607032298 test_loss: 0.1813764214515686
epoch: 79 training_loss 0.1749560761451721 test_loss: 0.1996374249458313
epoch: 80 training_loss 0.18586313992738723 test_loss: 0.18390049934387206
epoch: 81 training_loss 0.16692075580358506 test_loss: 0.17961328029632567
epoch: 82 training_loss 0.17116399466991425 test_loss: 0.17787762880325317
epoch: 83 training_loss 0.17220651656389235 test_loss: 0.18231306076049805
epoch: 84 training_loss 0.17617537923157214 test_loss: 0.17746225595474244
epoch: 85 training_loss 0.1785782789438963 test_loss: 0.17339510917663575
epoch: 86 training_loss 0.1781269647181034 test_loss: 0.19600362777709962
epoch: 87 training_loss 0.1772657885402441 test_loss: 0.16760896444320678
epoch: 88 training_loss 0.1784739900380373 test_loss: 0.19111608266830443
epoch: 89 training_loss 0.1742622197419405 test_loss: 0.1910974144935608
epoch: 90 training_loss 0.17230697758495808 test_loss: 0.18233914375305177
epoch: 91 training_loss 0.1758860792219639 test_loss: 0.1948543071746826
epoch: 92 training_loss 0.1787316683679819 test_loss: 0.19353866577148438
epoch: 93 training_loss 0.17406007550656796 test_loss: 0.18592526912689208
epoch: 94 training_loss 0.17999511398375034 test_loss: 0.18174487352371216
epoch: 95 training_loss 0.18117835983633995 test_loss: 0.1782555103302002
epoch: 96 training_loss 0.17905281752347946 test_loss: 0.18692231178283691
epoch: 97 training_loss 0.17746766164898872 test_loss: 0.18348548412322999
epoch: 98 training_loss 0.1731078988686204 test_loss: 0.19111040830612183
epoch: 99 training_loss 0.17250813059508802 test_loss: 0.16517809629440308
epoch: 100 training_loss 0.17752206213772298 test_loss: 0.1906881093978882
epoch: 101 training_loss 0.17010653905570508 test_loss: 0.17977066040039064
epoch: 102 training_loss 0.17492528572678567 test_loss: 0.17767026424407958
epoch: 103 training_loss 0.17616311825811862 test_loss: 0.16980832815170288
epoch: 104 training_loss 0.18310524247586726 test_loss: 0.18643065690994262
epoch: 105 training_loss 0.17820554547011852 test_loss: 0.18524996042251587
epoch: 106 training_loss 0.1890688182413578 test_loss: 0.1856168746948242
epoch: 107 training_loss 0.173938497826457 test_loss: 0.19480818510055542
epoch: 108 training_loss 0.18175728164613247 test_loss: 0.1857423424720764
epoch: 109 training_loss 0.17931034721434116 test_loss: 0.19942405223846435
epoch: 110 training_loss 0.17226687140762806 test_loss: 0.18551543951034546
epoch: 111 training_loss 0.16861019447445869 test_loss: 0.18787012100219727
epoch: 112 training_loss 0.17394039690494537 test_loss: 0.1939738631248474
epoch: 113 training_loss 0.17493440166115762 test_loss: 0.1872130036354065
epoch: 114 training_loss 0.17243026912212372 test_loss: 0.1800834059715271
epoch: 115 training_loss 0.1702359139174223 test_loss: 0.18492339849472045
epoch: 116 training_loss 0.17828985184431076 test_loss: 0.1732733130455017
epoch: 117 training_loss 0.1748102381080389 test_loss: 0.1693453907966614
epoch: 118 training_loss 0.17250586479902266 test_loss: 0.1895768404006958
epoch: 119 training_loss 0.18023268930613995 test_loss: 0.1897178053855896
epoch: 120 training_loss 0.17992912732064725 test_loss: 0.18020986318588256
epoch: 121 training_loss 0.18237782277166845 test_loss: 0.204986834526062
epoch: 122 training_loss 0.174748545140028 test_loss: 0.17348407506942748
epoch: 123 training_loss 0.16485635176301003 test_loss: 0.17910631895065307
epoch: 124 training_loss 0.17863106846809387 test_loss: 0.1805114269256592
epoch: 125 training_loss 0.17810878701508046 test_loss: 0.18217481374740602
epoch: 126 training_loss 0.17380071453750134 test_loss: 0.18694807291030885
epoch: 127 training_loss 0.17722609505057335 test_loss: 0.17151360511779784
epoch: 128 training_loss 0.1768553540855646 test_loss: 0.18090170621871948
epoch: 129 training_loss 0.16421528480947017 test_loss: 0.18265732526779174
epoch: 130 training_loss 0.1692836035043001 test_loss: 0.1847735285758972
epoch: 131 training_loss 0.17381634220480918 test_loss: 0.1794121742248535
epoch: 132 training_loss 0.17633692488074304 test_loss: 0.1801397681236267
epoch: 133 training_loss 0.17649420186877252 test_loss: 0.17818059921264648
epoch: 134 training_loss 0.1677868089824915 test_loss: 0.1833645820617676
epoch: 135 training_loss 0.16364532746374608 test_loss: 0.1833522915840149
epoch: 136 training_loss 0.16884664192795754 test_loss: 0.17079838514328002
epoch: 137 training_loss 0.17443959064781667 test_loss: 0.1803501009941101
epoch: 138 training_loss 0.16522589817643166 test_loss: 0.16885111331939698
epoch: 139 training_loss 0.16568700291216373 test_loss: 0.1747589349746704
epoch: 140 training_loss 0.17622656024992467 test_loss: 0.18918577432632447
epoch: 141 training_loss 0.16891807578504087 test_loss: 0.1754723906517029
epoch: 142 training_loss 0.18269480600953103 test_loss: 0.19466930627822876
epoch: 143 training_loss 0.17458657100796698 test_loss: 0.1764741063117981
epoch: 144 training_loss 0.17674927160143852 test_loss: 0.1922170639038086
epoch: 145 training_loss 0.1709918325394392 test_loss: 0.17643840312957765
epoch: 146 training_loss 0.1807337599992752 test_loss: 0.192448627948761
epoch: 147 training_loss 0.17212961591780185 test_loss: 0.188265597820282
epoch: 148 training_loss 0.16957594238221646 test_loss: 0.20652506351470948
epoch: 149 training_loss 0.18159598216414452 test_loss: 0.1818292737007141
epoch: 0 training_loss 0.283806442245841 test_loss: 0.21807246208190917
epoch: 1 training_loss 0.2060881443321705 test_loss: 0.20223600864410402
epoch: 2 training_loss 0.19994841396808624 test_loss: 0.1911097764968872
epoch: 3 training_loss 0.19056341037154198 test_loss: 0.19524919986724854
epoch: 4 training_loss 0.20634813129901886 test_loss: 0.18663244247436522
epoch: 5 training_loss 0.19680798634886743 test_loss: 0.19884727001190186
epoch: 6 training_loss 0.201234760209918 test_loss: 0.19171597957611083
epoch: 7 training_loss 0.1900411655008793 test_loss: 0.18101174831390382
epoch: 8 training_loss 0.18683128394186496 test_loss: 0.21451833248138427
epoch: 9 training_loss 0.18087412718683482 test_loss: 0.18361057043075563
epoch: 10 training_loss 0.18817362003028393 test_loss: 0.1890340566635132
epoch: 11 training_loss 0.1838860222697258 test_loss: 0.17372807264328002
epoch: 12 training_loss 0.17901827797293662 test_loss: 0.18904398679733275
epoch: 13 training_loss 0.1893147300928831 test_loss: 0.18478951454162598
epoch: 14 training_loss 0.18984967403113842 test_loss: 0.19692622423171996
epoch: 15 training_loss 0.18739574022591113 test_loss: 0.18900954723358154
epoch: 16 training_loss 0.18843135476112366 test_loss: 0.19057470560073853
epoch: 17 training_loss 0.18432341046631337 test_loss: 0.1760816216468811
epoch: 18 training_loss 0.18777889475226403 test_loss: 0.1855862855911255
epoch: 19 training_loss 0.17639070585370065 test_loss: 0.17491098642349243
epoch: 20 training_loss 0.17970162384212018 test_loss: 0.20121705532073975
epoch: 21 training_loss 0.17824960686266422 test_loss: 0.17129024267196655
epoch: 22 training_loss 0.1837965665012598 test_loss: 0.20724718570709227
epoch: 23 training_loss 0.18379140175879002 test_loss: 0.187871253490448
epoch: 24 training_loss 0.18593302443623544 test_loss: 0.17726399898529052
epoch: 25 training_loss 0.1763248211890459 test_loss: 0.1879719853401184
epoch: 26 training_loss 0.17383951656520366 test_loss: 0.18134539127349852
epoch: 27 training_loss 0.1824877668172121 test_loss: 0.17680431604385377
epoch: 28 training_loss 0.1829379614442587 test_loss: 0.19413045644760132
epoch: 29 training_loss 0.18594783790409564 test_loss: 0.19491630792617798
epoch: 30 training_loss 0.1821959675103426 test_loss: 0.17430691719055175
epoch: 31 training_loss 0.17723955564200877 test_loss: 0.17322280406951904
epoch: 32 training_loss 0.18313218779861928 test_loss: 0.20105407238006592
epoch: 33 training_loss 0.17773753367364406 test_loss: 0.1793155312538147
epoch: 34 training_loss 0.18712904877960682 test_loss: 0.1930155873298645
epoch: 35 training_loss 0.1776190436631441 test_loss: 0.17283040285110474
epoch: 36 training_loss 0.17918472323566675 test_loss: 0.17520697116851808
epoch: 37 training_loss 0.18318556919693946 test_loss: 0.19246314764022826
epoch: 38 training_loss 0.1791061394661665 test_loss: 0.17250922918319703
epoch: 39 training_loss 0.18041027452796699 test_loss: 0.19056257009506225
epoch: 40 training_loss 0.17671860232949257 test_loss: 0.18404282331466676
epoch: 41 training_loss 0.18502852879464626 test_loss: 0.18301455974578856
epoch: 42 training_loss 0.17702810510993003 test_loss: 0.16734800338745118
epoch: 43 training_loss 0.18900953128933906 test_loss: 0.187962543964386
epoch: 44 training_loss 0.18124344252049923 test_loss: 0.1947809100151062
epoch: 45 training_loss 0.18641909778118135 test_loss: 0.17102243900299072
epoch: 46 training_loss 0.17353193916380405 test_loss: 0.18852490186691284
epoch: 47 training_loss 0.181935520991683 test_loss: 0.180556321144104
epoch: 48 training_loss 0.17863310486078263 test_loss: 0.18673089742660523
epoch: 49 training_loss 0.17934281200170518 test_loss: 0.17427653074264526
epoch: 50 training_loss 0.18506261333823204 test_loss: 0.19347857236862182
epoch: 51 training_loss 0.17770729459822177 test_loss: 0.18167474269866943
epoch: 52 training_loss 0.1826558965444565 test_loss: 0.1840086340904236
epoch: 53 training_loss 0.1826097760349512 test_loss: 0.1832751989364624
epoch: 54 training_loss 0.17319911535829305 test_loss: 0.17732800245285035
epoch: 55 training_loss 0.18389583215117455 test_loss: 0.17666686773300172
epoch: 56 training_loss 0.174972615018487 test_loss: 0.1714526653289795
epoch: 57 training_loss 0.17986702747642994 test_loss: 0.1851911425590515
epoch: 58 training_loss 0.16720964584499598 test_loss: 0.18092867136001586
epoch: 59 training_loss 0.17828230671584605 test_loss: 0.1916460394859314
epoch: 60 training_loss 0.17917864583432674 test_loss: 0.17483923435211182
epoch: 61 training_loss 0.17383243031799794 test_loss: 0.2113027334213257
epoch: 62 training_loss 0.17717403769493104 test_loss: 0.18536243438720704
epoch: 63 training_loss 0.17948821730911732 test_loss: 0.19702693223953247
epoch: 64 training_loss 0.17594831094145774 test_loss: 0.1891019821166992
epoch: 65 training_loss 0.17851367026567458 test_loss: 0.18562395572662355
epoch: 66 training_loss 0.17924664758145809 test_loss: 0.18619073629379274
epoch: 67 training_loss 0.18137111745774745 test_loss: 0.1830148220062256
epoch: 68 training_loss 0.1840279270708561 test_loss: 0.17760875225067138
epoch: 69 training_loss 0.17838107757270336 test_loss: 0.17851520776748658
epoch: 70 training_loss 0.1702717911452055 test_loss: 0.18046773672103883
epoch: 71 training_loss 0.18226267635822296 test_loss: 0.19043782949447632
epoch: 72 training_loss 0.1800218526273966 test_loss: 0.1733243703842163
epoch: 73 training_loss 0.17724274508655072 test_loss: 0.18775103092193604
epoch: 74 training_loss 0.17815438590943813 test_loss: 0.17365192174911498
epoch: 75 training_loss 0.1872087500244379 test_loss: 0.17682579755783082
epoch: 76 training_loss 0.17606404714286328 test_loss: 0.1880391240119934
epoch: 77 training_loss 0.17408221565186976 test_loss: 0.1757799506187439
epoch: 78 training_loss 0.18672999113798142 test_loss: 0.17713167667388915
epoch: 79 training_loss 0.17888384602963925 test_loss: 0.1771469831466675
epoch: 80 training_loss 0.17715778544545174 test_loss: 0.1762887120246887
epoch: 81 training_loss 0.18580938391387464 test_loss: 0.18736953735351564
epoch: 82 training_loss 0.1746086246147752 test_loss: 0.18402031660079957
epoch: 83 training_loss 0.17521218508481978 test_loss: 0.1898650646209717
epoch: 84 training_loss 0.18312208756804466 test_loss: 0.16969310045242308
epoch: 85 training_loss 0.1778806123137474 test_loss: 0.19214584827423095
epoch: 86 training_loss 0.1731843027472496 test_loss: 0.178059983253479
epoch: 87 training_loss 0.1789470051974058 test_loss: 0.19119168519973756
epoch: 88 training_loss 0.16715847097337247 test_loss: 0.18524482250213622
epoch: 89 training_loss 0.1723296381533146 test_loss: 0.1854741930961609
epoch: 90 training_loss 0.174028835631907 test_loss: 0.20076210498809816
epoch: 91 training_loss 0.1769292353838682 test_loss: 0.18146432638168336
epoch: 92 training_loss 0.17635222498327494 test_loss: 0.19183460474014283
epoch: 93 training_loss 0.17821133255958557 test_loss: 0.1801150321960449
epoch: 94 training_loss 0.176295730471611 test_loss: 0.1743864059448242
epoch: 95 training_loss 0.18125516690313817 test_loss: 0.18452463150024415
epoch: 96 training_loss 0.18117101319134235 test_loss: 0.1704237937927246
epoch: 97 training_loss 0.17383543275296687 test_loss: 0.18886709213256836
epoch: 98 training_loss 0.17396991573274134 test_loss: 0.17209455966949463
epoch: 99 training_loss 0.17835110280662775 test_loss: 0.17449865341186524
epoch: 100 training_loss 0.18417102202773095 test_loss: 0.1775974988937378
epoch: 101 training_loss 0.17757869191467762 test_loss: 0.19637527465820312
epoch: 102 training_loss 0.1848249863833189 test_loss: 0.17078739404678345
epoch: 103 training_loss 0.17019496388733388 test_loss: 0.18590524196624755
epoch: 104 training_loss 0.17380798637866973 test_loss: 0.17217905521392823
epoch: 105 training_loss 0.17547442451119422 test_loss: 0.18704214096069335
epoch: 106 training_loss 0.17337829940021038 test_loss: 0.17693028450012208
epoch: 107 training_loss 0.1735884579271078 test_loss: 0.17497485876083374
epoch: 108 training_loss 0.1704085735231638 test_loss: 0.1871638059616089
epoch: 109 training_loss 0.17462857872247695 test_loss: 0.18445172309875488
epoch: 110 training_loss 0.17299720846116542 test_loss: 0.17441489696502685
epoch: 111 training_loss 0.17406116269528865 test_loss: 0.18559105396270753
epoch: 112 training_loss 0.1741508051007986 test_loss: 0.18606375455856322
epoch: 113 training_loss 0.17258262299001217 test_loss: 0.19183316230773925
epoch: 114 training_loss 0.17233481980860232 test_loss: 0.1800416111946106
epoch: 115 training_loss 0.1740639815479517 test_loss: 0.177886438369751
epoch: 116 training_loss 0.17855156756937504 test_loss: 0.17565858364105225
epoch: 117 training_loss 0.17809260837733745 test_loss: 0.18578861951828002
epoch: 118 training_loss 0.17997532203793526 test_loss: 0.16124887466430665
epoch: 119 training_loss 0.17394755311310292 test_loss: 0.16817182302474976
epoch: 120 training_loss 0.17535002656280996 test_loss: 0.18750327825546265
epoch: 121 training_loss 0.18026174128055572 test_loss: 0.18832942247390747
epoch: 122 training_loss 0.17167483381927012 test_loss: 0.20446176528930665
epoch: 123 training_loss 0.17298628930002452 test_loss: 0.19054050445556642
epoch: 124 training_loss 0.16912800580263138 test_loss: 0.19328449964523314
epoch: 125 training_loss 0.16986812926828862 test_loss: 0.20016121864318848
epoch: 126 training_loss 0.16646937623620034 test_loss: 0.18092883825302125
epoch: 127 training_loss 0.1801315937191248 test_loss: 0.18695663213729857
epoch: 128 training_loss 0.1720407658070326 test_loss: 0.18094110488891602
epoch: 129 training_loss 0.17163912400603296 test_loss: 0.18023719787597656
epoch: 130 training_loss 0.17964129142463206 test_loss: 0.18767061233520507
epoch: 131 training_loss 0.17706906326115132 test_loss: 0.1748746871948242
epoch: 132 training_loss 0.17425235971808434 test_loss: 0.1926036834716797
epoch: 133 training_loss 0.17997046068310737 test_loss: 0.18944902420043946
epoch: 134 training_loss 0.17675642661750315 test_loss: 0.20003280639648438
epoch: 135 training_loss 0.17188889138400554 test_loss: 0.19467250108718873
epoch: 136 training_loss 0.17510149508714676 test_loss: 0.18425171375274657
epoch: 137 training_loss 0.17752550065517425 test_loss: 0.17688511610031127
epoch: 138 training_loss 0.17547563523054122 test_loss: 0.16933587789535523
epoch: 139 training_loss 0.17274534799158572 test_loss: 0.18046973943710326
epoch: 140 training_loss 0.17999864257872106 test_loss: 0.17780388593673707
epoch: 141 training_loss 0.17389366239309312 test_loss: 0.18580267429351807
epoch: 142 training_loss 0.18045553930103778 test_loss: 0.16857682466506957
epoch: 143 training_loss 0.17563000924885272 test_loss: 0.16629058122634888
epoch: 144 training_loss 0.175401793345809 test_loss: 0.17676383256912231
epoch: 145 training_loss 0.17204995416104793 test_loss: 0.17730554342269897
epoch: 146 training_loss 0.17849754109978677 test_loss: 0.17127984762191772
epoch: 147 training_loss 0.18583181917667388 test_loss: 0.17228572368621825
epoch: 148 training_loss 0.17981220789253713 test_loss: 0.18753447532653808
epoch: 149 training_loss 0.17747252531349658 test_loss: 0.19566999673843383
epoch: 0 training_loss 0.26053822964429857 test_loss: 0.21191606521606446
epoch: 1 training_loss 0.20177359908819198 test_loss: 0.19661643505096435
epoch: 2 training_loss 0.1967292994260788 test_loss: 0.19355452060699463
epoch: 3 training_loss 0.1995870751142502 test_loss: 0.1833338499069214
epoch: 4 training_loss 0.1958438678085804 test_loss: 0.19627926349639893
epoch: 5 training_loss 0.1934664338827133 test_loss: 0.17793041467666626
epoch: 6 training_loss 0.18573779113590716 test_loss: 0.19924726486206054
epoch: 7 training_loss 0.19300869591534137 test_loss: 0.1770563006401062
epoch: 8 training_loss 0.19593912348151207 test_loss: 0.19867687225341796
epoch: 9 training_loss 0.1913623471558094 test_loss: 0.18663227558135986
epoch: 10 training_loss 0.18756078161299228 test_loss: 0.18349485397338866
epoch: 11 training_loss 0.18919655494391918 test_loss: 0.1843355417251587
epoch: 12 training_loss 0.1915274301916361 test_loss: 0.19827158451080323
epoch: 13 training_loss 0.19455797664821148 test_loss: 0.17174479961395264
epoch: 14 training_loss 0.18796441435813904 test_loss: 0.18208107948303223
epoch: 15 training_loss 0.18755446150898933 test_loss: 0.19835256338119506
epoch: 16 training_loss 0.18647694148123264 test_loss: 0.18040286302566527
epoch: 17 training_loss 0.1845622880756855 test_loss: 0.2000204563140869
epoch: 18 training_loss 0.19424361579120158 test_loss: 0.18971891403198243
epoch: 19 training_loss 0.19170226700603962 test_loss: 0.17499585151672364
epoch: 20 training_loss 0.17878284461796284 test_loss: 0.18751835823059082
epoch: 21 training_loss 0.18593291237950324 test_loss: 0.18130807876586913
epoch: 22 training_loss 0.1910280368477106 test_loss: 0.18621554374694824
epoch: 23 training_loss 0.18151572093367577 test_loss: 0.19077614545822144
epoch: 24 training_loss 0.182786378338933 test_loss: 0.1961950421333313
epoch: 25 training_loss 0.1763980884104967 test_loss: 0.18921703100204468
epoch: 26 training_loss 0.18295607939362526 test_loss: 0.18475399017333985
epoch: 27 training_loss 0.1921135250478983 test_loss: 0.19236043691635132
epoch: 28 training_loss 0.18506739512085915 test_loss: 0.1746617555618286
epoch: 29 training_loss 0.17533698454499244 test_loss: 0.17859992980957032
epoch: 30 training_loss 0.18514490455389024 test_loss: 0.18546664714813232
epoch: 31 training_loss 0.18283542156219482 test_loss: 0.1871517062187195
epoch: 32 training_loss 0.18707433730363845 test_loss: 0.20099823474884032
epoch: 33 training_loss 0.18823492117226123 test_loss: 0.18978163003921508
epoch: 34 training_loss 0.1741787824034691 test_loss: 0.18890296220779418
epoch: 35 training_loss 0.1816547767072916 test_loss: 0.1854511022567749
epoch: 36 training_loss 0.17857472531497479 test_loss: 0.1903180956840515
epoch: 37 training_loss 0.18390625670552255 test_loss: 0.18921847343444825
epoch: 38 training_loss 0.18136477001011372 test_loss: 0.19112143516540528
epoch: 39 training_loss 0.18454312212765217 test_loss: 0.17339398860931396
epoch: 40 training_loss 0.18673624373972417 test_loss: 0.17727161645889283
epoch: 41 training_loss 0.18092838503420353 test_loss: 0.1971782326698303
epoch: 42 training_loss 0.18755183771252631 test_loss: 0.1876688838005066
epoch: 43 training_loss 0.1809976254403591 test_loss: 0.1740660548210144
epoch: 44 training_loss 0.17966594435274602 test_loss: 0.18351095914840698
epoch: 45 training_loss 0.19375400446355343 test_loss: 0.1695953130722046
epoch: 46 training_loss 0.18143326569348572 test_loss: 0.1941673755645752
epoch: 47 training_loss 0.17905885770916938 test_loss: 0.1801878333091736
epoch: 48 training_loss 0.1827092569321394 test_loss: 0.19705328941345215
epoch: 49 training_loss 0.18182221807539464 test_loss: 0.18530737161636351
epoch: 50 training_loss 0.17952427498996257 test_loss: 0.17908977270126342
epoch: 51 training_loss 0.18202459394931794 test_loss: 0.1916631579399109
epoch: 52 training_loss 0.178879292011261 test_loss: 0.17717461585998534
epoch: 53 training_loss 0.18450385957956314 test_loss: 0.1827650785446167
epoch: 54 training_loss 0.1802185922116041 test_loss: 0.1883646845817566
epoch: 55 training_loss 0.18247286319732667 test_loss: 0.1819943070411682
epoch: 56 training_loss 0.1807450530678034 test_loss: 0.1786929726600647
epoch: 57 training_loss 0.1807590813189745 test_loss: 0.1906925082206726
epoch: 58 training_loss 0.18507323712110518 test_loss: 0.19098949432373047
epoch: 59 training_loss 0.1724295174330473 test_loss: 0.18142447471618653
epoch: 60 training_loss 0.17918568149209022 test_loss: 0.18083797693252562
epoch: 61 training_loss 0.17777948766946794 test_loss: 0.18295249938964844
epoch: 62 training_loss 0.1767485707998276 test_loss: 0.18862197399139405
epoch: 63 training_loss 0.1839047983288765 test_loss: 0.19285157918930054
epoch: 64 training_loss 0.17887230232357978 test_loss: 0.1884640097618103
epoch: 65 training_loss 0.18186705075204374 test_loss: 0.18518413305282594
epoch: 66 training_loss 0.17660755693912505 test_loss: 0.18638020753860474
epoch: 67 training_loss 0.179602787271142 test_loss: 0.1708038330078125
epoch: 68 training_loss 0.17799785166978835 test_loss: 0.17139483690261842
epoch: 69 training_loss 0.1839644368737936 test_loss: 0.18385372161865235
epoch: 70 training_loss 0.1736428814381361 test_loss: 0.1866464138031006
epoch: 71 training_loss 0.17901658669114112 test_loss: 0.19880763292312623
epoch: 72 training_loss 0.17745751067996024 test_loss: 0.18283106088638307
epoch: 73 training_loss 0.17660317912697793 test_loss: 0.18770619630813598
epoch: 74 training_loss 0.18223291903734207 test_loss: 0.16877212524414062
epoch: 75 training_loss 0.17809662900865078 test_loss: 0.18211506605148314
epoch: 76 training_loss 0.18030010864138604 test_loss: 0.19309769868850707
epoch: 77 training_loss 0.17951106563210487 test_loss: 0.1846112608909607
epoch: 78 training_loss 0.1797666621953249 test_loss: 0.1846992254257202
epoch: 79 training_loss 0.1793674888461828 test_loss: 0.18573850393295288
epoch: 80 training_loss 0.17653107546269894 test_loss: 0.1961544394493103
epoch: 81 training_loss 0.17645870633423327 test_loss: 0.18496285676956176
epoch: 82 training_loss 0.1819961229711771 test_loss: 0.18629701137542726
epoch: 83 training_loss 0.1756136192381382 test_loss: 0.1815088987350464
epoch: 84 training_loss 0.18267361663281917 test_loss: 0.1906401038169861
epoch: 85 training_loss 0.17385271720588208 test_loss: 0.1722988486289978
epoch: 86 training_loss 0.18243762902915478 test_loss: 0.16689850091934205
epoch: 87 training_loss 0.1802229118347168 test_loss: 0.17872608900070192
epoch: 88 training_loss 0.1775181993097067 test_loss: 0.16543208360671996
epoch: 89 training_loss 0.17401396989822387 test_loss: 0.17557218074798583
epoch: 90 training_loss 0.18142472364008427 test_loss: 0.18635324239730836
epoch: 91 training_loss 0.17632694941014052 test_loss: 0.18562755584716797
epoch: 92 training_loss 0.17581424057483674 test_loss: 0.17873250246047973
epoch: 93 training_loss 0.17407021060585975 test_loss: 0.18259835243225098
epoch: 94 training_loss 0.167677745744586 test_loss: 0.18659697771072387
epoch: 95 training_loss 0.18050096571445465 test_loss: 0.17850555181503297
epoch: 96 training_loss 0.17245124518871308 test_loss: 0.17866406440734864
epoch: 97 training_loss 0.18940975196659565 test_loss: 0.17561261653900145
epoch: 98 training_loss 0.17700138553977013 test_loss: 0.17515414953231812
epoch: 99 training_loss 0.1838189246505499 test_loss: 0.17657703161239624
epoch: 100 training_loss 0.18407491430640222 test_loss: 0.17999093532562255
epoch: 101 training_loss 0.17706215985119342 test_loss: 0.18963545560836792
epoch: 102 training_loss 0.17748960509896278 test_loss: 0.17680102586746216
epoch: 103 training_loss 0.17863679628819226 test_loss: 0.18439836502075196
epoch: 104 training_loss 0.1853722631931305 test_loss: 0.18098962306976318
epoch: 105 training_loss 0.18703118331730365 test_loss: 0.17116602659225463
epoch: 106 training_loss 0.18428005859255792 test_loss: 0.1738442063331604
epoch: 107 training_loss 0.18074289344251157 test_loss: 0.16791102886199952
epoch: 108 training_loss 0.17368834443390369 test_loss: 0.17606642246246337
epoch: 109 training_loss 0.18116648346185685 test_loss: 0.17469300031661988
epoch: 110 training_loss 0.1750571621209383 test_loss: 0.1605607748031616
epoch: 111 training_loss 0.17700898788869382 test_loss: 0.18188323974609374
epoch: 112 training_loss 0.17639955811202526 test_loss: 0.17237864732742308
epoch: 113 training_loss 0.17744856864213943 test_loss: 0.17708834409713745
epoch: 114 training_loss 0.17708493538200856 test_loss: 0.16953145265579223
epoch: 115 training_loss 0.1797355667501688 test_loss: 0.1856682777404785
epoch: 116 training_loss 0.1804985650628805 test_loss: 0.17092690467834473
epoch: 117 training_loss 0.1737955144047737 test_loss: 0.18804700374603273
epoch: 118 training_loss 0.17387551479041577 test_loss: 0.17662723064422609
epoch: 119 training_loss 0.181846514493227 test_loss: 0.17225531339645386
epoch: 120 training_loss 0.17724983170628547 test_loss: 0.19217936992645263
epoch: 121 training_loss 0.1743035550415516 test_loss: 0.19705803394317628
epoch: 122 training_loss 0.17395401440560818 test_loss: 0.17911816835403443
epoch: 123 training_loss 0.1815585409104824 test_loss: 0.19067091941833497
epoch: 124 training_loss 0.17263549119234084 test_loss: 0.16429296731948853
epoch: 125 training_loss 0.16900151774287223 test_loss: 0.16651344299316406
epoch: 126 training_loss 0.173997705578804 test_loss: 0.18972896337509154
epoch: 127 training_loss 0.17660680040717125 test_loss: 0.17927225828170776
epoch: 128 training_loss 0.17230347380042077 test_loss: 0.1929938793182373
epoch: 129 training_loss 0.17349347658455372 test_loss: 0.19487998485565186
epoch: 130 training_loss 0.16997016295790673 test_loss: 0.19293676614761351
epoch: 131 training_loss 0.17530442886054515 test_loss: 0.19012413024902344
epoch: 132 training_loss 0.18122520439326764 test_loss: 0.17444268465042115
epoch: 133 training_loss 0.17764543198049068 test_loss: 0.1713080048561096
epoch: 134 training_loss 0.173340677395463 test_loss: 0.18412163257598876
epoch: 135 training_loss 0.1815268152952194 test_loss: 0.17003870010375977
epoch: 136 training_loss 0.18099350482225418 test_loss: 0.1848319172859192
epoch: 137 training_loss 0.17203327372670174 test_loss: 0.16565558910369874
epoch: 138 training_loss 0.1805651877820492 test_loss: 0.186784029006958
epoch: 139 training_loss 0.1753137645125389 test_loss: 0.1708039164543152
epoch: 140 training_loss 0.1752404873818159 test_loss: 0.17668665647506715
epoch: 141 training_loss 0.16850662887096404 test_loss: 0.1788240432739258
epoch: 142 training_loss 0.17660690374672414 test_loss: 0.1885865569114685
epoch: 143 training_loss 0.17274815954267977 test_loss: 0.18270905017852784
epoch: 144 training_loss 0.17290272869169712 test_loss: 0.18500980138778686
epoch: 145 training_loss 0.17618125364184378 test_loss: 0.18194596767425536
epoch: 146 training_loss 0.1776035327464342 test_loss: 0.1717849612236023
epoch: 147 training_loss 0.1797228892147541 test_loss: 0.17425904273986817
epoch: 148 training_loss 0.17447870306670665 test_loss: 0.1760489821434021
epoch: 149 training_loss 0.17380886517465113 test_loss: 0.1855144500732422
episode: 0 training return: -602.2524225770094
episode: 1 training return: -550.6904584730626
episode: 2 training return: -782.3330593775744
episode: 3 training return: -590.3399671963915
epoch: 1 test_true_pfm: -106.00461157332263 sim_pfm: -632.3105761259318
episode: 4 training return: -579.8001328831665
episode: 5 training return: -899.0355299386495
episode: 6 training return: -560.2060050511478
episode: 7 training return: -868.3675382045003
epoch: 2 test_true_pfm: 310.2532256653903 sim_pfm: -852.6649997581289
episode: 8 training return: -850.8473839802915
episode: 9 training return: -569.3717599315983
episode: 10 training return: -868.4974558560825
episode: 11 training return: -881.4021327649097
epoch: 3 test_true_pfm: 216.62176323446525 sim_pfm: -887.9777808844173
episode: 12 training return: -845.1433590426972
episode: 13 training return: -717.5433357313985
episode: 14 training return: -867.2258743422715
episode: 15 training return: -778.3718935295338
epoch: 4 test_true_pfm: 303.89916433184345 sim_pfm: -814.7608293850532
episode: 16 training return: -683.2698249075079
episode: 17 training return: -591.9999838442973
episode: 18 training return: -445.3236455371326
episode: 19 training return: -469.7615853133806
epoch: 5 test_true_pfm: 138.26531149931938 sim_pfm: -628.8894998430727
episode: 20 training return: -531.9268089322351
episode: 21 training return: -626.2159519683165
episode: 22 training return: -452.99428418292786
episode: 23 training return: -591.2968351621256
epoch: 6 test_true_pfm: 129.84338118133186 sim_pfm: -631.7322712719475
episode: 24 training return: -445.40132960132405
episode: 25 training return: -513.562426090753
episode: 26 training return: -554.0916576378889
episode: 27 training return: -314.22794189944386
epoch: 7 test_true_pfm: -49.967538861051224 sim_pfm: -397.17107025415413
episode: 28 training return: -429.7468162147898
episode: 29 training return: -303.9127202307165
episode: 30 training return: -429.8046589243811
episode: 31 training return: -409.196395586036
epoch: 8 test_true_pfm: -46.540996461972036 sim_pfm: -383.10359188192
episode: 32 training return: -414.0448431710319
episode: 33 training return: -453.7687799362251
episode: 34 training return: -576.1446077027884
episode: 35 training return: -633.144795604815
epoch: 9 test_true_pfm: -113.19178326196241 sim_pfm: -483.2188334992864
episode: 36 training return: -633.3135329274579
episode: 37 training return: -472.28800938920074
episode: 38 training return: -574.179450156142
episode: 39 training return: -479.53887107787483
epoch: 10 test_true_pfm: 123.05512207782066 sim_pfm: -522.7258393552739
episode: 40 training return: -572.3582028979878
episode: 41 training return: -556.1420368647981
episode: 42 training return: -492.6667314379258
episode: 43 training return: -444.6367493593972
epoch: 11 test_true_pfm: 334.6582011067999 sim_pfm: -740.3554628890419
episode: 44 training return: -648.4926377963515
episode: 45 training return: -398.1235717870865
episode: 46 training return: -846.8273268723556
episode: 47 training return: -829.9867739201503
epoch: 12 test_true_pfm: 254.1455727224774 sim_pfm: -857.3452931103087
episode: 48 training return: -852.5839162122585
episode: 49 training return: -830.9024303561883
episode: 50 training return: -837.3482993146232
episode: 51 training return: -792.4541540949402
epoch: 13 test_true_pfm: 289.40752494949146 sim_pfm: -810.2508527448681
episode: 52 training return: -779.1988567639303
episode: 53 training return: -627.8461210926044
episode: 54 training return: -548.0480425048119
episode: 55 training return: -538.3317086662995
epoch: 14 test_true_pfm: 414.2365411925557 sim_pfm: -380.48840736890816
episode: 56 training return: -378.7943556703488
episode: 57 training return: -412.6525783085807
episode: 58 training return: -380.99899672180595
episode: 59 training return: -377.1578995829992
epoch: 15 test_true_pfm: 369.98746244909154 sim_pfm: -430.4196213727735
episode: 60 training return: -431.04440542020154
episode: 61 training return: -463.5671957163642
episode: 62 training return: -325.17424979077913
episode: 63 training return: -384.52862433548916
epoch: 16 test_true_pfm: 504.47530570067283 sim_pfm: -317.92161861235206
episode: 64 training return: -346.3540029389562
episode: 65 training return: -445.52479021321517
episode: 66 training return: -310.946682529192
episode: 67 training return: -415.80097363278986
epoch: 17 test_true_pfm: 440.8396380513058 sim_pfm: -342.75204132959306
episode: 68 training return: -376.2945513565214
episode: 69 training return: -413.7709753070925
episode: 70 training return: -387.54879707644056
episode: 71 training return: -400.01839013852407
epoch: 18 test_true_pfm: 634.7685143705472 sim_pfm: -302.12724719422
episode: 72 training return: -372.07926461041353
episode: 73 training return: -362.08895096789024
episode: 74 training return: -387.3107319912424
episode: 75 training return: -373.29551253321205
epoch: 19 test_true_pfm: 578.4055938048209 sim_pfm: -293.92405773782843
episode: 76 training return: -372.0134341691455
episode: 77 training return: -360.0981903110431
episode: 78 training return: -329.7233235450644
episode: 79 training return: -341.0663693101805
epoch: 20 test_true_pfm: 626.3031308231307 sim_pfm: -269.5335574549542
episode: 80 training return: -350.06639606041324
episode: 81 training return: -360.72176098273576
episode: 82 training return: -302.1755015352019
episode: 83 training return: -257.42318871637667
epoch: 21 test_true_pfm: 558.0944985657857 sim_pfm: -294.78299779603265
episode: 84 training return: -386.6404859295066
episode: 85 training return: -300.6540844988386
episode: 86 training return: -409.0035028779655
episode: 87 training return: -412.6125539528979
epoch: 22 test_true_pfm: 773.9287683832335 sim_pfm: -137.19298636314556
episode: 88 training return: -378.86980230494527
episode: 89 training return: -318.20737534759735
episode: 90 training return: -339.4908095418824
episode: 91 training return: -341.1312508115164
epoch: 23 test_true_pfm: 757.024776804637 sim_pfm: -170.270205716707
episode: 92 training return: -321.1027896370574
episode: 93 training return: -342.9402737735702
episode: 94 training return: -270.41674485815446
episode: 95 training return: -285.6355296971147
epoch: 24 test_true_pfm: 721.1189612279708 sim_pfm: -185.70761511961004
episode: 96 training return: -327.2689145914427
episode: 97 training return: -271.7702648061486
episode: 98 training return: -263.86824098747246
episode: 99 training return: -290.070815941955
epoch: 25 test_true_pfm: 668.3913939010986 sim_pfm: -239.50845405777534
episode: 100 training return: -303.5122523981336
episode: 101 training return: -248.72127095475045
episode: 102 training return: -288.6304066084424
episode: 103 training return: -321.7802655498389
epoch: 26 test_true_pfm: 756.8697985729467 sim_pfm: -173.2530423473805
episode: 104 training return: -254.43169183357654
episode: 105 training return: -245.11364732563501
episode: 106 training return: -272.28792158415814
episode: 107 training return: -282.35715379059536
epoch: 27 test_true_pfm: 793.7330071926193 sim_pfm: -153.19932429075467
episode: 108 training return: -267.1846215431441
episode: 109 training return: -281.3105499187704
episode: 110 training return: -248.50621161572982
episode: 111 training return: -247.47917619268955
epoch: 28 test_true_pfm: 814.6686963227367 sim_pfm: -114.7284591849055
episode: 112 training return: -285.29980421528575
episode: 113 training return: -303.09195070619126
episode: 114 training return: -257.659946868141
episode: 115 training return: -192.17806075425514
epoch: 29 test_true_pfm: 973.1227483576298 sim_pfm: -85.39308590811105
episode: 116 training return: -218.0559732456557
episode: 117 training return: -240.4038126886661
episode: 118 training return: -167.4893410124767
episode: 119 training return: -244.3447398164349
epoch: 30 test_true_pfm: 882.227856126307 sim_pfm: -108.5297905835534
episode: 120 training return: -172.86105480199404
episode: 121 training return: -215.03468200960722
episode: 122 training return: -244.11225629734776
episode: 123 training return: -238.68978129922127
epoch: 31 test_true_pfm: 844.5128728319891 sim_pfm: -119.58859645549519
episode: 124 training return: -154.0907172099686
episode: 125 training return: -174.93111841022167
episode: 126 training return: -182.99965061366808
episode: 127 training return: -133.07326249345428
epoch: 32 test_true_pfm: 1068.438683828271 sim_pfm: -17.8045508384096
episode: 128 training return: -153.4046833074212
episode: 129 training return: -140.90940682308639
episode: 130 training return: -175.6062510755613
episode: 131 training return: -159.91289681243268
epoch: 33 test_true_pfm: 879.6744525249934 sim_pfm: -80.38072335613636
episode: 132 training return: -166.59666850744384
episode: 133 training return: -172.89868921242697
episode: 134 training return: -169.4686898523934
episode: 135 training return: -142.66786908748944
epoch: 34 test_true_pfm: 1213.1480530319689 sim_pfm: 22.025153184767603
episode: 136 training return: -84.50483923464108
episode: 137 training return: -95.66203068959311
episode: 138 training return: -167.2502018458969
episode: 139 training return: -107.06904083550121
epoch: 35 test_true_pfm: 1019.4501138554269 sim_pfm: -48.43746648032232
episode: 140 training return: -123.05989217948053
episode: 141 training return: -121.52104083899576
episode: 142 training return: -158.7233887940432
episode: 143 training return: -128.5579673858632
epoch: 36 test_true_pfm: 1185.8511939287714 sim_pfm: 26.349543539486138
episode: 144 training return: -145.6556626356786
episode: 145 training return: -113.42616356884011
episode: 146 training return: -139.58347273341158
episode: 147 training return: -78.84202861862258
epoch: 37 test_true_pfm: 1103.9446433472788 sim_pfm: 15.772601870077528
episode: 148 training return: -111.95851909405718
episode: 149 training return: -133.60311345468384
episode: 150 training return: -102.50942428236613
episode: 151 training return: -111.23310147489424
epoch: 38 test_true_pfm: 1240.4732097130998 sim_pfm: 27.06153270850018
episode: 152 training return: -88.9805485355223
episode: 153 training return: -96.04585931062427
episode: 154 training return: -118.89966049424413
episode: 155 training return: -141.9089500667376
epoch: 39 test_true_pfm: 1075.5437362486255 sim_pfm: -11.87027736951054
episode: 156 training return: -103.69406871129756
episode: 157 training return: -168.54705028006583
episode: 158 training return: -35.72469167304955
episode: 159 training return: -65.71447490382101
epoch: 40 test_true_pfm: 1238.6872985954421 sim_pfm: 41.61416459973589
episode: 160 training return: -31.600289605598665
episode: 161 training return: -78.92581653020956
episode: 162 training return: -121.88322514574672
episode: 163 training return: -54.347009408056344
epoch: 41 test_true_pfm: 1295.6613995755024 sim_pfm: 35.712402887766174
episode: 164 training return: -53.82341548943403
episode: 165 training return: -17.155137952364214
episode: 166 training return: -95.57029906316247
episode: 167 training return: -20.82947399288891
epoch: 42 test_true_pfm: 1091.2798567344091 sim_pfm: -14.296710818538623
episode: 168 training return: -68.1082004813898
episode: 169 training return: -103.28975584171184
episode: 170 training return: -80.34704310343756
episode: 171 training return: -65.43963656880514
epoch: 43 test_true_pfm: 1410.9478190184127 sim_pfm: 53.90294060903207
episode: 172 training return: -72.78509060590648
episode: 173 training return: -7.137699242037274
episode: 174 training return: -64.21168269876469
episode: 175 training return: 49.445656663330084
epoch: 44 test_true_pfm: 1324.9632601101555 sim_pfm: 53.08069121904234
episode: 176 training return: -71.73341346962894
episode: 177 training return: -141.4562768815236
episode: 178 training return: -35.80596580719253
episode: 179 training return: -82.52735767401028
epoch: 45 test_true_pfm: 1348.2844035263045 sim_pfm: 58.546677124060274
episode: 180 training return: -48.532621539826984
episode: 181 training return: 27.01773431290126
episode: 182 training return: -50.263206612976354
episode: 183 training return: -80.35960924571424
epoch: 46 test_true_pfm: 1391.4984123501824 sim_pfm: 46.83690101410329
episode: 184 training return: -75.14941390513093
episode: 185 training return: -31.623469918704835
episode: 186 training return: -20.0420597459061
episode: 187 training return: -35.64932588362312
epoch: 47 test_true_pfm: 1407.2519047919322 sim_pfm: 81.32597758837962
episode: 188 training return: -68.39612510938613
episode: 189 training return: -63.601508099270646
episode: 190 training return: -52.18963531583105
episode: 191 training return: -13.075467866277684
epoch: 48 test_true_pfm: 1458.513180444483 sim_pfm: 92.58336040648948
episode: 192 training return: 24.026452080329257
episode: 193 training return: 9.078388141473837
episode: 194 training return: -40.406558594648686
episode: 195 training return: -94.07359673830199
epoch: 49 test_true_pfm: 1346.910716678626 sim_pfm: 108.43235682788503
episode: 196 training return: -31.41394405665314
episode: 197 training return: 8.654089651654582
episode: 198 training return: -35.13012805961616
episode: 199 training return: -34.64910200778172
epoch: 50 test_true_pfm: 1367.7757188679723 sim_pfm: 54.042999702541174
episode: 200 training return: 58.071883632647996
episode: 201 training return: 18.88441356381772
episode: 202 training return: -10.382712083603238
episode: 203 training return: -64.52047427938552
epoch: 51 test_true_pfm: 1374.2630652527732 sim_pfm: 59.39132034357311
episode: 204 training return: 8.997622562086782
episode: 205 training return: 58.578935171656376
episode: 206 training return: 22.82639032573952
episode: 207 training return: -111.52861725283412
epoch: 52 test_true_pfm: 1504.270783078413 sim_pfm: 117.46865878176385
episode: 208 training return: -93.72973414380459
episode: 209 training return: -91.8332974752406
episode: 210 training return: 9.016297928805344
episode: 211 training return: -32.49923443613201
epoch: 53 test_true_pfm: 1640.952309225733 sim_pfm: 140.26667126138872
episode: 212 training return: -109.61318937454814
episode: 213 training return: -27.252291900472027
episode: 214 training return: -54.43651893224926
episode: 215 training return: -13.652074173352988
epoch: 54 test_true_pfm: 1339.6451712679534 sim_pfm: 66.21057538590374
episode: 216 training return: 26.743829729015886
episode: 217 training return: -66.67953893556496
episode: 218 training return: -0.0968339329448411
episode: 219 training return: -63.294638011446665
epoch: 55 test_true_pfm: 1352.0221494805548 sim_pfm: 72.53132019389689
episode: 220 training return: -37.788571899968005
episode: 221 training return: -22.064080623510964
episode: 222 training return: -12.353205874261937
episode: 223 training return: 0.3628614394887249
epoch: 56 test_true_pfm: 1284.7530677216428 sim_pfm: 62.68140348594884
episode: 224 training return: 3.010192836210759
episode: 225 training return: 50.59575923133596
episode: 226 training return: -20.161652174593318
episode: 227 training return: -44.851615937441174
epoch: 57 test_true_pfm: 1215.9949347432348 sim_pfm: 35.25440838347856
episode: 228 training return: 10.045022350755428
episode: 229 training return: -12.2579864623149
episode: 230 training return: -20.85101602780906
episode: 231 training return: -29.351049285999608
epoch: 58 test_true_pfm: 1365.6397121127764 sim_pfm: 62.82505391492757
episode: 232 training return: -72.1620943589969
episode: 233 training return: -37.14404162198103
episode: 234 training return: -33.56891675435711
episode: 235 training return: -78.86163718814684
epoch: 59 test_true_pfm: 1650.4452190393831 sim_pfm: 157.6928130677098
episode: 236 training return: -37.861193898143874
episode: 237 training return: 20.056481814857186
episode: 238 training return: -47.628081687315174
episode: 239 training return: -86.18340008681487
epoch: 60 test_true_pfm: 1444.7578105386413 sim_pfm: 125.59960871202826
episode: 240 training return: 3.243102523803569
episode: 241 training return: -81.72739068785953
episode: 242 training return: -57.120053535558085
episode: 243 training return: 88.80289933576518
epoch: 61 test_true_pfm: 1462.756199030636 sim_pfm: 91.27164814018779
episode: 244 training return: 32.90883432112965
episode: 245 training return: -5.4160995002018915
episode: 246 training return: 1.4148011326258663
episode: 247 training return: 22.206434230614157
epoch: 62 test_true_pfm: 1495.0265909165053 sim_pfm: 61.42513347540555
episode: 248 training return: 3.4983878552068526
episode: 249 training return: 22.72018094339477
episode: 250 training return: -76.36930716556543
episode: 251 training return: 25.654230111243805
epoch: 63 test_true_pfm: 1560.7131321816726 sim_pfm: 141.46926801728713
episode: 252 training return: 17.981904022561423
episode: 253 training return: -24.873284922324768
episode: 254 training return: -13.741681640675498
episode: 255 training return: 5.456845672687205
epoch: 64 test_true_pfm: 1330.9587413072582 sim_pfm: 69.94851087664745
episode: 256 training return: -36.4546507809981
episode: 257 training return: 39.56828757076789
episode: 258 training return: 103.29388062679014
episode: 259 training return: -52.209131909194426
epoch: 65 test_true_pfm: 1488.9110810203772 sim_pfm: 125.5048759784093
episode: 260 training return: 53.107142031290905
episode: 261 training return: 47.77688740463809
episode: 262 training return: -48.89793215006258
episode: 263 training return: 28.856712576819994
epoch: 66 test_true_pfm: 1497.192372183239 sim_pfm: 98.67475002775852
episode: 264 training return: 70.4648487394604
episode: 265 training return: -37.89170406075924
episode: 266 training return: -18.581990864546903
episode: 267 training return: -18.646208667142908
epoch: 67 test_true_pfm: 1766.2842541540547 sim_pfm: 185.93155220447684
episode: 268 training return: 41.83701317053536
episode: 269 training return: -15.638499463343766
episode: 270 training return: -72.37129958813615
episode: 271 training return: 12.650145185058674
epoch: 68 test_true_pfm: 1568.1986217075425 sim_pfm: 129.5862023738142
episode: 272 training return: 58.412245103753776
episode: 273 training return: 32.43384496719478
episode: 274 training return: 13.0941864396526
episode: 275 training return: 58.59836422647386
epoch: 69 test_true_pfm: 1227.5033010115033 sim_pfm: 36.0724017461256
episode: 276 training return: 2.5194233440697413
episode: 277 training return: 49.73508752826561
episode: 278 training return: -38.24162028607066
episode: 279 training return: 32.0647306159141
epoch: 70 test_true_pfm: 1465.2813912783186 sim_pfm: 33.513042990389295
episode: 280 training return: 46.748137923713784
episode: 281 training return: 70.18245052919384
episode: 282 training return: -21.520427669982396
episode: 283 training return: 55.39370696561158
epoch: 71 test_true_pfm: 1547.825715703362 sim_pfm: 94.04768991268865
episode: 284 training return: 58.06510658783686
episode: 285 training return: 20.541503716739175
episode: 286 training return: 57.27374530208584
episode: 287 training return: 59.83757916027723
epoch: 72 test_true_pfm: 1608.962551777396 sim_pfm: 140.2148508222281
episode: 288 training return: 55.121178611295825
episode: 289 training return: -20.67956447734086
episode: 290 training return: 82.99463148979301
episode: 291 training return: -11.540989682258434
epoch: 73 test_true_pfm: 1704.313636768414 sim_pfm: 172.81581127525843
episode: 292 training return: 88.64522183171675
episode: 293 training return: -37.28922362123309
episode: 294 training return: 31.31220550783583
episode: 295 training return: 125.51514779593396
epoch: 74 test_true_pfm: 1536.0288026215849 sim_pfm: 132.63735262723083
episode: 296 training return: 97.50737929691118
episode: 297 training return: 22.66744143793988
episode: 298 training return: 57.81557664139966
episode: 299 training return: 40.66513745077188
epoch: 75 test_true_pfm: 1770.8778592894657 sim_pfm: 185.60966908762023
episode: 300 training return: 17.80565829779473
episode: 301 training return: 21.184115151158174
episode: 302 training return: 58.917907539310136
episode: 303 training return: 122.71809663964557
epoch: 76 test_true_pfm: 1720.3679900164568 sim_pfm: 208.19427302915267
episode: 304 training return: 30.43699505285623
episode: 305 training return: 71.89252385480025
episode: 306 training return: 55.24454566154112
episode: 307 training return: -23.015210940037388
epoch: 77 test_true_pfm: 1749.1788390991242 sim_pfm: 168.91169083519284
episode: 308 training return: 79.90592019236625
episode: 309 training return: 67.11063013084724
episode: 310 training return: 38.846679006004614
episode: 311 training return: 110.8507420636425
epoch: 78 test_true_pfm: 1698.2967754411695 sim_pfm: 187.62794203959535
episode: 312 training return: 76.6589716444782
episode: 313 training return: 16.112239523879293
episode: 314 training return: 35.604794088870065
episode: 315 training return: 86.86009536923068
epoch: 79 test_true_pfm: 1598.8001387553898 sim_pfm: 119.6022312169202
episode: 316 training return: 38.49225445699847
episode: 317 training return: 37.03051433221667
episode: 318 training return: 43.49999095495023
episode: 319 training return: 58.61779559309029
epoch: 80 test_true_pfm: 1700.8848549683744 sim_pfm: 157.6688013334722
episode: 320 training return: 122.000991533824
episode: 321 training return: 54.154135409963146
episode: 322 training return: 44.08155713990644
episode: 323 training return: 62.65641870345086
epoch: 81 test_true_pfm: 1749.187554754437 sim_pfm: 160.5877323780008
episode: 324 training return: 44.167607129209465
episode: 325 training return: 130.6827518900943
episode: 326 training return: 76.81731222365306
episode: 327 training return: 73.97673213376456
epoch: 82 test_true_pfm: 1919.783622485011 sim_pfm: 160.0907987915506
episode: 328 training return: -9.398273214169931
episode: 329 training return: 23.944549584421075
episode: 330 training return: 29.18755939742198
episode: 331 training return: 79.0612925315905
epoch: 83 test_true_pfm: 1748.3059161841677 sim_pfm: 182.09175070607523
episode: 332 training return: 79.69816710623765
episode: 333 training return: 57.374208424996574
episode: 334 training return: 62.095928035062315
episode: 335 training return: 121.56460184203935
epoch: 84 test_true_pfm: 1535.7958428394597 sim_pfm: 132.22898019268973
episode: 336 training return: 27.002211324049444
episode: 337 training return: 77.93930691185693
episode: 338 training return: -8.354824990026923
episode: 339 training return: 30.171400688490145
epoch: 85 test_true_pfm: 1782.3340510880269 sim_pfm: 182.10285555802656
episode: 340 training return: 58.452981159568246
episode: 341 training return: 52.3065780558637
episode: 342 training return: -2.0476198991965875
episode: 343 training return: 62.19276688454529
epoch: 86 test_true_pfm: 1774.342897272439 sim_pfm: 210.12232092540515
episode: 344 training return: 66.29566298632545
episode: 345 training return: 31.53413285226967
episode: 346 training return: 117.13910946287059
episode: 347 training return: 48.990354929058896
epoch: 87 test_true_pfm: 1803.15534778192 sim_pfm: 224.11894942092158
episode: 348 training return: 52.8085737852452
episode: 349 training return: 49.27749039861693
episode: 350 training return: 36.86817948776748
episode: 351 training return: 3.820224834713506
epoch: 88 test_true_pfm: 1794.761424326587 sim_pfm: 194.45013056388248
episode: 352 training return: 84.20266369886016
episode: 353 training return: 89.26961138678082
episode: 354 training return: -21.156284875789982
episode: 355 training return: 89.3499107937493
epoch: 89 test_true_pfm: 1585.0897511162937 sim_pfm: 106.89504296459347
episode: 356 training return: 42.581938205983015
episode: 357 training return: 57.96126527914525
episode: 358 training return: 21.016680415805723
episode: 359 training return: 73.68203796208911
epoch: 90 test_true_pfm: 1629.5544099678627 sim_pfm: 165.1503044239661
episode: 360 training return: 62.34410617702102
episode: 361 training return: 114.68982706992219
episode: 362 training return: 111.95438576797146
episode: 363 training return: 81.17551800255978
epoch: 91 test_true_pfm: 1768.073918343911 sim_pfm: 203.5960886159373
episode: 364 training return: 131.84686611539433
episode: 365 training return: 80.19908067578372
episode: 366 training return: 100.74978298526581
episode: 367 training return: 148.80193426746462
epoch: 92 test_true_pfm: 1797.2258781287792 sim_pfm: 184.15683175093
episode: 368 training return: 20.429228559519427
episode: 369 training return: 37.14265064168985
episode: 370 training return: 64.36784838121977
episode: 371 training return: 110.31443043076246
epoch: 93 test_true_pfm: 1661.7884870494345 sim_pfm: 177.1985597979233
episode: 372 training return: 73.98774816010956
episode: 373 training return: 67.9179587831366
episode: 374 training return: 30.77554754769024
episode: 375 training return: 65.30524704591447
epoch: 94 test_true_pfm: 1654.6070618814965 sim_pfm: 173.6271632403435
episode: 376 training return: 125.17384892293842
episode: 377 training return: -17.34367129104578
episode: 378 training return: 55.1595108457235
episode: 379 training return: 114.79103391135486
epoch: 95 test_true_pfm: 1710.9743311749392 sim_pfm: 192.07613975206422
episode: 380 training return: 72.14670850434456
episode: 381 training return: 54.785229799398145
episode: 382 training return: 101.27765693478766
episode: 383 training return: 78.40204293987155
epoch: 96 test_true_pfm: 1772.323138310414 sim_pfm: 208.5393864305315
episode: 384 training return: 104.96120244955839
episode: 385 training return: 153.11567516007213
episode: 386 training return: 92.94221926812381
episode: 387 training return: 47.79383165357549
epoch: 97 test_true_pfm: 1793.0752986147681 sim_pfm: 189.32612585939765
episode: 388 training return: 83.5077094604341
episode: 389 training return: 51.67885017505426
episode: 390 training return: -9.511097402624843
episode: 391 training return: 55.39736053386119
epoch: 98 test_true_pfm: 1800.7535442171602 sim_pfm: 200.41963649667142
episode: 392 training return: 73.8063258895036
episode: 393 training return: 52.834954838534976
episode: 394 training return: 93.04113977870037
episode: 395 training return: 95.23155922837415
epoch: 99 test_true_pfm: 1798.3957579425307 sim_pfm: 217.35037771983582
episode: 396 training return: 120.50528551048839
episode: 397 training return: -1.6261202223445101
episode: 398 training return: 104.81250901330068
episode: 399 training return: 62.64868264219861
epoch: 100 test_true_pfm: 1803.1706561273024 sim_pfm: 205.31518252259374
episode: 400 training return: 65.99569310953184
episode: 401 training return: 47.776944196967655
episode: 402 training return: 69.801361350561
episode: 403 training return: 140.26637709772336
epoch: 101 test_true_pfm: 1836.8137724307326 sim_pfm: 214.39926075968023
episode: 404 training return: 56.025103513660454
episode: 405 training return: 70.12140493044534
episode: 406 training return: 63.514947328450965
episode: 407 training return: 86.75959126119622
epoch: 102 test_true_pfm: 1829.4958602552163 sim_pfm: 209.9996590087641
episode: 408 training return: 98.18907194535582
episode: 409 training return: 81.91731393207195
episode: 410 training return: 150.57500451545127
episode: 411 training return: 100.32927328368064
epoch: 103 test_true_pfm: 1833.1045251266644 sim_pfm: 236.3646897708244
episode: 412 training return: 64.79745562925721
episode: 413 training return: 61.16419754691596
episode: 414 training return: 59.10794461372445
episode: 415 training return: 73.86782784396189
epoch: 104 test_true_pfm: 1919.0255865324432 sim_pfm: 219.25963944237023
episode: 416 training return: 124.2422301330368
episode: 417 training return: 64.09305681315767
episode: 418 training return: 66.85250757832746
episode: 419 training return: 105.09315331381141
epoch: 105 test_true_pfm: 1804.4437699580446 sim_pfm: 196.82030535174667
episode: 420 training return: 88.57780104010673
episode: 421 training return: 128.2865781310705
episode: 422 training return: 46.144684396231106
episode: 423 training return: 56.21228397778394
epoch: 106 test_true_pfm: 1836.3384473211038 sim_pfm: 222.44313875575213
episode: 424 training return: 92.31847603082154
episode: 425 training return: 22.475337937590226
episode: 426 training return: 86.5241699866411
episode: 427 training return: 99.79739747478968
epoch: 107 test_true_pfm: 1627.9689188929026 sim_pfm: 193.29328178433101
episode: 428 training return: 53.03425518511275
episode: 429 training return: -10.3720690077509
episode: 430 training return: 41.98467104999561
episode: 431 training return: 91.69919097027662
epoch: 108 test_true_pfm: 1590.7289826603471 sim_pfm: 195.8769128326319
episode: 432 training return: 112.4622865548762
episode: 433 training return: 108.38228615225069
episode: 434 training return: 52.50343418216503
episode: 435 training return: 95.31307188398618
epoch: 109 test_true_pfm: 1666.0955946970614 sim_pfm: 210.6179393330098
episode: 436 training return: 69.34875226067429
episode: 437 training return: 24.599196265909594
episode: 438 training return: 119.24678320349543
episode: 439 training return: 47.2980209182667
epoch: 110 test_true_pfm: 1758.6830199351934 sim_pfm: 205.7681894507267
episode: 440 training return: 35.218732559515175
episode: 441 training return: 22.998107749167254
episode: 442 training return: 96.71110175652653
episode: 443 training return: 43.060538620969844
epoch: 111 test_true_pfm: 1638.6900463758318 sim_pfm: 193.99504324786417
episode: 444 training return: 47.39733579607221
episode: 445 training return: 135.45923294327844
episode: 446 training return: 58.42696174461713
episode: 447 training return: 77.61060715516506
epoch: 112 test_true_pfm: 1654.8616238030945 sim_pfm: 137.57597479133497
episode: 448 training return: 38.27565831316694
episode: 449 training return: 71.44258662308414
episode: 450 training return: 92.60223493066373
episode: 451 training return: 27.78414906864976
epoch: 113 test_true_pfm: 1671.0669100331434 sim_pfm: 160.55587051875952
episode: 452 training return: 66.55840814715292
episode: 453 training return: 95.72487751046414
episode: 454 training return: 129.54880505712433
episode: 455 training return: 145.462130077537
epoch: 114 test_true_pfm: 1890.5173325870257 sim_pfm: 206.13794123256335
episode: 456 training return: 116.14951442151653
episode: 457 training return: 150.42755470383452
episode: 458 training return: 125.629131119578
episode: 459 training return: 116.85604638061379
epoch: 115 test_true_pfm: 1720.3701376765196 sim_pfm: 183.58584905300654
episode: 460 training return: 115.02016458025177
episode: 461 training return: 12.741578691693977
episode: 462 training return: 116.49113335787716
episode: 463 training return: 41.76208117573701
epoch: 116 test_true_pfm: 1745.9279879757678 sim_pfm: 201.85349636245178
episode: 464 training return: 59.092769478217484
episode: 465 training return: 143.8537679429115
episode: 466 training return: 105.83725441144327
episode: 467 training return: 106.96820546874211
epoch: 117 test_true_pfm: 1806.9504563784383 sim_pfm: 221.5375636397995
episode: 468 training return: 70.12451851221601
episode: 469 training return: 95.57172350656889
episode: 470 training return: 145.6222132983416
episode: 471 training return: 140.11074180537088
epoch: 118 test_true_pfm: 1768.9422451726161 sim_pfm: 202.69436431292863
episode: 472 training return: 96.90931501464145
episode: 473 training return: 78.68953002224445
episode: 474 training return: 134.54565231879124
episode: 475 training return: 110.30684196280828
epoch: 119 test_true_pfm: 1730.4503034382803 sim_pfm: 179.75356204771026
episode: 476 training return: 67.7889763106587
episode: 477 training return: 114.44301569501448
episode: 478 training return: 65.96145097359359
episode: 479 training return: 65.57488342399337
epoch: 120 test_true_pfm: 1847.8602233999127 sim_pfm: 216.7941617382534
episode: 480 training return: 138.39777180412878
episode: 481 training return: 146.92805033063962
episode: 482 training return: 97.10398713061862
episode: 483 training return: 91.27006233918497
epoch: 121 test_true_pfm: 1728.5289160236473 sim_pfm: 193.1927887621996
episode: 484 training return: 168.50063745840944
episode: 485 training return: 101.29627425857736
episode: 486 training return: 30.653430740032956
episode: 487 training return: 107.47587926992593
epoch: 122 test_true_pfm: 1819.9897850119987 sim_pfm: 186.13829590919204
episode: 488 training return: 130.65889856262527
episode: 489 training return: 91.44523589063253
episode: 490 training return: 142.47783689027216
episode: 491 training return: 158.1414987672314
epoch: 123 test_true_pfm: 1630.0813751086162 sim_pfm: 109.39157163106786
episode: 492 training return: 109.04326891737786
episode: 493 training return: 102.20930371827316
episode: 494 training return: 59.697723315072
episode: 495 training return: 84.10634496501248
epoch: 124 test_true_pfm: 1878.741670851031 sim_pfm: 254.15205563260506
episode: 496 training return: 137.11746076149325
episode: 497 training return: 99.18747649588362
episode: 498 training return: 89.44073740414191
episode: 499 training return: 80.35240982964416
epoch: 125 test_true_pfm: 1699.8554614841353 sim_pfm: 184.78480471640782
episode: 500 training return: 104.61856066673901
episode: 501 training return: 76.21937039208053
episode: 502 training return: 109.80673944634947
episode: 503 training return: 36.77091978037407
epoch: 126 test_true_pfm: 1753.7455822604945 sim_pfm: 198.1038647976768
episode: 504 training return: 83.76517290603432
episode: 505 training return: 98.32995629114411
episode: 506 training return: 65.36499177516208
episode: 507 training return: 65.4423767079444
epoch: 127 test_true_pfm: 1789.2912881469565 sim_pfm: 228.95906928802867
episode: 508 training return: 105.87011401092057
episode: 509 training return: 112.17759239357886
episode: 510 training return: 58.63987957639196
episode: 511 training return: 100.81947218066017
epoch: 128 test_true_pfm: 1883.4193064848057 sim_pfm: 249.56799571102067
episode: 512 training return: 64.01106227213151
episode: 513 training return: 104.84647927329056
episode: 514 training return: 81.30543055909962
episode: 515 training return: 114.96714324958337
epoch: 129 test_true_pfm: 1928.450937270377 sim_pfm: 279.49468206002683
episode: 516 training return: 65.62397001526541
episode: 517 training return: 126.44819701881232
episode: 518 training return: 153.0726761424677
episode: 519 training return: 128.30519494418388
epoch: 130 test_true_pfm: 1791.3726366003948 sim_pfm: 215.81311076557384
episode: 520 training return: 144.07808475146575
episode: 521 training return: 189.08976336862986
episode: 522 training return: 88.15821208913643
episode: 523 training return: 96.52078051526104
epoch: 131 test_true_pfm: 1748.531340446424 sim_pfm: 226.78187094384657
episode: 524 training return: 55.770773415632696
episode: 525 training return: 54.93108449086321
episode: 526 training return: 73.83053574982246
episode: 527 training return: 151.56886719649054
epoch: 132 test_true_pfm: 1918.5300662560705 sim_pfm: 238.6228219403296
episode: 528 training return: 115.39882428761184
episode: 529 training return: 100.8460028580877
episode: 530 training return: 64.8085182676478
episode: 531 training return: 118.2232885437323
epoch: 133 test_true_pfm: 1869.8258177769212 sim_pfm: 261.9518275801899
episode: 532 training return: 89.76328610652799
episode: 533 training return: 133.7864542306296
episode: 534 training return: 134.02847920237159
episode: 535 training return: 106.41021905580494
epoch: 134 test_true_pfm: 1711.5420059388143 sim_pfm: 174.94238871964066
episode: 536 training return: 65.48831073385772
episode: 537 training return: 141.4060121752963
episode: 538 training return: 36.27155615888063
episode: 539 training return: 112.44306190571362
epoch: 135 test_true_pfm: 1820.5544339414876 sim_pfm: 207.26607457232566
episode: 540 training return: 156.3886881853435
episode: 541 training return: 49.03396092426246
episode: 542 training return: 112.57676203517984
episode: 543 training return: 155.85254260242874
epoch: 136 test_true_pfm: 1927.8332742655457 sim_pfm: 257.91146063067725
episode: 544 training return: 142.99184611907313
episode: 545 training return: 165.22447038930983
episode: 546 training return: 76.98561725729975
episode: 547 training return: 111.06557330235427
epoch: 137 test_true_pfm: 1823.2590932375242 sim_pfm: 221.4032222039033
episode: 548 training return: 89.280999937823
episode: 549 training return: 124.26342425965196
episode: 550 training return: 72.57786524381967
episode: 551 training return: -25.983283454254234
epoch: 138 test_true_pfm: 1994.1734836093383 sim_pfm: 274.3433292340579
episode: 552 training return: 67.21199700678348
episode: 553 training return: 72.04769510031181
episode: 554 training return: 146.69231081253037
episode: 555 training return: 67.71168687155784
epoch: 139 test_true_pfm: 1926.8273191795386 sim_pfm: 250.77265287163027
episode: 556 training return: 39.71025379559687
episode: 557 training return: 80.18087684945627
episode: 558 training return: 129.22581050309358
episode: 559 training return: 159.47565590975674
epoch: 140 test_true_pfm: 1743.0411586240746 sim_pfm: 201.36814463579648
episode: 560 training return: 133.98457782259942
episode: 561 training return: 91.4934735868627
episode: 562 training return: 128.3307636360654
episode: 563 training return: 136.87890390951375
epoch: 141 test_true_pfm: 1756.831107959235 sim_pfm: 188.67331598574438
episode: 564 training return: 111.24234658332587
episode: 565 training return: 109.03717367839538
episode: 566 training return: 174.53126037765418
episode: 567 training return: 145.54986731404986
epoch: 142 test_true_pfm: 1865.5908668952459 sim_pfm: 220.89123152139996
episode: 568 training return: 61.48131623162915
episode: 569 training return: 52.089972742645045
episode: 570 training return: 85.74224323377534
episode: 571 training return: 71.82722392993816
epoch: 143 test_true_pfm: 1703.388493856496 sim_pfm: 166.6278492405509
episode: 572 training return: 161.77227399416032
episode: 573 training return: 124.12348562140973
episode: 574 training return: 151.32053352363727
episode: 575 training return: 165.8157358728218
epoch: 144 test_true_pfm: 1785.3343454876338 sim_pfm: 218.25792412592355
episode: 576 training return: 128.5703788116717
episode: 577 training return: 82.7317917886306
episode: 578 training return: 99.8822671314085
episode: 579 training return: 93.79973739505091
epoch: 145 test_true_pfm: 1800.7999966056248 sim_pfm: 235.16839132542327
episode: 580 training return: 118.68433402381031
episode: 581 training return: 71.1243654606399
episode: 582 training return: 120.31308887725059
episode: 583 training return: 67.1817429674968
epoch: 146 test_true_pfm: 1848.629751900571 sim_pfm: 251.4124955068138
episode: 584 training return: 161.0931559429999
episode: 585 training return: 93.64705802885099
episode: 586 training return: 65.47396687378165
episode: 587 training return: 126.1182586905239
epoch: 147 test_true_pfm: 1842.2065434812812 sim_pfm: 223.1946448602138
episode: 588 training return: 57.80579165605766
episode: 589 training return: 117.92640468927253
episode: 590 training return: 121.18680854565194
episode: 591 training return: 133.81328928298956
epoch: 148 test_true_pfm: 1949.51168871614 sim_pfm: 262.8677103218599
episode: 592 training return: 141.0268582527739
episode: 593 training return: 117.07365624448552
episode: 594 training return: 129.0527004968422
episode: 595 training return: 74.10950181242167
epoch: 149 test_true_pfm: 1988.2540090689934 sim_pfm: 269.0786999724922
episode: 596 training return: 102.44644483986391
episode: 597 training return: 157.80644599982242
episode: 598 training return: 110.47296858618208
episode: 599 training return: 115.87284472111368
epoch: 150 test_true_pfm: 1994.4118712981126 sim_pfm: 252.86077185751162
