['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'uncertainty', '--traj', 'medium', '--seed', '1', '--data', '100000']
epoch: 0 training_loss 0.2504583301395178 test_loss: 0.1933349370956421
epoch: 1 training_loss 0.20487461514770985 test_loss: 0.18141907453536987
epoch: 2 training_loss 0.20817745819687844 test_loss: 0.19203351736068724
epoch: 3 training_loss 0.20512510873377324 test_loss: 0.18992910385131836
epoch: 4 training_loss 0.20077240280807018 test_loss: 0.1966405987739563
epoch: 5 training_loss 0.19353240147233008 test_loss: 0.17857880592346193
epoch: 6 training_loss 0.1964918849617243 test_loss: 0.1992499828338623
epoch: 7 training_loss 0.19258537985384463 test_loss: 0.17308081388473512
epoch: 8 training_loss 0.18946513891220093 test_loss: 0.17813944816589355
epoch: 9 training_loss 0.18897972390055656 test_loss: 0.18832322359085082
epoch: 10 training_loss 0.19150449365377425 test_loss: 0.18057167530059814
epoch: 11 training_loss 0.1906119403988123 test_loss: 0.1999804735183716
epoch: 12 training_loss 0.1934745793417096 test_loss: 0.18790698051452637
epoch: 13 training_loss 0.18531814619898795 test_loss: 0.17603110074996947
epoch: 14 training_loss 0.18266180150210856 test_loss: 0.19442546367645264
epoch: 15 training_loss 0.18303250771015883 test_loss: 0.18229262828826903
epoch: 16 training_loss 0.18413588725030422 test_loss: 0.18414851427078247
epoch: 17 training_loss 0.1828341072052717 test_loss: 0.19202243089675902
epoch: 18 training_loss 0.18169710256159305 test_loss: 0.18074650764465333
epoch: 19 training_loss 0.182751534730196 test_loss: 0.18014470338821412
epoch: 20 training_loss 0.1901225356757641 test_loss: 0.18453288078308105
epoch: 21 training_loss 0.179875021725893 test_loss: 0.19443658590316773
epoch: 22 training_loss 0.1828918842971325 test_loss: 0.18753316402435302
epoch: 23 training_loss 0.1803877818584442 test_loss: 0.17343961000442504
epoch: 24 training_loss 0.18152768604457378 test_loss: 0.17966244220733643
epoch: 25 training_loss 0.1867587822675705 test_loss: 0.172829008102417
epoch: 26 training_loss 0.18481199115514754 test_loss: 0.18772751092910767
epoch: 27 training_loss 0.18141394779086112 test_loss: 0.18797279596328736
epoch: 28 training_loss 0.17388755917549134 test_loss: 0.164205002784729
epoch: 29 training_loss 0.17283579602837562 test_loss: 0.18640604019165039
epoch: 30 training_loss 0.17746924705803394 test_loss: 0.18073099851608276
epoch: 31 training_loss 0.18205561071634294 test_loss: 0.1784292459487915
epoch: 32 training_loss 0.1798632863163948 test_loss: 0.18190438747406007
epoch: 33 training_loss 0.1736974909901619 test_loss: 0.19462021589279174
epoch: 34 training_loss 0.19165384329855442 test_loss: 0.18110228776931764
epoch: 35 training_loss 0.1824867581576109 test_loss: 0.18437448740005494
epoch: 36 training_loss 0.1858811952918768 test_loss: 0.1814647674560547
epoch: 37 training_loss 0.18444995082914828 test_loss: 0.183352267742157
epoch: 38 training_loss 0.184121308401227 test_loss: 0.16487005949020386
epoch: 39 training_loss 0.18879848584532738 test_loss: 0.18100283145904542
epoch: 40 training_loss 0.1931167896836996 test_loss: 0.18866817951202391
epoch: 41 training_loss 0.18233082316815852 test_loss: 0.17066538333892822
epoch: 42 training_loss 0.18420529440045358 test_loss: 0.17951271533966065
epoch: 43 training_loss 0.18200959987938403 test_loss: 0.17771849632263184
epoch: 44 training_loss 0.1820216777548194 test_loss: 0.17080655097961425
epoch: 45 training_loss 0.18550680816173554 test_loss: 0.1762342095375061
epoch: 46 training_loss 0.18209606602787973 test_loss: 0.1808363080024719
epoch: 47 training_loss 0.17919332198798657 test_loss: 0.18802878856658936
epoch: 48 training_loss 0.17981589369475842 test_loss: 0.17108761072158812
epoch: 49 training_loss 0.18457171089947225 test_loss: 0.17624564170837403
epoch: 50 training_loss 0.1731564239412546 test_loss: 0.1595914363861084
epoch: 51 training_loss 0.18085322059690953 test_loss: 0.18426225185394288
epoch: 52 training_loss 0.18527727216482162 test_loss: 0.1766292929649353
epoch: 53 training_loss 0.18248874217271804 test_loss: 0.19116154909133912
epoch: 54 training_loss 0.17952303901314737 test_loss: 0.18734856843948364
epoch: 55 training_loss 0.17711048074066638 test_loss: 0.17543680667877198
epoch: 56 training_loss 0.17566015020012857 test_loss: 0.17322659492492676
epoch: 57 training_loss 0.17148416817188264 test_loss: 0.1887549042701721
epoch: 58 training_loss 0.17714260876178742 test_loss: 0.1908562183380127
epoch: 59 training_loss 0.1752545414119959 test_loss: 0.1854674696922302
epoch: 60 training_loss 0.1791199417412281 test_loss: 0.16596771478652955
epoch: 61 training_loss 0.17637957885861397 test_loss: 0.17848955392837523
epoch: 62 training_loss 0.18301772736012936 test_loss: 0.18349039554595947
epoch: 63 training_loss 0.16950136117637157 test_loss: 0.17524032592773436
epoch: 64 training_loss 0.17699346922338008 test_loss: 0.16764626502990723
epoch: 65 training_loss 0.17971460048109292 test_loss: 0.15522180795669555
epoch: 66 training_loss 0.17334652669727801 test_loss: 0.18633342981338502
epoch: 67 training_loss 0.18357726380228997 test_loss: 0.17374880313873292
epoch: 68 training_loss 0.18148053549230098 test_loss: 0.18497087955474853
epoch: 69 training_loss 0.18017168886959553 test_loss: 0.1677287459373474
epoch: 70 training_loss 0.17159235544502735 test_loss: 0.16890316009521483
epoch: 71 training_loss 0.17947703920304775 test_loss: 0.18033268451690673
epoch: 72 training_loss 0.17587470345199108 test_loss: 0.18333736658096314
epoch: 73 training_loss 0.17983024142682552 test_loss: 0.17904219627380372
epoch: 74 training_loss 0.17134626559913158 test_loss: 0.16762905120849608
epoch: 75 training_loss 0.17508282452821733 test_loss: 0.17481211423873902
epoch: 76 training_loss 0.1812196297943592 test_loss: 0.18866225481033325
epoch: 77 training_loss 0.17949489705264568 test_loss: 0.1770087242126465
epoch: 78 training_loss 0.1745244838297367 test_loss: 0.17724074125289918
epoch: 79 training_loss 0.1780443713068962 test_loss: 0.17374267578125
epoch: 80 training_loss 0.17266748145222663 test_loss: 0.17542704343795776
epoch: 81 training_loss 0.18017949394881724 test_loss: 0.16998358964920043
epoch: 82 training_loss 0.17605503417551518 test_loss: 0.18232084512710572
epoch: 83 training_loss 0.17740524716675282 test_loss: 0.18648041486740113
epoch: 84 training_loss 0.17794757388532162 test_loss: 0.185155189037323
epoch: 85 training_loss 0.18224840149283408 test_loss: 0.17236791849136351
epoch: 86 training_loss 0.1769665402173996 test_loss: 0.1421388268470764
epoch: 87 training_loss 0.1790028290450573 test_loss: 0.1857662320137024
epoch: 88 training_loss 0.1711442892253399 test_loss: 0.1835313320159912
epoch: 89 training_loss 0.17779422365128994 test_loss: 0.17002108097076415
epoch: 90 training_loss 0.17699169285595417 test_loss: 0.17465416193008423
epoch: 91 training_loss 0.17448471024632453 test_loss: 0.19377351999282838
epoch: 92 training_loss 0.1777441806346178 test_loss: 0.1792113423347473
epoch: 93 training_loss 0.1777538013458252 test_loss: 0.17814109325408936
epoch: 94 training_loss 0.18066668950021267 test_loss: 0.18178437948226928
epoch: 95 training_loss 0.1765249989181757 test_loss: 0.18214339017868042
epoch: 96 training_loss 0.18153758712112902 test_loss: 0.18111275434494017
epoch: 97 training_loss 0.17764164745807648 test_loss: 0.17981868982315063
epoch: 98 training_loss 0.18246846958994867 test_loss: 0.16415889263153077
epoch: 99 training_loss 0.17245520547032356 test_loss: 0.1839862585067749
epoch: 100 training_loss 0.17305574662983417 test_loss: 0.16896352767944336
epoch: 101 training_loss 0.18336879886686802 test_loss: 0.18571552038192748
epoch: 102 training_loss 0.1706751224398613 test_loss: 0.1794920563697815
epoch: 103 training_loss 0.1719267711043358 test_loss: 0.1735769748687744
epoch: 104 training_loss 0.17843867070972919 test_loss: 0.17424447536468507
epoch: 105 training_loss 0.177967534288764 test_loss: 0.17703380584716796
epoch: 106 training_loss 0.17949733644723892 test_loss: 0.17663848400115967
epoch: 107 training_loss 0.1694007221609354 test_loss: 0.17302350997924804
epoch: 108 training_loss 0.1691192726790905 test_loss: 0.18949513435363768
epoch: 109 training_loss 0.17307995103299617 test_loss: 0.1593951940536499
epoch: 110 training_loss 0.17813893742859363 test_loss: 0.17595688104629517
epoch: 111 training_loss 0.1741142801195383 test_loss: 0.17130826711654662
epoch: 112 training_loss 0.18209042377769946 test_loss: 0.16807234287261963
epoch: 113 training_loss 0.17885497465729713 test_loss: 0.17523086071014404
epoch: 114 training_loss 0.16775657281279563 test_loss: 0.1789361357688904
epoch: 115 training_loss 0.17138096414506435 test_loss: 0.18388025760650634
epoch: 116 training_loss 0.1817126114666462 test_loss: 0.17707761526107788
epoch: 117 training_loss 0.17485294997692108 test_loss: 0.18547669649124146
epoch: 118 training_loss 0.18189089350402354 test_loss: 0.16537747383117676
epoch: 119 training_loss 0.17949059687554836 test_loss: 0.1697800874710083
epoch: 120 training_loss 0.17201401449739934 test_loss: 0.17643309831619264
epoch: 121 training_loss 0.16947672687470913 test_loss: 0.17210943698883058
epoch: 122 training_loss 0.18143104039132596 test_loss: 0.16624867916107178
epoch: 123 training_loss 0.17881045304238796 test_loss: 0.16414110660552977
epoch: 124 training_loss 0.17314857058227062 test_loss: 0.18516955375671387
epoch: 125 training_loss 0.17686059057712555 test_loss: 0.174649178981781
epoch: 126 training_loss 0.16685452926903963 test_loss: 0.17137460708618163
epoch: 127 training_loss 0.17327706903219223 test_loss: 0.16893749237060546
epoch: 128 training_loss 0.17229710027575493 test_loss: 0.157578444480896
epoch: 129 training_loss 0.1775080556422472 test_loss: 0.16789529323577881
epoch: 130 training_loss 0.17856310367584227 test_loss: 0.17373037338256836
epoch: 131 training_loss 0.17718554295599462 test_loss: 0.1675704836845398
epoch: 132 training_loss 0.17124341279268265 test_loss: 0.1856772780418396
epoch: 133 training_loss 0.18219843707978725 test_loss: 0.15628131628036498
epoch: 134 training_loss 0.17229840368032456 test_loss: 0.17563718557357788
epoch: 135 training_loss 0.16937520198523998 test_loss: 0.16907293796539308
epoch: 136 training_loss 0.181630686968565 test_loss: 0.17853571176528932
epoch: 137 training_loss 0.17659237295389174 test_loss: 0.16541446447372438
epoch: 138 training_loss 0.1770004177838564 test_loss: 0.17570111751556397
epoch: 139 training_loss 0.1769868206232786 test_loss: 0.16665273904800415
epoch: 140 training_loss 0.18169512175023556 test_loss: 0.19406571388244628
epoch: 141 training_loss 0.17657567419111728 test_loss: 0.17932192087173462
epoch: 142 training_loss 0.18238464809954166 test_loss: 0.1575542449951172
epoch: 143 training_loss 0.1785990209877491 test_loss: 0.17349303960800172
epoch: 144 training_loss 0.1796902085095644 test_loss: 0.16972650289535524
epoch: 145 training_loss 0.17769158132374285 test_loss: 0.17396692037582398
epoch: 146 training_loss 0.1755667858570814 test_loss: 0.17946943044662475
epoch: 147 training_loss 0.17739775732159616 test_loss: 0.1737615704536438
epoch: 148 training_loss 0.17633694529533386 test_loss: 0.1809876561164856
epoch: 149 training_loss 0.17540514416992664 test_loss: 0.17234280109405517
epoch: 0 training_loss 0.2429280461370945 test_loss: 0.20703706741333008
epoch: 1 training_loss 0.20330728203058243 test_loss: 0.18084067106246948
epoch: 2 training_loss 0.1982712996006012 test_loss: 0.19923425912857057
epoch: 3 training_loss 0.2087201826274395 test_loss: 0.18709884881973265
epoch: 4 training_loss 0.1926093762367964 test_loss: 0.18922594785690308
epoch: 5 training_loss 0.1895757443457842 test_loss: 0.18813127279281616
epoch: 6 training_loss 0.18821939185261727 test_loss: 0.19329015016555787
epoch: 7 training_loss 0.19092375963926314 test_loss: 0.18279236555099487
epoch: 8 training_loss 0.18976275376975538 test_loss: 0.1799416422843933
epoch: 9 training_loss 0.18465730398893357 test_loss: 0.18149945735931397
epoch: 10 training_loss 0.1952078188955784 test_loss: 0.19311968088150025
epoch: 11 training_loss 0.1873177619278431 test_loss: 0.17840948104858398
epoch: 12 training_loss 0.19509637258946896 test_loss: 0.1868860602378845
epoch: 13 training_loss 0.18297971077263356 test_loss: 0.19394984245300292
epoch: 14 training_loss 0.18048831358551978 test_loss: 0.1818867564201355
epoch: 15 training_loss 0.18034246638417245 test_loss: 0.19483035802841187
epoch: 16 training_loss 0.18327203206717968 test_loss: 0.164814293384552
epoch: 17 training_loss 0.18596665509045124 test_loss: 0.1870507001876831
epoch: 18 training_loss 0.1813057355582714 test_loss: 0.16446571350097655
epoch: 19 training_loss 0.18199967570602893 test_loss: 0.18933048248291015
epoch: 20 training_loss 0.18571927361190319 test_loss: 0.18093663454055786
epoch: 21 training_loss 0.18054675243794918 test_loss: 0.18123302459716797
epoch: 22 training_loss 0.1857894466817379 test_loss: 0.1857256531715393
epoch: 23 training_loss 0.1828473825007677 test_loss: 0.18981649875640869
epoch: 24 training_loss 0.18889424696564674 test_loss: 0.16061140298843385
epoch: 25 training_loss 0.17712651833891868 test_loss: 0.18399778604507447
epoch: 26 training_loss 0.1861634434759617 test_loss: 0.18206362724304198
epoch: 27 training_loss 0.18517376422882081 test_loss: 0.1822482943534851
epoch: 28 training_loss 0.18446237035095692 test_loss: 0.17878828048706055
epoch: 29 training_loss 0.1897388457506895 test_loss: 0.17633808851242067
epoch: 30 training_loss 0.18054234877228736 test_loss: 0.1776909351348877
epoch: 31 training_loss 0.17746568121016026 test_loss: 0.17833964824676513
epoch: 32 training_loss 0.18491815626621247 test_loss: 0.18946667909622192
epoch: 33 training_loss 0.182692926004529 test_loss: 0.18055762052536012
epoch: 34 training_loss 0.18456792883574963 test_loss: 0.17295119762420655
epoch: 35 training_loss 0.18959581583738327 test_loss: 0.1793321490287781
epoch: 36 training_loss 0.183476689606905 test_loss: 0.19765700101852418
epoch: 37 training_loss 0.18435783356428145 test_loss: 0.18491913080215455
epoch: 38 training_loss 0.17744718700647355 test_loss: 0.17330073118209838
epoch: 39 training_loss 0.17991427540779115 test_loss: 0.17325133085250854
epoch: 40 training_loss 0.174263164550066 test_loss: 0.17170412540435792
epoch: 41 training_loss 0.18557127550244332 test_loss: 0.18813589811325074
epoch: 42 training_loss 0.18092642202973366 test_loss: 0.17444931268692015
epoch: 43 training_loss 0.179859424456954 test_loss: 0.1645138144493103
epoch: 44 training_loss 0.1797218181192875 test_loss: 0.17694700956344606
epoch: 45 training_loss 0.18041574582457542 test_loss: 0.17316426038742067
epoch: 46 training_loss 0.17987753629684447 test_loss: 0.18213545083999633
epoch: 47 training_loss 0.18186996214091777 test_loss: 0.1876197099685669
epoch: 48 training_loss 0.18190445214509965 test_loss: 0.17434498071670532
epoch: 49 training_loss 0.18488369025290013 test_loss: 0.16699305772781373
epoch: 50 training_loss 0.1775811779871583 test_loss: 0.16436606645584106
epoch: 51 training_loss 0.18009916111826896 test_loss: 0.17652937173843383
epoch: 52 training_loss 0.17829434432089328 test_loss: 0.17027409076690675
epoch: 53 training_loss 0.17802345991134644 test_loss: 0.17995988130569457
epoch: 54 training_loss 0.17793828405439854 test_loss: 0.17562893629074097
epoch: 55 training_loss 0.1878294610232115 test_loss: 0.1856548547744751
epoch: 56 training_loss 0.17907078839838506 test_loss: 0.17124017477035522
epoch: 57 training_loss 0.1766051649302244 test_loss: 0.1826128363609314
epoch: 58 training_loss 0.170482734516263 test_loss: 0.17435504198074342
epoch: 59 training_loss 0.17377497516572477 test_loss: 0.19904485940933228
epoch: 60 training_loss 0.17795438416302203 test_loss: 0.17563835382461548
epoch: 61 training_loss 0.17997401274740696 test_loss: 0.1819211721420288
epoch: 62 training_loss 0.1755950426310301 test_loss: 0.2024374008178711
epoch: 63 training_loss 0.17779302820563317 test_loss: 0.18571031093597412
epoch: 64 training_loss 0.175303570702672 test_loss: 0.1745213747024536
epoch: 65 training_loss 0.1744909043237567 test_loss: 0.18304817676544188
epoch: 66 training_loss 0.1740017291903496 test_loss: 0.1803969621658325
epoch: 67 training_loss 0.17754765041172504 test_loss: 0.18023829460144042
epoch: 68 training_loss 0.1758526711165905 test_loss: 0.17980421781539918
epoch: 69 training_loss 0.18111278288066388 test_loss: 0.17296190261840821
epoch: 70 training_loss 0.17822722233831884 test_loss: 0.18705356121063232
epoch: 71 training_loss 0.1821734468638897 test_loss: 0.17976701259613037
epoch: 72 training_loss 0.1783060822635889 test_loss: 0.18443819284439086
epoch: 73 training_loss 0.18564480900764466 test_loss: 0.18148919343948364
epoch: 74 training_loss 0.17017667286098004 test_loss: 0.17610899209976197
epoch: 75 training_loss 0.19007720433175565 test_loss: 0.16639119386672974
epoch: 76 training_loss 0.17526170447468759 test_loss: 0.18204566240310668
epoch: 77 training_loss 0.1763384857773781 test_loss: 0.16152894496917725
epoch: 78 training_loss 0.18946794979274273 test_loss: 0.18776689767837523
epoch: 79 training_loss 0.1735677047818899 test_loss: 0.17208366394042968
epoch: 80 training_loss 0.17828139521181582 test_loss: 0.17372504472732545
epoch: 81 training_loss 0.18402389712631703 test_loss: 0.18728845119476317
epoch: 82 training_loss 0.17294316772371532 test_loss: 0.17262277603149415
epoch: 83 training_loss 0.17501362472772597 test_loss: 0.18564611673355103
epoch: 84 training_loss 0.1848027503490448 test_loss: 0.17802205085754394
epoch: 85 training_loss 0.17404693447053432 test_loss: 0.18588372468948364
epoch: 86 training_loss 0.17405557401478292 test_loss: 0.1976525902748108
epoch: 87 training_loss 0.17891401886940003 test_loss: 0.16749807596206664
epoch: 88 training_loss 0.17733473919332027 test_loss: 0.18235139846801757
epoch: 89 training_loss 0.1835555264353752 test_loss: 0.18641589879989623
epoch: 90 training_loss 0.1792284331470728 test_loss: 0.1843823552131653
epoch: 91 training_loss 0.16815298669040202 test_loss: 0.16563743352890015
epoch: 92 training_loss 0.18176663115620614 test_loss: 0.1923656702041626
epoch: 93 training_loss 0.1868130039423704 test_loss: 0.16990808248519898
epoch: 94 training_loss 0.17280700460076331 test_loss: 0.17690615653991698
epoch: 95 training_loss 0.18024891071021557 test_loss: 0.18995825052261353
epoch: 96 training_loss 0.17918963015079498 test_loss: 0.18084851503372193
epoch: 97 training_loss 0.17846454627811908 test_loss: 0.17578336000442504
epoch: 98 training_loss 0.1772446320950985 test_loss: 0.1767457365989685
epoch: 99 training_loss 0.17877796582877636 test_loss: 0.17331758737564087
epoch: 100 training_loss 0.17593463741242885 test_loss: 0.1644013047218323
epoch: 101 training_loss 0.17711471065878867 test_loss: 0.17402466535568237
epoch: 102 training_loss 0.1780761668086052 test_loss: 0.17091653347015381
epoch: 103 training_loss 0.1754858196899295 test_loss: 0.1788006067276001
epoch: 104 training_loss 0.1753653959929943 test_loss: 0.17229585647583007
epoch: 105 training_loss 0.18267287202179433 test_loss: 0.17675309181213378
epoch: 106 training_loss 0.17166351467370988 test_loss: 0.17269223928451538
epoch: 107 training_loss 0.16939567647874354 test_loss: 0.17525756359100342
epoch: 108 training_loss 0.18568812265992166 test_loss: 0.18381824493408203
epoch: 109 training_loss 0.17745549261569976 test_loss: 0.19106003046035766
epoch: 110 training_loss 0.17502799421548842 test_loss: 0.17449601888656616
epoch: 111 training_loss 0.1839015806466341 test_loss: 0.16268908977508545
epoch: 112 training_loss 0.17773654088377952 test_loss: 0.16557348966598512
epoch: 113 training_loss 0.16655781719833612 test_loss: 0.1799532651901245
epoch: 114 training_loss 0.17370803885161876 test_loss: 0.1849069118499756
epoch: 115 training_loss 0.1755584356188774 test_loss: 0.17956910133361817
epoch: 116 training_loss 0.18173973590135575 test_loss: 0.16468387842178345
epoch: 117 training_loss 0.18181224584579467 test_loss: 0.18067290782928466
epoch: 118 training_loss 0.18225751727819442 test_loss: 0.18578619956970216
epoch: 119 training_loss 0.1722811146825552 test_loss: 0.16860358715057372
epoch: 120 training_loss 0.176183073669672 test_loss: 0.17236597537994386
epoch: 121 training_loss 0.1810718461126089 test_loss: 0.18008666038513182
epoch: 122 training_loss 0.1822534640878439 test_loss: 0.17613375186920166
epoch: 123 training_loss 0.1809844221174717 test_loss: 0.1884205937385559
epoch: 124 training_loss 0.17044942155480386 test_loss: 0.18637806177139282
epoch: 125 training_loss 0.1817355827242136 test_loss: 0.17789101600646973
epoch: 126 training_loss 0.17444635152816773 test_loss: 0.17733378410339357
epoch: 127 training_loss 0.17649409741163255 test_loss: 0.16429564952850342
epoch: 128 training_loss 0.17377460092306138 test_loss: 0.18981485366821288
epoch: 129 training_loss 0.17539833746850492 test_loss: 0.17958779335021974
epoch: 130 training_loss 0.17840482316911221 test_loss: 0.16718498468399048
epoch: 131 training_loss 0.17938797794282435 test_loss: 0.16092073917388916
epoch: 132 training_loss 0.18243273481726646 test_loss: 0.18684546947479247
epoch: 133 training_loss 0.17576729647815229 test_loss: 0.17705376148223878
epoch: 134 training_loss 0.17125872224569322 test_loss: 0.1691335082054138
epoch: 135 training_loss 0.17596031934022904 test_loss: 0.18642644882202147
epoch: 136 training_loss 0.18075758084654808 test_loss: 0.17992854118347168
epoch: 137 training_loss 0.17218153312802315 test_loss: 0.166494619846344
epoch: 138 training_loss 0.16964842326939106 test_loss: 0.17190757989883423
epoch: 139 training_loss 0.17666910491883756 test_loss: 0.17324901819229127
epoch: 140 training_loss 0.1735431455820799 test_loss: 0.17821457386016845
epoch: 141 training_loss 0.18050215676426887 test_loss: 0.1663862109184265
epoch: 142 training_loss 0.17806598253548145 test_loss: 0.17282845973968505
epoch: 143 training_loss 0.17779478803277016 test_loss: 0.17782827615737914
epoch: 144 training_loss 0.17817116983234882 test_loss: 0.16485626697540284
epoch: 145 training_loss 0.18279177166521549 test_loss: 0.1710737705230713
epoch: 146 training_loss 0.17298026949167253 test_loss: 0.16510384082794188
epoch: 147 training_loss 0.17423240903764964 test_loss: 0.1886332392692566
epoch: 148 training_loss 0.17576025538146495 test_loss: 0.17988826036453248
epoch: 149 training_loss 0.17764667473733425 test_loss: 0.16710572242736815
epoch: 0 training_loss 0.25539048224687577 test_loss: 0.2248534679412842
epoch: 1 training_loss 0.20572320751845838 test_loss: 0.20732874870300294
epoch: 2 training_loss 0.19463566832244397 test_loss: 0.21189432144165038
epoch: 3 training_loss 0.1975380440056324 test_loss: 0.20085465908050537
epoch: 4 training_loss 0.18448470756411553 test_loss: 0.21209452152252198
epoch: 5 training_loss 0.18102388478815556 test_loss: 0.1982287049293518
epoch: 6 training_loss 0.18831791020929814 test_loss: 0.2065964460372925
epoch: 7 training_loss 0.18987995713949204 test_loss: 0.19508554935455322
epoch: 8 training_loss 0.1842641594260931 test_loss: 0.21440091133117675
epoch: 9 training_loss 0.17963881425559522 test_loss: 0.1874389171600342
epoch: 10 training_loss 0.1867630122601986 test_loss: 0.18483521938323974
epoch: 11 training_loss 0.1902075383812189 test_loss: 0.1740654706954956
epoch: 12 training_loss 0.18568955413997174 test_loss: 0.19749437570571898
epoch: 13 training_loss 0.18400431670248507 test_loss: 0.19523640871047973
epoch: 14 training_loss 0.17272809065878392 test_loss: 0.1809360146522522
epoch: 15 training_loss 0.181953012496233 test_loss: 0.19677501916885376
epoch: 16 training_loss 0.1849961167573929 test_loss: 0.22299129962921144
epoch: 17 training_loss 0.18862026996910572 test_loss: 0.17536749839782714
epoch: 18 training_loss 0.18572469174861908 test_loss: 0.19560205936431885
epoch: 19 training_loss 0.18200758554041385 test_loss: 0.19897260665893554
epoch: 20 training_loss 0.1799280484765768 test_loss: 0.1876670241355896
epoch: 21 training_loss 0.17383418612182142 test_loss: 0.19738143682479858
epoch: 22 training_loss 0.17755180515348912 test_loss: 0.19735921621322633
epoch: 23 training_loss 0.17875490769743918 test_loss: 0.20299036502838136
epoch: 24 training_loss 0.17434151463210582 test_loss: 0.19859750270843507
epoch: 25 training_loss 0.1800303743034601 test_loss: 0.18562071323394774
epoch: 26 training_loss 0.17925006225705148 test_loss: 0.1925273895263672
epoch: 27 training_loss 0.1721904318779707 test_loss: 0.18739445209503175
epoch: 28 training_loss 0.17102469690144062 test_loss: 0.189391028881073
epoch: 29 training_loss 0.1751132571697235 test_loss: 0.17983356714248658
epoch: 30 training_loss 0.1768140245974064 test_loss: 0.20705151557922363
epoch: 31 training_loss 0.16660845518112183 test_loss: 0.1902977466583252
epoch: 32 training_loss 0.17552835173904896 test_loss: 0.206893253326416
epoch: 33 training_loss 0.1728370174765587 test_loss: 0.19050838947296142
epoch: 34 training_loss 0.16881897836923598 test_loss: 0.192634916305542
epoch: 35 training_loss 0.1683966864645481 test_loss: 0.18208156824111937
epoch: 36 training_loss 0.1778549285233021 test_loss: 0.2032305955886841
epoch: 37 training_loss 0.17764374293386936 test_loss: 0.2021834135055542
epoch: 38 training_loss 0.17080709166824817 test_loss: 0.18810486793518066
epoch: 39 training_loss 0.17359377082437277 test_loss: 0.19674209356307984
epoch: 40 training_loss 0.18172280080616474 test_loss: 0.19300071001052857
epoch: 41 training_loss 0.17963718466460704 test_loss: 0.1954064965248108
epoch: 42 training_loss 0.17348770417273043 test_loss: 0.19219021797180175
epoch: 43 training_loss 0.17495944194495677 test_loss: 0.19419777393341064
epoch: 44 training_loss 0.17539682023227215 test_loss: 0.179619300365448
epoch: 45 training_loss 0.17642903245985508 test_loss: 0.19333478212356567
epoch: 46 training_loss 0.17732783541083336 test_loss: 0.18516849279403685
epoch: 47 training_loss 0.17282734125852584 test_loss: 0.1848365068435669
epoch: 48 training_loss 0.17294567115604878 test_loss: 0.19944144487380983
epoch: 49 training_loss 0.17148438572883606 test_loss: 0.20787520408630372
epoch: 50 training_loss 0.17586861655116082 test_loss: 0.20484297275543212
epoch: 51 training_loss 0.1788583417236805 test_loss: 0.18977702856063844
epoch: 52 training_loss 0.17283092010766268 test_loss: 0.18593194484710693
epoch: 53 training_loss 0.1748633948713541 test_loss: 0.19944554567337036
epoch: 54 training_loss 0.17701562792062758 test_loss: 0.1951622486114502
epoch: 55 training_loss 0.1692023900896311 test_loss: 0.20091500282287597
epoch: 56 training_loss 0.18756063163280487 test_loss: 0.19334193468093872
epoch: 57 training_loss 0.17792252533137798 test_loss: 0.1960223913192749
epoch: 58 training_loss 0.17109477572143078 test_loss: 0.1971462368965149
epoch: 59 training_loss 0.18352058209478855 test_loss: 0.19318339824676514
epoch: 60 training_loss 0.17164384290575982 test_loss: 0.18930327892303467
epoch: 61 training_loss 0.1767301271110773 test_loss: 0.2039644241333008
epoch: 62 training_loss 0.17877754151821137 test_loss: 0.1865756630897522
epoch: 63 training_loss 0.17732278719544411 test_loss: 0.19793323278427125
epoch: 64 training_loss 0.17847443886101247 test_loss: 0.20082261562347412
epoch: 65 training_loss 0.17470474824309348 test_loss: 0.1908199667930603
epoch: 66 training_loss 0.17156868360936642 test_loss: 0.18258000612258912
epoch: 67 training_loss 0.17102760061621666 test_loss: 0.1869825839996338
epoch: 68 training_loss 0.1868429559469223 test_loss: 0.2109922409057617
epoch: 69 training_loss 0.1771065989136696 test_loss: 0.19687222242355346
epoch: 70 training_loss 0.16934613034129142 test_loss: 0.2100152015686035
epoch: 71 training_loss 0.17231798633933068 test_loss: 0.18629149198532105
epoch: 72 training_loss 0.16758005902171136 test_loss: 0.18886064291000365
epoch: 73 training_loss 0.17238942183554173 test_loss: 0.19001145362854005
epoch: 74 training_loss 0.1713806815445423 test_loss: 0.17086513042449952
epoch: 75 training_loss 0.1710768860578537 test_loss: 0.19739762544631959
epoch: 76 training_loss 0.17657887913286685 test_loss: 0.19322797060012817
epoch: 77 training_loss 0.17785107247531415 test_loss: 0.1872715950012207
epoch: 78 training_loss 0.17454453077167273 test_loss: 0.19644917249679567
epoch: 79 training_loss 0.17742068015038967 test_loss: 0.1916547179222107
epoch: 80 training_loss 0.17450108453631402 test_loss: 0.1838515281677246
epoch: 81 training_loss 0.1849438413977623 test_loss: 0.18949146270751954
epoch: 82 training_loss 0.17820657826960087 test_loss: 0.187559175491333
epoch: 83 training_loss 0.17141101114451884 test_loss: 0.18098926544189453
epoch: 84 training_loss 0.1733523601293564 test_loss: 0.1950794816017151
epoch: 85 training_loss 0.1797198773920536 test_loss: 0.18660740852355956
epoch: 86 training_loss 0.17316429294645785 test_loss: 0.20043535232543946
epoch: 87 training_loss 0.17820839293301105 test_loss: 0.18525513410568237
epoch: 88 training_loss 0.18442073106765747 test_loss: 0.18887755870819092
epoch: 89 training_loss 0.17687875114381313 test_loss: 0.1832814931869507
epoch: 90 training_loss 0.17659414306282997 test_loss: 0.1927591323852539
epoch: 91 training_loss 0.18270305156707764 test_loss: 0.19061259031295777
epoch: 92 training_loss 0.17378000505268573 test_loss: 0.20313854217529298
epoch: 93 training_loss 0.1707062139362097 test_loss: 0.19632846117019653
epoch: 94 training_loss 0.17576292172074318 test_loss: 0.19634054899215697
epoch: 95 training_loss 0.17499050632119179 test_loss: 0.18689790964126587
epoch: 96 training_loss 0.17589415431022645 test_loss: 0.17534469366073607
epoch: 97 training_loss 0.17719215169548988 test_loss: 0.18744901418685914
epoch: 98 training_loss 0.17621429532766342 test_loss: 0.19285550117492675
epoch: 99 training_loss 0.17935784555971623 test_loss: 0.19279854297637938
epoch: 100 training_loss 0.17321159288287163 test_loss: 0.19142367839813232
epoch: 101 training_loss 0.16279653817415238 test_loss: 0.1793859839439392
epoch: 102 training_loss 0.17283834710717202 test_loss: 0.18669328689575196
epoch: 103 training_loss 0.1693314504623413 test_loss: 0.1910439610481262
epoch: 104 training_loss 0.17848944634199143 test_loss: 0.19004676342010499
epoch: 105 training_loss 0.17371365122497082 test_loss: 0.1903631329536438
epoch: 106 training_loss 0.17540140673518181 test_loss: 0.19884519577026366
epoch: 107 training_loss 0.17435308821499348 test_loss: 0.20305564403533935
epoch: 108 training_loss 0.17536443091928958 test_loss: 0.1955675959587097
epoch: 109 training_loss 0.16572378136217594 test_loss: 0.19941952228546142
epoch: 110 training_loss 0.17953050561249256 test_loss: 0.2046353578567505
epoch: 111 training_loss 0.16702761441469194 test_loss: 0.20628836154937744
epoch: 112 training_loss 0.1724376468360424 test_loss: 0.1930508494377136
epoch: 113 training_loss 0.17701974354684352 test_loss: 0.18892388343811034
epoch: 114 training_loss 0.18226421795785427 test_loss: 0.19214779138565063
epoch: 115 training_loss 0.16977463610470295 test_loss: 0.1803126096725464
epoch: 116 training_loss 0.16784611515700817 test_loss: 0.17708852291107177
epoch: 117 training_loss 0.17107670173048972 test_loss: 0.18934546709060668
epoch: 118 training_loss 0.17681732028722763 test_loss: 0.1768894076347351
epoch: 119 training_loss 0.17963290609419347 test_loss: 0.1842230439186096
epoch: 120 training_loss 0.17186319649219514 test_loss: 0.18619319200515747
epoch: 121 training_loss 0.17252884812653066 test_loss: 0.21156902313232423
epoch: 122 training_loss 0.177773400247097 test_loss: 0.18945285081863403
epoch: 123 training_loss 0.16718028597533702 test_loss: 0.19568585157394408
epoch: 124 training_loss 0.17251243405044078 test_loss: 0.17514660358428955
epoch: 125 training_loss 0.17528976164758206 test_loss: 0.19168306589126588
epoch: 126 training_loss 0.1706058458983898 test_loss: 0.18650590181350707
epoch: 127 training_loss 0.17170538157224655 test_loss: 0.20718307495117189
epoch: 128 training_loss 0.17500957123935224 test_loss: 0.18419771194458007
epoch: 129 training_loss 0.15877054357901216 test_loss: 0.1953342914581299
epoch: 130 training_loss 0.17873926125466824 test_loss: 0.19608089923858643
epoch: 131 training_loss 0.18148474529385566 test_loss: 0.20397768020629883
epoch: 132 training_loss 0.17622215256094934 test_loss: 0.19210869073867798
epoch: 133 training_loss 0.17513313017785548 test_loss: 0.19810914993286133
epoch: 134 training_loss 0.17812749348580836 test_loss: 0.19017845392227173
epoch: 135 training_loss 0.16737225256860255 test_loss: 0.17654626369476317
epoch: 136 training_loss 0.17595523163676263 test_loss: 0.19814836978912354
epoch: 137 training_loss 0.17353639520704747 test_loss: 0.1885601043701172
epoch: 138 training_loss 0.17415291532874108 test_loss: 0.18529716730117798
epoch: 139 training_loss 0.16820419192314148 test_loss: 0.1852593183517456
epoch: 140 training_loss 0.1634467749297619 test_loss: 0.18363975286483764
epoch: 141 training_loss 0.17677598737180233 test_loss: 0.1750162124633789
epoch: 142 training_loss 0.17843292243778705 test_loss: 0.19511430263519286
epoch: 143 training_loss 0.17223118260502815 test_loss: 0.191849684715271
epoch: 144 training_loss 0.1768031044304371 test_loss: 0.18975673913955687
epoch: 145 training_loss 0.1727467167377472 test_loss: 0.19352598190307618
epoch: 146 training_loss 0.1730970374494791 test_loss: 0.19592223167419434
epoch: 147 training_loss 0.16885911539196968 test_loss: 0.1890395164489746
epoch: 148 training_loss 0.1666110223531723 test_loss: 0.18372745513916017
epoch: 149 training_loss 0.17812350153923034 test_loss: 0.1877158284187317
epoch: 0 training_loss 0.2825925151258707 test_loss: 0.21504437923431396
epoch: 1 training_loss 0.2066019332408905 test_loss: 0.2191338300704956
epoch: 2 training_loss 0.19637382932007313 test_loss: 0.19556957483291626
epoch: 3 training_loss 0.1996779926121235 test_loss: 0.2016918182373047
epoch: 4 training_loss 0.19753324203193187 test_loss: 0.1838128685951233
epoch: 5 training_loss 0.19604291073977947 test_loss: 0.18614057302474976
epoch: 6 training_loss 0.18946993485093117 test_loss: 0.20920469760894775
epoch: 7 training_loss 0.18396476954221724 test_loss: 0.1971517324447632
epoch: 8 training_loss 0.1906507520377636 test_loss: 0.19402576684951783
epoch: 9 training_loss 0.19174604438245296 test_loss: 0.19130932092666625
epoch: 10 training_loss 0.19523736342787743 test_loss: 0.18842331171035767
epoch: 11 training_loss 0.1957086543738842 test_loss: 0.18967431783676147
epoch: 12 training_loss 0.1928446390479803 test_loss: 0.19002461433410645
epoch: 13 training_loss 0.1872154662758112 test_loss: 0.19417542219161987
epoch: 14 training_loss 0.18742053374648093 test_loss: 0.18831822872161866
epoch: 15 training_loss 0.19245520994067192 test_loss: 0.1961047887802124
epoch: 16 training_loss 0.1887187184393406 test_loss: 0.19857351779937743
epoch: 17 training_loss 0.18679825134575367 test_loss: 0.18822576999664306
epoch: 18 training_loss 0.18623647801578044 test_loss: 0.19018522500991822
epoch: 19 training_loss 0.18952286295592785 test_loss: 0.18569347858428956
epoch: 20 training_loss 0.1845584912598133 test_loss: 0.19223439693450928
epoch: 21 training_loss 0.18638654485344885 test_loss: 0.17759357690811156
epoch: 22 training_loss 0.18231713958084583 test_loss: 0.18310176134109496
epoch: 23 training_loss 0.18287392444908618 test_loss: 0.1857048511505127
epoch: 24 training_loss 0.1848778611421585 test_loss: 0.19861897230148315
epoch: 25 training_loss 0.18387260526418686 test_loss: 0.1806178331375122
epoch: 26 training_loss 0.18506229780614375 test_loss: 0.19738224744796753
epoch: 27 training_loss 0.1806221977621317 test_loss: 0.19557427167892455
epoch: 28 training_loss 0.1850450821965933 test_loss: 0.17133618593215943
epoch: 29 training_loss 0.18641782097518445 test_loss: 0.18300656080245972
epoch: 30 training_loss 0.17986516170203687 test_loss: 0.180400812625885
epoch: 31 training_loss 0.18146514512598513 test_loss: 0.1938373327255249
epoch: 32 training_loss 0.18013240166008473 test_loss: 0.17852760553359986
epoch: 33 training_loss 0.18352901242673397 test_loss: 0.19772495031356813
epoch: 34 training_loss 0.18248321495950223 test_loss: 0.17624282836914062
epoch: 35 training_loss 0.17794326804578303 test_loss: 0.16190927028656005
epoch: 36 training_loss 0.18873122446238993 test_loss: 0.17649878263473512
epoch: 37 training_loss 0.1821964454650879 test_loss: 0.18752413988113403
epoch: 38 training_loss 0.1931112640351057 test_loss: 0.19747567176818848
epoch: 39 training_loss 0.18531993247568607 test_loss: 0.18766531944274903
epoch: 40 training_loss 0.18727431952953338 test_loss: 0.1820913791656494
epoch: 41 training_loss 0.1833417858928442 test_loss: 0.18399689197540284
epoch: 42 training_loss 0.18168248891830444 test_loss: 0.1852169632911682
epoch: 43 training_loss 0.18024570547044277 test_loss: 0.18664425611495972
epoch: 44 training_loss 0.18554695941507815 test_loss: 0.185744571685791
epoch: 45 training_loss 0.18659126050770283 test_loss: 0.18732203245162965
epoch: 46 training_loss 0.1782247641682625 test_loss: 0.18388079404830932
epoch: 47 training_loss 0.18881613694131374 test_loss: 0.19138420820236207
epoch: 48 training_loss 0.17440725985914468 test_loss: 0.17844690084457399
epoch: 49 training_loss 0.17771439261734487 test_loss: 0.17992310523986815
epoch: 50 training_loss 0.17950774408876896 test_loss: 0.1892891049385071
epoch: 51 training_loss 0.18433393366634845 test_loss: 0.18634601831436157
epoch: 52 training_loss 0.18587930098176003 test_loss: 0.19429813623428344
epoch: 53 training_loss 0.18425045490264894 test_loss: 0.18575419187545777
epoch: 54 training_loss 0.18018225848674774 test_loss: 0.19377644062042237
epoch: 55 training_loss 0.18076608210802078 test_loss: 0.18061110973358155
epoch: 56 training_loss 0.17945863507688045 test_loss: 0.17363061904907226
epoch: 57 training_loss 0.18155482798814773 test_loss: 0.1740636110305786
epoch: 58 training_loss 0.1787212909013033 test_loss: 0.175143039226532
epoch: 59 training_loss 0.1932257179915905 test_loss: 0.18599749803543092
epoch: 60 training_loss 0.17692643657326698 test_loss: 0.19033483266830445
epoch: 61 training_loss 0.17621698819100856 test_loss: 0.18129165172576905
epoch: 62 training_loss 0.18342240072786808 test_loss: 0.19674420356750488
epoch: 63 training_loss 0.1833623545616865 test_loss: 0.1788737654685974
epoch: 64 training_loss 0.18073896862566471 test_loss: 0.18224313259124755
epoch: 65 training_loss 0.17389550518244504 test_loss: 0.1777659296989441
epoch: 66 training_loss 0.18504065312445164 test_loss: 0.18010097742080688
epoch: 67 training_loss 0.1779127975180745 test_loss: 0.1867367148399353
epoch: 68 training_loss 0.1812663020938635 test_loss: 0.1894576907157898
epoch: 69 training_loss 0.17950551860034467 test_loss: 0.1939847230911255
epoch: 70 training_loss 0.17786249294877052 test_loss: 0.16016669273376466
epoch: 71 training_loss 0.18122687511146068 test_loss: 0.18094795942306519
epoch: 72 training_loss 0.17515665113925935 test_loss: 0.1722743034362793
epoch: 73 training_loss 0.18233639776706695 test_loss: 0.18375391960144044
epoch: 74 training_loss 0.1740999050438404 test_loss: 0.17007904052734374
epoch: 75 training_loss 0.18183213844895363 test_loss: 0.1728852868080139
epoch: 76 training_loss 0.17415361911058425 test_loss: 0.17298234701156617
epoch: 77 training_loss 0.18678365349769593 test_loss: 0.19531742334365845
epoch: 78 training_loss 0.17627229772508143 test_loss: 0.18397363424301147
epoch: 79 training_loss 0.17409625858068467 test_loss: 0.18018574714660646
epoch: 80 training_loss 0.17330054104328155 test_loss: 0.18122341632843017
epoch: 81 training_loss 0.1738935276865959 test_loss: 0.20082108974456786
epoch: 82 training_loss 0.1752148724347353 test_loss: 0.17027156352996825
epoch: 83 training_loss 0.18542053021490573 test_loss: 0.17921496629714967
epoch: 84 training_loss 0.1819039895385504 test_loss: 0.1840839147567749
epoch: 85 training_loss 0.1794315751641989 test_loss: 0.18127839565277098
epoch: 86 training_loss 0.17635124184191228 test_loss: 0.1759117364883423
epoch: 87 training_loss 0.1804280575364828 test_loss: 0.17870975732803346
epoch: 88 training_loss 0.17225272141397 test_loss: 0.17277387380599976
epoch: 89 training_loss 0.18100420787930488 test_loss: 0.18290529251098633
epoch: 90 training_loss 0.17995031893253327 test_loss: 0.17392925024032593
epoch: 91 training_loss 0.18307775743305682 test_loss: 0.18055646419525145
epoch: 92 training_loss 0.1795166314393282 test_loss: 0.17293496131896974
epoch: 93 training_loss 0.17862121760845184 test_loss: 0.19141603708267213
epoch: 94 training_loss 0.1798519092798233 test_loss: 0.18397554159164428
epoch: 95 training_loss 0.1770899474620819 test_loss: 0.16564563512802125
epoch: 96 training_loss 0.17334098242223261 test_loss: 0.17508059740066528
epoch: 97 training_loss 0.18131314761936665 test_loss: 0.19147863388061523
epoch: 98 training_loss 0.18201672367751598 test_loss: 0.16775325536727906
epoch: 99 training_loss 0.18818768322467805 test_loss: 0.17059634923934935
epoch: 100 training_loss 0.18403055138885974 test_loss: 0.18996549844741822
epoch: 101 training_loss 0.17358254697173833 test_loss: 0.17445080280303954
epoch: 102 training_loss 0.16898670449852943 test_loss: 0.17659422159194946
epoch: 103 training_loss 0.17994803484529256 test_loss: 0.18037767410278321
epoch: 104 training_loss 0.18064845591783524 test_loss: 0.16205918788909912
epoch: 105 training_loss 0.16982519052922726 test_loss: 0.18763757944107057
epoch: 106 training_loss 0.17518975503742695 test_loss: 0.17488218545913697
epoch: 107 training_loss 0.18235461369156838 test_loss: 0.1686745524406433
epoch: 108 training_loss 0.17647571943700313 test_loss: 0.17570858001708983
epoch: 109 training_loss 0.1822991342842579 test_loss: 0.17673008441925048
epoch: 110 training_loss 0.17536565586924552 test_loss: 0.18505439758300782
epoch: 111 training_loss 0.17248215839266778 test_loss: 0.1696267008781433
epoch: 112 training_loss 0.1823351814597845 test_loss: 0.19751635789871216
epoch: 113 training_loss 0.17898560732603072 test_loss: 0.18156943321228028
epoch: 114 training_loss 0.17322359532117843 test_loss: 0.17068339586257936
epoch: 115 training_loss 0.16773482531309128 test_loss: 0.18482714891433716
epoch: 116 training_loss 0.17820181705057622 test_loss: 0.18474526405334474
epoch: 117 training_loss 0.17355223439633846 test_loss: 0.18951038122177125
epoch: 118 training_loss 0.1701970164477825 test_loss: 0.1693871259689331
epoch: 119 training_loss 0.17335904154926537 test_loss: 0.19790083169937134
epoch: 120 training_loss 0.17720310330390932 test_loss: 0.15512187480926515
epoch: 121 training_loss 0.18494917765259744 test_loss: 0.198544180393219
epoch: 122 training_loss 0.17657672971487046 test_loss: 0.1899079203605652
epoch: 123 training_loss 0.17657376728951932 test_loss: 0.16451597213745117
epoch: 124 training_loss 0.18281837150454522 test_loss: 0.18192152976989745
epoch: 125 training_loss 0.18467329762876034 test_loss: 0.1792409062385559
epoch: 126 training_loss 0.1730189709365368 test_loss: 0.176552677154541
epoch: 127 training_loss 0.17958681993186473 test_loss: 0.17661552429199218
epoch: 128 training_loss 0.17648987263441085 test_loss: 0.1929112195968628
epoch: 129 training_loss 0.17595357783138751 test_loss: 0.18135640621185303
epoch: 130 training_loss 0.18775259867310523 test_loss: 0.16119381189346313
epoch: 131 training_loss 0.1685865853726864 test_loss: 0.17672923803329468
epoch: 132 training_loss 0.17845155909657479 test_loss: 0.18448266983032227
epoch: 133 training_loss 0.1755088347941637 test_loss: 0.19727599620819092
epoch: 134 training_loss 0.17412881627678872 test_loss: 0.17670021057128907
epoch: 135 training_loss 0.17410993717610837 test_loss: 0.16430264711380005
epoch: 136 training_loss 0.17690712310373782 test_loss: 0.16693035364151002
epoch: 137 training_loss 0.18036310449242593 test_loss: 0.18885904550552368
epoch: 138 training_loss 0.18214328736066818 test_loss: 0.17653242349624634
epoch: 139 training_loss 0.1790070443600416 test_loss: 0.18139634132385254
epoch: 140 training_loss 0.17735577143728734 test_loss: 0.19893442392349242
epoch: 141 training_loss 0.17805233057588338 test_loss: 0.1702183961868286
epoch: 142 training_loss 0.1813552551716566 test_loss: 0.18101742267608642
epoch: 143 training_loss 0.17462926425039768 test_loss: 0.18227323293685913
epoch: 144 training_loss 0.17729402005672454 test_loss: 0.19084272384643555
epoch: 145 training_loss 0.17887999415397643 test_loss: 0.18532792329788209
epoch: 146 training_loss 0.17906563997268676 test_loss: 0.18001242876052856
epoch: 147 training_loss 0.1787012753635645 test_loss: 0.17871519327163696
epoch: 148 training_loss 0.1712555381655693 test_loss: 0.1688866972923279
epoch: 149 training_loss 0.17764823608100413 test_loss: 0.18707945346832275
episode: 0 training return: -1493.5723666638287
episode: 1 training return: -1742.5222780628353
episode: 2 training return: -951.8704333543419
episode: 3 training return: -1101.7462957980558
epoch: 1 test_true_pfm: 229.4801365041986 sim_pfm: -777.7492531294209
episode: 4 training return: -1573.1316334475769
episode: 5 training return: -1487.116114287378
episode: 6 training return: -1923.344246667626
episode: 7 training return: -983.0688117388102
epoch: 2 test_true_pfm: 220.81415285375658 sim_pfm: -829.0536293751428
episode: 8 training return: -1351.339434275627
episode: 9 training return: -1489.2642828920818
episode: 10 training return: -792.4347933596234
episode: 11 training return: -844.0482465605924
epoch: 3 test_true_pfm: 223.3796674886356 sim_pfm: -833.5016409456252
episode: 12 training return: -746.6356241946299
episode: 13 training return: -736.4868917084208
episode: 14 training return: -684.5350037011303
episode: 15 training return: -678.9825906216196
epoch: 4 test_true_pfm: 232.41455039877474 sim_pfm: -772.262337248835
episode: 16 training return: -648.8204308803472
episode: 17 training return: -622.7294792729457
episode: 18 training return: -599.590639123499
episode: 19 training return: -599.8244477760555
epoch: 5 test_true_pfm: 316.8131753109115 sim_pfm: -567.1951171105493
episode: 20 training return: -595.3003380208963
episode: 21 training return: -585.259273345314
episode: 22 training return: -583.0977140778365
episode: 23 training return: -602.6731863806473
epoch: 6 test_true_pfm: 270.40139639902117 sim_pfm: -597.7591552152443
episode: 24 training return: -586.7868487319997
episode: 25 training return: -545.7649219090518
episode: 26 training return: -578.6622301498807
episode: 27 training return: -538.7458277622892
epoch: 7 test_true_pfm: 423.7954647983208 sim_pfm: -526.9674224567053
episode: 28 training return: -565.0423693682052
episode: 29 training return: -526.8621520584403
episode: 30 training return: -508.41969044042025
episode: 31 training return: -530.5536018354272
epoch: 8 test_true_pfm: 583.5974034748456 sim_pfm: -485.2380047222382
episode: 32 training return: -530.1531129110102
episode: 33 training return: -533.5892598289433
episode: 34 training return: -542.391772403141
episode: 35 training return: -528.6729486510288
epoch: 9 test_true_pfm: 611.0184130319534 sim_pfm: -430.6069205136021
episode: 36 training return: -517.8637051505214
episode: 37 training return: -475.3897343924699
episode: 38 training return: -500.97272914162136
episode: 39 training return: -498.4262879575208
epoch: 10 test_true_pfm: 613.663152243791 sim_pfm: -442.5364708445516
episode: 40 training return: -464.1384560585337
episode: 41 training return: -497.82394889871205
episode: 42 training return: -488.00627316275825
episode: 43 training return: -487.382425347747
epoch: 11 test_true_pfm: 589.6078143585615 sim_pfm: -421.0497562713336
episode: 44 training return: -450.92341317860325
episode: 45 training return: -476.2586253288522
episode: 46 training return: -486.98663136082536
episode: 47 training return: -455.23018757993015
epoch: 12 test_true_pfm: 616.5281581523133 sim_pfm: -395.1538541148912
episode: 48 training return: -466.54077904686693
episode: 49 training return: -455.35410848943917
episode: 50 training return: -475.3054199010209
episode: 51 training return: -475.14354184940424
epoch: 13 test_true_pfm: 615.1032092505125 sim_pfm: -399.53162874806145
episode: 52 training return: -453.55915250480996
episode: 53 training return: -439.02187470493965
episode: 54 training return: -455.6853675046569
episode: 55 training return: -453.45174201342365
epoch: 14 test_true_pfm: 669.040114822771 sim_pfm: -400.06946917968935
episode: 56 training return: -466.0012776824054
episode: 57 training return: -443.55560510161774
episode: 58 training return: -453.089560157512
episode: 59 training return: -431.9440518417982
epoch: 15 test_true_pfm: 691.5431864986786 sim_pfm: -374.4529704242589
episode: 60 training return: -434.6143226908551
episode: 61 training return: -452.452025794898
episode: 62 training return: -454.88916496115957
episode: 63 training return: -450.92583391124487
epoch: 16 test_true_pfm: 807.4861797712383 sim_pfm: -323.15573394843074
episode: 64 training return: -424.1944882346723
episode: 65 training return: -438.1483487498672
episode: 66 training return: -430.2045447984837
episode: 67 training return: -442.4236773922149
epoch: 17 test_true_pfm: 705.9564499196136 sim_pfm: -364.09843665993395
episode: 68 training return: -400.99984161448845
episode: 69 training return: -406.0468520657361
episode: 70 training return: -422.7732161768359
episode: 71 training return: -431.4444017240624
epoch: 18 test_true_pfm: 725.9880100832505 sim_pfm: -362.138220387472
episode: 72 training return: -409.56634100190865
episode: 73 training return: -417.6844121560498
episode: 74 training return: -432.4480908492644
episode: 75 training return: -437.23882847772495
epoch: 19 test_true_pfm: 753.5681310375458 sim_pfm: -337.78359377920924
episode: 76 training return: -398.16621207515124
episode: 77 training return: -407.69625023969377
episode: 78 training return: -423.8018846666333
episode: 79 training return: -406.5986478761596
epoch: 20 test_true_pfm: 872.856352653886 sim_pfm: -297.867102106475
episode: 80 training return: -418.77730141381403
episode: 81 training return: -430.10449091518353
episode: 82 training return: -426.6555163871754
episode: 83 training return: -412.50842908540415
epoch: 21 test_true_pfm: 841.4252771405158 sim_pfm: -300.2992002482104
episode: 84 training return: -378.85719407373836
episode: 85 training return: -405.08376633607685
episode: 86 training return: -386.7662601104299
episode: 87 training return: -381.31438612662936
epoch: 22 test_true_pfm: 831.052758093042 sim_pfm: -301.35405707257723
episode: 88 training return: -382.02293724251786
episode: 89 training return: -398.67282412820646
episode: 90 training return: -380.322419119896
episode: 91 training return: -405.711436981163
epoch: 23 test_true_pfm: 773.538111674165 sim_pfm: -345.15079114567806
episode: 92 training return: -415.34202912024614
episode: 93 training return: -404.99387076907703
episode: 94 training return: -423.06231952904017
episode: 95 training return: -389.077033023924
epoch: 24 test_true_pfm: 777.3763528546721 sim_pfm: -325.5601254632136
episode: 96 training return: -400.61247398475837
episode: 97 training return: -431.23279187665804
episode: 98 training return: -411.7858162753183
episode: 99 training return: -389.48214806660604
epoch: 25 test_true_pfm: 775.0430135892219 sim_pfm: -316.93399908295123
episode: 100 training return: -396.8056669039173
episode: 101 training return: -389.91817593206264
episode: 102 training return: -417.7708032193214
episode: 103 training return: -406.67623157737074
epoch: 26 test_true_pfm: 868.5791329877889 sim_pfm: -283.3011679489841
episode: 104 training return: -409.94611376733644
episode: 105 training return: -397.84793132655494
episode: 106 training return: -374.10966426186775
episode: 107 training return: -420.4028016036594
epoch: 27 test_true_pfm: 839.7777641322783 sim_pfm: -272.0701665961162
episode: 108 training return: -378.856358433864
episode: 109 training return: -401.67713731548304
episode: 110 training return: -338.01540345784326
episode: 111 training return: -384.2760918673651
epoch: 28 test_true_pfm: 949.6060915334156 sim_pfm: -264.289807510857
episode: 112 training return: -382.45472959646486
episode: 113 training return: -357.0905512681428
episode: 114 training return: -347.7271792649913
episode: 115 training return: -375.4095595264021
epoch: 29 test_true_pfm: 886.9173320207419 sim_pfm: -280.0755321150441
episode: 116 training return: -356.99474637297703
episode: 117 training return: -354.1201718525338
episode: 118 training return: -347.81496446531315
episode: 119 training return: -381.4416283933209
epoch: 30 test_true_pfm: 975.4478082712 sim_pfm: -229.06056362471793
episode: 120 training return: -335.21590179386254
episode: 121 training return: -338.22005714258205
episode: 122 training return: -374.54900701570244
episode: 123 training return: -360.73048546143264
epoch: 31 test_true_pfm: 923.9569206927764 sim_pfm: -272.94913214055305
episode: 124 training return: -358.1210401880414
episode: 125 training return: -326.27424288416506
episode: 126 training return: -385.37516299586486
episode: 127 training return: -379.03779197426985
epoch: 32 test_true_pfm: 858.5480253427362 sim_pfm: -287.4697825926016
episode: 128 training return: -344.2501601853556
episode: 129 training return: -372.79166393978846
episode: 130 training return: -349.2135145956866
episode: 131 training return: -380.34038412358814
epoch: 33 test_true_pfm: 897.7114862466491 sim_pfm: -268.72636530613437
episode: 132 training return: -331.17740936321417
episode: 133 training return: -345.74068331609317
episode: 134 training return: -349.30687744634264
episode: 135 training return: -345.6771898582972
epoch: 34 test_true_pfm: 933.4889501554752 sim_pfm: -246.54006911331348
episode: 136 training return: -371.9327669959146
episode: 137 training return: -319.1817181779021
episode: 138 training return: -342.6552164550329
episode: 139 training return: -332.8104632721934
epoch: 35 test_true_pfm: 885.8090441783252 sim_pfm: -274.8428142654291
episode: 140 training return: -386.8835971084705
episode: 141 training return: -311.65480792350866
episode: 142 training return: -328.85019652057014
episode: 143 training return: -343.4477771826074
epoch: 36 test_true_pfm: 904.064844005425 sim_pfm: -260.2708369736975
episode: 144 training return: -350.17180332817645
episode: 145 training return: -341.80910061877785
episode: 146 training return: -294.91248665455856
episode: 147 training return: -317.2580954904756
epoch: 37 test_true_pfm: 797.3746194648488 sim_pfm: -315.052151976661
episode: 148 training return: -359.2603199402418
episode: 149 training return: -334.76471108979376
episode: 150 training return: -294.0430291423842
episode: 151 training return: -334.5346733118879
epoch: 38 test_true_pfm: 860.540700212044 sim_pfm: -270.60460026424215
episode: 152 training return: -305.46086474530756
episode: 153 training return: -311.1126493056035
episode: 154 training return: -319.45925178554717
episode: 155 training return: -322.707735281847
epoch: 39 test_true_pfm: 871.0584795216164 sim_pfm: -286.4559891775637
episode: 156 training return: -299.6879581254809
episode: 157 training return: -305.35220015619205
episode: 158 training return: -340.17495408673216
episode: 159 training return: -336.1340920179519
epoch: 40 test_true_pfm: 986.0178775168355 sim_pfm: -203.1270603226628
episode: 160 training return: -287.86992392483364
episode: 161 training return: -302.4407242630282
episode: 162 training return: -329.06554031889783
episode: 163 training return: -333.02881689591936
epoch: 41 test_true_pfm: 935.5443884674397 sim_pfm: -245.7759513121754
episode: 164 training return: -317.12321661639004
episode: 165 training return: -267.34366567161493
episode: 166 training return: -215.62752680742264
episode: 167 training return: -263.068500686836
epoch: 42 test_true_pfm: 962.6503966922218 sim_pfm: -252.88534654944874
episode: 168 training return: -310.9927503147577
episode: 169 training return: -306.9123094852782
episode: 170 training return: -289.7298560532558
episode: 171 training return: -250.766334076081
epoch: 43 test_true_pfm: 853.9149650941812 sim_pfm: -265.81803471521306
episode: 172 training return: -307.7329463999617
episode: 173 training return: -265.63805742530513
episode: 174 training return: -251.35968672197396
episode: 175 training return: -180.192995227884
epoch: 44 test_true_pfm: 1160.673286086191 sim_pfm: -116.91369068936677
episode: 176 training return: -277.118558180755
episode: 177 training return: -256.8616403023933
episode: 178 training return: -275.4954452546848
episode: 179 training return: -254.88755395361355
epoch: 45 test_true_pfm: 969.8582651868437 sim_pfm: -227.70397720050735
episode: 180 training return: -222.9793979369256
episode: 181 training return: -216.27013034961215
episode: 182 training return: -217.2377428709943
episode: 183 training return: -227.08668114372895
epoch: 46 test_true_pfm: 990.1230725700461 sim_pfm: -180.28650428508078
episode: 184 training return: -209.96853692021466
episode: 185 training return: -222.67349883108503
episode: 186 training return: -211.5297853091127
episode: 187 training return: -262.33481189261227
epoch: 47 test_true_pfm: 1140.725707610549 sim_pfm: -120.93347789695495
episode: 188 training return: -198.06725203608028
episode: 189 training return: -240.3532015749882
episode: 190 training return: -226.44240072590975
episode: 191 training return: -278.2017589914865
epoch: 48 test_true_pfm: 1282.2361148566142 sim_pfm: -67.32554474274342
episode: 192 training return: -224.5807639949154
episode: 193 training return: -203.1204081943082
episode: 194 training return: -196.8686497330489
episode: 195 training return: -182.56191623617894
epoch: 49 test_true_pfm: 1265.4142034167169 sim_pfm: -103.88165357590572
episode: 196 training return: -167.58536054604897
episode: 197 training return: -139.46548861695032
episode: 198 training return: -173.965152590036
episode: 199 training return: -157.18150409655053
epoch: 50 test_true_pfm: 1130.1085422071822 sim_pfm: -125.51055959019196
episode: 200 training return: -181.4914883487476
episode: 201 training return: -157.89319909989467
episode: 202 training return: -199.74436981651232
episode: 203 training return: -214.52026587582466
epoch: 51 test_true_pfm: 1040.9393526683316 sim_pfm: -162.90463912651762
episode: 204 training return: -174.66253323888338
episode: 205 training return: -163.7174503734895
episode: 206 training return: -191.6105893218992
episode: 207 training return: -191.41070532573775
epoch: 52 test_true_pfm: 1221.814075763885 sim_pfm: -79.34010677976012
episode: 208 training return: -246.0523004340963
episode: 209 training return: -135.86895829632567
episode: 210 training return: -200.77386640001737
episode: 211 training return: -204.91183063759087
epoch: 53 test_true_pfm: 1279.597469746229 sim_pfm: -43.67607378056428
episode: 212 training return: -171.6114256653827
episode: 213 training return: -172.10503066084527
episode: 214 training return: -179.94435180671806
episode: 215 training return: -180.30919322615972
epoch: 54 test_true_pfm: 1213.9532811849688 sim_pfm: -72.55576303131433
episode: 216 training return: -220.4446833165558
episode: 217 training return: -167.842094155438
episode: 218 training return: -158.81545093377702
episode: 219 training return: -144.39230622927866
epoch: 55 test_true_pfm: 1060.253121711097 sim_pfm: -150.76309223057777
episode: 220 training return: -123.42011984934472
episode: 221 training return: -211.29882458441315
episode: 222 training return: -118.07959919071382
episode: 223 training return: -172.7523310235047
epoch: 56 test_true_pfm: 1148.160673658519 sim_pfm: -109.98890701774467
episode: 224 training return: -176.74980830574037
episode: 225 training return: -129.52758789986285
episode: 226 training return: -237.38481264585846
episode: 227 training return: -157.78682981579087
epoch: 57 test_true_pfm: 1357.93216524952 sim_pfm: -63.07180879928183
episode: 228 training return: -169.29365351294973
episode: 229 training return: -121.5769565489488
episode: 230 training return: -157.34486142400712
episode: 231 training return: -134.96360987637664
epoch: 58 test_true_pfm: 1233.9314716974484 sim_pfm: -66.0048740292653
episode: 232 training return: -142.24588714556612
episode: 233 training return: -185.16372888357105
episode: 234 training return: -150.958510485588
episode: 235 training return: -150.6542197958542
epoch: 59 test_true_pfm: 1317.5902638327577 sim_pfm: -72.49660639888336
episode: 236 training return: -162.22047292867617
episode: 237 training return: -166.96108253371813
episode: 238 training return: -166.19220324404213
episode: 239 training return: -121.8300944690162
epoch: 60 test_true_pfm: 1151.210681515992 sim_pfm: -75.70239128482253
episode: 240 training return: -168.75829964458705
episode: 241 training return: -158.0408815469077
episode: 242 training return: -117.38105357730029
episode: 243 training return: -199.65023562268414
epoch: 61 test_true_pfm: 1232.7140452787028 sim_pfm: -66.35859888551674
episode: 244 training return: -131.2089511865378
episode: 245 training return: -169.22983564412914
episode: 246 training return: -146.5519974118366
episode: 247 training return: -191.31827232573917
epoch: 62 test_true_pfm: 1191.1852004994655 sim_pfm: -74.71150017550913
episode: 248 training return: -213.48734715624235
episode: 249 training return: -131.3881592233622
episode: 250 training return: -128.28006031398414
episode: 251 training return: -148.8302315353456
epoch: 63 test_true_pfm: 1359.8069329515706 sim_pfm: -18.221523292448307
episode: 252 training return: -137.46908090025678
episode: 253 training return: -99.63359239174187
episode: 254 training return: -149.957695906709
episode: 255 training return: -119.55390057692671
epoch: 64 test_true_pfm: 1223.674435263961 sim_pfm: -52.18286979352998
episode: 256 training return: -135.74346396894785
episode: 257 training return: -144.59843465529926
episode: 258 training return: -133.87534838843604
episode: 259 training return: -136.5650021563013
epoch: 65 test_true_pfm: 1439.8934626185692 sim_pfm: -12.342295261503315
episode: 260 training return: -115.77459151010909
episode: 261 training return: -172.41853707048992
episode: 262 training return: -117.63823462979019
episode: 263 training return: -150.08426953609282
epoch: 66 test_true_pfm: 1320.1713901130242 sim_pfm: -56.252614180955085
episode: 264 training return: -128.751307481441
episode: 265 training return: -120.18458029589716
episode: 266 training return: -207.204223178271
episode: 267 training return: -143.89607436099467
epoch: 67 test_true_pfm: 1232.7087821637222 sim_pfm: -51.49259544529557
episode: 268 training return: -153.54730881013484
episode: 269 training return: -137.79165598493657
episode: 270 training return: -143.40291159115526
episode: 271 training return: -148.75211129653843
epoch: 68 test_true_pfm: 1257.2769331659993 sim_pfm: -39.50389021805347
episode: 272 training return: -113.16266392646753
episode: 273 training return: -109.25240391676861
episode: 274 training return: -119.64637247555409
episode: 275 training return: -137.1035989911978
epoch: 69 test_true_pfm: 1216.9783795204964 sim_pfm: -60.42566911988384
episode: 276 training return: -120.57771288143633
episode: 277 training return: -108.55442865836574
episode: 278 training return: -136.54143487748453
episode: 279 training return: -162.40748665387025
epoch: 70 test_true_pfm: 1251.3596403312033 sim_pfm: -48.231721048698454
episode: 280 training return: -148.61210599502354
episode: 281 training return: -118.28477780788478
episode: 282 training return: -164.63627447950196
episode: 283 training return: -131.3808042745821
epoch: 71 test_true_pfm: 1447.9319881825684 sim_pfm: -19.14769488072443
episode: 284 training return: -119.60912642165749
episode: 285 training return: -169.26906921233572
episode: 286 training return: -125.91350949475213
episode: 287 training return: -141.24418087106756
epoch: 72 test_true_pfm: 1449.1899978509944 sim_pfm: -2.26400556999161
episode: 288 training return: -200.23752928386662
episode: 289 training return: -96.76369434752885
episode: 290 training return: -129.88429590539906
episode: 291 training return: -166.5888204532487
epoch: 73 test_true_pfm: 1285.4246317380787 sim_pfm: -3.4233171842090226
episode: 292 training return: -157.12894987766302
episode: 293 training return: -173.28010592046684
episode: 294 training return: -104.47853376531653
episode: 295 training return: -120.99728513403522
epoch: 74 test_true_pfm: 1356.2708709783826 sim_pfm: -10.977623235069323
episode: 296 training return: -119.30356777118675
episode: 297 training return: -178.18642558742656
episode: 298 training return: -128.53345389732485
episode: 299 training return: -110.91080611020358
epoch: 75 test_true_pfm: 1139.0025733014002 sim_pfm: -103.36069478808389
episode: 300 training return: -133.1004021981547
episode: 301 training return: -148.2620448901243
episode: 302 training return: -121.04069209016073
episode: 303 training return: -101.43223765261808
epoch: 76 test_true_pfm: 1116.8392426989894 sim_pfm: -119.82667414024799
episode: 304 training return: -117.34369304783323
episode: 305 training return: -99.83549058357796
episode: 306 training return: -159.15545053370795
episode: 307 training return: -125.31606556990525
epoch: 77 test_true_pfm: 1370.354696783219 sim_pfm: -30.571013558123408
episode: 308 training return: -116.61457009217324
episode: 309 training return: -134.35808403033977
episode: 310 training return: -96.0521040009084
episode: 311 training return: -126.01759198084636
epoch: 78 test_true_pfm: 1210.5793218014398 sim_pfm: -66.26353938577748
episode: 312 training return: -119.00087486566761
episode: 313 training return: -114.36545924004521
episode: 314 training return: -124.56798463018922
episode: 315 training return: -177.16133346623914
epoch: 79 test_true_pfm: 1435.4880237169707 sim_pfm: -27.374458978022336
episode: 316 training return: -131.68021603470388
episode: 317 training return: -117.6372529367594
episode: 318 training return: -96.24838697595249
episode: 319 training return: -108.89099772144758
epoch: 80 test_true_pfm: 1352.6235577427133 sim_pfm: 6.2369556721529475
episode: 320 training return: -148.91779483465925
episode: 321 training return: -118.79308773869008
episode: 322 training return: -97.80310152463105
episode: 323 training return: -127.6819263124828
epoch: 81 test_true_pfm: 1229.5213646318546 sim_pfm: -58.61622168699821
episode: 324 training return: -88.71643932844358
episode: 325 training return: -91.63620905084947
episode: 326 training return: -107.46663377348415
episode: 327 training return: -134.59879428918853
epoch: 82 test_true_pfm: 1382.8982201875779 sim_pfm: -34.26139494369726
episode: 328 training return: -108.02432066671228
episode: 329 training return: -100.54703167210069
episode: 330 training return: -125.51509603707835
episode: 331 training return: -157.84843804135636
epoch: 83 test_true_pfm: 1488.3152519521918 sim_pfm: 4.370355888464327
episode: 332 training return: -128.576682880862
episode: 333 training return: -121.30962453660887
episode: 334 training return: -100.82726679066941
episode: 335 training return: -115.28307843712655
epoch: 84 test_true_pfm: 1549.4811833140946 sim_pfm: -0.7601267028705854
episode: 336 training return: -125.28621456411415
episode: 337 training return: -159.29488004325088
episode: 338 training return: -115.42011992827028
episode: 339 training return: -112.97020379685668
epoch: 85 test_true_pfm: 1500.244088606368 sim_pfm: 3.7520122758154066
episode: 340 training return: -103.8024215861435
episode: 341 training return: -133.7167220282587
episode: 342 training return: -83.73138343134202
episode: 343 training return: -115.89519666280773
epoch: 86 test_true_pfm: 1241.2728924785322 sim_pfm: -60.487931750667684
episode: 344 training return: -98.06416734571305
episode: 345 training return: -104.7729141684331
episode: 346 training return: -88.9533416329314
episode: 347 training return: -105.51581528971415
epoch: 87 test_true_pfm: 1320.8179385004632 sim_pfm: -25.00027202122111
episode: 348 training return: -141.34973921550235
episode: 349 training return: -114.21023765972785
episode: 350 training return: -85.06198043105856
episode: 351 training return: -114.5744818503977
epoch: 88 test_true_pfm: 1269.7863300623603 sim_pfm: -12.821333362370131
episode: 352 training return: -83.0051957202306
episode: 353 training return: -105.46002591436175
episode: 354 training return: -91.6404337476557
episode: 355 training return: -101.19768695474215
epoch: 89 test_true_pfm: 1318.0860977288337 sim_pfm: -16.181053704144507
episode: 356 training return: -115.65084562915848
episode: 357 training return: -116.68139029639548
episode: 358 training return: -71.34666202572674
episode: 359 training return: -97.60708297148726
epoch: 90 test_true_pfm: 1447.8900635478706 sim_pfm: 12.44942488818838
episode: 360 training return: -140.2014800689374
episode: 361 training return: -112.39556650840277
episode: 362 training return: -100.26502649740044
episode: 363 training return: -113.68224684699757
epoch: 91 test_true_pfm: 1254.797529716677 sim_pfm: -33.036416385628094
episode: 364 training return: -93.85081609330476
episode: 365 training return: -121.9720112608744
episode: 366 training return: -94.7533857444927
episode: 367 training return: -125.09852587834024
epoch: 92 test_true_pfm: 1474.4232168646213 sim_pfm: 7.061492042483635
episode: 368 training return: -91.46560495389043
episode: 369 training return: -111.73543976106002
episode: 370 training return: -103.13248565712789
episode: 371 training return: -109.57491791920867
epoch: 93 test_true_pfm: 1452.9745250699063 sim_pfm: -14.061511105018463
episode: 372 training return: -152.57934619215288
episode: 373 training return: -79.2204405242356
episode: 374 training return: -94.92912916429529
episode: 375 training return: -111.41393440623095
epoch: 94 test_true_pfm: 1425.3949766382702 sim_pfm: -17.04487782648882
episode: 376 training return: -106.4014109222152
episode: 377 training return: -102.31098693676508
episode: 378 training return: -93.08811406284485
episode: 379 training return: -77.20343913464578
epoch: 95 test_true_pfm: 1412.6759418858321 sim_pfm: -23.513349139770217
episode: 380 training return: -110.31108882899036
episode: 381 training return: -112.5044956837961
episode: 382 training return: -69.66171334265431
episode: 383 training return: -65.3862173644679
epoch: 96 test_true_pfm: 1490.9355227954863 sim_pfm: 25.617730504393464
episode: 384 training return: -134.12159110290452
episode: 385 training return: -124.18290843666108
episode: 386 training return: -91.95293677275671
episode: 387 training return: -110.082163351369
epoch: 97 test_true_pfm: 1325.3963078803567 sim_pfm: -6.863056384570265
episode: 388 training return: -85.86004706688962
episode: 389 training return: -75.96679395424293
episode: 390 training return: -110.00828801057186
episode: 391 training return: -81.76650055189533
epoch: 98 test_true_pfm: 1384.8758603109302 sim_pfm: -3.4477488683839472
episode: 392 training return: -107.63517224165197
episode: 393 training return: -75.99337793999476
episode: 394 training return: -117.50846227075154
episode: 395 training return: -89.5095807697017
epoch: 99 test_true_pfm: 1444.792514949613 sim_pfm: -2.259601797448769
episode: 396 training return: -100.7865477154103
episode: 397 training return: -100.50490762818345
episode: 398 training return: -82.26579087060259
episode: 399 training return: -100.57511884051645
epoch: 100 test_true_pfm: 1291.4005750236447 sim_pfm: -29.62826533315607
episode: 400 training return: -106.61838721422231
episode: 401 training return: -134.0711341065575
episode: 402 training return: -85.37873477115184
episode: 403 training return: -129.37816189543454
epoch: 101 test_true_pfm: 1276.901508192819 sim_pfm: -22.768955960985043
episode: 404 training return: -121.45544570651839
episode: 405 training return: -76.06065942299718
episode: 406 training return: -70.47082341015346
episode: 407 training return: -95.6167585877832
epoch: 102 test_true_pfm: 1455.092933611856 sim_pfm: 13.884850033388282
episode: 408 training return: -97.22068255814118
episode: 409 training return: -92.41313539340366
episode: 410 training return: -64.81589333344836
episode: 411 training return: -80.5036027990927
epoch: 103 test_true_pfm: 1385.7247966582215 sim_pfm: 0.09736429236414283
episode: 412 training return: -109.83929759729061
episode: 413 training return: -118.91506564050776
episode: 414 training return: -122.88399183263955
episode: 415 training return: -93.14564367815471
epoch: 104 test_true_pfm: 1355.7751914094115 sim_pfm: 0.7920052795691038
episode: 416 training return: -70.43256786488212
episode: 417 training return: -103.58317634557562
episode: 418 training return: -94.24852431163987
episode: 419 training return: -103.55310813952718
epoch: 105 test_true_pfm: 1364.6121635087209 sim_pfm: -3.336571973295332
episode: 420 training return: -131.64269864859216
episode: 421 training return: -73.60461166708973
episode: 422 training return: -117.77033694385277
episode: 423 training return: -89.37323930674357
epoch: 106 test_true_pfm: 1400.5576202219684 sim_pfm: 3.4155090370573427
episode: 424 training return: -106.65562717974612
episode: 425 training return: -108.67057835853196
episode: 426 training return: -109.75760953902615
episode: 427 training return: -82.12406344471331
epoch: 107 test_true_pfm: 1481.5967547986183 sim_pfm: 18.844595418063125
episode: 428 training return: -101.52246192632207
episode: 429 training return: -107.11649762013168
episode: 430 training return: -70.03665609922638
episode: 431 training return: -89.53330256319477
epoch: 108 test_true_pfm: 1357.1289587364665 sim_pfm: -12.2237391181282
episode: 432 training return: -117.01831626018176
episode: 433 training return: -90.57473280398882
episode: 434 training return: -142.54941130066754
episode: 435 training return: -125.83322900228212
epoch: 109 test_true_pfm: 1329.574920505901 sim_pfm: -14.886617044143307
episode: 436 training return: -94.3228553843488
episode: 437 training return: -104.43501186573062
episode: 438 training return: -86.81831731735848
episode: 439 training return: -77.95662628928818
epoch: 110 test_true_pfm: 1409.3399056126448 sim_pfm: 0.31559858593269574
episode: 440 training return: -127.27277981315704
episode: 441 training return: -107.90283534145415
episode: 442 training return: -127.19678532197159
episode: 443 training return: -90.03258962341444
epoch: 111 test_true_pfm: 1334.57407749024 sim_pfm: 7.850525341368427
episode: 444 training return: -100.57221361947596
episode: 445 training return: -114.72958212440459
episode: 446 training return: -116.46044733282066
episode: 447 training return: -91.80622373250435
epoch: 112 test_true_pfm: 1470.2619771327197 sim_pfm: 16.907950893766515
episode: 448 training return: -92.88832781493139
episode: 449 training return: -62.540669586317264
episode: 450 training return: -90.25159749932571
episode: 451 training return: -66.76056946214413
epoch: 113 test_true_pfm: 1405.2384860891407 sim_pfm: 6.24964980737976
episode: 452 training return: -112.59078048220417
episode: 453 training return: -93.77298923962584
episode: 454 training return: -122.84499772179156
episode: 455 training return: -80.57170319413832
epoch: 114 test_true_pfm: 1411.9552251736932 sim_pfm: 21.566880684273883
episode: 456 training return: -97.64934270497935
episode: 457 training return: -103.29960261367533
episode: 458 training return: -92.94828483001756
episode: 459 training return: -84.0887385551547
epoch: 115 test_true_pfm: 1265.8746094404385 sim_pfm: -22.60384236913316
episode: 460 training return: -90.51765530352799
episode: 461 training return: -105.5112863046168
episode: 462 training return: -75.06048558020686
episode: 463 training return: -97.67345173492666
epoch: 116 test_true_pfm: 1429.4621455434724 sim_pfm: -1.1244753671135663
episode: 464 training return: -106.42585853419267
episode: 465 training return: -128.93005835459658
episode: 466 training return: -92.0308705622596
episode: 467 training return: -64.52007415250844
epoch: 117 test_true_pfm: 1415.1533053675769 sim_pfm: 3.485681695544333
episode: 468 training return: -97.35803789191253
episode: 469 training return: -108.1161871715211
episode: 470 training return: -94.03972389631166
episode: 471 training return: -104.82805607186536
epoch: 118 test_true_pfm: 1456.2507807063746 sim_pfm: 12.360560008234144
episode: 472 training return: -86.46849767565298
episode: 473 training return: -122.27153045921344
episode: 474 training return: -76.71391832178035
episode: 475 training return: -115.2462255076137
epoch: 119 test_true_pfm: 1482.8964176134612 sim_pfm: 31.69259896127353
episode: 476 training return: -120.49868630454624
episode: 477 training return: -75.60259323428218
episode: 478 training return: -96.30136146390625
episode: 479 training return: -81.40968874931679
epoch: 120 test_true_pfm: 1311.3944019992061 sim_pfm: 13.99527818742817
episode: 480 training return: -65.02020659938354
episode: 481 training return: -124.38492100659488
episode: 482 training return: -72.43071588473735
episode: 483 training return: -62.45668596794795
epoch: 121 test_true_pfm: 1471.6060979861804 sim_pfm: 18.42923297454662
episode: 484 training return: -91.7871086800715
episode: 485 training return: -67.9738211253865
episode: 486 training return: -118.65923894865875
episode: 487 training return: -110.93836029747271
epoch: 122 test_true_pfm: 1500.3495074683624 sim_pfm: 28.17301780663007
episode: 488 training return: -90.72546555450839
episode: 489 training return: -100.02861060382568
episode: 490 training return: -97.40661114454184
episode: 491 training return: -67.00225637819993
epoch: 123 test_true_pfm: 1363.9314528965433 sim_pfm: 5.021111326055531
episode: 492 training return: -138.07958368898474
episode: 493 training return: -119.88837958953602
episode: 494 training return: -79.86896068406203
episode: 495 training return: -89.52592864973326
epoch: 124 test_true_pfm: 1477.3615823052498 sim_pfm: 1.9425241564880267
episode: 496 training return: -89.34759961948302
episode: 497 training return: -104.84394042486021
episode: 498 training return: -134.10161996375462
episode: 499 training return: -106.2405242335333
epoch: 125 test_true_pfm: 1448.2042026794131 sim_pfm: 16.113958453068527
episode: 500 training return: -126.23553545848259
episode: 501 training return: -106.12300903863124
episode: 502 training return: -86.06039673083608
episode: 503 training return: -121.87790636345656
epoch: 126 test_true_pfm: 1404.4206126795646 sim_pfm: 14.562603908205835
episode: 504 training return: -95.55085234504526
episode: 505 training return: -79.40186640304903
episode: 506 training return: -88.48776757292352
episode: 507 training return: -98.04924523385634
epoch: 127 test_true_pfm: 1471.8376402402525 sim_pfm: 22.497016121046205
episode: 508 training return: -103.27846590818912
episode: 509 training return: -63.533701254519606
episode: 510 training return: -95.01560463405185
episode: 511 training return: -71.98285590446258
epoch: 128 test_true_pfm: 1306.8769709186975 sim_pfm: -9.35394614757658
episode: 512 training return: -80.10523176821381
episode: 513 training return: -90.94968017391503
episode: 514 training return: -97.4121124455889
episode: 515 training return: -66.21561454437288
epoch: 129 test_true_pfm: 1478.9529247907576 sim_pfm: 10.502352483538868
episode: 516 training return: -122.03037172970086
episode: 517 training return: -77.53846231368435
episode: 518 training return: -107.69249704663974
episode: 519 training return: -70.26389036341591
epoch: 130 test_true_pfm: 1426.3795654619382 sim_pfm: 21.757141484459307
episode: 520 training return: -84.84848137681792
episode: 521 training return: -81.8044791077383
episode: 522 training return: -92.48129225475937
episode: 523 training return: -115.57219970648336
epoch: 131 test_true_pfm: 1460.330563794433 sim_pfm: 19.21525986942103
episode: 524 training return: -153.19768289330526
episode: 525 training return: -60.74230933769259
episode: 526 training return: -69.68441556982332
episode: 527 training return: -78.88974853271922
epoch: 132 test_true_pfm: 1429.774519027297 sim_pfm: -21.114504051730574
episode: 528 training return: -65.58521058433034
episode: 529 training return: -69.77008696536706
episode: 530 training return: -73.26420343002187
episode: 531 training return: -66.5896532719106
epoch: 133 test_true_pfm: 1453.637274339204 sim_pfm: 12.964400291452826
episode: 532 training return: -78.05290920035377
episode: 533 training return: -111.48332508866133
episode: 534 training return: -102.12496300024743
episode: 535 training return: -85.08026340943368
epoch: 134 test_true_pfm: 1430.2087780426375 sim_pfm: 17.297774275462107
episode: 536 training return: -95.34807033088602
episode: 537 training return: -72.68266990295983
episode: 538 training return: -69.7534391642824
episode: 539 training return: -89.1534112157475
epoch: 135 test_true_pfm: 1408.4841809116003 sim_pfm: 9.206424901138098
episode: 540 training return: -75.35216233055745
episode: 541 training return: -96.67070293330804
episode: 542 training return: -62.04359639957552
episode: 543 training return: -89.46462553757759
epoch: 136 test_true_pfm: 1478.4046474528388 sim_pfm: 18.98974577113465
episode: 544 training return: -55.91998633249681
episode: 545 training return: -72.25013620661012
episode: 546 training return: -89.93501646721967
episode: 547 training return: -83.21173662708038
epoch: 137 test_true_pfm: 1404.93778637357 sim_pfm: 23.74615000296052
episode: 548 training return: -62.455373220428996
episode: 549 training return: -82.98722781448767
episode: 550 training return: -73.20394944132134
episode: 551 training return: -100.26613394916195
epoch: 138 test_true_pfm: 1430.7523872577076 sim_pfm: 9.684962521034167
episode: 552 training return: -73.38601373354066
episode: 553 training return: -82.20850678236111
episode: 554 training return: -74.05961281187258
episode: 555 training return: -87.60148230091944
epoch: 139 test_true_pfm: 1536.9848604295282 sim_pfm: 31.35649179359885
episode: 556 training return: -70.90001350762611
episode: 557 training return: -87.01878217542115
episode: 558 training return: -64.95200086669941
episode: 559 training return: -63.50529386189317
epoch: 140 test_true_pfm: 1419.0961850346114 sim_pfm: 22.15112525881811
episode: 560 training return: -90.26090518881882
episode: 561 training return: -86.15495960811816
episode: 562 training return: -87.37026237348415
episode: 563 training return: -64.18506630204277
epoch: 141 test_true_pfm: 1503.9040545477667 sim_pfm: 25.2437241480235
episode: 564 training return: -114.64379134437216
episode: 565 training return: -60.573583980099556
episode: 566 training return: -74.7468660859995
episode: 567 training return: -70.43346438198851
epoch: 142 test_true_pfm: 1510.568893260487 sim_pfm: -5.073508534110686
episode: 568 training return: -81.73726507980244
episode: 569 training return: -57.359351476611685
episode: 570 training return: -55.771379151601806
episode: 571 training return: -76.87045212278151
epoch: 143 test_true_pfm: 1441.5205031577577 sim_pfm: 24.27852826388232
episode: 572 training return: -76.49924806488099
episode: 573 training return: -71.90450673024506
episode: 574 training return: -73.01103984546636
episode: 575 training return: -89.97920137968066
epoch: 144 test_true_pfm: 1468.4561179970344 sim_pfm: 23.1156303938849
episode: 576 training return: -86.88305166413842
episode: 577 training return: -121.48593807065544
episode: 578 training return: -83.63960728075776
episode: 579 training return: -80.67643004102675
epoch: 145 test_true_pfm: 1486.6234366245183 sim_pfm: 25.71130035716477
episode: 580 training return: -83.93162042191507
episode: 581 training return: -75.63274059264776
episode: 582 training return: -60.21337144689034
episode: 583 training return: -82.80029169260543
epoch: 146 test_true_pfm: 1344.8026182462927 sim_pfm: 19.83701439659995
episode: 584 training return: -90.65826446673009
episode: 585 training return: -64.32914728897862
episode: 586 training return: -123.04179802604031
episode: 587 training return: -92.74471233376745
epoch: 147 test_true_pfm: 1488.6361606598682 sim_pfm: 24.997059957634775
episode: 588 training return: -99.3704875267068
episode: 589 training return: -69.89814510422072
episode: 590 training return: -77.85818799903859
episode: 591 training return: -60.472353559189614
epoch: 148 test_true_pfm: 1436.7140199329353 sim_pfm: 26.65455639550952
episode: 592 training return: -62.9250953278631
episode: 593 training return: -97.51802811935076
episode: 594 training return: -71.07706515680235
episode: 595 training return: -111.30966415415955
epoch: 149 test_true_pfm: 1436.6968085989558 sim_pfm: 27.442168051305163
episode: 596 training return: -68.47080855697145
episode: 597 training return: -71.42525383157546
episode: 598 training return: -103.10908451976417
episode: 599 training return: -105.54089130294695
epoch: 150 test_true_pfm: 1380.7971189886118 sim_pfm: 4.710207877753534
