['--alg', 'sac', '--env', 'Swimmer-v3', '--learn', 'uncertainty', '--traj', 'medium', '--seed', '2', '--data', '100000']
epoch: 0 training_loss 0.4096829853951931 test_loss: 0.2931302547454834
epoch: 1 training_loss 0.2768349869549274 test_loss: 0.2520322799682617
epoch: 2 training_loss 0.2524591714143753 test_loss: 0.22825298309326172
epoch: 3 training_loss 0.23243245467543602 test_loss: 0.2159757137298584
epoch: 4 training_loss 0.24248661585152148 test_loss: 0.2340322971343994
epoch: 5 training_loss 0.23625081904232503 test_loss: 0.24068210124969483
epoch: 6 training_loss 0.23507203072309493 test_loss: 0.2024024486541748
epoch: 7 training_loss 0.22183257795870304 test_loss: 0.21299617290496825
epoch: 8 training_loss 0.21949680648744108 test_loss: 0.19450241327285767
epoch: 9 training_loss 0.20799968600273133 test_loss: 0.20828053951263428
epoch: 10 training_loss 0.21586464591324328 test_loss: 0.2225471019744873
epoch: 11 training_loss 0.2152476666122675 test_loss: 0.1987514853477478
epoch: 12 training_loss 0.2125870877504349 test_loss: 0.20996599197387694
epoch: 13 training_loss 0.21756784729659556 test_loss: 0.2124418020248413
epoch: 14 training_loss 0.21427987664937972 test_loss: 0.1960055112838745
epoch: 15 training_loss 0.2083527046442032 test_loss: 0.19689133167266845
epoch: 16 training_loss 0.21141987636685372 test_loss: 0.2077573299407959
epoch: 17 training_loss 0.21850794538855553 test_loss: 0.2012338876724243
epoch: 18 training_loss 0.20409392222762107 test_loss: 0.20673606395721436
epoch: 19 training_loss 0.2059274286776781 test_loss: 0.1917891502380371
epoch: 20 training_loss 0.20602450504899025 test_loss: 0.20048911571502687
epoch: 21 training_loss 0.19515147000551225 test_loss: 0.19868491888046264
epoch: 22 training_loss 0.20433997482061386 test_loss: 0.2501129150390625
epoch: 23 training_loss 0.20518840059638024 test_loss: 0.18227312564849854
epoch: 24 training_loss 0.20388312973082065 test_loss: 0.1965527653694153
epoch: 25 training_loss 0.20742589205503464 test_loss: 0.1984890341758728
epoch: 26 training_loss 0.19749479234218598 test_loss: 0.18765963315963746
epoch: 27 training_loss 0.2009711616486311 test_loss: 0.20934052467346193
epoch: 28 training_loss 0.20388547398149967 test_loss: 0.1975808024406433
epoch: 29 training_loss 0.1982191298902035 test_loss: 0.21495311260223388
epoch: 30 training_loss 0.1964279954880476 test_loss: 0.19395096302032472
epoch: 31 training_loss 0.19928466193377972 test_loss: 0.19543224573135376
epoch: 32 training_loss 0.19870489716529846 test_loss: 0.20206000804901122
epoch: 33 training_loss 0.19868887454271317 test_loss: 0.19620543718338013
epoch: 34 training_loss 0.19602971471846103 test_loss: 0.1899056315422058
epoch: 35 training_loss 0.18817564480006696 test_loss: 0.21346030235290528
epoch: 36 training_loss 0.19318773299455644 test_loss: 0.20639662742614745
epoch: 37 training_loss 0.19505015604197978 test_loss: 0.1862368702888489
epoch: 38 training_loss 0.19581705071032046 test_loss: 0.2064537763595581
epoch: 39 training_loss 0.19987296305596827 test_loss: 0.18994928598403932
epoch: 40 training_loss 0.19860775120556354 test_loss: 0.18276952505111693
epoch: 41 training_loss 0.19933753453195094 test_loss: 0.19367010593414308
epoch: 42 training_loss 0.19749236784875393 test_loss: 0.2178422212600708
epoch: 43 training_loss 0.1979282732307911 test_loss: 0.19647887945175171
epoch: 44 training_loss 0.19151038974523543 test_loss: 0.17396684885025024
epoch: 45 training_loss 0.19703072093427182 test_loss: 0.20945591926574708
epoch: 46 training_loss 0.20098524160683154 test_loss: 0.18450803756713868
epoch: 47 training_loss 0.1926873929053545 test_loss: 0.19265903234481813
epoch: 48 training_loss 0.19300313919782638 test_loss: 0.1909668803215027
epoch: 49 training_loss 0.19213795080780982 test_loss: 0.18186427354812623
epoch: 50 training_loss 0.19768355205655097 test_loss: 0.1910883903503418
epoch: 51 training_loss 0.18885091595351697 test_loss: 0.18255363702774047
epoch: 52 training_loss 0.18534156262874604 test_loss: 0.20346240997314452
epoch: 53 training_loss 0.19720594219863416 test_loss: 0.19271211624145507
epoch: 54 training_loss 0.1849080066382885 test_loss: 0.19689300060272216
epoch: 55 training_loss 0.19487288057804109 test_loss: 0.18493077754974366
epoch: 56 training_loss 0.20521641738712787 test_loss: 0.19868367910385132
epoch: 57 training_loss 0.18498972281813622 test_loss: 0.1672138214111328
epoch: 58 training_loss 0.19192329175770284 test_loss: 0.17526669502258302
epoch: 59 training_loss 0.18331027925014495 test_loss: 0.18089371919631958
epoch: 60 training_loss 0.19754369102418423 test_loss: 0.1818918228149414
epoch: 61 training_loss 0.18857975840568542 test_loss: 0.18851059675216675
epoch: 62 training_loss 0.18555955410003663 test_loss: 0.1875310182571411
epoch: 63 training_loss 0.18315430283546447 test_loss: 0.17978253364562988
epoch: 64 training_loss 0.18536419831216336 test_loss: 0.19317597150802612
epoch: 65 training_loss 0.18609363801777362 test_loss: 0.1892496943473816
epoch: 66 training_loss 0.18765578277409076 test_loss: 0.192991304397583
epoch: 67 training_loss 0.19746525667607784 test_loss: 0.1589958429336548
epoch: 68 training_loss 0.1844404724240303 test_loss: 0.18733246326446534
epoch: 69 training_loss 0.1867660938203335 test_loss: 0.2062443971633911
epoch: 70 training_loss 0.1864653666317463 test_loss: 0.18053104877471923
epoch: 71 training_loss 0.1876094576716423 test_loss: 0.18523063659667968
epoch: 72 training_loss 0.18815329231321812 test_loss: 0.18011345863342285
epoch: 73 training_loss 0.19228108458220958 test_loss: 0.18558510541915893
epoch: 74 training_loss 0.18041769564151763 test_loss: 0.18575698137283325
epoch: 75 training_loss 0.19134667463600635 test_loss: 0.1930713176727295
epoch: 76 training_loss 0.1866082400083542 test_loss: 0.21474480628967285
epoch: 77 training_loss 0.19793471850454808 test_loss: 0.18233852386474608
epoch: 78 training_loss 0.18542654767632485 test_loss: 0.1780592679977417
epoch: 79 training_loss 0.1892104911804199 test_loss: 0.17075841426849364
epoch: 80 training_loss 0.18207299418747425 test_loss: 0.18840270042419432
epoch: 81 training_loss 0.1852586879581213 test_loss: 0.1903039813041687
epoch: 82 training_loss 0.17951015651226043 test_loss: 0.19599968194961548
epoch: 83 training_loss 0.18832091487944125 test_loss: 0.16421021223068238
epoch: 84 training_loss 0.19307730071246623 test_loss: 0.19731563329696655
epoch: 85 training_loss 0.18582402050495148 test_loss: 0.1806265354156494
epoch: 86 training_loss 0.185466465651989 test_loss: 0.180497145652771
epoch: 87 training_loss 0.17782928831875325 test_loss: 0.1793551564216614
epoch: 88 training_loss 0.17982636049389839 test_loss: 0.16449688673019408
epoch: 89 training_loss 0.19066816590726376 test_loss: 0.18953996896743774
epoch: 90 training_loss 0.18558674074709416 test_loss: 0.1759138822555542
epoch: 91 training_loss 0.17953990168869496 test_loss: 0.18172067403793335
epoch: 92 training_loss 0.17880775965750217 test_loss: 0.20098800659179689
epoch: 93 training_loss 0.18592522718012333 test_loss: 0.16296263933181762
epoch: 94 training_loss 0.17393253594636918 test_loss: 0.19875478744506836
epoch: 95 training_loss 0.18079384438693524 test_loss: 0.17956010103225709
epoch: 96 training_loss 0.18381382182240485 test_loss: 0.1851440668106079
epoch: 97 training_loss 0.18813895113766194 test_loss: 0.17791520357131957
epoch: 98 training_loss 0.1877226961404085 test_loss: 0.2083970546722412
epoch: 99 training_loss 0.18335693717002868 test_loss: 0.18703263998031616
epoch: 100 training_loss 0.18429037220776082 test_loss: 0.18587316274642945
epoch: 101 training_loss 0.18606664806604387 test_loss: 0.18641374111175538
epoch: 102 training_loss 0.18225545398890972 test_loss: 0.19446077346801757
epoch: 103 training_loss 0.18438938781619071 test_loss: 0.17743645906448363
epoch: 104 training_loss 0.1788417887687683 test_loss: 0.1795064926147461
epoch: 105 training_loss 0.18498296193778516 test_loss: 0.17004249095916749
epoch: 106 training_loss 0.18084318466484547 test_loss: 0.17728145122528077
epoch: 107 training_loss 0.17896707125008107 test_loss: 0.1936502695083618
epoch: 108 training_loss 0.1894558548182249 test_loss: 0.1777038335800171
epoch: 109 training_loss 0.17299484200775622 test_loss: 0.18086172342300416
epoch: 110 training_loss 0.17772998347878455 test_loss: 0.19117130041122438
epoch: 111 training_loss 0.18294374734163285 test_loss: 0.17201985120773317
epoch: 112 training_loss 0.1771155960857868 test_loss: 0.1841262936592102
epoch: 113 training_loss 0.1813629225641489 test_loss: 0.19454948902130126
epoch: 114 training_loss 0.17905495882034303 test_loss: 0.16746978759765624
epoch: 115 training_loss 0.1758293705433607 test_loss: 0.1873365044593811
epoch: 116 training_loss 0.18203997291624546 test_loss: 0.16904507875442504
epoch: 117 training_loss 0.17668903045356274 test_loss: 0.17048839330673218
epoch: 118 training_loss 0.180863341614604 test_loss: 0.18634359836578368
epoch: 119 training_loss 0.17539482101798057 test_loss: 0.19343169927597045
epoch: 120 training_loss 0.17433168448507785 test_loss: 0.17436167001724243
epoch: 121 training_loss 0.17770059801638127 test_loss: 0.18069010972976685
epoch: 122 training_loss 0.18075881190598012 test_loss: 0.18195772171020508
epoch: 123 training_loss 0.1778926519304514 test_loss: 0.19191738367080688
epoch: 124 training_loss 0.1750988797098398 test_loss: 0.18715981245040894
epoch: 125 training_loss 0.1776961813122034 test_loss: 0.1820766806602478
epoch: 126 training_loss 0.17962493501603605 test_loss: 0.17251389026641845
epoch: 127 training_loss 0.18264720313251018 test_loss: 0.18028273582458496
epoch: 128 training_loss 0.1772174796462059 test_loss: 0.1704477071762085
epoch: 129 training_loss 0.17753420442342757 test_loss: 0.17477585077285768
epoch: 130 training_loss 0.17946275278925897 test_loss: 0.18798922300338744
epoch: 131 training_loss 0.1782348357886076 test_loss: 0.18032116889953614
epoch: 132 training_loss 0.17587517470121383 test_loss: 0.17518198490142822
epoch: 133 training_loss 0.1707402102649212 test_loss: 0.17597399950027465
epoch: 134 training_loss 0.1765043181926012 test_loss: 0.20629405975341797
epoch: 135 training_loss 0.17858325026929378 test_loss: 0.1800044894218445
epoch: 136 training_loss 0.1712179606407881 test_loss: 0.1676243305206299
epoch: 137 training_loss 0.18118987143039703 test_loss: 0.17407909631729127
epoch: 138 training_loss 0.1736236297339201 test_loss: 0.18559805154800416
epoch: 139 training_loss 0.17329978115856648 test_loss: 0.18879776000976561
epoch: 140 training_loss 0.18063159562647343 test_loss: 0.1825704336166382
epoch: 141 training_loss 0.1745987991988659 test_loss: 0.16113789081573487
epoch: 142 training_loss 0.18138955548405647 test_loss: 0.1766974687576294
epoch: 143 training_loss 0.17371278040111066 test_loss: 0.19439730644226075
epoch: 144 training_loss 0.1754017572104931 test_loss: 0.1624419569969177
epoch: 145 training_loss 0.18366447694599627 test_loss: 0.17596536874771118
epoch: 146 training_loss 0.17766137935221196 test_loss: 0.17184966802597046
epoch: 147 training_loss 0.17054656252264977 test_loss: 0.18283225297927858
epoch: 148 training_loss 0.17393477499485016 test_loss: 0.16840381622314454
epoch: 149 training_loss 0.1752072187513113 test_loss: 0.17307696342468262
epoch: 0 training_loss 0.4780705261230469 test_loss: 0.3270107746124268
epoch: 1 training_loss 0.29409394845366477 test_loss: 0.24953186511993408
epoch: 2 training_loss 0.2624599325656891 test_loss: 0.23907277584075928
epoch: 3 training_loss 0.2444373306632042 test_loss: 0.2658243179321289
epoch: 4 training_loss 0.2680900423228741 test_loss: 0.2213587522506714
epoch: 5 training_loss 0.2343825675547123 test_loss: 0.23206908702850343
epoch: 6 training_loss 0.23743681110441683 test_loss: 0.281325364112854
epoch: 7 training_loss 0.22999323934316634 test_loss: 0.225463342666626
epoch: 8 training_loss 0.21728731215000152 test_loss: 0.20639505386352539
epoch: 9 training_loss 0.22990124583244323 test_loss: 0.21889982223510743
epoch: 10 training_loss 0.22806698597967626 test_loss: 0.21997036933898925
epoch: 11 training_loss 0.2156423881649971 test_loss: 0.22011377811431884
epoch: 12 training_loss 0.22231263235211374 test_loss: 0.22080719470977783
epoch: 13 training_loss 0.21332590490579606 test_loss: 0.20421271324157714
epoch: 14 training_loss 0.22418748088181017 test_loss: 0.23311872482299806
epoch: 15 training_loss 0.2186113129556179 test_loss: 0.23098347187042237
epoch: 16 training_loss 0.2204609189927578 test_loss: 0.23159506320953369
epoch: 17 training_loss 0.2140707417577505 test_loss: 0.21977708339691163
epoch: 18 training_loss 0.2107577172666788 test_loss: 0.20649001598358155
epoch: 19 training_loss 0.2173527707159519 test_loss: 0.2047621488571167
epoch: 20 training_loss 0.21360878668725491 test_loss: 0.208453369140625
epoch: 21 training_loss 0.2180088458955288 test_loss: 0.205698823928833
epoch: 22 training_loss 0.21003319926559924 test_loss: 0.20702824592590333
epoch: 23 training_loss 0.2083535546809435 test_loss: 0.19689462184906006
epoch: 24 training_loss 0.2097070027142763 test_loss: 0.2045496940612793
epoch: 25 training_loss 0.20893715992569922 test_loss: 0.2118236780166626
epoch: 26 training_loss 0.20084790736436844 test_loss: 0.20627470016479493
epoch: 27 training_loss 0.20420338831841944 test_loss: 0.20196959972381592
epoch: 28 training_loss 0.20902156680822373 test_loss: 0.19025532007217408
epoch: 29 training_loss 0.20020710356533528 test_loss: 0.20618300437927245
epoch: 30 training_loss 0.19813126891851426 test_loss: 0.1916091561317444
epoch: 31 training_loss 0.1968344531953335 test_loss: 0.2053469181060791
epoch: 32 training_loss 0.20062727816402912 test_loss: 0.20860929489135743
epoch: 33 training_loss 0.19773440033197404 test_loss: 0.19213542938232422
epoch: 34 training_loss 0.2147702868282795 test_loss: 0.1857979416847229
epoch: 35 training_loss 0.195853925421834 test_loss: 0.19700119495391846
epoch: 36 training_loss 0.1992515691369772 test_loss: 0.201629638671875
epoch: 37 training_loss 0.19727580778300763 test_loss: 0.1828068494796753
epoch: 38 training_loss 0.194549997150898 test_loss: 0.18797435760498046
epoch: 39 training_loss 0.19438264682888984 test_loss: 0.19965522289276122
epoch: 40 training_loss 0.19860591888427734 test_loss: 0.18859217166900635
epoch: 41 training_loss 0.20260890319943428 test_loss: 0.19776877164840698
epoch: 42 training_loss 0.19882347844541073 test_loss: 0.1919777512550354
epoch: 43 training_loss 0.19539990179240704 test_loss: 0.20399978160858154
epoch: 44 training_loss 0.20466094464063644 test_loss: 0.19515299797058105
epoch: 45 training_loss 0.19212204292416574 test_loss: 0.20589349269866944
epoch: 46 training_loss 0.19530724428594112 test_loss: 0.19313971996307372
epoch: 47 training_loss 0.18499271668493747 test_loss: 0.18676633834838868
epoch: 48 training_loss 0.19568818151950837 test_loss: 0.20499093532562257
epoch: 49 training_loss 0.1944183750450611 test_loss: 0.18213388919830323
epoch: 50 training_loss 0.194234913662076 test_loss: 0.1902293086051941
epoch: 51 training_loss 0.19568044923245906 test_loss: 0.2017125368118286
epoch: 52 training_loss 0.1950338788330555 test_loss: 0.1872124195098877
epoch: 53 training_loss 0.1921217868477106 test_loss: 0.1699442982673645
epoch: 54 training_loss 0.1869174077361822 test_loss: 0.19275062084197997
epoch: 55 training_loss 0.19028575330972672 test_loss: 0.20223042964935303
epoch: 56 training_loss 0.2028793815523386 test_loss: 0.17409085035324096
epoch: 57 training_loss 0.19365066051483154 test_loss: 0.20691313743591308
epoch: 58 training_loss 0.19526520170271397 test_loss: 0.1858859181404114
epoch: 59 training_loss 0.19350321918725968 test_loss: 0.17660678625106813
epoch: 60 training_loss 0.19562189169228078 test_loss: 0.1932673931121826
epoch: 61 training_loss 0.19101311914622784 test_loss: 0.19844200611114501
epoch: 62 training_loss 0.1926883278787136 test_loss: 0.18561896085739135
epoch: 63 training_loss 0.18612712740898132 test_loss: 0.18100155591964723
epoch: 64 training_loss 0.19179766550660132 test_loss: 0.1974117636680603
epoch: 65 training_loss 0.18268239863216876 test_loss: 0.18203120231628417
epoch: 66 training_loss 0.1810761971026659 test_loss: 0.19612616300582886
epoch: 67 training_loss 0.19126922063529492 test_loss: 0.17995524406433105
epoch: 68 training_loss 0.18079931117594242 test_loss: 0.17908272743225098
epoch: 69 training_loss 0.18958875060081481 test_loss: 0.17724967002868652
epoch: 70 training_loss 0.18447029396891593 test_loss: 0.18444522619247436
epoch: 71 training_loss 0.1818150806427002 test_loss: 0.2004857063293457
epoch: 72 training_loss 0.1852650571614504 test_loss: 0.18373849391937255
epoch: 73 training_loss 0.18316714078187943 test_loss: 0.1793934464454651
epoch: 74 training_loss 0.18952620893716812 test_loss: 0.2015453338623047
epoch: 75 training_loss 0.19128805354237557 test_loss: 0.18229700326919557
epoch: 76 training_loss 0.18094107143580915 test_loss: 0.1750818133354187
epoch: 77 training_loss 0.19006431631743909 test_loss: 0.16840451955795288
epoch: 78 training_loss 0.18375241935253142 test_loss: 0.16851308345794677
epoch: 79 training_loss 0.1915469942986965 test_loss: 0.18134127855300902
epoch: 80 training_loss 0.18109574727714062 test_loss: 0.19256134033203126
epoch: 81 training_loss 0.18143185436725617 test_loss: 0.19875930547714232
epoch: 82 training_loss 0.18186555802822113 test_loss: 0.17510896921157837
epoch: 83 training_loss 0.18296725407242775 test_loss: 0.19800906181335448
epoch: 84 training_loss 0.1890507247298956 test_loss: 0.18454604148864745
epoch: 85 training_loss 0.18280362091958524 test_loss: 0.19512201547622682
epoch: 86 training_loss 0.18867172688245773 test_loss: 0.19936400651931763
epoch: 87 training_loss 0.18405636288225652 test_loss: 0.1821322798728943
epoch: 88 training_loss 0.1828220471739769 test_loss: 0.19959919452667235
epoch: 89 training_loss 0.1793244442343712 test_loss: 0.19016354084014891
epoch: 90 training_loss 0.18702897794544696 test_loss: 0.1815774917602539
epoch: 91 training_loss 0.17803688503801823 test_loss: 0.1856682300567627
epoch: 92 training_loss 0.1904263213276863 test_loss: 0.1877342700958252
epoch: 93 training_loss 0.18260111406445503 test_loss: 0.20435009002685547
epoch: 94 training_loss 0.18501911655068398 test_loss: 0.17772796154022216
epoch: 95 training_loss 0.1800464354455471 test_loss: 0.18134373426437378
epoch: 96 training_loss 0.18153221257030963 test_loss: 0.188783860206604
epoch: 97 training_loss 0.18045362539589405 test_loss: 0.18024125099182128
epoch: 98 training_loss 0.18675801016390323 test_loss: 0.17739567756652833
epoch: 99 training_loss 0.17941089294850826 test_loss: 0.17674670219421387
epoch: 100 training_loss 0.17757846996188165 test_loss: 0.1816083550453186
epoch: 101 training_loss 0.18660363420844078 test_loss: 0.1816196084022522
epoch: 102 training_loss 0.18847126372158526 test_loss: 0.17685199975967408
epoch: 103 training_loss 0.18253246024250985 test_loss: 0.18179174661636352
epoch: 104 training_loss 0.17591378703713417 test_loss: 0.1894874691963196
epoch: 105 training_loss 0.18115252085030079 test_loss: 0.18012908697128296
epoch: 106 training_loss 0.1827492508292198 test_loss: 0.1981458067893982
epoch: 107 training_loss 0.18218159958720206 test_loss: 0.17906405925750732
epoch: 108 training_loss 0.18388402216136457 test_loss: 0.17628732919692994
epoch: 109 training_loss 0.17967046141624451 test_loss: 0.1711200952529907
epoch: 110 training_loss 0.18452426128089428 test_loss: 0.16495800018310547
epoch: 111 training_loss 0.17532038308680056 test_loss: 0.1806923508644104
epoch: 112 training_loss 0.18267839409410955 test_loss: 0.1680024266242981
epoch: 113 training_loss 0.18002589605748653 test_loss: 0.17691611051559447
epoch: 114 training_loss 0.1805144640058279 test_loss: 0.1754999876022339
epoch: 115 training_loss 0.1781276160478592 test_loss: 0.1863509774208069
epoch: 116 training_loss 0.17612432949244977 test_loss: 0.18417346477508545
epoch: 117 training_loss 0.18240613736212252 test_loss: 0.17350831031799316
epoch: 118 training_loss 0.17953947216272353 test_loss: 0.1778431534767151
epoch: 119 training_loss 0.17966384537518024 test_loss: 0.16796789169311524
epoch: 120 training_loss 0.18458415292203426 test_loss: 0.17866462469100952
epoch: 121 training_loss 0.17876220986247063 test_loss: 0.18995072841644287
epoch: 122 training_loss 0.17325962357223035 test_loss: 0.18586946725845338
epoch: 123 training_loss 0.18440187513828277 test_loss: 0.17353518009185792
epoch: 124 training_loss 0.17864002265036105 test_loss: 0.1789357304573059
epoch: 125 training_loss 0.17553089790046214 test_loss: 0.17571717500686646
epoch: 126 training_loss 0.18062265291810037 test_loss: 0.17651703357696533
epoch: 127 training_loss 0.18246465921401978 test_loss: 0.17354739904403688
epoch: 128 training_loss 0.17729348838329315 test_loss: 0.1790234088897705
epoch: 129 training_loss 0.17966613747179508 test_loss: 0.1915870189666748
epoch: 130 training_loss 0.17672395206987856 test_loss: 0.16773489713668824
epoch: 131 training_loss 0.18335060425102712 test_loss: 0.18115955591201782
epoch: 132 training_loss 0.17277528338134288 test_loss: 0.17687432765960692
epoch: 133 training_loss 0.17375849284231662 test_loss: 0.1954498291015625
epoch: 134 training_loss 0.17421404212713243 test_loss: 0.1835910439491272
epoch: 135 training_loss 0.17485683143138886 test_loss: 0.16588629484176637
epoch: 136 training_loss 0.17727345898747443 test_loss: 0.1717921495437622
epoch: 137 training_loss 0.17082403250038625 test_loss: 0.17589218616485597
epoch: 138 training_loss 0.17257796227931976 test_loss: 0.18439722061157227
epoch: 139 training_loss 0.1699542957544327 test_loss: 0.17026690244674683
epoch: 140 training_loss 0.17503688536584378 test_loss: 0.19286195039749146
epoch: 141 training_loss 0.1780727244913578 test_loss: 0.17235169410705567
epoch: 142 training_loss 0.1808855041861534 test_loss: 0.175726318359375
epoch: 143 training_loss 0.17797597065567972 test_loss: 0.17520394325256347
epoch: 144 training_loss 0.16740771025419235 test_loss: 0.19437397718429567
epoch: 145 training_loss 0.17309316605329514 test_loss: 0.18294364213943481
epoch: 146 training_loss 0.17499277427792548 test_loss: 0.19324465990066528
epoch: 147 training_loss 0.17199461750686168 test_loss: 0.17815189361572265
epoch: 148 training_loss 0.17891511045396327 test_loss: 0.17447015047073364
epoch: 149 training_loss 0.1785313843935728 test_loss: 0.17957932949066163
epoch: 0 training_loss 0.4229182402789593 test_loss: 0.3062404155731201
epoch: 1 training_loss 0.2978254093229771 test_loss: 0.2781558036804199
epoch: 2 training_loss 0.25678204745054245 test_loss: 0.2540819406509399
epoch: 3 training_loss 0.24546738639473914 test_loss: 0.2224064826965332
epoch: 4 training_loss 0.2282332943379879 test_loss: 0.23794953823089598
epoch: 5 training_loss 0.243016577064991 test_loss: 0.23581607341766359
epoch: 6 training_loss 0.22882862582802774 test_loss: 0.20956759452819823
epoch: 7 training_loss 0.22559832230210305 test_loss: 0.22979257106781006
epoch: 8 training_loss 0.22110068440437317 test_loss: 0.21895344257354737
epoch: 9 training_loss 0.22147393062710763 test_loss: 0.222961688041687
epoch: 10 training_loss 0.20558493033051492 test_loss: 0.1967650294303894
epoch: 11 training_loss 0.21372020184993745 test_loss: 0.20929937362670897
epoch: 12 training_loss 0.2309281721711159 test_loss: 0.2145233154296875
epoch: 13 training_loss 0.21208108104765416 test_loss: 0.21212482452392578
epoch: 14 training_loss 0.20913666397333144 test_loss: 0.22276926040649414
epoch: 15 training_loss 0.20710872180759907 test_loss: 0.2037287712097168
epoch: 16 training_loss 0.20944499492645263 test_loss: 0.19944263696670533
epoch: 17 training_loss 0.20418765999376773 test_loss: 0.21041831970214844
epoch: 18 training_loss 0.21386355340480803 test_loss: 0.22491836547851562
epoch: 19 training_loss 0.20672180630266668 test_loss: 0.19699262380599974
epoch: 20 training_loss 0.20364086732268333 test_loss: 0.1968727707862854
epoch: 21 training_loss 0.20175706401467322 test_loss: 0.20942394733428954
epoch: 22 training_loss 0.2069151235371828 test_loss: 0.20152437686920166
epoch: 23 training_loss 0.2136642526090145 test_loss: 0.20084807872772217
epoch: 24 training_loss 0.21017465189099313 test_loss: 0.18776692152023317
epoch: 25 training_loss 0.19888052813708781 test_loss: 0.20485033988952636
epoch: 26 training_loss 0.20556294590234755 test_loss: 0.20140063762664795
epoch: 27 training_loss 0.19884666197001935 test_loss: 0.2396070957183838
epoch: 28 training_loss 0.20412115596234798 test_loss: 0.24952363967895508
epoch: 29 training_loss 0.20986304722726346 test_loss: 0.19380640983581543
epoch: 30 training_loss 0.19694998390972615 test_loss: 0.20467534065246581
epoch: 31 training_loss 0.20237703815102578 test_loss: 0.21013805866241456
epoch: 32 training_loss 0.20241407424211502 test_loss: 0.21559042930603028
epoch: 33 training_loss 0.19681551046669482 test_loss: 0.20633468627929688
epoch: 34 training_loss 0.19211050666868687 test_loss: 0.17931164503097535
epoch: 35 training_loss 0.20232012137770652 test_loss: 0.1965290427207947
epoch: 36 training_loss 0.21231941901147366 test_loss: 0.19921200275421141
epoch: 37 training_loss 0.19920275658369063 test_loss: 0.19923733472824096
epoch: 38 training_loss 0.20177848368883133 test_loss: 0.19097437858581542
epoch: 39 training_loss 0.19819886855781077 test_loss: 0.19518624544143676
epoch: 40 training_loss 0.20449776105582715 test_loss: 0.18343945741653442
epoch: 41 training_loss 0.20107508644461633 test_loss: 0.20026171207427979
epoch: 42 training_loss 0.20064684577286243 test_loss: 0.1900196671485901
epoch: 43 training_loss 0.19072421543300153 test_loss: 0.1938950777053833
epoch: 44 training_loss 0.1989727494865656 test_loss: 0.20035035610198976
epoch: 45 training_loss 0.19885025650262833 test_loss: 0.1822768807411194
epoch: 46 training_loss 0.19115739300847054 test_loss: 0.1805768132209778
epoch: 47 training_loss 0.19151407025754452 test_loss: 0.18374286890029906
epoch: 48 training_loss 0.18618748977780342 test_loss: 0.19957782030105592
epoch: 49 training_loss 0.1882300551980734 test_loss: 0.23112936019897462
epoch: 50 training_loss 0.19260595239698886 test_loss: 0.18952449560165405
epoch: 51 training_loss 0.18343421667814255 test_loss: 0.181416654586792
epoch: 52 training_loss 0.1986306457966566 test_loss: 0.17574537992477418
epoch: 53 training_loss 0.19490085080265998 test_loss: 0.22784831523895263
epoch: 54 training_loss 0.18678954638540746 test_loss: 0.18407455682754517
epoch: 55 training_loss 0.19159324675798417 test_loss: 0.22717499732971191
epoch: 56 training_loss 0.192075619623065 test_loss: 0.19833476543426515
epoch: 57 training_loss 0.19709523133933543 test_loss: 0.1820279359817505
epoch: 58 training_loss 0.19152462542057036 test_loss: 0.17896648645401
epoch: 59 training_loss 0.1852866107970476 test_loss: 0.19544264078140258
epoch: 60 training_loss 0.19489616625010966 test_loss: 0.1874017596244812
epoch: 61 training_loss 0.19755402140319347 test_loss: 0.20065610408782958
epoch: 62 training_loss 0.19356839902698994 test_loss: 0.18480136394500732
epoch: 63 training_loss 0.19051045879721643 test_loss: 0.18236852884292604
epoch: 64 training_loss 0.19177750788629055 test_loss: 0.1916520118713379
epoch: 65 training_loss 0.1849610147625208 test_loss: 0.18596690893173218
epoch: 66 training_loss 0.18840548284351827 test_loss: 0.19492654800415038
epoch: 67 training_loss 0.18376837663352488 test_loss: 0.18224039077758789
epoch: 68 training_loss 0.19289202988147736 test_loss: 0.1867320418357849
epoch: 69 training_loss 0.1866340970993042 test_loss: 0.1788962483406067
epoch: 70 training_loss 0.18530993953347205 test_loss: 0.18939238786697388
epoch: 71 training_loss 0.1905307575315237 test_loss: 0.18515458106994628
epoch: 72 training_loss 0.18736906968057154 test_loss: 0.16825528144836427
epoch: 73 training_loss 0.18672553956508636 test_loss: 0.18400062322616578
epoch: 74 training_loss 0.19014622807502746 test_loss: 0.1769034147262573
epoch: 75 training_loss 0.19115363240242003 test_loss: 0.175248122215271
epoch: 76 training_loss 0.1922001016885042 test_loss: 0.1955039381980896
epoch: 77 training_loss 0.18540726110339165 test_loss: 0.18002818822860717
epoch: 78 training_loss 0.18987092152237892 test_loss: 0.19299379587173462
epoch: 79 training_loss 0.19757987007498742 test_loss: 0.18442529439926147
epoch: 80 training_loss 0.18642606921494007 test_loss: 0.1886184573173523
epoch: 81 training_loss 0.1962803727388382 test_loss: 0.19097065925598145
epoch: 82 training_loss 0.18265758484601974 test_loss: 0.18206459283828735
epoch: 83 training_loss 0.19118772484362126 test_loss: 0.20871334075927733
epoch: 84 training_loss 0.18500880621373653 test_loss: 0.1883000373840332
epoch: 85 training_loss 0.18143951281905174 test_loss: 0.18954670429229736
epoch: 86 training_loss 0.19358169861137867 test_loss: 0.19290506839752197
epoch: 87 training_loss 0.18989514313638212 test_loss: 0.18008922338485717
epoch: 88 training_loss 0.1949067547917366 test_loss: 0.18395100831985473
epoch: 89 training_loss 0.19164556361734866 test_loss: 0.18894342184066773
epoch: 90 training_loss 0.18981539264321326 test_loss: 0.19243572950363158
epoch: 91 training_loss 0.1770873022824526 test_loss: 0.21048102378845215
epoch: 92 training_loss 0.18820112727582455 test_loss: 0.19642027616500854
epoch: 93 training_loss 0.18278661467134952 test_loss: 0.18705177307128906
epoch: 94 training_loss 0.19195241421461107 test_loss: 0.17807869911193847
epoch: 95 training_loss 0.19059852719306947 test_loss: 0.16824318170547486
epoch: 96 training_loss 0.1790944317728281 test_loss: 0.189193058013916
epoch: 97 training_loss 0.1925004743039608 test_loss: 0.18827496767044066
epoch: 98 training_loss 0.19192969970405102 test_loss: 0.2013707160949707
epoch: 99 training_loss 0.17712503671646118 test_loss: 0.17447450160980224
epoch: 100 training_loss 0.19460126966238023 test_loss: 0.17277278900146484
epoch: 101 training_loss 0.17166123114526272 test_loss: 0.20208826065063476
epoch: 102 training_loss 0.18247540012001992 test_loss: 0.17768285274505616
epoch: 103 training_loss 0.18613155260682107 test_loss: 0.1756069302558899
epoch: 104 training_loss 0.18271971851587296 test_loss: 0.16689893007278442
epoch: 105 training_loss 0.18822451055049896 test_loss: 0.17895470857620238
epoch: 106 training_loss 0.19600819237530231 test_loss: 0.18306734561920165
epoch: 107 training_loss 0.17923918426036833 test_loss: 0.18676193952560424
epoch: 108 training_loss 0.1948056773096323 test_loss: 0.20555131435394286
epoch: 109 training_loss 0.185223248898983 test_loss: 0.21941444873809815
epoch: 110 training_loss 0.18113813735544682 test_loss: 0.1787453293800354
epoch: 111 training_loss 0.18757638461887838 test_loss: 0.1815212845802307
epoch: 112 training_loss 0.1925601376593113 test_loss: 0.17179514169692994
epoch: 113 training_loss 0.18618226014077663 test_loss: 0.18473448753356933
epoch: 114 training_loss 0.1778916498273611 test_loss: 0.18684579133987428
epoch: 115 training_loss 0.18000555232167245 test_loss: 0.19350990056991577
epoch: 116 training_loss 0.18636315487325192 test_loss: 0.17341549396514894
epoch: 117 training_loss 0.18079944930970668 test_loss: 0.19164531230926513
epoch: 118 training_loss 0.17815826274454594 test_loss: 0.1893246054649353
epoch: 119 training_loss 0.18541857853531837 test_loss: 0.16427520513534546
epoch: 120 training_loss 0.1886679358035326 test_loss: 0.1804528594017029
epoch: 121 training_loss 0.1839356655627489 test_loss: 0.20209250450134278
epoch: 122 training_loss 0.17269732385873796 test_loss: 0.17457573413848876
epoch: 123 training_loss 0.1871660753339529 test_loss: 0.16613115072250367
epoch: 124 training_loss 0.1883535522222519 test_loss: 0.20496108531951904
epoch: 125 training_loss 0.17811525344848633 test_loss: 0.1759116530418396
epoch: 126 training_loss 0.1784481519833207 test_loss: 0.17485610246658326
epoch: 127 training_loss 0.17461792521178723 test_loss: 0.17625811100006103
epoch: 128 training_loss 0.18624414756894112 test_loss: 0.18168296813964843
epoch: 129 training_loss 0.18508938625454902 test_loss: 0.1864571452140808
epoch: 130 training_loss 0.18154215842485427 test_loss: 0.1790861129760742
epoch: 131 training_loss 0.18771976433694362 test_loss: 0.18449448347091674
epoch: 132 training_loss 0.1758680532127619 test_loss: 0.17355421781539918
epoch: 133 training_loss 0.18597888626158238 test_loss: 0.18337972164154054
epoch: 134 training_loss 0.1798307154327631 test_loss: 0.19743020534515382
epoch: 135 training_loss 0.18084172762930392 test_loss: 0.1872844696044922
epoch: 136 training_loss 0.18486208133399487 test_loss: 0.18367918729782104
epoch: 137 training_loss 0.1813541153073311 test_loss: 0.17491911649703978
epoch: 138 training_loss 0.17163752913475036 test_loss: 0.1756647229194641
epoch: 139 training_loss 0.18153378628194333 test_loss: 0.16836991310119628
epoch: 140 training_loss 0.18112735629081725 test_loss: 0.18323370218276977
epoch: 141 training_loss 0.1762613654881716 test_loss: 0.18632562160491944
epoch: 142 training_loss 0.17835013225674629 test_loss: 0.1757673978805542
epoch: 143 training_loss 0.17429395385086535 test_loss: 0.19131071567535402
epoch: 144 training_loss 0.1825014267861843 test_loss: 0.17900577783584595
epoch: 145 training_loss 0.17220723681151867 test_loss: 0.16974321603775025
epoch: 146 training_loss 0.19246523015201092 test_loss: 0.18588621616363527
epoch: 147 training_loss 0.1772333013266325 test_loss: 0.17919036149978637
epoch: 148 training_loss 0.18558846600353718 test_loss: 0.18640052080154418
epoch: 149 training_loss 0.1815184286981821 test_loss: 0.176313579082489
epoch: 0 training_loss 0.4669747453927994 test_loss: 0.34867751598358154
epoch: 1 training_loss 0.32272195965051653 test_loss: 0.28702006340026853
epoch: 2 training_loss 0.28040591403841975 test_loss: 0.25867581367492676
epoch: 3 training_loss 0.2496543385088444 test_loss: 0.23781373500823974
epoch: 4 training_loss 0.24471347838640212 test_loss: 0.2139892578125
epoch: 5 training_loss 0.23932588011026382 test_loss: 0.21519842147827148
epoch: 6 training_loss 0.23878947213292123 test_loss: 0.20069594383239747
epoch: 7 training_loss 0.22758347496390344 test_loss: 0.20546293258666992
epoch: 8 training_loss 0.23426577277481556 test_loss: 0.23250489234924315
epoch: 9 training_loss 0.21401401728391647 test_loss: 0.23165662288665773
epoch: 10 training_loss 0.22251750513911248 test_loss: 0.24037585258483887
epoch: 11 training_loss 0.22127759516239165 test_loss: 0.21236011981964112
epoch: 12 training_loss 0.21895087711513042 test_loss: 0.2159043788909912
epoch: 13 training_loss 0.21167174458503724 test_loss: 0.19810911417007446
epoch: 14 training_loss 0.21611936792731284 test_loss: 0.20831995010375975
epoch: 15 training_loss 0.21843665286898614 test_loss: 0.23407905101776122
epoch: 16 training_loss 0.211841399371624 test_loss: 0.20401864051818847
epoch: 17 training_loss 0.22989648804068566 test_loss: 0.22048017978668213
epoch: 18 training_loss 0.21710147634148597 test_loss: 0.21870310306549073
epoch: 19 training_loss 0.21732185855507852 test_loss: 0.19158430099487306
epoch: 20 training_loss 0.22631349846720694 test_loss: 0.19002149105072022
epoch: 21 training_loss 0.20879699617624284 test_loss: 0.20830748081207276
epoch: 22 training_loss 0.20944783493876457 test_loss: 0.20374596118927002
epoch: 23 training_loss 0.2141774572432041 test_loss: 0.20423340797424316
epoch: 24 training_loss 0.21149032674729823 test_loss: 0.20599148273468018
epoch: 25 training_loss 0.2063632507622242 test_loss: 0.1938629627227783
epoch: 26 training_loss 0.20687957279384137 test_loss: 0.21367306709289552
epoch: 27 training_loss 0.2110421435534954 test_loss: 0.20058104991912842
epoch: 28 training_loss 0.20631030656397342 test_loss: 0.20117626190185547
epoch: 29 training_loss 0.1980291423201561 test_loss: 0.20262060165405274
epoch: 30 training_loss 0.20620230183005334 test_loss: 0.20366694927215576
epoch: 31 training_loss 0.20078466102480888 test_loss: 0.1853706955909729
epoch: 32 training_loss 0.20409045644104482 test_loss: 0.21319928169250488
epoch: 33 training_loss 0.20708056956529616 test_loss: 0.1923281192779541
epoch: 34 training_loss 0.19910732127726077 test_loss: 0.19561617374420165
epoch: 35 training_loss 0.20977231711149216 test_loss: 0.2009044408798218
epoch: 36 training_loss 0.2056846383213997 test_loss: 0.1919795274734497
epoch: 37 training_loss 0.20062557257711888 test_loss: 0.1865723967552185
epoch: 38 training_loss 0.20047930881381035 test_loss: 0.19502632617950438
epoch: 39 training_loss 0.2002496425062418 test_loss: 0.21094632148742676
epoch: 40 training_loss 0.20527567245066167 test_loss: 0.19205904006958008
epoch: 41 training_loss 0.1944132874906063 test_loss: 0.20581300258636476
epoch: 42 training_loss 0.19333497799932955 test_loss: 0.18661032915115355
epoch: 43 training_loss 0.19953596346080305 test_loss: 0.19304736852645873
epoch: 44 training_loss 0.20030690155923367 test_loss: 0.18744453191757202
epoch: 45 training_loss 0.20462755233049393 test_loss: 0.1881725311279297
epoch: 46 training_loss 0.20507063150405883 test_loss: 0.21340589523315429
epoch: 47 training_loss 0.1975727842748165 test_loss: 0.1944817543029785
epoch: 48 training_loss 0.19826597966253756 test_loss: 0.19000141620635985
epoch: 49 training_loss 0.1993557208776474 test_loss: 0.19717713594436645
epoch: 50 training_loss 0.2098077879846096 test_loss: 0.19574215412139892
epoch: 51 training_loss 0.19034848794341086 test_loss: 0.18784900903701782
epoch: 52 training_loss 0.20247696220874786 test_loss: 0.18996095657348633
epoch: 53 training_loss 0.20332659274339676 test_loss: 0.18327586650848388
epoch: 54 training_loss 0.19969053633511066 test_loss: 0.1997480273246765
epoch: 55 training_loss 0.19379007004201412 test_loss: 0.20708730220794677
epoch: 56 training_loss 0.19697221830487252 test_loss: 0.20865848064422607
epoch: 57 training_loss 0.1978117622435093 test_loss: 0.2203174352645874
epoch: 58 training_loss 0.1953787713497877 test_loss: 0.18954869508743286
epoch: 59 training_loss 0.18817865870893002 test_loss: 0.19358088970184326
epoch: 60 training_loss 0.19509441934525967 test_loss: 0.19657710790634156
epoch: 61 training_loss 0.19780750147998333 test_loss: 0.17541253566741943
epoch: 62 training_loss 0.19996473908424378 test_loss: 0.19852335453033448
epoch: 63 training_loss 0.1935223823785782 test_loss: 0.2072298765182495
epoch: 64 training_loss 0.1937147866934538 test_loss: 0.18688093423843383
epoch: 65 training_loss 0.2053827640414238 test_loss: 0.19275728464126587
epoch: 66 training_loss 0.19331485830247402 test_loss: 0.19112606048583985
epoch: 67 training_loss 0.19561250247061251 test_loss: 0.18405152559280397
epoch: 68 training_loss 0.18525369629263877 test_loss: 0.18324730396270753
epoch: 69 training_loss 0.19373235926032067 test_loss: 0.1973461389541626
epoch: 70 training_loss 0.1881185334175825 test_loss: 0.1799065351486206
epoch: 71 training_loss 0.18231141194701195 test_loss: 0.1969622015953064
epoch: 72 training_loss 0.1904289208352566 test_loss: 0.20172572135925293
epoch: 73 training_loss 0.19774579249322413 test_loss: 0.21704838275909424
epoch: 74 training_loss 0.19457917258143426 test_loss: 0.1945698618888855
epoch: 75 training_loss 0.19291891612112522 test_loss: 0.18585296869277954
epoch: 76 training_loss 0.19443263962864876 test_loss: 0.20259544849395753
epoch: 77 training_loss 0.19115559116005898 test_loss: 0.1938709497451782
epoch: 78 training_loss 0.19436501309275628 test_loss: 0.1969146490097046
epoch: 79 training_loss 0.19738558523356914 test_loss: 0.2028341293334961
epoch: 80 training_loss 0.18892203971743585 test_loss: 0.1880779504776001
epoch: 81 training_loss 0.1827536478638649 test_loss: 0.2023223876953125
epoch: 82 training_loss 0.1845965863019228 test_loss: 0.1971118688583374
epoch: 83 training_loss 0.19127693608403207 test_loss: 0.19501116275787353
epoch: 84 training_loss 0.18576956376433373 test_loss: 0.19424750804901122
epoch: 85 training_loss 0.18297927260398864 test_loss: 0.18264056444168092
epoch: 86 training_loss 0.1904534189403057 test_loss: 0.2019122838973999
epoch: 87 training_loss 0.19035337992012502 test_loss: 0.18776830434799194
epoch: 88 training_loss 0.19216894946992397 test_loss: 0.19222161769866944
epoch: 89 training_loss 0.1934459214657545 test_loss: 0.18235703706741332
epoch: 90 training_loss 0.18637246415019035 test_loss: 0.17992262840270995
epoch: 91 training_loss 0.18410516425967216 test_loss: 0.1987872004508972
epoch: 92 training_loss 0.18641326889395715 test_loss: 0.20366888046264647
epoch: 93 training_loss 0.18936129085719586 test_loss: 0.18001129627227783
epoch: 94 training_loss 0.1804127488285303 test_loss: 0.1848623275756836
epoch: 95 training_loss 0.1831913573294878 test_loss: 0.1944868803024292
epoch: 96 training_loss 0.19056279368698598 test_loss: 0.17931015491485597
epoch: 97 training_loss 0.19080855391919613 test_loss: 0.22777485847473145
epoch: 98 training_loss 0.19062829062342643 test_loss: 0.18346883058547975
epoch: 99 training_loss 0.18259492129087448 test_loss: 0.1964452624320984
epoch: 100 training_loss 0.1878886666893959 test_loss: 0.17572300434112548
epoch: 101 training_loss 0.18509711012244223 test_loss: 0.1825866937637329
epoch: 102 training_loss 0.185761196911335 test_loss: 0.18955376148223876
epoch: 103 training_loss 0.18512760765850544 test_loss: 0.1718655824661255
epoch: 104 training_loss 0.18194216802716257 test_loss: 0.17933320999145508
epoch: 105 training_loss 0.180412133410573 test_loss: 0.21144683361053468
epoch: 106 training_loss 0.1883256606757641 test_loss: 0.17810884714126587
epoch: 107 training_loss 0.1848565586656332 test_loss: 0.19796620607376098
epoch: 108 training_loss 0.1899533277750015 test_loss: 0.18219058513641356
epoch: 109 training_loss 0.17827796563506126 test_loss: 0.1841619372367859
epoch: 110 training_loss 0.191802666708827 test_loss: 0.18063572645187378
epoch: 111 training_loss 0.19205017670989036 test_loss: 0.17804020643234253
epoch: 112 training_loss 0.1926836496591568 test_loss: 0.1772858738899231
epoch: 113 training_loss 0.18692696698009967 test_loss: 0.18562216758728028
epoch: 114 training_loss 0.1861544732004404 test_loss: 0.19853155612945556
epoch: 115 training_loss 0.18416005320847034 test_loss: 0.18639640808105468
epoch: 116 training_loss 0.17791821964085103 test_loss: 0.19030066728591918
epoch: 117 training_loss 0.18404267385601997 test_loss: 0.17591108083724977
epoch: 118 training_loss 0.18382769882678984 test_loss: 0.21533212661743165
epoch: 119 training_loss 0.1855793261528015 test_loss: 0.19700369834899903
epoch: 120 training_loss 0.17934153139591216 test_loss: 0.18445379734039308
epoch: 121 training_loss 0.1839465559273958 test_loss: 0.17947492599487305
epoch: 122 training_loss 0.17906954497098923 test_loss: 0.1932915449142456
epoch: 123 training_loss 0.18137003600597382 test_loss: 0.18269163370132446
epoch: 124 training_loss 0.18258936174213886 test_loss: 0.1872177839279175
epoch: 125 training_loss 0.1864445260167122 test_loss: 0.1758466839790344
epoch: 126 training_loss 0.1873169209063053 test_loss: 0.1820254445075989
epoch: 127 training_loss 0.18090551413595676 test_loss: 0.17989182472229004
epoch: 128 training_loss 0.18628021501004696 test_loss: 0.18693794012069703
epoch: 129 training_loss 0.17565140455961228 test_loss: 0.2071082353591919
epoch: 130 training_loss 0.18806687384843826 test_loss: 0.1613754153251648
epoch: 131 training_loss 0.17957117915153503 test_loss: 0.17900956869125367
epoch: 132 training_loss 0.1863082102686167 test_loss: 0.18735140562057495
epoch: 133 training_loss 0.18437182009220124 test_loss: 0.18776289224624634
epoch: 134 training_loss 0.19080579094588757 test_loss: 0.1835133671760559
epoch: 135 training_loss 0.18705484583973886 test_loss: 0.2138915777206421
epoch: 136 training_loss 0.18433576472103597 test_loss: 0.18394763469696046
epoch: 137 training_loss 0.17196225427091122 test_loss: 0.18720026016235353
epoch: 138 training_loss 0.1812185261398554 test_loss: 0.1848662257194519
epoch: 139 training_loss 0.18210454247891902 test_loss: 0.189308762550354
epoch: 140 training_loss 0.18433131262660027 test_loss: 0.1823175311088562
epoch: 141 training_loss 0.17785428695380687 test_loss: 0.1730266332626343
epoch: 142 training_loss 0.17734869711101056 test_loss: 0.1962710738182068
epoch: 143 training_loss 0.17707573421299458 test_loss: 0.17827763557434081
epoch: 144 training_loss 0.17941449426114559 test_loss: 0.18804683685302734
epoch: 145 training_loss 0.18329421371221544 test_loss: 0.17410601377487184
epoch: 146 training_loss 0.17891740255057811 test_loss: 0.19229636192321778
epoch: 147 training_loss 0.1746063407510519 test_loss: 0.18127110004425048
epoch: 148 training_loss 0.19528289139270782 test_loss: 0.20363955497741698
epoch: 149 training_loss 0.1882260274887085 test_loss: 0.18645091056823732
episode: 0 training return: -363.5290519533687
episode: 1 training return: -455.41991478927883
episode: 2 training return: -556.1382928730295
episode: 3 training return: -216.25842631651992
epoch: 1 test_true_pfm: 23.163668872267703 sim_pfm: -237.03846552500235
episode: 4 training return: -444.7478812092747
episode: 5 training return: -453.8878830213298
episode: 6 training return: -468.10330010367215
episode: 7 training return: -474.63485232755994
epoch: 2 test_true_pfm: 30.00475368144581 sim_pfm: -166.3953759275717
episode: 8 training return: -621.8555316729231
episode: 9 training return: -613.0914572842279
episode: 10 training return: -142.88277379360363
episode: 11 training return: -178.4337257084086
epoch: 3 test_true_pfm: 40.318401085735665 sim_pfm: -108.92705847295908
episode: 12 training return: -218.10132587455246
episode: 13 training return: -143.2966713745632
episode: 14 training return: -86.75928476167933
episode: 15 training return: -18.89196859021746
epoch: 4 test_true_pfm: 31.852171583208484 sim_pfm: -20.394449528470496
episode: 16 training return: -34.634052796979226
episode: 17 training return: 258.7461705762709
episode: 18 training return: 262.62533773948525
episode: 19 training return: 304.76516498757945
epoch: 5 test_true_pfm: 23.44499475029079 sim_pfm: 368.3673344443593
episode: 20 training return: 290.1354658645171
episode: 21 training return: 324.4682262160621
episode: 22 training return: -80.64050558854498
episode: 23 training return: 345.81753956420266
epoch: 6 test_true_pfm: 39.72171282671004 sim_pfm: -88.31082159660689
episode: 24 training return: -212.9075124407105
episode: 25 training return: -26.045780457392645
episode: 26 training return: 17.32126066936067
episode: 27 training return: 132.66889891696258
epoch: 7 test_true_pfm: 34.085274424202 sim_pfm: 252.09614073592425
episode: 28 training return: 66.65589217384938
episode: 29 training return: 126.05775019754284
episode: 30 training return: 338.4190221970072
episode: 31 training return: 342.87738927631057
epoch: 8 test_true_pfm: 40.88655994126596 sim_pfm: 359.3565924316534
episode: 32 training return: 334.7490951413701
episode: 33 training return: 380.3617565394622
episode: 34 training return: 368.5160716606359
episode: 35 training return: 340.4121254551415
epoch: 9 test_true_pfm: 52.44422303408009 sim_pfm: 472.47283961000176
episode: 36 training return: 340.88191788895625
episode: 37 training return: 337.40766721061783
episode: 38 training return: 335.27958585663015
episode: 39 training return: 320.15157012338483
epoch: 10 test_true_pfm: 52.39601831565659 sim_pfm: 475.7101050610184
episode: 40 training return: 338.34626018842454
episode: 41 training return: 358.20328846848815
episode: 42 training return: 413.52068376933863
episode: 43 training return: 317.87420073466717
epoch: 11 test_true_pfm: 38.17270576906062 sim_pfm: 480.74350629721357
episode: 44 training return: 439.17009191308523
episode: 45 training return: 452.406007702149
episode: 46 training return: 458.0597844002606
episode: 47 training return: 342.036122914982
epoch: 12 test_true_pfm: 48.83974882787667 sim_pfm: 470.0148429715611
episode: 48 training return: 357.39766578971677
episode: 49 training return: 351.05075745589016
episode: 50 training return: 444.54413564694255
episode: 51 training return: 363.63966483261254
epoch: 13 test_true_pfm: 50.135773793908236 sim_pfm: 477.21225908403545
episode: 52 training return: 419.1305391203748
episode: 53 training return: 413.05881398024934
episode: 54 training return: 324.11092439542523
episode: 55 training return: 353.01491475425
epoch: 14 test_true_pfm: 53.508259323470206 sim_pfm: 484.19382092472387
episode: 56 training return: 438.2009099326729
episode: 57 training return: 345.6213472373629
episode: 58 training return: 332.9991260687601
episode: 59 training return: 348.3022674611807
epoch: 15 test_true_pfm: 47.90900623861006 sim_pfm: 544.2412382267404
episode: 60 training return: 433.34359843760797
episode: 61 training return: 425.49189908465644
episode: 62 training return: 349.85186761632065
episode: 63 training return: 359.1074504106858
epoch: 16 test_true_pfm: 50.49199609606362 sim_pfm: 472.54650318896455
episode: 64 training return: 447.9072967618223
episode: 65 training return: 433.203209078628
episode: 66 training return: 353.39033333229634
episode: 67 training return: 351.438210965067
epoch: 17 test_true_pfm: 48.64167559423146 sim_pfm: 501.80762793982666
episode: 68 training return: 331.31547577989875
episode: 69 training return: 357.0003915072847
episode: 70 training return: 433.169759222834
episode: 71 training return: 359.7668776665856
epoch: 18 test_true_pfm: 46.14009775730876 sim_pfm: 519.220339593456
episode: 72 training return: 354.47573976468664
episode: 73 training return: 351.25232366645014
episode: 74 training return: 335.27059872033743
episode: 75 training return: 348.74837686884854
epoch: 19 test_true_pfm: 43.52918359597887 sim_pfm: 491.9853009703907
episode: 76 training return: 348.3598772576592
episode: 77 training return: 354.33386666706446
episode: 78 training return: 349.5685607239796
episode: 79 training return: 352.8824593882699
epoch: 20 test_true_pfm: 50.30772083352295 sim_pfm: 475.97374887981323
episode: 80 training return: 341.30531199111414
episode: 81 training return: 347.83965804840926
episode: 82 training return: 444.1894726711169
episode: 83 training return: 355.3982977997327
epoch: 21 test_true_pfm: 49.42143824801504 sim_pfm: 487.21191913478316
episode: 84 training return: 353.00143327465673
episode: 85 training return: 348.66450629521006
episode: 86 training return: 326.6253342304819
episode: 87 training return: 443.9066060999142
epoch: 22 test_true_pfm: 48.59419106115062 sim_pfm: 499.9219822686878
episode: 88 training return: 435.3534402340076
episode: 89 training return: 453.37800739613476
episode: 90 training return: 348.43056315918176
episode: 91 training return: 347.7338242568413
epoch: 23 test_true_pfm: 50.306070338208734 sim_pfm: 571.241372025134
episode: 92 training return: 368.32681534749156
episode: 93 training return: 341.9552630380279
episode: 94 training return: 366.513306077167
episode: 95 training return: 437.68750841972
epoch: 24 test_true_pfm: 51.67342275768314 sim_pfm: 504.05302214850354
episode: 96 training return: 342.5864847437152
episode: 97 training return: 354.3222820426019
episode: 98 training return: 358.4178013251901
episode: 99 training return: 338.4692418041102
epoch: 25 test_true_pfm: 45.9877295104007 sim_pfm: 517.9534547681333
episode: 100 training return: 349.18061151935353
episode: 101 training return: 348.99412217202314
episode: 102 training return: 335.49909674236585
episode: 103 training return: 444.2444712031929
epoch: 26 test_true_pfm: 44.052748562789844 sim_pfm: 484.79229498631065
episode: 104 training return: 351.7321589898304
episode: 105 training return: 349.7083775462582
episode: 106 training return: 360.31774352339977
episode: 107 training return: 351.9496979340577
epoch: 27 test_true_pfm: 46.38249620412302 sim_pfm: 495.46232562633077
episode: 108 training return: 351.42468879246314
episode: 109 training return: 345.9637853512868
episode: 110 training return: 437.5086569549132
episode: 111 training return: 349.29683940791705
epoch: 28 test_true_pfm: 42.02508312755781 sim_pfm: 487.49063330467965
episode: 112 training return: 454.45757367620024
episode: 113 training return: 445.26506812525184
episode: 114 training return: 349.38228066408504
episode: 115 training return: 351.0472611878703
epoch: 29 test_true_pfm: 47.93042481411578 sim_pfm: 501.7830256663653
episode: 116 training return: 355.9132565581807
episode: 117 training return: 350.6473819874436
episode: 118 training return: 341.48495917759936
episode: 119 training return: 347.48144216553897
epoch: 30 test_true_pfm: 46.373011809244645 sim_pfm: 466.49697561336694
episode: 120 training return: 348.6252484632012
episode: 121 training return: 351.3515282054235
episode: 122 training return: 365.9949130455895
episode: 123 training return: 367.9469022117542
epoch: 31 test_true_pfm: 49.06493927492162 sim_pfm: 528.8036964345157
episode: 124 training return: 354.3158963722689
episode: 125 training return: 438.8291456319353
episode: 126 training return: 355.0002131108793
episode: 127 training return: 348.0555729940053
epoch: 32 test_true_pfm: 43.98589347230643 sim_pfm: 521.2365001515396
episode: 128 training return: 362.15223998659206
episode: 129 training return: 346.8478464740898
episode: 130 training return: 454.4113410819893
episode: 131 training return: 453.81353236419164
epoch: 33 test_true_pfm: 52.605632290553125 sim_pfm: 518.5627967459214
episode: 132 training return: 352.0261094047277
episode: 133 training return: 462.00655304431695
episode: 134 training return: 346.55039791929073
episode: 135 training return: 366.67431927181553
epoch: 34 test_true_pfm: 48.28476900416259 sim_pfm: 509.90310729097797
episode: 136 training return: 336.55284572720944
episode: 137 training return: 349.1225031530742
episode: 138 training return: 330.61221179507845
episode: 139 training return: 346.5765287389582
epoch: 35 test_true_pfm: 49.65395762003357 sim_pfm: 482.1616667760088
episode: 140 training return: 459.22312336508037
episode: 141 training return: 342.82876764351846
episode: 142 training return: 467.06398496244844
episode: 143 training return: 351.6544751459683
epoch: 36 test_true_pfm: 42.57573864517579 sim_pfm: 471.29195452860563
episode: 144 training return: 348.0332663544095
episode: 145 training return: 356.75266824494287
episode: 146 training return: 452.5535953872296
episode: 147 training return: 353.5406861119258
epoch: 37 test_true_pfm: 46.14614234137546 sim_pfm: 455.06324408439303
episode: 148 training return: 358.0283786767554
episode: 149 training return: 346.2180162126564
episode: 150 training return: 347.08774394304635
episode: 151 training return: 353.2973761243662
epoch: 38 test_true_pfm: 47.76931697128666 sim_pfm: 556.110678685477
episode: 152 training return: 358.99672572307344
episode: 153 training return: 462.87332905715243
episode: 154 training return: 336.54850378517733
episode: 155 training return: 440.07788942681054
epoch: 39 test_true_pfm: 39.42789349623391 sim_pfm: 488.89629407717183
episode: 156 training return: 347.22544617877014
episode: 157 training return: 346.3011451236568
episode: 158 training return: 367.22952042670664
episode: 159 training return: 464.2358496395511
epoch: 40 test_true_pfm: 47.6567546021501 sim_pfm: 575.6478763187837
episode: 160 training return: 347.3909395542063
episode: 161 training return: 438.13027191765417
episode: 162 training return: 352.88727338629525
episode: 163 training return: 441.4539893984505
epoch: 41 test_true_pfm: 53.20620584187932 sim_pfm: 493.953040067142
episode: 164 training return: 444.79041565292687
episode: 165 training return: 423.61745302077014
episode: 166 training return: 467.3706366667312
episode: 167 training return: 340.10664726709786
epoch: 42 test_true_pfm: 48.514753025311165 sim_pfm: 479.8947268707141
episode: 168 training return: 436.6414280468154
episode: 169 training return: 457.7843627389733
episode: 170 training return: 343.97056751670397
episode: 171 training return: 325.3624466026692
epoch: 43 test_true_pfm: 46.231072739831625 sim_pfm: 483.02788908603924
episode: 172 training return: 351.1628904079835
episode: 173 training return: 454.9708414231494
episode: 174 training return: 359.85791403852886
episode: 175 training return: 349.50651138827897
epoch: 44 test_true_pfm: 40.41257089667735 sim_pfm: 493.9631218473807
episode: 176 training return: 355.0926412334365
episode: 177 training return: 470.19568193382554
episode: 178 training return: 335.718718637732
episode: 179 training return: 350.3537700356569
epoch: 45 test_true_pfm: 44.850776581325924 sim_pfm: 478.78297367514415
episode: 180 training return: 427.10869151864085
episode: 181 training return: 352.4713113742094
episode: 182 training return: 453.76917536887277
episode: 183 training return: 438.39139611238284
epoch: 46 test_true_pfm: 47.70127935412413 sim_pfm: 482.6799633107601
episode: 184 training return: 464.48831962322424
episode: 185 training return: 349.96922585897426
episode: 186 training return: 445.3802416201936
episode: 187 training return: 449.1571808754637
epoch: 47 test_true_pfm: 49.541795874147084 sim_pfm: 491.6362816732651
episode: 188 training return: 453.48704864662403
episode: 189 training return: 446.4151372473675
episode: 190 training return: 346.7956916144169
episode: 191 training return: 445.0653864168961
epoch: 48 test_true_pfm: 50.502234825103955 sim_pfm: 458.60499655130707
episode: 192 training return: 354.4218544059209
episode: 193 training return: 461.6380850375193
episode: 194 training return: 463.6257886085451
episode: 195 training return: 350.682556115651
epoch: 49 test_true_pfm: 45.65884421575084 sim_pfm: 539.8341371013249
episode: 196 training return: 353.88107534442935
episode: 197 training return: 355.9048754253732
episode: 198 training return: 428.57660084671227
episode: 199 training return: 459.6680386667326
epoch: 50 test_true_pfm: 41.303131268243966 sim_pfm: 481.4338695913723
episode: 200 training return: 341.5476806208364
episode: 201 training return: 357.06096390553256
episode: 202 training return: 461.0057762599148
episode: 203 training return: 464.3227926468568
epoch: 51 test_true_pfm: 39.7771601582011 sim_pfm: 493.7985712242427
episode: 204 training return: 449.8281056354367
episode: 205 training return: 350.6135051967051
episode: 206 training return: 345.6386858493156
episode: 207 training return: 348.6191540453123
epoch: 52 test_true_pfm: 46.67916361233265 sim_pfm: 535.8161576878155
episode: 208 training return: 356.99262004719543
episode: 209 training return: 352.37324316425185
episode: 210 training return: 460.03149925007017
episode: 211 training return: 460.3248856296056
epoch: 53 test_true_pfm: 47.895700269356325 sim_pfm: 507.86265360408174
episode: 212 training return: 350.6152532284622
episode: 213 training return: 348.75796332167164
episode: 214 training return: 354.05937444205773
episode: 215 training return: 426.51741280529365
epoch: 54 test_true_pfm: 50.293959632516284 sim_pfm: 548.5104632112087
episode: 216 training return: 367.8127172185376
episode: 217 training return: 345.25716506273034
episode: 218 training return: 355.904162119055
episode: 219 training return: 343.61449292087815
epoch: 55 test_true_pfm: 46.87829307201823 sim_pfm: 461.55230984431415
episode: 220 training return: 444.6146854270606
episode: 221 training return: 350.46258120079665
episode: 222 training return: 354.72092084210385
episode: 223 training return: 356.0767618230797
epoch: 56 test_true_pfm: 43.20840265521962 sim_pfm: 526.4540354399902
episode: 224 training return: 345.75048073170814
episode: 225 training return: 363.64196506099387
episode: 226 training return: 341.1643109143739
episode: 227 training return: 337.7279234678653
epoch: 57 test_true_pfm: 50.87286853379761 sim_pfm: 508.1246638699451
episode: 228 training return: 355.5405967134313
episode: 229 training return: 469.40876768528454
episode: 230 training return: 359.9935407700221
episode: 231 training return: 477.6317965375714
epoch: 58 test_true_pfm: 45.09783026348463 sim_pfm: 499.9669334038099
episode: 232 training return: 362.4001312969963
episode: 233 training return: 336.6934124725084
episode: 234 training return: 354.19701674913676
episode: 235 training return: 352.71407427328336
epoch: 59 test_true_pfm: 45.273863914198294 sim_pfm: 561.2207555738984
episode: 236 training return: 455.93224657819684
episode: 237 training return: 339.15741549184816
episode: 238 training return: 449.32343513391
episode: 239 training return: 456.64109741615283
epoch: 60 test_true_pfm: 49.86833913036994 sim_pfm: 491.3234940271818
episode: 240 training return: 351.6908751936797
episode: 241 training return: 456.8335912975106
episode: 242 training return: 355.8886607540114
episode: 243 training return: 483.97481219294315
epoch: 61 test_true_pfm: 46.55387817671152 sim_pfm: 544.9610346094286
episode: 244 training return: 477.34986280338325
episode: 245 training return: 447.11341478929893
episode: 246 training return: 359.81882646676456
episode: 247 training return: 470.2410434722917
epoch: 62 test_true_pfm: 50.8019427736108 sim_pfm: 500.9972033390707
episode: 248 training return: 456.10165004246676
episode: 249 training return: 464.53818788954413
episode: 250 training return: 459.8353459631105
episode: 251 training return: 342.07443287924445
epoch: 63 test_true_pfm: 43.33856706641242 sim_pfm: 523.3388026511628
episode: 252 training return: 359.58397559092924
episode: 253 training return: 353.15117204978833
episode: 254 training return: 461.2245971969088
episode: 255 training return: 367.68078362473676
epoch: 64 test_true_pfm: 35.609010304157714 sim_pfm: 529.0484344280658
episode: 256 training return: 468.67135420153465
episode: 257 training return: 352.7123896212322
episode: 258 training return: 456.8054456772928
episode: 259 training return: 355.79077061736285
epoch: 65 test_true_pfm: 44.09292012415085 sim_pfm: 519.3478776912353
episode: 260 training return: 468.50287495222574
episode: 261 training return: 347.5512206624823
episode: 262 training return: 347.65372959452617
episode: 263 training return: 462.92694620743197
epoch: 66 test_true_pfm: 48.729889368437455 sim_pfm: 500.2838613370939
episode: 264 training return: 450.2693227618255
episode: 265 training return: 343.3570567338764
episode: 266 training return: 372.234537873601
episode: 267 training return: 361.0038088878249
epoch: 67 test_true_pfm: 44.49934486779666 sim_pfm: 532.1063260229572
episode: 268 training return: 347.44422707178023
episode: 269 training return: 346.0582187686322
episode: 270 training return: 428.4189313804506
episode: 271 training return: 370.8934642904847
epoch: 68 test_true_pfm: 43.94465176467558 sim_pfm: 529.6434717903767
episode: 272 training return: 356.8818852694487
episode: 273 training return: 354.55686454698304
episode: 274 training return: 447.7456587778457
episode: 275 training return: 452.3699378933601
epoch: 69 test_true_pfm: 46.28469838665031 sim_pfm: 494.6267535639619
episode: 276 training return: 335.0308410795039
episode: 277 training return: 338.9266130424219
episode: 278 training return: 353.0706246952793
episode: 279 training return: 360.07270497980704
epoch: 70 test_true_pfm: 45.87595883000927 sim_pfm: 494.6923554202144
episode: 280 training return: 448.08824271847925
episode: 281 training return: 349.5673181375274
episode: 282 training return: 468.49279802913327
episode: 283 training return: 475.73799327390725
epoch: 71 test_true_pfm: 50.524951459078636 sim_pfm: 518.3665727325184
episode: 284 training return: 449.72963160986916
episode: 285 training return: 346.9217049665162
episode: 286 training return: 340.8177962616969
episode: 287 training return: 351.0103148082064
epoch: 72 test_true_pfm: 45.19239719018742 sim_pfm: 505.36517063805377
episode: 288 training return: 337.21945768512256
episode: 289 training return: 348.7306540224786
episode: 290 training return: 451.6196060496713
episode: 291 training return: 353.37548326908984
epoch: 73 test_true_pfm: 45.86604644388714 sim_pfm: 476.9760973455638
episode: 292 training return: 458.5757191735064
episode: 293 training return: 462.1177283416406
episode: 294 training return: 469.9765144517263
episode: 295 training return: 449.8053040838679
epoch: 74 test_true_pfm: 48.29487573198865 sim_pfm: 537.3462162850491
episode: 296 training return: 478.12569106802374
episode: 297 training return: 473.1937129133254
episode: 298 training return: 444.36192164007934
episode: 299 training return: 464.26901691421745
epoch: 75 test_true_pfm: 48.09883308324637 sim_pfm: 496.59489365143145
episode: 300 training return: 346.8789937917519
episode: 301 training return: 348.05301462531884
episode: 302 training return: 340.2661721245922
episode: 303 training return: 452.6039053270298
epoch: 76 test_true_pfm: 50.337067084764556 sim_pfm: 578.1130655863781
episode: 304 training return: 352.8905153766504
episode: 305 training return: 343.88700127793487
episode: 306 training return: 463.2983290761763
episode: 307 training return: 438.40022760550255
epoch: 77 test_true_pfm: 50.68961460783425 sim_pfm: 469.375557068563
episode: 308 training return: 466.1057786297591
episode: 309 training return: 347.2647246970519
episode: 310 training return: 470.44966850003175
episode: 311 training return: 454.8906092334874
epoch: 78 test_true_pfm: 48.006825157996936 sim_pfm: 465.92684013563786
episode: 312 training return: 481.00268310544885
episode: 313 training return: 346.8382421107143
episode: 314 training return: 360.5948064619862
episode: 315 training return: 356.97792400977016
epoch: 79 test_true_pfm: 44.04672840483016 sim_pfm: 511.88068398624296
episode: 316 training return: 343.6904448370626
episode: 317 training return: 454.2193885084941
episode: 318 training return: 360.02305383626066
episode: 319 training return: 470.6631584060002
epoch: 80 test_true_pfm: 45.67362510770804 sim_pfm: 464.8292209631408
episode: 320 training return: 368.35633834512686
episode: 321 training return: 364.41895238013115
episode: 322 training return: 462.96946225205807
episode: 323 training return: 356.66201429199975
epoch: 81 test_true_pfm: 43.977188576151285 sim_pfm: 586.92372866828
episode: 324 training return: 367.3603262217763
episode: 325 training return: 466.7204592744428
episode: 326 training return: 443.6450767342038
episode: 327 training return: 347.55875098359246
epoch: 82 test_true_pfm: 49.55855282089576 sim_pfm: 576.8664667306956
episode: 328 training return: 336.4289284547148
episode: 329 training return: 347.5091449040034
episode: 330 training return: 366.3601659991682
episode: 331 training return: 462.93662920040197
epoch: 83 test_true_pfm: 43.5855735415196 sim_pfm: 514.2563477207045
episode: 332 training return: 359.82719867051657
episode: 333 training return: 356.75407320383994
episode: 334 training return: 464.0543427931339
episode: 335 training return: 369.890649920363
epoch: 84 test_true_pfm: 39.928467246425065 sim_pfm: 498.3597117158195
episode: 336 training return: 363.2339052815
episode: 337 training return: 355.9873445722822
episode: 338 training return: 444.26589628826974
episode: 339 training return: 353.6822016914913
epoch: 85 test_true_pfm: 49.451300514887116 sim_pfm: 529.0290697070301
episode: 340 training return: 464.22555638086834
episode: 341 training return: 465.6920418790996
episode: 342 training return: 451.31387081569505
episode: 343 training return: 358.4968254110724
epoch: 86 test_true_pfm: 45.078362855771395 sim_pfm: 500.6751206731271
episode: 344 training return: 355.25128080219065
episode: 345 training return: 350.41966793901423
episode: 346 training return: 469.47817728536506
episode: 347 training return: 444.7563247090639
epoch: 87 test_true_pfm: 39.81108707042381 sim_pfm: 572.3094385125092
episode: 348 training return: 344.9277839322156
episode: 349 training return: 379.1648521973176
episode: 350 training return: 359.0551277893258
episode: 351 training return: 468.65953656833074
epoch: 88 test_true_pfm: 49.627298162944534 sim_pfm: 542.0293511854862
episode: 352 training return: 351.034364240133
episode: 353 training return: 352.5254008413249
episode: 354 training return: 350.2686446572836
episode: 355 training return: 356.9277226743298
epoch: 89 test_true_pfm: 49.868063377107795 sim_pfm: 535.4957596331186
episode: 356 training return: 351.5579069794789
episode: 357 training return: 452.3753425639401
episode: 358 training return: 460.43021313929154
episode: 359 training return: 356.81355016053277
epoch: 90 test_true_pfm: 46.05256921648638 sim_pfm: 548.889860336329
episode: 360 training return: 454.1535637806156
episode: 361 training return: 456.63018631092905
episode: 362 training return: 451.93291373871773
episode: 363 training return: 356.96011419579185
epoch: 91 test_true_pfm: 48.32749807555981 sim_pfm: 535.1871199088013
episode: 364 training return: 345.0320749172268
episode: 365 training return: 463.8949113600356
episode: 366 training return: 437.5434473860362
episode: 367 training return: 477.1223148512612
epoch: 92 test_true_pfm: 47.38835707356782 sim_pfm: 497.4770246713247
episode: 368 training return: 354.0074475578936
episode: 369 training return: 464.4701227589394
episode: 370 training return: 473.68444816556746
episode: 371 training return: 358.77254120458707
epoch: 93 test_true_pfm: 47.910060087871095 sim_pfm: 477.2471851435418
episode: 372 training return: 464.2051346474884
episode: 373 training return: 352.6243055365523
episode: 374 training return: 326.0100198329436
episode: 375 training return: 366.4950106136993
epoch: 94 test_true_pfm: 50.84276054945325 sim_pfm: 541.4686689815982
episode: 376 training return: 472.3913844542838
episode: 377 training return: 355.13656329001367
episode: 378 training return: 366.47153813232137
episode: 379 training return: 449.0196844962063
epoch: 95 test_true_pfm: 43.43163657526397 sim_pfm: 496.0161577904834
episode: 380 training return: 357.9365127786101
episode: 381 training return: 442.2261335195929
episode: 382 training return: 468.5875930658306
episode: 383 training return: 366.4004491741739
epoch: 96 test_true_pfm: 41.98413447705381 sim_pfm: 464.0034685695578
episode: 384 training return: 450.1308810975597
episode: 385 training return: 480.01988453726204
episode: 386 training return: 374.5476473239134
episode: 387 training return: 361.2253142759505
epoch: 97 test_true_pfm: 47.81392876400458 sim_pfm: 503.8170853305673
episode: 388 training return: 350.9736221420087
episode: 389 training return: 344.0317850765935
episode: 390 training return: 455.49342958977945
episode: 391 training return: 471.2187443546746
epoch: 98 test_true_pfm: 38.70272795358596 sim_pfm: 471.17343900828337
episode: 392 training return: 351.9169522972891
episode: 393 training return: 359.2404067053001
episode: 394 training return: 463.00336567243215
episode: 395 training return: 341.4603412879812
epoch: 99 test_true_pfm: 45.578227395924806 sim_pfm: 497.44668518331304
episode: 396 training return: 366.11486660856
episode: 397 training return: 358.9546781437935
episode: 398 training return: 466.9718734059972
episode: 399 training return: 337.8951850595333
epoch: 100 test_true_pfm: 50.5973643398041 sim_pfm: 493.9641017188488
episode: 400 training return: 335.3397819948052
episode: 401 training return: 367.0010549423113
episode: 402 training return: 354.77679441294583
episode: 403 training return: 345.9235741433529
epoch: 101 test_true_pfm: 48.06336525071697 sim_pfm: 511.91124351517175
episode: 404 training return: 356.1358551394509
episode: 405 training return: 463.41828292000366
episode: 406 training return: 440.48977808383216
episode: 407 training return: 345.22080323000677
epoch: 102 test_true_pfm: 43.53743263325208 sim_pfm: 525.2881129772308
episode: 408 training return: 347.5948673848182
episode: 409 training return: 352.6751084923995
episode: 410 training return: 341.5551111867445
episode: 411 training return: 464.6836768545649
epoch: 103 test_true_pfm: 48.89655757117719 sim_pfm: 545.2670553810134
episode: 412 training return: 456.8113652325727
episode: 413 training return: 371.162265416944
episode: 414 training return: 371.18515416535854
episode: 415 training return: 453.62789018769064
epoch: 104 test_true_pfm: 51.092545289605425 sim_pfm: 477.86629182907
episode: 416 training return: 468.598032740942
episode: 417 training return: 375.2248925582711
episode: 418 training return: 370.1421864653655
episode: 419 training return: 454.30686873526594
epoch: 105 test_true_pfm: 45.23263888231714 sim_pfm: 448.0497623053561
episode: 420 training return: 456.95303975114246
episode: 421 training return: 465.8071780953327
episode: 422 training return: 470.01270165458027
episode: 423 training return: 350.27123268764626
epoch: 106 test_true_pfm: 44.03635106892373 sim_pfm: 546.7912841504691
episode: 424 training return: 351.6761584109255
episode: 425 training return: 458.186593589666
episode: 426 training return: 346.16560790920926
episode: 427 training return: 352.8408808685934
epoch: 107 test_true_pfm: 48.56031766334471 sim_pfm: 548.7346118841759
episode: 428 training return: 457.5878582044831
episode: 429 training return: 450.21943785163955
episode: 430 training return: 463.3525844579412
episode: 431 training return: 357.7918515256442
epoch: 108 test_true_pfm: 50.070888276076715 sim_pfm: 537.1182940088561
episode: 432 training return: 473.4678158590158
episode: 433 training return: 349.9798951977123
episode: 434 training return: 462.24059598845986
episode: 435 training return: 355.8698166918766
epoch: 109 test_true_pfm: 41.99690514109512 sim_pfm: 433.4580695591353
episode: 436 training return: 342.4059468658619
episode: 437 training return: 461.8601704273092
episode: 438 training return: 470.75316546756886
episode: 439 training return: 360.93398650325656
epoch: 110 test_true_pfm: 47.72691268761415 sim_pfm: 545.4636678712202
episode: 440 training return: 371.33126271398027
episode: 441 training return: 361.0415417494741
episode: 442 training return: 462.760545731193
episode: 443 training return: 351.73000156531515
epoch: 111 test_true_pfm: 43.349316089792374 sim_pfm: 539.8023802412454
episode: 444 training return: 477.6587994432551
episode: 445 training return: 354.36271456525805
episode: 446 training return: 345.57558846976474
episode: 447 training return: 360.5301154427633
epoch: 112 test_true_pfm: 44.266154432955624 sim_pfm: 550.3895616449252
episode: 448 training return: 357.5020027860694
episode: 449 training return: 362.500765138262
episode: 450 training return: 357.1313148236322
episode: 451 training return: 474.5727286870467
epoch: 113 test_true_pfm: 47.54583347320777 sim_pfm: 502.56475606820794
episode: 452 training return: 354.9027116340034
episode: 453 training return: 452.32141809934075
episode: 454 training return: 474.08276104286443
episode: 455 training return: 363.64352181062844
epoch: 114 test_true_pfm: 45.336434434635294 sim_pfm: 504.8303591084814
episode: 456 training return: 333.22712104941365
episode: 457 training return: 470.64397747141254
episode: 458 training return: 455.2966404754324
episode: 459 training return: 487.59459173245256
epoch: 115 test_true_pfm: 46.9210584380185 sim_pfm: 482.80543529779743
episode: 460 training return: 447.3016255602019
episode: 461 training return: 365.510117320864
episode: 462 training return: 462.08344362994995
episode: 463 training return: 355.6220561395741
epoch: 116 test_true_pfm: 51.723471289999644 sim_pfm: 476.1074414837372
episode: 464 training return: 367.4045058452378
episode: 465 training return: 370.90457592611153
episode: 466 training return: 351.4668227066853
episode: 467 training return: 343.6250816499393
epoch: 117 test_true_pfm: 43.380444538936125 sim_pfm: 495.21487686086704
episode: 468 training return: 342.6380136336305
episode: 469 training return: 350.3842560860993
episode: 470 training return: 359.3916999465885
episode: 471 training return: 346.92871670367833
epoch: 118 test_true_pfm: 42.42995629964765 sim_pfm: 544.1389152922148
episode: 472 training return: 342.055812530763
episode: 473 training return: 352.8874869073546
episode: 474 training return: 465.65313733148594
episode: 475 training return: 337.3947254800606
epoch: 119 test_true_pfm: 47.61625541560201 sim_pfm: 494.1041518681448
episode: 476 training return: 448.95662196647334
episode: 477 training return: 476.46319821573445
episode: 478 training return: 342.0911863291277
episode: 479 training return: 362.21083263867615
epoch: 120 test_true_pfm: 45.118077643272784 sim_pfm: 508.7780456280777
episode: 480 training return: 351.6625883264451
episode: 481 training return: 353.6014297923661
episode: 482 training return: 355.7806737297644
episode: 483 training return: 467.845928704422
epoch: 121 test_true_pfm: 40.0864837641525 sim_pfm: 537.3189061498214
episode: 484 training return: 355.51586145377263
episode: 485 training return: 341.54360330697375
episode: 486 training return: 453.1187847555885
episode: 487 training return: 360.92833949734234
epoch: 122 test_true_pfm: 47.62550416259421 sim_pfm: 545.0860338095492
episode: 488 training return: 348.48085001857066
episode: 489 training return: 483.37560448574004
episode: 490 training return: 474.0645769666083
episode: 491 training return: 347.2495192854939
epoch: 123 test_true_pfm: 43.420205799937854 sim_pfm: 546.1883522787125
episode: 492 training return: 448.02667139704556
episode: 493 training return: 347.21771185873916
episode: 494 training return: 464.2170926272422
episode: 495 training return: 456.70448776831967
epoch: 124 test_true_pfm: 41.57882500626275 sim_pfm: 440.1670510435021
episode: 496 training return: 365.4351963425831
episode: 497 training return: 362.2743985829313
episode: 498 training return: 458.2359095680227
episode: 499 training return: 354.8035749280625
epoch: 125 test_true_pfm: 47.339056225014815 sim_pfm: 549.1861184327589
episode: 500 training return: 466.39241684219047
episode: 501 training return: 347.44963595914726
episode: 502 training return: 358.7488145847449
episode: 503 training return: 348.3967641568305
epoch: 126 test_true_pfm: 35.626813294206976 sim_pfm: 467.58105376439335
episode: 504 training return: 449.4692145541468
episode: 505 training return: 364.2004682372786
episode: 506 training return: 360.4636320081282
episode: 507 training return: 363.95796032392826
epoch: 127 test_true_pfm: 42.049726597170064 sim_pfm: 547.895507851939
episode: 508 training return: 358.2513643962559
episode: 509 training return: 467.23286181463646
episode: 510 training return: 355.77796074289785
episode: 511 training return: 351.52170108449474
epoch: 128 test_true_pfm: 42.85840543882495 sim_pfm: 488.7687963949684
episode: 512 training return: 449.7226825046051
episode: 513 training return: 439.7034657001527
episode: 514 training return: 357.8639807887123
episode: 515 training return: 447.2942855751838
epoch: 129 test_true_pfm: 47.11156478072777 sim_pfm: 579.9243876772399
episode: 516 training return: 331.1764914605596
episode: 517 training return: 358.5357938681745
episode: 518 training return: 352.1717809172494
episode: 519 training return: 348.6022572344097
epoch: 130 test_true_pfm: 45.37657352198269 sim_pfm: 569.6137187981301
episode: 520 training return: 455.794904160362
episode: 521 training return: 341.5510217401046
episode: 522 training return: 350.8577340134929
episode: 523 training return: 361.90958353769616
epoch: 131 test_true_pfm: 47.55075529280355 sim_pfm: 538.4176775244969
episode: 524 training return: 446.39524357107103
episode: 525 training return: 464.7982833688609
episode: 526 training return: 455.6939605632166
episode: 527 training return: 342.25910407233886
epoch: 132 test_true_pfm: 45.95032548137715 sim_pfm: 550.7609936452706
episode: 528 training return: 457.2101812920792
episode: 529 training return: 467.2553823941123
episode: 530 training return: 340.1142559246578
episode: 531 training return: 349.2814075717484
epoch: 133 test_true_pfm: 43.206001241812544 sim_pfm: 545.7799789271526
episode: 532 training return: 350.3232387408413
episode: 533 training return: 467.2554635138321
episode: 534 training return: 449.2231608884313
episode: 535 training return: 469.9407809945692
epoch: 134 test_true_pfm: 48.30005816250243 sim_pfm: 499.37541175676824
episode: 536 training return: 467.73731382510067
episode: 537 training return: 471.75583319793753
episode: 538 training return: 460.4202621825737
episode: 539 training return: 342.7950157248937
epoch: 135 test_true_pfm: 49.15230214186197 sim_pfm: 488.18040033421266
episode: 540 training return: 359.8258923792432
episode: 541 training return: 468.2301351752864
episode: 542 training return: 444.6373599755776
episode: 543 training return: 366.1512953248803
epoch: 136 test_true_pfm: 39.31785371436552 sim_pfm: 520.8311233788853
episode: 544 training return: 348.118129098454
episode: 545 training return: 453.4715234185251
episode: 546 training return: 361.97018296699895
episode: 547 training return: 469.92475328398325
epoch: 137 test_true_pfm: 41.19809230852815 sim_pfm: 537.9674995158354
episode: 548 training return: 478.25265559459785
episode: 549 training return: 358.1932112605035
episode: 550 training return: 460.8747760813403
episode: 551 training return: 463.0575669594785
epoch: 138 test_true_pfm: 40.602113662686484 sim_pfm: 544.3322523754863
episode: 552 training return: 460.6094811770771
episode: 553 training return: 483.6247666717031
episode: 554 training return: 356.2395454396037
episode: 555 training return: 449.8548125090184
epoch: 139 test_true_pfm: 44.379713811577496 sim_pfm: 542.017614711309
episode: 556 training return: 346.85770823803074
episode: 557 training return: 447.5242670303337
episode: 558 training return: 448.26643742302576
episode: 559 training return: 478.7133750216045
epoch: 140 test_true_pfm: 49.5194849239974 sim_pfm: 535.4516090007887
episode: 560 training return: 477.90560407213366
episode: 561 training return: 480.13668654573104
episode: 562 training return: 454.595227572417
episode: 563 training return: 359.37010696305884
epoch: 141 test_true_pfm: 48.362608636040726 sim_pfm: 471.2115094901628
episode: 564 training return: 339.00300711305346
episode: 565 training return: 448.4913527006807
episode: 566 training return: 357.8159403902204
episode: 567 training return: 362.8305945871928
epoch: 142 test_true_pfm: 40.001246156086424 sim_pfm: 545.016764909963
episode: 568 training return: 445.72929476289994
episode: 569 training return: 454.3180382480587
episode: 570 training return: 369.49902232302986
episode: 571 training return: 363.38370451631863
epoch: 143 test_true_pfm: 38.70767074174848 sim_pfm: 496.96767063161286
episode: 572 training return: 464.6064455539263
episode: 573 training return: 343.5076619698934
episode: 574 training return: 475.6938774062303
episode: 575 training return: 362.6593515490639
epoch: 144 test_true_pfm: 44.21433808970125 sim_pfm: 542.6148539746774
episode: 576 training return: 486.6474164550669
episode: 577 training return: 476.21696925108165
episode: 578 training return: 345.4725524334104
episode: 579 training return: 341.71306748241767
epoch: 145 test_true_pfm: 48.039458008562995 sim_pfm: 507.1459130347204
episode: 580 training return: 354.3217350977239
episode: 581 training return: 366.82323760517426
episode: 582 training return: 439.4077620055025
episode: 583 training return: 473.2632830265198
epoch: 146 test_true_pfm: 47.1691561670142 sim_pfm: 540.63131609815
episode: 584 training return: 359.0340386873755
episode: 585 training return: 353.1926159356717
episode: 586 training return: 471.8459942431335
episode: 587 training return: 343.4569011659594
epoch: 147 test_true_pfm: 45.85639061326577 sim_pfm: 540.866737769888
episode: 588 training return: 360.91376051508826
episode: 589 training return: 345.19173903231075
episode: 590 training return: 362.41403630252853
episode: 591 training return: 353.1674198130051
epoch: 148 test_true_pfm: 37.45006037859129 sim_pfm: 540.227081969948
episode: 592 training return: 358.35139263606743
episode: 593 training return: 473.8917891219842
episode: 594 training return: 353.7685647860791
episode: 595 training return: 345.44111917832583
epoch: 149 test_true_pfm: 38.74306351438892 sim_pfm: 500.7328238583826
episode: 596 training return: 350.9656617913601
episode: 597 training return: 474.1396419098194
episode: 598 training return: 363.5992446351629
episode: 599 training return: 372.52245052390333
epoch: 150 test_true_pfm: 44.66065625364245 sim_pfm: 526.5400800982737
