['--alg', 'sac', '--env', 'Hopper-v3', '--learn', 'uncertainty', '--traj', 'expert', '--seed', '3', '--data', '100000']
epoch: 0 training_loss 0.29434069216251374 test_loss: 0.1731380820274353
epoch: 1 training_loss 0.18083473637700082 test_loss: 0.17912203073501587
epoch: 2 training_loss 0.17456637173891068 test_loss: 0.17180730104446412
epoch: 3 training_loss 0.16229323744773866 test_loss: 0.14441522359848022
epoch: 4 training_loss 0.1617325107753277 test_loss: 0.22030835151672362
epoch: 5 training_loss 0.15437096443027257 test_loss: 0.14157278537750245
epoch: 6 training_loss 0.16808549232780934 test_loss: 0.16915522813796996
epoch: 7 training_loss 0.16307189680635928 test_loss: 0.15712130069732666
epoch: 8 training_loss 0.1546664395928383 test_loss: 0.13799196481704712
epoch: 9 training_loss 0.1596285116299987 test_loss: 0.15981390476226806
epoch: 10 training_loss 0.13778125137090683 test_loss: 0.13797577619552612
epoch: 11 training_loss 0.15767155952751635 test_loss: 0.14216911792755127
epoch: 12 training_loss 0.14621593091636897 test_loss: 0.14663314819335938
epoch: 13 training_loss 0.1547517565637827 test_loss: 0.14282382726669313
epoch: 14 training_loss 0.15539580799639224 test_loss: 0.1382318139076233
epoch: 15 training_loss 0.14633077081292867 test_loss: 0.1356234669685364
epoch: 16 training_loss 0.14460607912391424 test_loss: 0.14280164241790771
epoch: 17 training_loss 0.1454583617299795 test_loss: 0.13708845376968384
epoch: 18 training_loss 0.14577936969697475 test_loss: 0.14314228296279907
epoch: 19 training_loss 0.14825775802135469 test_loss: 0.164167058467865
epoch: 20 training_loss 0.14632500443607568 test_loss: 0.1500999689102173
epoch: 21 training_loss 0.14518836803734303 test_loss: 0.13818023204803467
epoch: 22 training_loss 0.15108270686119796 test_loss: 0.15222630500793458
epoch: 23 training_loss 0.1486002055183053 test_loss: 0.16163743734359742
epoch: 24 training_loss 0.1446873200684786 test_loss: 0.15637751817703247
epoch: 25 training_loss 0.14138763096183538 test_loss: 0.14130406379699706
epoch: 26 training_loss 0.14494207382202148 test_loss: 0.14443542957305908
epoch: 27 training_loss 0.14464409105479717 test_loss: 0.13049557209014892
epoch: 28 training_loss 0.13285336028784514 test_loss: 0.13689228296279907
epoch: 29 training_loss 0.14173394277691842 test_loss: 0.14487255811691285
epoch: 30 training_loss 0.14753530457615852 test_loss: 0.13603615760803223
epoch: 31 training_loss 0.13537786558270454 test_loss: 0.13653072118759155
epoch: 32 training_loss 0.14702936455607415 test_loss: 0.137283992767334
epoch: 33 training_loss 0.14114848285913467 test_loss: 0.12088435888290405
epoch: 34 training_loss 0.13973643016070128 test_loss: 0.12622721195220948
epoch: 35 training_loss 0.14110120322555303 test_loss: 0.14501086473464966
epoch: 36 training_loss 0.14002723168581724 test_loss: 0.1335363507270813
epoch: 37 training_loss 0.147040860876441 test_loss: 0.11339807510375977
epoch: 38 training_loss 0.13988600265234707 test_loss: 0.12293542623519897
epoch: 39 training_loss 0.14186662036925554 test_loss: 0.13206924200057985
epoch: 40 training_loss 0.14001528061926366 test_loss: 0.12823913097381592
epoch: 41 training_loss 0.1452302610129118 test_loss: 0.1366695523262024
epoch: 42 training_loss 0.14631187204271556 test_loss: 0.13058325052261352
epoch: 43 training_loss 0.14583315148949624 test_loss: 0.11733424663543701
epoch: 44 training_loss 0.1518021225184202 test_loss: 0.13368721008300782
epoch: 45 training_loss 0.13513075295835733 test_loss: 0.14128620624542237
epoch: 46 training_loss 0.1337737973406911 test_loss: 0.1360316514968872
epoch: 47 training_loss 0.13995277762413025 test_loss: 0.11487691402435303
epoch: 48 training_loss 0.13794371619820595 test_loss: 0.12931004762649537
epoch: 49 training_loss 0.1360317125916481 test_loss: 0.13188337087631224
epoch: 50 training_loss 0.1362008273229003 test_loss: 0.13451189994812013
epoch: 51 training_loss 0.1463330800458789 test_loss: 0.13389545679092407
epoch: 52 training_loss 0.1294118621200323 test_loss: 0.14103351831436156
epoch: 53 training_loss 0.13811514757573604 test_loss: 0.14861377477645873
epoch: 54 training_loss 0.13483891546726226 test_loss: 0.15572354793548585
epoch: 55 training_loss 0.14372879225760699 test_loss: 0.1354222536087036
epoch: 56 training_loss 0.12490043003112078 test_loss: 0.13900314569473265
epoch: 57 training_loss 0.13880758317187428 test_loss: 0.12386820316314698
epoch: 58 training_loss 0.13852369461208583 test_loss: 0.12776197195053102
epoch: 59 training_loss 0.13708704698830843 test_loss: 0.1301828145980835
epoch: 60 training_loss 0.1377417340874672 test_loss: 0.11462174654006958
epoch: 61 training_loss 0.1401530595868826 test_loss: 0.13383142948150634
epoch: 62 training_loss 0.14178825225681066 test_loss: 0.14260913133621217
epoch: 63 training_loss 0.1350870470702648 test_loss: 0.13689494132995605
epoch: 64 training_loss 0.14302619379013776 test_loss: 0.1429067373275757
epoch: 65 training_loss 0.13663511287420987 test_loss: 0.12789336442947388
epoch: 66 training_loss 0.14009402882307767 test_loss: 0.1278367042541504
epoch: 67 training_loss 0.14222487822175026 test_loss: 0.15147489309310913
epoch: 68 training_loss 0.14346196446567774 test_loss: 0.1336209297180176
epoch: 69 training_loss 0.14578845288604497 test_loss: 0.1230204939842224
epoch: 70 training_loss 0.13893012322485446 test_loss: 0.1331505537033081
epoch: 71 training_loss 0.14163658849895 test_loss: 0.13283650875091552
epoch: 72 training_loss 0.15107210621237754 test_loss: 0.13373095989227296
epoch: 73 training_loss 0.14082128200680016 test_loss: 0.12356332540512086
epoch: 74 training_loss 0.13356108024716376 test_loss: 0.12064242362976074
epoch: 75 training_loss 0.12836589694023132 test_loss: 0.11309151649475098
epoch: 76 training_loss 0.1315214579924941 test_loss: 0.13131430149078369
epoch: 77 training_loss 0.13757614504545926 test_loss: 0.1280112862586975
epoch: 78 training_loss 0.13553333543241025 test_loss: 0.1290665626525879
epoch: 79 training_loss 0.14059422921389342 test_loss: 0.1340186357498169
epoch: 80 training_loss 0.14100357796996832 test_loss: 0.10849719047546387
epoch: 81 training_loss 0.14423220958560706 test_loss: 0.13094828128814698
epoch: 82 training_loss 0.1314857131242752 test_loss: 0.10760115385055542
epoch: 83 training_loss 0.1413381365314126 test_loss: 0.14398471117019654
epoch: 84 training_loss 0.12975414680317043 test_loss: 0.1273255705833435
epoch: 85 training_loss 0.13819049932062627 test_loss: 0.1324695587158203
epoch: 86 training_loss 0.141689256131649 test_loss: 0.13083183765411377
epoch: 87 training_loss 0.1380022817105055 test_loss: 0.12598544359207153
epoch: 88 training_loss 0.1420290206372738 test_loss: 0.1312442421913147
epoch: 89 training_loss 0.13636751720681786 test_loss: 0.11620018482208253
epoch: 90 training_loss 0.1424147230386734 test_loss: 0.14338001012802123
epoch: 91 training_loss 0.14194328762590885 test_loss: 0.13934577703475953
epoch: 92 training_loss 0.1401665772125125 test_loss: 0.1381327271461487
epoch: 93 training_loss 0.13836341846734285 test_loss: 0.12954912185668946
epoch: 94 training_loss 0.14203726220875978 test_loss: 0.1393439769744873
epoch: 95 training_loss 0.13946788221597672 test_loss: 0.13517389297485352
epoch: 96 training_loss 0.14268915839493274 test_loss: 0.11460245847702026
epoch: 97 training_loss 0.1337990367412567 test_loss: 0.12753888368606567
epoch: 98 training_loss 0.14527667615562678 test_loss: 0.13272998332977295
epoch: 99 training_loss 0.14081737194210292 test_loss: 0.13256882429122924
epoch: 100 training_loss 0.13972103394567967 test_loss: 0.11397074460983277
epoch: 101 training_loss 0.13865069400519134 test_loss: 0.14027856588363646
epoch: 102 training_loss 0.1385691263526678 test_loss: 0.11408661603927613
epoch: 103 training_loss 0.14199726168066262 test_loss: 0.13961900472640992
epoch: 104 training_loss 0.13417378652840853 test_loss: 0.13069084882736207
epoch: 105 training_loss 0.14066285524517297 test_loss: 0.12858251333236695
epoch: 106 training_loss 0.15170281555503606 test_loss: 0.12111607789993287
epoch: 107 training_loss 0.13645158674567937 test_loss: 0.1477767586708069
epoch: 108 training_loss 0.13261509098112584 test_loss: 0.12720801830291747
epoch: 109 training_loss 0.1317761798761785 test_loss: 0.13827645778656006
epoch: 110 training_loss 0.13570873895660043 test_loss: 0.12985581159591675
epoch: 111 training_loss 0.13586405519396066 test_loss: 0.14666007757186889
epoch: 112 training_loss 0.14485235713422298 test_loss: 0.13637017011642455
epoch: 113 training_loss 0.13581746492534877 test_loss: 0.13338388204574586
epoch: 114 training_loss 0.1404770762845874 test_loss: 0.1406167507171631
epoch: 115 training_loss 0.13669645976275205 test_loss: 0.11563706398010254
epoch: 116 training_loss 0.13972274549305438 test_loss: 0.13947032690048217
epoch: 117 training_loss 0.13289675526320935 test_loss: 0.12348928451538085
epoch: 118 training_loss 0.13584580138325691 test_loss: 0.12163668870925903
epoch: 119 training_loss 0.14008937161415816 test_loss: 0.1477574348449707
epoch: 120 training_loss 0.12779405143111944 test_loss: 0.13042104244232178
epoch: 121 training_loss 0.13123072408139705 test_loss: 0.13901762962341307
epoch: 122 training_loss 0.13955880828201772 test_loss: 0.1258498787879944
epoch: 123 training_loss 0.13870412040501834 test_loss: 0.11775020360946656
epoch: 124 training_loss 0.1363207022473216 test_loss: 0.12341500520706176
epoch: 125 training_loss 0.14253886818885803 test_loss: 0.13081111907958984
epoch: 126 training_loss 0.13700397301465272 test_loss: 0.1346798062324524
epoch: 127 training_loss 0.1328926821053028 test_loss: 0.14197154045104982
epoch: 128 training_loss 0.13070901717990638 test_loss: 0.14037015438079833
epoch: 129 training_loss 0.1339505449682474 test_loss: 0.12389607429504394
epoch: 130 training_loss 0.14025321811437608 test_loss: 0.13573877811431884
epoch: 131 training_loss 0.1318959816172719 test_loss: 0.13424277305603027
epoch: 132 training_loss 0.13575913064181805 test_loss: 0.14914554357528687
epoch: 133 training_loss 0.13940226949751378 test_loss: 0.13562060594558717
epoch: 134 training_loss 0.1321256000176072 test_loss: 0.12072654962539672
epoch: 135 training_loss 0.13356930596753955 test_loss: 0.12921046018600463
epoch: 136 training_loss 0.13759723525494338 test_loss: 0.1234848141670227
epoch: 137 training_loss 0.13203219432383775 test_loss: 0.12497510910034179
epoch: 138 training_loss 0.13336820665746926 test_loss: 0.1287549376487732
epoch: 139 training_loss 0.13476525973528625 test_loss: 0.12477495670318603
epoch: 140 training_loss 0.1411084583774209 test_loss: 0.1179301381111145
epoch: 141 training_loss 0.13737326711416245 test_loss: 0.12970203161239624
epoch: 142 training_loss 0.12579544585198163 test_loss: 0.12704567909240722
epoch: 143 training_loss 0.13403151277452707 test_loss: 0.12475011348724366
epoch: 144 training_loss 0.13945975415408612 test_loss: 0.1185314416885376
epoch: 145 training_loss 0.13702327318489552 test_loss: 0.11266037225723266
epoch: 146 training_loss 0.14280670419335365 test_loss: 0.13999851942062377
epoch: 147 training_loss 0.13767102424055339 test_loss: 0.11190743446350097
epoch: 148 training_loss 0.12857535552233457 test_loss: 0.132376766204834
epoch: 149 training_loss 0.13612391632050275 test_loss: 0.14054560661315918
epoch: 0 training_loss 0.2973182489722967 test_loss: 0.17163293361663817
epoch: 1 training_loss 0.17500691242516042 test_loss: 0.15223597288131713
epoch: 2 training_loss 0.16575787864625455 test_loss: 0.15626367330551147
epoch: 3 training_loss 0.1510615010559559 test_loss: 0.14212266206741334
epoch: 4 training_loss 0.15266299568116665 test_loss: 0.12957078218460083
epoch: 5 training_loss 0.15688106805086136 test_loss: 0.1434767246246338
epoch: 6 training_loss 0.15104507274925708 test_loss: 0.14130723476409912
epoch: 7 training_loss 0.1550814963132143 test_loss: 0.15503861904144287
epoch: 8 training_loss 0.14773144762963056 test_loss: 0.1423124313354492
epoch: 9 training_loss 0.13711278915405273 test_loss: 0.136859667301178
epoch: 10 training_loss 0.1491192850098014 test_loss: 0.1401798963546753
epoch: 11 training_loss 0.1444006297364831 test_loss: 0.13368384838104247
epoch: 12 training_loss 0.1525098218768835 test_loss: 0.13543455600738524
epoch: 13 training_loss 0.14381406854838133 test_loss: 0.12532209157943724
epoch: 14 training_loss 0.15298847012221814 test_loss: 0.1373528003692627
epoch: 15 training_loss 0.15001036278903485 test_loss: 0.13163400888442994
epoch: 16 training_loss 0.1383909733593464 test_loss: 0.14761073589324952
epoch: 17 training_loss 0.1456870831362903 test_loss: 0.16522420644760133
epoch: 18 training_loss 0.1369109543040395 test_loss: 0.14229578971862794
epoch: 19 training_loss 0.14075450342148543 test_loss: 0.1407867431640625
epoch: 20 training_loss 0.14498624682426453 test_loss: 0.16563020944595336
epoch: 21 training_loss 0.14269398108124734 test_loss: 0.12497888803482056
epoch: 22 training_loss 0.14066323645412923 test_loss: 0.13836501836776732
epoch: 23 training_loss 0.13631867792457342 test_loss: 0.14199970960617064
epoch: 24 training_loss 0.1492110674083233 test_loss: 0.16505426168441772
epoch: 25 training_loss 0.13947358924895525 test_loss: 0.1488175630569458
epoch: 26 training_loss 0.13676355365663767 test_loss: 0.1646917700767517
epoch: 27 training_loss 0.1333305773884058 test_loss: 0.14243907928466798
epoch: 28 training_loss 0.13511804446578027 test_loss: 0.14275031089782714
epoch: 29 training_loss 0.14121489271521567 test_loss: 0.13204805850982665
epoch: 30 training_loss 0.13431935019791127 test_loss: 0.127819287776947
epoch: 31 training_loss 0.1394375592470169 test_loss: 0.14045659303665162
epoch: 32 training_loss 0.14338344652205706 test_loss: 0.12306263446807861
epoch: 33 training_loss 0.14103281639516355 test_loss: 0.11961989402770996
epoch: 34 training_loss 0.13175397992134094 test_loss: 0.1331825852394104
epoch: 35 training_loss 0.1356589955277741 test_loss: 0.1356736898422241
epoch: 36 training_loss 0.1443027725815773 test_loss: 0.12501654624938965
epoch: 37 training_loss 0.14368065409362316 test_loss: 0.13249881267547609
epoch: 38 training_loss 0.13793188638985157 test_loss: 0.15347046852111818
epoch: 39 training_loss 0.14174199249595404 test_loss: 0.15507543087005615
epoch: 40 training_loss 0.13406022422015668 test_loss: 0.13684964179992676
epoch: 41 training_loss 0.1432733504474163 test_loss: 0.13024322986602782
epoch: 42 training_loss 0.13381903674453496 test_loss: 0.12503385543823242
epoch: 43 training_loss 0.13903056398034097 test_loss: 0.1405459761619568
epoch: 44 training_loss 0.141256380751729 test_loss: 0.13975262641906738
epoch: 45 training_loss 0.13970970146358014 test_loss: 0.12748018503189087
epoch: 46 training_loss 0.13277517389506102 test_loss: 0.1473066210746765
epoch: 47 training_loss 0.1346104445308447 test_loss: 0.16569851636886596
epoch: 48 training_loss 0.12915511295199394 test_loss: 0.135404634475708
epoch: 49 training_loss 0.13944698978215456 test_loss: 0.1413909077644348
epoch: 50 training_loss 0.1323355860821903 test_loss: 0.14232510328292847
epoch: 51 training_loss 0.14198484957218171 test_loss: 0.13607707023620605
epoch: 52 training_loss 0.1410406953841448 test_loss: 0.12570205926895142
epoch: 53 training_loss 0.14397741470485925 test_loss: 0.13126903772354126
epoch: 54 training_loss 0.1353771397471428 test_loss: 0.13472694158554077
epoch: 55 training_loss 0.13970842581242324 test_loss: 0.1386338949203491
epoch: 56 training_loss 0.13778194542974234 test_loss: 0.13776901960372925
epoch: 57 training_loss 0.13452273454517127 test_loss: 0.14002320766448975
epoch: 58 training_loss 0.1380333525687456 test_loss: 0.12465605735778809
epoch: 59 training_loss 0.13237914569675924 test_loss: 0.12890225648880005
epoch: 60 training_loss 0.12946148172020913 test_loss: 0.1366148591041565
epoch: 61 training_loss 0.14035484284162522 test_loss: 0.13942177295684816
epoch: 62 training_loss 0.12956411611288787 test_loss: 0.12413595914840699
epoch: 63 training_loss 0.13826418958604336 test_loss: 0.1426878571510315
epoch: 64 training_loss 0.1381338321790099 test_loss: 0.13807852268218995
epoch: 65 training_loss 0.1359379057213664 test_loss: 0.13621068000793457
epoch: 66 training_loss 0.1353155404701829 test_loss: 0.15106674432754516
epoch: 67 training_loss 0.13237798631191253 test_loss: 0.1413923382759094
epoch: 68 training_loss 0.12956226874142884 test_loss: 0.1310811758041382
epoch: 69 training_loss 0.13077412793412804 test_loss: 0.12897579669952391
epoch: 70 training_loss 0.13480889689177275 test_loss: 0.1315630316734314
epoch: 71 training_loss 0.1281518369540572 test_loss: 0.1288654088973999
epoch: 72 training_loss 0.12735461454838515 test_loss: 0.12891650199890137
epoch: 73 training_loss 0.12689452290534972 test_loss: 0.12139185667037963
epoch: 74 training_loss 0.13342647461220622 test_loss: 0.15261714458465575
epoch: 75 training_loss 0.12826362021267415 test_loss: 0.11875683069229126
epoch: 76 training_loss 0.13645181745290758 test_loss: 0.11845870018005371
epoch: 77 training_loss 0.13635651521384717 test_loss: 0.13739122152328492
epoch: 78 training_loss 0.13187853652983905 test_loss: 0.14326720237731932
epoch: 79 training_loss 0.1382168613001704 test_loss: 0.11446518898010254
epoch: 80 training_loss 0.12801806550472974 test_loss: 0.12512060403823852
epoch: 81 training_loss 0.1319429448619485 test_loss: 0.15322850942611693
epoch: 82 training_loss 0.13248091958463193 test_loss: 0.13286653757095337
epoch: 83 training_loss 0.13185199566185474 test_loss: 0.12820342779159546
epoch: 84 training_loss 0.1253969332575798 test_loss: 0.12464369535446167
epoch: 85 training_loss 0.14143563240766524 test_loss: 0.14457578659057618
epoch: 86 training_loss 0.13939013950526713 test_loss: 0.14801287651062012
epoch: 87 training_loss 0.13076499056071042 test_loss: 0.1294844627380371
epoch: 88 training_loss 0.13684620819985865 test_loss: 0.12779415845870973
epoch: 89 training_loss 0.1343611343577504 test_loss: 0.12105342149734497
epoch: 90 training_loss 0.13853264600038528 test_loss: 0.12364274263381958
epoch: 91 training_loss 0.13681584268808364 test_loss: 0.12532243728637696
epoch: 92 training_loss 0.1345783320814371 test_loss: 0.1303110122680664
epoch: 93 training_loss 0.12642383407801389 test_loss: 0.1091149091720581
epoch: 94 training_loss 0.12912065403535963 test_loss: 0.13914709091186522
epoch: 95 training_loss 0.13167848471552135 test_loss: 0.11407392024993897
epoch: 96 training_loss 0.12904252026230098 test_loss: 0.14254199266433715
epoch: 97 training_loss 0.137907397672534 test_loss: 0.13327637910842896
epoch: 98 training_loss 0.13171035051345825 test_loss: 0.11317402124404907
epoch: 99 training_loss 0.13569454979151488 test_loss: 0.12592333555221558
epoch: 100 training_loss 0.12687290001660587 test_loss: 0.13995375633239746
epoch: 101 training_loss 0.12256983228027821 test_loss: 0.14814149141311644
epoch: 102 training_loss 0.1333232880756259 test_loss: 0.13155478239059448
epoch: 103 training_loss 0.13748225051909685 test_loss: 0.13307279348373413
epoch: 104 training_loss 0.13272335812449454 test_loss: 0.1630374789237976
epoch: 105 training_loss 0.13147648382931948 test_loss: 0.11550660133361816
epoch: 106 training_loss 0.1306272103637457 test_loss: 0.12567209005355834
epoch: 107 training_loss 0.14478541508316994 test_loss: 0.1326040267944336
epoch: 108 training_loss 0.1273181877657771 test_loss: 0.1358763337135315
epoch: 109 training_loss 0.13453929513692856 test_loss: 0.14732950925827026
epoch: 110 training_loss 0.14315634164959193 test_loss: 0.12481814622879028
epoch: 111 training_loss 0.13566162230446935 test_loss: 0.14036306142807006
epoch: 112 training_loss 0.13176301054656506 test_loss: 0.12429263591766357
epoch: 113 training_loss 0.1300450887158513 test_loss: 0.13518868684768676
epoch: 114 training_loss 0.1347166596725583 test_loss: 0.1378929615020752
epoch: 115 training_loss 0.13100622788071634 test_loss: 0.1357478380203247
epoch: 116 training_loss 0.12575703483074904 test_loss: 0.13410589694976807
epoch: 117 training_loss 0.12376333091408015 test_loss: 0.12780232429504396
epoch: 118 training_loss 0.13697666808962822 test_loss: 0.1411870837211609
epoch: 119 training_loss 0.1356234696879983 test_loss: 0.1400246262550354
epoch: 120 training_loss 0.13756804227828978 test_loss: 0.13344138860702515
epoch: 121 training_loss 0.13182048670947552 test_loss: 0.13707277774810792
epoch: 122 training_loss 0.1304777901247144 test_loss: 0.13943095207214357
epoch: 123 training_loss 0.13520250093191863 test_loss: 0.12581931352615355
epoch: 124 training_loss 0.1312373209372163 test_loss: 0.11912009716033936
epoch: 125 training_loss 0.13094213638454677 test_loss: 0.12388712167739868
epoch: 126 training_loss 0.12888198032975196 test_loss: 0.1483636498451233
epoch: 127 training_loss 0.133592755459249 test_loss: 0.13564208745956421
epoch: 128 training_loss 0.14108371041715145 test_loss: 0.12314245700836182
epoch: 129 training_loss 0.12276395682245493 test_loss: 0.1305790662765503
epoch: 130 training_loss 0.13670375809073448 test_loss: 0.14939850568771362
epoch: 131 training_loss 0.1365671144798398 test_loss: 0.12203705310821533
epoch: 132 training_loss 0.13742601539939642 test_loss: 0.1288226842880249
epoch: 133 training_loss 0.1251155222579837 test_loss: 0.13280463218688965
epoch: 134 training_loss 0.13529664520174264 test_loss: 0.1317162871360779
epoch: 135 training_loss 0.13464090999215841 test_loss: 0.12961363792419434
epoch: 136 training_loss 0.13480384835042059 test_loss: 0.13711382150650026
epoch: 137 training_loss 0.13629467085003852 test_loss: 0.13262443542480468
epoch: 138 training_loss 0.13881743434816599 test_loss: 0.13923423290252684
epoch: 139 training_loss 0.12774623347446323 test_loss: 0.12580740451812744
epoch: 140 training_loss 0.14540312878787517 test_loss: 0.13177275657653809
epoch: 141 training_loss 0.12360588677227496 test_loss: 0.12384015321731567
epoch: 142 training_loss 0.13236936140805483 test_loss: 0.13534419536590575
epoch: 143 training_loss 0.13045949190855027 test_loss: 0.1347014784812927
epoch: 144 training_loss 0.1269460241869092 test_loss: 0.13134021759033204
epoch: 145 training_loss 0.13172885993495584 test_loss: 0.1370617151260376
epoch: 146 training_loss 0.13505472470074892 test_loss: 0.12200008630752564
epoch: 147 training_loss 0.13516816310584545 test_loss: 0.14508063793182374
epoch: 148 training_loss 0.12936103915795683 test_loss: 0.1385549783706665
epoch: 149 training_loss 0.13305308017879725 test_loss: 0.13284990787506104
epoch: 0 training_loss 0.29117510326206686 test_loss: 0.1914377450942993
epoch: 1 training_loss 0.19226816065609456 test_loss: 0.17340482473373414
epoch: 2 training_loss 0.1666617066413164 test_loss: 0.18307700157165527
epoch: 3 training_loss 0.1594323243945837 test_loss: 0.14519937038421632
epoch: 4 training_loss 0.15165247973054646 test_loss: 0.162575626373291
epoch: 5 training_loss 0.15796866569668055 test_loss: 0.15572686195373536
epoch: 6 training_loss 0.15302691288292408 test_loss: 0.1458429217338562
epoch: 7 training_loss 0.14126683153212072 test_loss: 0.15117344856262208
epoch: 8 training_loss 0.15002014886587858 test_loss: 0.16350408792495727
epoch: 9 training_loss 0.14572248198091983 test_loss: 0.15491001605987548
epoch: 10 training_loss 0.1517045897245407 test_loss: 0.16962889432907105
epoch: 11 training_loss 0.14127155996859073 test_loss: 0.14544260501861572
epoch: 12 training_loss 0.14484454955905676 test_loss: 0.1491212487220764
epoch: 13 training_loss 0.1460816468298435 test_loss: 0.14941619634628295
epoch: 14 training_loss 0.128340892419219 test_loss: 0.1453757643699646
epoch: 15 training_loss 0.1476840664073825 test_loss: 0.14672132730484008
epoch: 16 training_loss 0.13715331129729746 test_loss: 0.15837756395339966
epoch: 17 training_loss 0.13812358405441047 test_loss: 0.14344005584716796
epoch: 18 training_loss 0.14541601486504077 test_loss: 0.15679134130477906
epoch: 19 training_loss 0.1407339283823967 test_loss: 0.1365698456764221
epoch: 20 training_loss 0.13580938037484885 test_loss: 0.16739275455474853
epoch: 21 training_loss 0.13772628225386144 test_loss: 0.13318815231323242
epoch: 22 training_loss 0.13955143004655837 test_loss: 0.15469439029693605
epoch: 23 training_loss 0.14463498085737228 test_loss: 0.16854865550994874
epoch: 24 training_loss 0.13806675381958486 test_loss: 0.1738258719444275
epoch: 25 training_loss 0.143215343169868 test_loss: 0.14134702682495118
epoch: 26 training_loss 0.14819772258400918 test_loss: 0.13534140586853027
epoch: 27 training_loss 0.1343203555420041 test_loss: 0.16859382390975952
epoch: 28 training_loss 0.13530961699783803 test_loss: 0.15835955142974853
epoch: 29 training_loss 0.14219606518745423 test_loss: 0.13158578872680665
epoch: 30 training_loss 0.14209618248045444 test_loss: 0.13861294984817504
epoch: 31 training_loss 0.1332338422909379 test_loss: 0.159615159034729
epoch: 32 training_loss 0.14145732410252093 test_loss: 0.15541377067565917
epoch: 33 training_loss 0.1402513872459531 test_loss: 0.14569236040115358
epoch: 34 training_loss 0.1352115597575903 test_loss: 0.14617177248001098
epoch: 35 training_loss 0.13737368989735843 test_loss: 0.1703154444694519
epoch: 36 training_loss 0.13506720151752233 test_loss: 0.12479798793792725
epoch: 37 training_loss 0.13369343418627977 test_loss: 0.13377312421798707
epoch: 38 training_loss 0.14391185991466046 test_loss: 0.1310306668281555
epoch: 39 training_loss 0.13213338069617747 test_loss: 0.11946431398391724
epoch: 40 training_loss 0.1371519461646676 test_loss: 0.15387113094329835
epoch: 41 training_loss 0.13713308531790971 test_loss: 0.17236592769622802
epoch: 42 training_loss 0.138530038818717 test_loss: 0.13981356620788574
epoch: 43 training_loss 0.1357188643887639 test_loss: 0.17006375789642333
epoch: 44 training_loss 0.13499314472079277 test_loss: 0.14458491802215576
epoch: 45 training_loss 0.13689997870475054 test_loss: 0.14063488245010375
epoch: 46 training_loss 0.13253996271640062 test_loss: 0.13950482606887818
epoch: 47 training_loss 0.13465873327106237 test_loss: 0.14862849712371826
epoch: 48 training_loss 0.1377638405561447 test_loss: 0.1732654094696045
epoch: 49 training_loss 0.12771269597113133 test_loss: 0.1480312466621399
epoch: 50 training_loss 0.132950666770339 test_loss: 0.1409069776535034
epoch: 51 training_loss 0.13021374970674515 test_loss: 0.1360745072364807
epoch: 52 training_loss 0.13239107564091682 test_loss: 0.1328318953514099
epoch: 53 training_loss 0.1406023046374321 test_loss: 0.14194144010543824
epoch: 54 training_loss 0.13991195295006037 test_loss: 0.13797188997268678
epoch: 55 training_loss 0.13242337565869092 test_loss: 0.12458925247192383
epoch: 56 training_loss 0.12412165440618991 test_loss: 0.13490110635757446
epoch: 57 training_loss 0.13710665449500084 test_loss: 0.14316608905792236
epoch: 58 training_loss 0.13020407382398844 test_loss: 0.14481563568115235
epoch: 59 training_loss 0.12865047506988048 test_loss: 0.17334920167922974
epoch: 60 training_loss 0.13467994421720506 test_loss: 0.16664777994155883
epoch: 61 training_loss 0.13835800088942052 test_loss: 0.1464380741119385
epoch: 62 training_loss 0.12318571537733078 test_loss: 0.14651743173599244
epoch: 63 training_loss 0.1324677625671029 test_loss: 0.13191667795181275
epoch: 64 training_loss 0.13311153154820204 test_loss: 0.126614511013031
epoch: 65 training_loss 0.13769268818199634 test_loss: 0.13334197998046876
epoch: 66 training_loss 0.12683100793510677 test_loss: 0.13514713048934937
epoch: 67 training_loss 0.127916927523911 test_loss: 0.12489895820617676
epoch: 68 training_loss 0.13357870165258645 test_loss: 0.15418490171432495
epoch: 69 training_loss 0.1279754539206624 test_loss: 0.14025652408599854
epoch: 70 training_loss 0.1288153950870037 test_loss: 0.1316160202026367
epoch: 71 training_loss 0.14024086661636828 test_loss: 0.1405097484588623
epoch: 72 training_loss 0.12385671868920327 test_loss: 0.16052578687667846
epoch: 73 training_loss 0.13440584551542997 test_loss: 0.14952490329742432
epoch: 74 training_loss 0.12589916691184044 test_loss: 0.1386565923690796
epoch: 75 training_loss 0.12448164235800505 test_loss: 0.13167322874069215
epoch: 76 training_loss 0.12873089782893657 test_loss: 0.13818824291229248
epoch: 77 training_loss 0.13069994926452635 test_loss: 0.14944982528686523
epoch: 78 training_loss 0.124688954167068 test_loss: 0.13847782611846923
epoch: 79 training_loss 0.13293255042284727 test_loss: 0.14728293418884278
epoch: 80 training_loss 0.13648530844599008 test_loss: 0.1572675347328186
epoch: 81 training_loss 0.12749236999079586 test_loss: 0.14006935358047484
epoch: 82 training_loss 0.12678032118827104 test_loss: 0.12806133031845093
epoch: 83 training_loss 0.12877472691237926 test_loss: 0.13622339963912963
epoch: 84 training_loss 0.13642445586621763 test_loss: 0.14108256101608277
epoch: 85 training_loss 0.13148885685950518 test_loss: 0.14717111587524415
epoch: 86 training_loss 0.13137294713407754 test_loss: 0.14693259000778197
epoch: 87 training_loss 0.1309769244492054 test_loss: 0.13234107494354247
epoch: 88 training_loss 0.136950144469738 test_loss: 0.1467486262321472
epoch: 89 training_loss 0.12767399221658707 test_loss: 0.12813156843185425
epoch: 90 training_loss 0.12574112705886364 test_loss: 0.12879135608673095
epoch: 91 training_loss 0.1323682862520218 test_loss: 0.15613822937011718
epoch: 92 training_loss 0.12705643232911826 test_loss: 0.1438878893852234
epoch: 93 training_loss 0.1287871526926756 test_loss: 0.1441630482673645
epoch: 94 training_loss 0.1269131857343018 test_loss: 0.1378856658935547
epoch: 95 training_loss 0.1352055148780346 test_loss: 0.12334833145141602
epoch: 96 training_loss 0.13273424569517375 test_loss: 0.13997570276260377
epoch: 97 training_loss 0.1264681166037917 test_loss: 0.1519227147102356
epoch: 98 training_loss 0.12833064284175635 test_loss: 0.13708584308624266
epoch: 99 training_loss 0.1311465186998248 test_loss: 0.1550590991973877
epoch: 100 training_loss 0.1314037460461259 test_loss: 0.14575542211532594
epoch: 101 training_loss 0.12833642970770598 test_loss: 0.1436035752296448
epoch: 102 training_loss 0.1310574809089303 test_loss: 0.14702290296554565
epoch: 103 training_loss 0.136731900498271 test_loss: 0.12865492105484008
epoch: 104 training_loss 0.12583734054118395 test_loss: 0.13576257228851318
epoch: 105 training_loss 0.12268011868000031 test_loss: 0.13997089862823486
epoch: 106 training_loss 0.13597914937883615 test_loss: 0.12869163751602172
epoch: 107 training_loss 0.1278788384795189 test_loss: 0.1353098750114441
epoch: 108 training_loss 0.12811395492404698 test_loss: 0.14229780435562134
epoch: 109 training_loss 0.12897275816649198 test_loss: 0.14062217473983765
epoch: 110 training_loss 0.12332946358248592 test_loss: 0.14609540700912477
epoch: 111 training_loss 0.13109445527195931 test_loss: 0.15687509775161743
epoch: 112 training_loss 0.13651082251220942 test_loss: 0.13623371124267578
epoch: 113 training_loss 0.12715406710281968 test_loss: 0.13354642391204835
epoch: 114 training_loss 0.12708012435585261 test_loss: 0.15676329135894776
epoch: 115 training_loss 0.12961862429976465 test_loss: 0.1384446144104004
epoch: 116 training_loss 0.12863761339336632 test_loss: 0.1421162009239197
epoch: 117 training_loss 0.1197680420614779 test_loss: 0.14017783403396605
epoch: 118 training_loss 0.12255069022998213 test_loss: 0.14518100023269653
epoch: 119 training_loss 0.12752411149442197 test_loss: 0.154075288772583
epoch: 120 training_loss 0.12114907272160053 test_loss: 0.12556979656219483
epoch: 121 training_loss 0.12109677467495203 test_loss: 0.16364387273788453
epoch: 122 training_loss 0.12385574795305729 test_loss: 0.14423930644989014
epoch: 123 training_loss 0.12434937227517366 test_loss: 0.1432383418083191
epoch: 124 training_loss 0.13617980480194092 test_loss: 0.12986966371536254
epoch: 125 training_loss 0.12491268951445818 test_loss: 0.147965407371521
epoch: 126 training_loss 0.13233684498816728 test_loss: 0.12569128274917601
epoch: 127 training_loss 0.12868543138727545 test_loss: 0.15034875869750977
epoch: 128 training_loss 0.12911046346649527 test_loss: 0.1531454563140869
epoch: 129 training_loss 0.12571319863200187 test_loss: 0.14250649213790895
epoch: 130 training_loss 0.13508767357096077 test_loss: 0.1611134886741638
epoch: 131 training_loss 0.13314289297908544 test_loss: 0.14831130504608153
epoch: 132 training_loss 0.12538696341216565 test_loss: 0.13921905755996705
epoch: 133 training_loss 0.13219182141125202 test_loss: 0.1343984842300415
epoch: 134 training_loss 0.12231715954840183 test_loss: 0.138822340965271
epoch: 135 training_loss 0.12119543608278036 test_loss: 0.13468420505523682
epoch: 136 training_loss 0.12595394022762776 test_loss: 0.13916513919830323
epoch: 137 training_loss 0.1185536577180028 test_loss: 0.14009629487991332
epoch: 138 training_loss 0.11917736064642667 test_loss: 0.1466349959373474
epoch: 139 training_loss 0.1188667196035385 test_loss: 0.1380390405654907
epoch: 140 training_loss 0.12191697377711534 test_loss: 0.12799408435821533
epoch: 141 training_loss 0.1227996401116252 test_loss: 0.14041488170623778
epoch: 142 training_loss 0.12807138793170453 test_loss: 0.15155603885650634
epoch: 143 training_loss 0.12907317586243153 test_loss: 0.13180209398269654
epoch: 144 training_loss 0.12984826404601335 test_loss: 0.14122915267944336
epoch: 145 training_loss 0.13239823296666145 test_loss: 0.15864485502243042
epoch: 146 training_loss 0.12423976777121425 test_loss: 0.13927632570266724
epoch: 147 training_loss 0.12678828526288272 test_loss: 0.13961966037750245
epoch: 148 training_loss 0.1261914712190628 test_loss: 0.13584623336791993
epoch: 149 training_loss 0.13091374572366477 test_loss: 0.13109030723571777
epoch: 0 training_loss 0.315658845603466 test_loss: 0.19330306053161622
epoch: 1 training_loss 0.19076117284595967 test_loss: 0.1773645281791687
epoch: 2 training_loss 0.18125147081911563 test_loss: 0.17369598150253296
epoch: 3 training_loss 0.1809402033686638 test_loss: 0.15563687086105346
epoch: 4 training_loss 0.16481366753578186 test_loss: 0.1390844464302063
epoch: 5 training_loss 0.1597448617592454 test_loss: 0.15474300384521483
epoch: 6 training_loss 0.15456950765103103 test_loss: 0.1462884545326233
epoch: 7 training_loss 0.15645403783768416 test_loss: 0.15911340713500977
epoch: 8 training_loss 0.15496298495680094 test_loss: 0.14621635675430297
epoch: 9 training_loss 0.15177504248917104 test_loss: 0.15557515621185303
epoch: 10 training_loss 0.14584249328821897 test_loss: 0.15064815282821656
epoch: 11 training_loss 0.151437354311347 test_loss: 0.15448497533798217
epoch: 12 training_loss 0.150172573402524 test_loss: 0.13473092317581176
epoch: 13 training_loss 0.15130122877657415 test_loss: 0.14328862428665162
epoch: 14 training_loss 0.14344646863639354 test_loss: 0.14007205963134767
epoch: 15 training_loss 0.1529952970892191 test_loss: 0.15055376291275024
epoch: 16 training_loss 0.14686906047165393 test_loss: 0.14073275327682494
epoch: 17 training_loss 0.15907668814063072 test_loss: 0.14352726936340332
epoch: 18 training_loss 0.13162496782839297 test_loss: 0.15443058013916017
epoch: 19 training_loss 0.15097491454333067 test_loss: 0.1319652795791626
epoch: 20 training_loss 0.152383284047246 test_loss: 0.12851935625076294
epoch: 21 training_loss 0.13440716195851565 test_loss: 0.15619590282440185
epoch: 22 training_loss 0.13541318705305458 test_loss: 0.15947434902191163
epoch: 23 training_loss 0.1429988132044673 test_loss: 0.14306589365005493
epoch: 24 training_loss 0.13448332972824573 test_loss: 0.14860867261886596
epoch: 25 training_loss 0.1327630078047514 test_loss: 0.1590803623199463
epoch: 26 training_loss 0.14422528985887767 test_loss: 0.1398797631263733
epoch: 27 training_loss 0.13294571762904525 test_loss: 0.15671488046646118
epoch: 28 training_loss 0.14272308818995952 test_loss: 0.14002519845962524
epoch: 29 training_loss 0.13430831030011178 test_loss: 0.13198078870773317
epoch: 30 training_loss 0.14784764803946018 test_loss: 0.1329595685005188
epoch: 31 training_loss 0.13854070875793695 test_loss: 0.15726104974746705
epoch: 32 training_loss 0.1447592993453145 test_loss: 0.14101566076278688
epoch: 33 training_loss 0.1385122850164771 test_loss: 0.1470853090286255
epoch: 34 training_loss 0.14276289638131856 test_loss: 0.17013111114501953
epoch: 35 training_loss 0.1348801479116082 test_loss: 0.13950650691986083
epoch: 36 training_loss 0.13818563532084227 test_loss: 0.13458718061447145
epoch: 37 training_loss 0.148046096265316 test_loss: 0.14379594326019288
epoch: 38 training_loss 0.13989959647879005 test_loss: 0.1406851053237915
epoch: 39 training_loss 0.1345513328164816 test_loss: 0.1432479977607727
epoch: 40 training_loss 0.14333287604153155 test_loss: 0.13822067975997926
epoch: 41 training_loss 0.13416429203003644 test_loss: 0.16756426095962523
epoch: 42 training_loss 0.13653135657310486 test_loss: 0.13754878044128419
epoch: 43 training_loss 0.13644420295953752 test_loss: 0.1382738709449768
epoch: 44 training_loss 0.1417342200130224 test_loss: 0.14614793062210082
epoch: 45 training_loss 0.14645841423422098 test_loss: 0.14574319124221802
epoch: 46 training_loss 0.13670301798731088 test_loss: 0.14750595092773439
epoch: 47 training_loss 0.13338397845625877 test_loss: 0.134490442276001
epoch: 48 training_loss 0.13692948376759886 test_loss: 0.14545466899871826
epoch: 49 training_loss 0.13489397622644902 test_loss: 0.13864512443542482
epoch: 50 training_loss 0.1400883474573493 test_loss: 0.13535399436950685
epoch: 51 training_loss 0.13931077051907778 test_loss: 0.11947543621063232
epoch: 52 training_loss 0.13413637874647974 test_loss: 0.12343637943267823
epoch: 53 training_loss 0.140343631580472 test_loss: 0.1430964231491089
epoch: 54 training_loss 0.13625400636345147 test_loss: 0.1270545244216919
epoch: 55 training_loss 0.13274821892380714 test_loss: 0.13854290246963502
epoch: 56 training_loss 0.1336194685846567 test_loss: 0.13882501125335694
epoch: 57 training_loss 0.1448433194309473 test_loss: 0.13103336095809937
epoch: 58 training_loss 0.13793038837611676 test_loss: 0.1421345591545105
epoch: 59 training_loss 0.13380526062101125 test_loss: 0.13135501146316528
epoch: 60 training_loss 0.1478405235335231 test_loss: 0.12525508403778077
epoch: 61 training_loss 0.14524420402944088 test_loss: 0.15285630226135255
epoch: 62 training_loss 0.14213596530258654 test_loss: 0.1589670181274414
epoch: 63 training_loss 0.14053311828523873 test_loss: 0.14967492818832398
epoch: 64 training_loss 0.1380326481163502 test_loss: 0.12258739471435547
epoch: 65 training_loss 0.14491959318518638 test_loss: 0.12748985290527343
epoch: 66 training_loss 0.1308254612609744 test_loss: 0.13108261823654174
epoch: 67 training_loss 0.14653018593788147 test_loss: 0.13040223121643066
epoch: 68 training_loss 0.13117731481790543 test_loss: 0.13501360416412353
epoch: 69 training_loss 0.1330823465064168 test_loss: 0.14092400074005126
epoch: 70 training_loss 0.13938610222190617 test_loss: 0.12819108963012696
epoch: 71 training_loss 0.13525803864002228 test_loss: 0.13971740007400513
epoch: 72 training_loss 0.13944622319191693 test_loss: 0.1413611054420471
epoch: 73 training_loss 0.13547624655067922 test_loss: 0.14758388996124266
epoch: 74 training_loss 0.13737462602555753 test_loss: 0.14651141166687012
epoch: 75 training_loss 0.13235240638256074 test_loss: 0.1408477544784546
epoch: 76 training_loss 0.14007593207061292 test_loss: 0.1295798420906067
epoch: 77 training_loss 0.12646301094442605 test_loss: 0.1348292350769043
epoch: 78 training_loss 0.13366864573210477 test_loss: 0.1399932622909546
epoch: 79 training_loss 0.13232840549200772 test_loss: 0.1267598032951355
epoch: 80 training_loss 0.12902183331549166 test_loss: 0.13164814710617065
epoch: 81 training_loss 0.1381095840781927 test_loss: 0.13993587493896484
epoch: 82 training_loss 0.13629497058689594 test_loss: 0.14033827781677247
epoch: 83 training_loss 0.14033346518874168 test_loss: 0.1267081618309021
epoch: 84 training_loss 0.14337321143597365 test_loss: 0.14396852254867554
epoch: 85 training_loss 0.12755899315699934 test_loss: 0.1433561086654663
epoch: 86 training_loss 0.12549989027902483 test_loss: 0.13924999237060548
epoch: 87 training_loss 0.13962706439197065 test_loss: 0.13768864870071412
epoch: 88 training_loss 0.12763701107352973 test_loss: 0.1384680151939392
epoch: 89 training_loss 0.13315674167126418 test_loss: 0.11744612455368042
epoch: 90 training_loss 0.13384429253637792 test_loss: 0.13357851505279542
epoch: 91 training_loss 0.1319898283854127 test_loss: 0.13835467100143434
epoch: 92 training_loss 0.12975916028022766 test_loss: 0.13800599575042724
epoch: 93 training_loss 0.13546083705499767 test_loss: 0.14275732040405273
epoch: 94 training_loss 0.13096632588654755 test_loss: 0.13182811737060546
epoch: 95 training_loss 0.1364546425268054 test_loss: 0.15412338972091674
epoch: 96 training_loss 0.13356085427105427 test_loss: 0.1308308482170105
epoch: 97 training_loss 0.1324117571860552 test_loss: 0.13198659420013428
epoch: 98 training_loss 0.13164896100759507 test_loss: 0.13531774282455444
epoch: 99 training_loss 0.13450202245265244 test_loss: 0.14033645391464233
epoch: 100 training_loss 0.131698744520545 test_loss: 0.1479532837867737
epoch: 101 training_loss 0.1353519258275628 test_loss: 0.1412922263145447
epoch: 102 training_loss 0.13381488468497993 test_loss: 0.1517447829246521
epoch: 103 training_loss 0.13774409130215645 test_loss: 0.14619479179382325
epoch: 104 training_loss 0.12733001563698054 test_loss: 0.13716230392456055
epoch: 105 training_loss 0.13406353037804364 test_loss: 0.14203548431396484
epoch: 106 training_loss 0.13695725832134487 test_loss: 0.14408243894577027
epoch: 107 training_loss 0.1405979525670409 test_loss: 0.13432879447937013
epoch: 108 training_loss 0.12962804835289718 test_loss: 0.1343280553817749
epoch: 109 training_loss 0.13790381882339717 test_loss: 0.13946527242660522
epoch: 110 training_loss 0.13177349403500557 test_loss: 0.15148043632507324
epoch: 111 training_loss 0.13631589852273465 test_loss: 0.12684144973754882
epoch: 112 training_loss 0.13996302127838134 test_loss: 0.1283828854560852
epoch: 113 training_loss 0.12764462869614362 test_loss: 0.12753751277923583
epoch: 114 training_loss 0.1284306001663208 test_loss: 0.13508933782577515
epoch: 115 training_loss 0.13479874845594167 test_loss: 0.12561026811599732
epoch: 116 training_loss 0.12932957913726567 test_loss: 0.1377963662147522
epoch: 117 training_loss 0.1370782557502389 test_loss: 0.13131357431411744
epoch: 118 training_loss 0.13357205834239722 test_loss: 0.15287766456604004
epoch: 119 training_loss 0.141890871450305 test_loss: 0.14038490056991576
epoch: 120 training_loss 0.12961184356361627 test_loss: 0.11228985786437988
epoch: 121 training_loss 0.13436875350773334 test_loss: 0.13196767568588258
epoch: 122 training_loss 0.12427238695323467 test_loss: 0.14474451541900635
epoch: 123 training_loss 0.1336151248589158 test_loss: 0.15212997198104858
epoch: 124 training_loss 0.13416552964597941 test_loss: 0.14119772911071776
epoch: 125 training_loss 0.12953814379870893 test_loss: 0.12317566871643067
epoch: 126 training_loss 0.13939824026077985 test_loss: 0.13901396989822387
epoch: 127 training_loss 0.13598295986652376 test_loss: 0.1504077434539795
epoch: 128 training_loss 0.13376985071226954 test_loss: 0.1300053834915161
epoch: 129 training_loss 0.1367408339679241 test_loss: 0.13138726949691773
epoch: 130 training_loss 0.12951775263994933 test_loss: 0.1455216884613037
epoch: 131 training_loss 0.12794317673891784 test_loss: 0.1408333659172058
epoch: 132 training_loss 0.1298721681907773 test_loss: 0.13985580205917358
epoch: 133 training_loss 0.1371978698670864 test_loss: 0.1530148983001709
epoch: 134 training_loss 0.13224725849926472 test_loss: 0.15449520349502563
epoch: 135 training_loss 0.14151361778378488 test_loss: 0.14487957954406738
epoch: 136 training_loss 0.1318994275853038 test_loss: 0.12829772233963013
epoch: 137 training_loss 0.14055563613772393 test_loss: 0.13600839376449586
epoch: 138 training_loss 0.12744058113545179 test_loss: 0.1414355754852295
epoch: 139 training_loss 0.13349142536520958 test_loss: 0.11777758598327637
epoch: 140 training_loss 0.1282318554073572 test_loss: 0.12565983533859254
epoch: 141 training_loss 0.13436353705823423 test_loss: 0.15566362142562867
epoch: 142 training_loss 0.13156390450894834 test_loss: 0.1327838659286499
epoch: 143 training_loss 0.13193002296611667 test_loss: 0.13021557331085204
epoch: 144 training_loss 0.1388452684506774 test_loss: 0.13154659271240235
epoch: 145 training_loss 0.13871887512505054 test_loss: 0.13803015947341918
epoch: 146 training_loss 0.1259159592166543 test_loss: 0.1374588966369629
epoch: 147 training_loss 0.1260895638912916 test_loss: 0.1255443811416626
epoch: 148 training_loss 0.13480328898876906 test_loss: 0.13300786018371583
epoch: 149 training_loss 0.13556561538949607 test_loss: 0.12353314161300659
episode: 0 training return: -1040.3642554593457
episode: 1 training return: -999.2619286864111
episode: 2 training return: -999.6113533928158
episode: 3 training return: -999.0916480366475
epoch: 1 test_true_pfm: -86.62635541624121 sim_pfm: -974.8434621513508
episode: 4 training return: -1001.4792019500508
episode: 5 training return: -998.676593455038
episode: 6 training return: -1004.6031579546958
episode: 7 training return: -1006.3486778600864
epoch: 2 test_true_pfm: -99.92010085211096 sim_pfm: -975.4260911901241
episode: 8 training return: -1092.39193038244
episode: 9 training return: -1084.544100141524
episode: 10 training return: -995.9044534797457
episode: 11 training return: -979.8091179523115
epoch: 3 test_true_pfm: -4.01458939918118 sim_pfm: -999.1579511039945
episode: 12 training return: -995.5547950351258
episode: 13 training return: -993.3946199005271
episode: 14 training return: -994.802458545399
episode: 15 training return: -994.5895380359864
epoch: 4 test_true_pfm: 44.193437167826374 sim_pfm: -998.2639961086821
episode: 16 training return: -979.5968007056849
episode: 17 training return: -992.635178746776
episode: 18 training return: -997.7682438089561
episode: 19 training return: -997.533907976981
epoch: 5 test_true_pfm: 49.42195181838937 sim_pfm: -995.9401485841735
episode: 20 training return: -982.6720720428178
episode: 21 training return: -978.0295747894546
episode: 22 training return: -999.0751800804236
episode: 23 training return: -977.556227699377
epoch: 6 test_true_pfm: 42.14982119369958 sim_pfm: -996.6509557763669
episode: 24 training return: -977.7770459915237
episode: 25 training return: -993.1296782657026
episode: 26 training return: -978.2013295033754
episode: 27 training return: -994.6085377003819
epoch: 7 test_true_pfm: 114.4610766550051 sim_pfm: -995.6244639879836
episode: 28 training return: -978.8957259787755
episode: 29 training return: -986.0098637876505
episode: 30 training return: -987.5765783044601
episode: 31 training return: -984.3485918288745
epoch: 8 test_true_pfm: 80.4756038992188 sim_pfm: -993.4307678697454
episode: 32 training return: -983.3209439686253
episode: 33 training return: -997.3735226357078
episode: 34 training return: -996.0823167735464
episode: 35 training return: -993.6569646423981
epoch: 9 test_true_pfm: 113.8793818092554 sim_pfm: -992.3831404743017
episode: 36 training return: -997.5472679375969
episode: 37 training return: -994.7764156304335
episode: 38 training return: -996.2063082329361
episode: 39 training return: -976.3866978482845
epoch: 10 test_true_pfm: 300.52273689319776 sim_pfm: -989.379972385275
episode: 40 training return: -995.2944754108893
episode: 41 training return: -997.2132432119624
episode: 42 training return: -993.6310559422766
episode: 43 training return: -974.1605821715254
epoch: 11 test_true_pfm: 288.24039621630317 sim_pfm: -988.8009378057163
episode: 44 training return: -989.464761490653
episode: 45 training return: -975.5606171321472
episode: 46 training return: -987.5521045531124
episode: 47 training return: -988.9357618882091
epoch: 12 test_true_pfm: 182.55335314813124 sim_pfm: -964.7299499928645
episode: 48 training return: -978.2248537243995
episode: 49 training return: -986.2643571535472
episode: 50 training return: -968.3716618271466
episode: 51 training return: -978.4273552253384
epoch: 13 test_true_pfm: 158.62300543215198 sim_pfm: -945.9733699107222
episode: 52 training return: -951.7959733188022
episode: 53 training return: -993.0124556307713
episode: 54 training return: -934.522210785326
episode: 55 training return: -941.1165282902864
epoch: 14 test_true_pfm: 167.8584119519985 sim_pfm: -953.4009841817923
episode: 56 training return: -923.5845504117866
episode: 57 training return: -947.3715590660314
episode: 58 training return: -942.3222076832474
episode: 59 training return: -943.7299456441855
epoch: 15 test_true_pfm: 210.01081246405613 sim_pfm: -891.8214305697411
episode: 60 training return: -936.575372615255
episode: 61 training return: -898.5519400429387
episode: 62 training return: -915.2521571440774
episode: 63 training return: -937.0980721149773
epoch: 16 test_true_pfm: 161.2775524884299 sim_pfm: -907.7600244349543
episode: 64 training return: -920.287764830684
episode: 65 training return: -918.454481697317
episode: 66 training return: -907.4970132764304
episode: 67 training return: -916.9617900673335
epoch: 17 test_true_pfm: 155.0650207933733 sim_pfm: -891.2138988217245
episode: 68 training return: -967.1495181975398
episode: 69 training return: -900.588019795107
episode: 70 training return: -922.0000217453063
episode: 71 training return: -809.0302866907645
epoch: 18 test_true_pfm: 180.15525282746796 sim_pfm: -905.7506553403769
episode: 72 training return: -904.0434139270983
episode: 73 training return: -917.7525158548505
episode: 74 training return: -907.4848308793697
episode: 75 training return: -880.0759882194052
epoch: 19 test_true_pfm: 105.16962572464149 sim_pfm: -904.8443583911079
episode: 76 training return: -850.7558468743597
episode: 77 training return: -907.792997764852
episode: 78 training return: -823.5045583040419
episode: 79 training return: -837.7328661193957
epoch: 20 test_true_pfm: 102.32131826814843 sim_pfm: -903.2242074399416
episode: 80 training return: -902.8733952200415
episode: 81 training return: -901.2382001732116
episode: 82 training return: -903.0380943692829
episode: 83 training return: -890.088276171421
epoch: 21 test_true_pfm: 221.9249799562599 sim_pfm: -880.071286133797
episode: 84 training return: -871.8206475328631
episode: 85 training return: -894.3213083830875
episode: 86 training return: -841.8530044066533
episode: 87 training return: -715.2732033843201
epoch: 22 test_true_pfm: 168.08504639233834 sim_pfm: -888.4615321664179
episode: 88 training return: -744.2811115399743
episode: 89 training return: -721.5165016025492
episode: 90 training return: -715.4710779705317
episode: 91 training return: -696.2019324380025
epoch: 23 test_true_pfm: 357.8754227897887 sim_pfm: -583.7062278499312
episode: 92 training return: -740.8548776871991
episode: 93 training return: -626.3163581793367
episode: 94 training return: -663.0302186892943
episode: 95 training return: -651.3681826676404
epoch: 24 test_true_pfm: 413.42426336639363 sim_pfm: -539.0186104020733
episode: 96 training return: -620.2421855482291
episode: 97 training return: -591.9950070826028
episode: 98 training return: -602.3870478739982
episode: 99 training return: -773.2860348369113
epoch: 25 test_true_pfm: 366.08118799326434 sim_pfm: -680.8989421318602
episode: 100 training return: -693.189055641085
episode: 101 training return: -598.2417690957003
episode: 102 training return: -682.9597990674287
episode: 103 training return: -603.3663491005301
epoch: 26 test_true_pfm: 331.042410067921 sim_pfm: -486.4753844785203
episode: 104 training return: -581.8066304904784
episode: 105 training return: -541.5099133234892
episode: 106 training return: -558.3604055133856
episode: 107 training return: -555.2287120157563
epoch: 27 test_true_pfm: 383.7931518440878 sim_pfm: -504.8803158927777
episode: 108 training return: -561.0889949946018
episode: 109 training return: -522.2857553505108
episode: 110 training return: -562.7994093046735
episode: 111 training return: -572.6545564029258
epoch: 28 test_true_pfm: 407.9011522739411 sim_pfm: -520.4280751611108
episode: 112 training return: -573.6240742310919
episode: 113 training return: -541.3401884084997
episode: 114 training return: -516.2899905009775
episode: 115 training return: -504.96829256214426
epoch: 29 test_true_pfm: 386.41031458326916 sim_pfm: -505.49964106609144
episode: 116 training return: -573.4513207822098
episode: 117 training return: -479.8945562232508
episode: 118 training return: -630.3951765740046
episode: 119 training return: -539.3053443488157
epoch: 30 test_true_pfm: 463.842781591296 sim_pfm: -459.0287172917262
episode: 120 training return: -534.8419520979935
episode: 121 training return: -576.9645193942484
episode: 122 training return: -517.7484777347452
episode: 123 training return: -537.4113954343075
epoch: 31 test_true_pfm: 405.3214700348538 sim_pfm: -418.5475613881675
episode: 124 training return: -607.3658037019228
episode: 125 training return: -533.345608373669
episode: 126 training return: -726.2783959085311
episode: 127 training return: -560.3837234152782
epoch: 32 test_true_pfm: 239.36035302328685 sim_pfm: -777.2669307748747
episode: 128 training return: -564.459257675507
episode: 129 training return: -535.8563751693162
episode: 130 training return: -794.8648863748343
episode: 131 training return: -789.9022171752576
epoch: 33 test_true_pfm: 433.46558888046985 sim_pfm: -414.3042430173495
episode: 132 training return: -506.70236300266106
episode: 133 training return: -625.3219426634694
episode: 134 training return: -647.05788347492
episode: 135 training return: -488.18685739568133
epoch: 34 test_true_pfm: 356.16444967613484 sim_pfm: -408.02961747355425
episode: 136 training return: -586.8020777140374
episode: 137 training return: -494.73922673692505
episode: 138 training return: -441.2757939511763
episode: 139 training return: -469.6237346244042
epoch: 35 test_true_pfm: 417.5854308249461 sim_pfm: -416.41355298164814
episode: 140 training return: -460.41729617257374
episode: 141 training return: -509.28499598970853
episode: 142 training return: -466.5171370109238
episode: 143 training return: -460.5765280588574
epoch: 36 test_true_pfm: 493.31419982249867 sim_pfm: -424.266239693688
episode: 144 training return: -423.4704365400193
episode: 145 training return: -446.762985873164
episode: 146 training return: -465.646505144418
episode: 147 training return: -423.64964680124706
epoch: 37 test_true_pfm: 444.91236161770706 sim_pfm: -466.6848668414643
episode: 148 training return: -412.33638276257767
episode: 149 training return: -430.75930242230316
episode: 150 training return: -427.44536169583694
episode: 151 training return: -417.87915529279786
epoch: 38 test_true_pfm: 474.92627572389364 sim_pfm: -391.37344168009537
episode: 152 training return: -451.8985761074309
episode: 153 training return: -459.99723141967905
episode: 154 training return: -449.1304834763617
episode: 155 training return: -439.1868112178606
epoch: 39 test_true_pfm: 532.46673410826 sim_pfm: -408.7898520735093
episode: 156 training return: -464.53022120870327
episode: 157 training return: -399.7615011870635
episode: 158 training return: -401.5316047161588
episode: 159 training return: -404.30019991916004
epoch: 40 test_true_pfm: 450.4307708076587 sim_pfm: -405.0450105927802
episode: 160 training return: -417.4200926299427
episode: 161 training return: -441.50466786170256
episode: 162 training return: -430.15330649075173
episode: 163 training return: -421.3699816729386
epoch: 41 test_true_pfm: 524.9542750574127 sim_pfm: -402.01650453785464
episode: 164 training return: -424.0781992236648
episode: 165 training return: -417.77788780204065
episode: 166 training return: -417.4946556422118
episode: 167 training return: -416.8077891711956
epoch: 42 test_true_pfm: 523.4367581914689 sim_pfm: -399.28899573032305
episode: 168 training return: -409.3803269592796
episode: 169 training return: -432.7668257433901
episode: 170 training return: -431.01654641938865
episode: 171 training return: -423.54747964535704
epoch: 43 test_true_pfm: 407.19691956740985 sim_pfm: -413.2789990469444
episode: 172 training return: -407.64661915200395
episode: 173 training return: -437.01913954036434
episode: 174 training return: -416.37717809908435
episode: 175 training return: -426.4294451712791
epoch: 44 test_true_pfm: 486.6559920060958 sim_pfm: -408.84625494645894
episode: 176 training return: -420.5774432974678
episode: 177 training return: -411.3314289343575
episode: 178 training return: -444.7486800348651
episode: 179 training return: -417.39608002037005
epoch: 45 test_true_pfm: 530.4225176945339 sim_pfm: -360.28649477120683
episode: 180 training return: -376.24956649379675
episode: 181 training return: -395.65003790940966
episode: 182 training return: -470.8931887857172
episode: 183 training return: -399.703026667312
epoch: 46 test_true_pfm: 538.273653487497 sim_pfm: -378.8169149783218
episode: 184 training return: -421.42571112809424
episode: 185 training return: -422.2422778360346
episode: 186 training return: -415.6243141476489
episode: 187 training return: -433.0712377551741
epoch: 47 test_true_pfm: 477.34090969222467 sim_pfm: -381.6636509991851
episode: 188 training return: -417.2818145043845
episode: 189 training return: -395.4940080308273
episode: 190 training return: -387.12164427012857
episode: 191 training return: -387.08202829028636
epoch: 48 test_true_pfm: 439.8282041778645 sim_pfm: -375.36317963952206
episode: 192 training return: -373.8008988773732
episode: 193 training return: -401.07627099464065
episode: 194 training return: -365.4619234600913
episode: 195 training return: -393.42487813552947
epoch: 49 test_true_pfm: 578.6293118421174 sim_pfm: -345.4231156324059
episode: 196 training return: -385.9484200819432
episode: 197 training return: -386.5289067860959
episode: 198 training return: -411.37881705023136
episode: 199 training return: -408.6899181720702
epoch: 50 test_true_pfm: 561.2741541911067 sim_pfm: -356.2132187919235
episode: 200 training return: -411.1811453830051
episode: 201 training return: -436.29559939700465
episode: 202 training return: -387.44743211681873
episode: 203 training return: -410.9919333215867
epoch: 51 test_true_pfm: 471.2213879330546 sim_pfm: -371.12278318317203
episode: 204 training return: -393.4721458016148
episode: 205 training return: -387.87199174041837
episode: 206 training return: -391.8724416411966
episode: 207 training return: -373.63910691775317
epoch: 52 test_true_pfm: 407.771709125364 sim_pfm: -365.89394367828737
episode: 208 training return: -395.37420724284397
episode: 209 training return: -411.6559774293424
episode: 210 training return: -387.1596323777568
episode: 211 training return: -382.80280989998414
epoch: 53 test_true_pfm: 457.55083894650346 sim_pfm: -348.86571703112253
episode: 212 training return: -386.7468336012148
episode: 213 training return: -396.9500566295179
episode: 214 training return: -400.8660506506411
episode: 215 training return: -414.80428639426856
epoch: 54 test_true_pfm: 444.9873218807713 sim_pfm: -340.52930112518425
episode: 216 training return: -390.05318227541994
episode: 217 training return: -381.13309608720385
episode: 218 training return: -417.57026393029804
episode: 219 training return: -403.5280167100663
epoch: 55 test_true_pfm: 457.5168999931281 sim_pfm: -352.8224916090853
episode: 220 training return: -385.95336742006816
episode: 221 training return: -399.0366221177995
episode: 222 training return: -394.48563532449106
episode: 223 training return: -396.3837276886285
epoch: 56 test_true_pfm: 453.60120531548927 sim_pfm: -336.0592175625489
episode: 224 training return: -396.7278730834928
episode: 225 training return: -407.28551717832806
episode: 226 training return: -390.2062898231215
episode: 227 training return: -392.4366282418209
epoch: 57 test_true_pfm: 399.63951972432136 sim_pfm: -349.27095581937346
episode: 228 training return: -376.84298816081946
episode: 229 training return: -385.45527090823066
episode: 230 training return: -388.3256663353156
episode: 231 training return: -399.139218543384
epoch: 58 test_true_pfm: 433.58986990372847 sim_pfm: -351.55550323949086
episode: 232 training return: -379.21361877165936
episode: 233 training return: -367.2578196288669
episode: 234 training return: -389.6572386923298
episode: 235 training return: -383.85567966460115
epoch: 59 test_true_pfm: 475.3160034437642 sim_pfm: -344.68892233195646
episode: 236 training return: -390.20685441137715
episode: 237 training return: -376.6785434316754
episode: 238 training return: -407.2723650830643
episode: 239 training return: -384.1491478337657
epoch: 60 test_true_pfm: 461.8693700954065 sim_pfm: -348.09018006492425
episode: 240 training return: -363.62924895227326
episode: 241 training return: -399.4781852437006
episode: 242 training return: -397.04375717450995
episode: 243 training return: -378.3481289034382
epoch: 61 test_true_pfm: 418.9340673969694 sim_pfm: -366.6472813378729
episode: 244 training return: -396.9325126957644
episode: 245 training return: -426.7785965146848
episode: 246 training return: -397.6622069507331
episode: 247 training return: -379.4576651719809
epoch: 62 test_true_pfm: 432.60845230783997 sim_pfm: -346.9407524663341
episode: 248 training return: -394.11587109902695
episode: 249 training return: -400.41307125693874
episode: 250 training return: -360.308356384814
episode: 251 training return: -405.4030773419422
epoch: 63 test_true_pfm: 424.7463598038949 sim_pfm: -354.5225056521862
episode: 252 training return: -409.3675424735819
episode: 253 training return: -373.12839916067514
episode: 254 training return: -392.3771404743687
episode: 255 training return: -390.0210718056182
epoch: 64 test_true_pfm: 561.6874051878417 sim_pfm: -314.82728179039
episode: 256 training return: -392.138377975789
episode: 257 training return: -376.73926506950494
episode: 258 training return: -370.2475502140883
episode: 259 training return: -361.02513874321454
epoch: 65 test_true_pfm: 548.2144198269046 sim_pfm: -302.8512136072814
episode: 260 training return: -394.93127807042646
episode: 261 training return: -381.33904340972236
episode: 262 training return: -376.28505055787394
episode: 263 training return: -380.27968720617935
epoch: 66 test_true_pfm: 510.26174660253963 sim_pfm: -341.7439089963621
episode: 264 training return: -388.52930423672586
episode: 265 training return: -377.263387006574
episode: 266 training return: -377.91485152164915
episode: 267 training return: -384.65594704411035
epoch: 67 test_true_pfm: 468.02355028385153 sim_pfm: -345.5238264845716
episode: 268 training return: -361.61648657156803
episode: 269 training return: -380.22325666712663
episode: 270 training return: -385.45144107857635
episode: 271 training return: -394.3339442418599
epoch: 68 test_true_pfm: 524.33753469438 sim_pfm: -326.73217598455085
episode: 272 training return: -363.86490770235326
episode: 273 training return: -389.2594537683624
episode: 274 training return: -362.7817746912925
episode: 275 training return: -350.81023846189294
epoch: 69 test_true_pfm: 515.0754022272762 sim_pfm: -335.47785585886976
episode: 276 training return: -395.43176098070535
episode: 277 training return: -368.89183899948677
episode: 278 training return: -389.79945863188004
episode: 279 training return: -363.7798979712542
epoch: 70 test_true_pfm: 439.0773325207445 sim_pfm: -329.6130940850853
episode: 280 training return: -375.52479141804366
episode: 281 training return: -374.9596243504791
episode: 282 training return: -382.3329942975109
episode: 283 training return: -360.1312323164838
epoch: 71 test_true_pfm: 428.08602811876705 sim_pfm: -329.22963184602366
episode: 284 training return: -375.085197065657
episode: 285 training return: -384.08208633455325
episode: 286 training return: -382.4223379519001
episode: 287 training return: -392.1298351448512
epoch: 72 test_true_pfm: 388.8663179178056 sim_pfm: -372.13728972461564
episode: 288 training return: -384.0053399225804
episode: 289 training return: -398.23906783260315
episode: 290 training return: -394.6845340381512
episode: 291 training return: -364.653665592731
epoch: 73 test_true_pfm: 431.0460346895623 sim_pfm: -341.3576263519596
episode: 292 training return: -371.21560396568844
episode: 293 training return: -365.7529926502243
episode: 294 training return: -400.6587004343662
episode: 295 training return: -371.07188115227507
epoch: 74 test_true_pfm: 418.60630867156107 sim_pfm: -331.83141643847307
episode: 296 training return: -370.4813217595658
episode: 297 training return: -366.7818089003828
episode: 298 training return: -379.5291332350446
episode: 299 training return: -386.31337308765956
epoch: 75 test_true_pfm: 549.7136618994331 sim_pfm: -309.0639801365813
episode: 300 training return: -376.12595893886106
episode: 301 training return: -372.9250303786361
episode: 302 training return: -372.28507823016
episode: 303 training return: -373.86951740866806
epoch: 76 test_true_pfm: 575.8071355948238 sim_pfm: -292.17343445610686
episode: 304 training return: -375.20114831793876
episode: 305 training return: -356.458639161597
episode: 306 training return: -357.2939247848285
episode: 307 training return: -360.04089122109457
epoch: 77 test_true_pfm: 480.50400508011506 sim_pfm: -304.3445681235734
episode: 308 training return: -348.3798659385668
episode: 309 training return: -361.1131611715861
episode: 310 training return: -349.73806103919225
episode: 311 training return: -380.5900732948214
epoch: 78 test_true_pfm: 523.4304389648813 sim_pfm: -299.33272017024024
episode: 312 training return: -372.51391479488524
episode: 313 training return: -376.69975438191557
episode: 314 training return: -385.0537245902263
episode: 315 training return: -354.89976694784394
epoch: 79 test_true_pfm: 543.1733422590493 sim_pfm: -293.1399181161584
episode: 316 training return: -415.7113583982228
episode: 317 training return: -386.38700169251285
episode: 318 training return: -381.9078061908057
episode: 319 training return: -372.3734890767231
epoch: 80 test_true_pfm: 502.2663877671269 sim_pfm: -305.3591403592486
episode: 320 training return: -367.86264862103934
episode: 321 training return: -354.0263770784744
episode: 322 training return: -363.00041653507753
episode: 323 training return: -391.92003121198513
epoch: 81 test_true_pfm: 470.54872931631047 sim_pfm: -320.482647831286
episode: 324 training return: -382.16600818925025
episode: 325 training return: -386.22770670613
episode: 326 training return: -363.92326064622773
episode: 327 training return: -362.57043745206056
epoch: 82 test_true_pfm: 560.2603726343249 sim_pfm: -305.169613850943
episode: 328 training return: -383.3277184873244
episode: 329 training return: -345.9406840190896
episode: 330 training return: -356.4281677863771
episode: 331 training return: -353.17856063153926
epoch: 83 test_true_pfm: 506.0954323774606 sim_pfm: -302.0617121924062
episode: 332 training return: -376.60801116961534
episode: 333 training return: -358.7346439624528
episode: 334 training return: -390.319736417013
episode: 335 training return: -396.20891922348216
epoch: 84 test_true_pfm: 569.0031695586584 sim_pfm: -288.81616498578313
episode: 336 training return: -381.4246011286518
episode: 337 training return: -365.8482883485379
episode: 338 training return: -370.52447856623724
episode: 339 training return: -357.60219944891
epoch: 85 test_true_pfm: 520.0361155637789 sim_pfm: -282.0881853295064
episode: 340 training return: -371.45374024873576
episode: 341 training return: -381.9044098504749
episode: 342 training return: -353.9182917049523
episode: 343 training return: -381.7845501402442
epoch: 86 test_true_pfm: 519.5370590607844 sim_pfm: -287.2203887992161
episode: 344 training return: -362.40372809708157
episode: 345 training return: -376.46072931351364
episode: 346 training return: -381.4014942355373
episode: 347 training return: -364.9282807282538
epoch: 87 test_true_pfm: 434.98273664119534 sim_pfm: -326.89662705302385
episode: 348 training return: -396.11654971460257
episode: 349 training return: -359.7246740679093
episode: 350 training return: -354.5010642791498
episode: 351 training return: -373.3421899763661
epoch: 88 test_true_pfm: 466.3597492913388 sim_pfm: -306.77390916388555
episode: 352 training return: -391.57046451353096
episode: 353 training return: -353.72805269095556
episode: 354 training return: -356.12012550788864
episode: 355 training return: -377.09772631069114
epoch: 89 test_true_pfm: 465.46673327667804 sim_pfm: -341.3134760023291
episode: 356 training return: -351.837044359194
episode: 357 training return: -370.0337518259632
episode: 358 training return: -367.9053794809478
episode: 359 training return: -368.0562116154729
epoch: 90 test_true_pfm: 425.11243098585146 sim_pfm: -323.4653937529867
episode: 360 training return: -357.19685012591
episode: 361 training return: -373.63229570986493
episode: 362 training return: -361.620098635313
episode: 363 training return: -378.7493871392578
epoch: 91 test_true_pfm: 445.3002190831505 sim_pfm: -313.2990944249434
episode: 364 training return: -356.3894327095112
episode: 365 training return: -377.49858289966323
episode: 366 training return: -348.80441582528056
episode: 367 training return: -365.32294565424667
epoch: 92 test_true_pfm: 550.5848313195778 sim_pfm: -298.8172524202841
episode: 368 training return: -346.5276005165978
episode: 369 training return: -367.04866778565804
episode: 370 training return: -366.72034553035854
episode: 371 training return: -375.24039345112504
epoch: 93 test_true_pfm: 425.69862810170366 sim_pfm: -317.3299060201137
episode: 372 training return: -412.1079550078921
episode: 373 training return: -371.26714762890924
episode: 374 training return: -355.0728728088192
episode: 375 training return: -346.4331403569393
epoch: 94 test_true_pfm: 531.8295097172897 sim_pfm: -288.8879938853235
episode: 376 training return: -351.16664040861735
episode: 377 training return: -394.52599459171637
episode: 378 training return: -343.316407022717
episode: 379 training return: -360.5184481634493
epoch: 95 test_true_pfm: 450.4441452288295 sim_pfm: -311.32885285946776
episode: 380 training return: -373.61269701093994
episode: 381 training return: -370.9510617628081
episode: 382 training return: -364.49973655270077
episode: 383 training return: -343.1777487380913
epoch: 96 test_true_pfm: 486.86971368791666 sim_pfm: -295.8612429432581
episode: 384 training return: -349.3366582890301
episode: 385 training return: -382.1086316387696
episode: 386 training return: -363.0417367862594
episode: 387 training return: -364.32654001018244
epoch: 97 test_true_pfm: 421.5357696653296 sim_pfm: -300.5930504932657
episode: 388 training return: -363.3690791977666
episode: 389 training return: -335.2357549489496
episode: 390 training return: -358.16308441415316
episode: 391 training return: -341.7248722567143
epoch: 98 test_true_pfm: 434.39457766099855 sim_pfm: -316.89601095936854
episode: 392 training return: -367.41943988381706
episode: 393 training return: -361.93541422450903
episode: 394 training return: -369.3288040499605
episode: 395 training return: -366.64814326306464
epoch: 99 test_true_pfm: 442.14704861838845 sim_pfm: -332.8481038811783
episode: 396 training return: -379.696012916211
episode: 397 training return: -353.21353878595863
episode: 398 training return: -364.91832912767927
episode: 399 training return: -387.1023114567125
epoch: 100 test_true_pfm: 511.0297835283424 sim_pfm: -292.71111095670136
episode: 400 training return: -374.55818156677424
episode: 401 training return: -360.6388237569702
episode: 402 training return: -344.2734667989702
episode: 403 training return: -355.8312946925137
epoch: 101 test_true_pfm: 425.4217213790548 sim_pfm: -327.3155458151768
episode: 404 training return: -361.47445630823563
episode: 405 training return: -364.44953241005913
episode: 406 training return: -350.3929244550868
episode: 407 training return: -376.8907838674278
epoch: 102 test_true_pfm: 501.2168066112088 sim_pfm: -308.3051641846831
episode: 408 training return: -365.55095414939063
episode: 409 training return: -359.8869204190752
episode: 410 training return: -368.00158266716846
episode: 411 training return: -369.1342213741365
epoch: 103 test_true_pfm: 449.5211034154472 sim_pfm: -303.985899843033
episode: 412 training return: -371.40603524954594
episode: 413 training return: -368.2323035596291
episode: 414 training return: -366.0909239056716
episode: 415 training return: -355.7050948103365
epoch: 104 test_true_pfm: 473.15271368168874 sim_pfm: -313.8635085142799
episode: 416 training return: -359.9408269695211
episode: 417 training return: -362.85688128219675
episode: 418 training return: -357.0419103656919
episode: 419 training return: -350.21126908540134
epoch: 105 test_true_pfm: 476.70869905225294 sim_pfm: -309.0395969771336
episode: 420 training return: -362.95986767329106
episode: 421 training return: -348.13197901328505
episode: 422 training return: -360.19409628287445
episode: 423 training return: -350.43117436939673
epoch: 106 test_true_pfm: 539.7609709249413 sim_pfm: -298.9960998034973
episode: 424 training return: -357.91135369403656
episode: 425 training return: -358.1188261095298
episode: 426 training return: -363.6793321670311
episode: 427 training return: -369.6516710380257
epoch: 107 test_true_pfm: 507.2525924898543 sim_pfm: -295.21196158918013
episode: 428 training return: -360.27038033643794
episode: 429 training return: -348.8130511593354
episode: 430 training return: -345.78749730014306
episode: 431 training return: -383.1400154552965
epoch: 108 test_true_pfm: 425.4670677011756 sim_pfm: -319.9698870979966
episode: 432 training return: -347.93100154234634
episode: 433 training return: -349.0958911752093
episode: 434 training return: -350.96639185091936
episode: 435 training return: -343.21868993697115
epoch: 109 test_true_pfm: 484.175863383683 sim_pfm: -303.5988736777202
episode: 436 training return: -347.4924786989255
episode: 437 training return: -329.72680530836493
episode: 438 training return: -349.2633826028272
episode: 439 training return: -376.43964955450014
epoch: 110 test_true_pfm: 409.31389828038687 sim_pfm: -326.0394978659959
episode: 440 training return: -349.2677022334814
episode: 441 training return: -368.74854173033555
episode: 442 training return: -331.32037845173943
episode: 443 training return: -364.03312328908845
epoch: 111 test_true_pfm: 451.69015588433973 sim_pfm: -297.2569329567634
episode: 444 training return: -361.6157823613614
episode: 445 training return: -365.33388348514734
episode: 446 training return: -384.43003384440726
episode: 447 training return: -368.14112582280313
epoch: 112 test_true_pfm: 447.05393838745704 sim_pfm: -315.9738256611544
episode: 448 training return: -366.92796652531996
episode: 449 training return: -349.86313822192665
episode: 450 training return: -362.82057769597355
episode: 451 training return: -340.0301638388245
epoch: 113 test_true_pfm: 514.4766393408817 sim_pfm: -315.9857613447096
episode: 452 training return: -377.4002682972458
episode: 453 training return: -357.47795221094316
episode: 454 training return: -355.78539138294866
episode: 455 training return: -335.25815991736386
epoch: 114 test_true_pfm: 501.84890746891756 sim_pfm: -309.11255536198996
episode: 456 training return: -333.5948311591525
episode: 457 training return: -357.4078796073888
episode: 458 training return: -356.09485768463634
episode: 459 training return: -356.4611098314006
epoch: 115 test_true_pfm: 533.041364497542 sim_pfm: -305.64858758611496
episode: 460 training return: -356.6042189547399
episode: 461 training return: -365.16992122189464
episode: 462 training return: -352.1475503171492
episode: 463 training return: -353.5924980204736
epoch: 116 test_true_pfm: 450.2753518701909 sim_pfm: -315.1899439059299
episode: 464 training return: -365.27497574852333
episode: 465 training return: -352.8647495184705
episode: 466 training return: -367.8792095665629
episode: 467 training return: -355.1361284851229
epoch: 117 test_true_pfm: 452.9072458510963 sim_pfm: -315.9706203223196
episode: 468 training return: -368.4175602633386
episode: 469 training return: -346.99245037357036
episode: 470 training return: -347.65205760300773
episode: 471 training return: -367.96273309754184
epoch: 118 test_true_pfm: 498.90638847867393 sim_pfm: -295.6014619700954
episode: 472 training return: -346.29709230288734
episode: 473 training return: -363.0091412807933
episode: 474 training return: -353.1530494726849
episode: 475 training return: -360.98179636319645
epoch: 119 test_true_pfm: 486.43029432930143 sim_pfm: -326.4938029655891
episode: 476 training return: -357.13399030189447
episode: 477 training return: -353.84993531436993
episode: 478 training return: -339.77398319857576
episode: 479 training return: -355.97611318723506
epoch: 120 test_true_pfm: 468.5976988751993 sim_pfm: -291.7070938833071
episode: 480 training return: -348.9340887794616
episode: 481 training return: -345.3088584772297
episode: 482 training return: -346.05129328519064
episode: 483 training return: -359.9229574253034
epoch: 121 test_true_pfm: 526.1707125670641 sim_pfm: -318.5117721227434
episode: 484 training return: -334.08376672858014
episode: 485 training return: -351.30954098954766
episode: 486 training return: -358.76370414765626
episode: 487 training return: -346.8075498609425
epoch: 122 test_true_pfm: 445.38856640464957 sim_pfm: -307.27986430780294
episode: 488 training return: -329.03248328795473
episode: 489 training return: -346.4981597338703
episode: 490 training return: -358.75300121405553
episode: 491 training return: -361.26893067335976
epoch: 123 test_true_pfm: 486.27557233012993 sim_pfm: -305.35906534548593
episode: 492 training return: -337.9263435085867
episode: 493 training return: -346.28169025087294
episode: 494 training return: -358.284267093296
episode: 495 training return: -349.4117866394464
epoch: 124 test_true_pfm: 458.97501804265954 sim_pfm: -300.9744694361261
episode: 496 training return: -382.7316792906528
episode: 497 training return: -341.8352668990928
episode: 498 training return: -348.1372241432537
episode: 499 training return: -358.04403140104245
epoch: 125 test_true_pfm: 464.84097237560474 sim_pfm: -299.55753842816125
episode: 500 training return: -357.8424826180784
episode: 501 training return: -369.1374178176467
episode: 502 training return: -366.4550939115874
episode: 503 training return: -345.97449330434665
epoch: 126 test_true_pfm: 449.35234336350214 sim_pfm: -294.94581189754996
episode: 504 training return: -341.63638398678586
episode: 505 training return: -337.5231817352471
episode: 506 training return: -378.7677522974851
episode: 507 training return: -323.57570066190925
epoch: 127 test_true_pfm: 476.1217099293503 sim_pfm: -294.0704606250107
episode: 508 training return: -370.8372405028565
episode: 509 training return: -342.9878511566966
episode: 510 training return: -341.4187324135064
episode: 511 training return: -377.10001524332694
epoch: 128 test_true_pfm: 463.82978334346166 sim_pfm: -302.2999468437572
episode: 512 training return: -370.5035637603935
episode: 513 training return: -355.7359282639215
episode: 514 training return: -373.2223568968669
episode: 515 training return: -375.6219921519301
epoch: 129 test_true_pfm: 451.9694365704745 sim_pfm: -303.78404336972136
episode: 516 training return: -367.06891281577435
episode: 517 training return: -340.26149490681564
episode: 518 training return: -329.0545537802589
episode: 519 training return: -331.89175239042953
epoch: 130 test_true_pfm: 579.2654659711937 sim_pfm: -267.9496851079179
episode: 520 training return: -353.3761365506964
episode: 521 training return: -333.9851518934782
episode: 522 training return: -342.5242686296107
episode: 523 training return: -352.2980563209552
epoch: 131 test_true_pfm: 498.5746910378816 sim_pfm: -285.6400775089814
episode: 524 training return: -355.3555374345976
episode: 525 training return: -334.2809771240558
episode: 526 training return: -335.05850896734233
episode: 527 training return: -327.68165216143143
epoch: 132 test_true_pfm: 467.4277595811015 sim_pfm: -309.05719375096055
episode: 528 training return: -355.5981993645022
episode: 529 training return: -353.19696237428434
episode: 530 training return: -350.9786842162385
episode: 531 training return: -354.643880409869
epoch: 133 test_true_pfm: 524.4203325246422 sim_pfm: -282.0490701674989
episode: 532 training return: -371.0536902124678
episode: 533 training return: -359.59334347674405
episode: 534 training return: -335.2964649065742
episode: 535 training return: -352.04471847867677
epoch: 134 test_true_pfm: 430.9211448385895 sim_pfm: -320.2401124221296
episode: 536 training return: -363.3133343839008
episode: 537 training return: -372.8373567464961
episode: 538 training return: -333.3909560630166
episode: 539 training return: -335.4350008564867
epoch: 135 test_true_pfm: 506.6483597708656 sim_pfm: -306.4758503532046
episode: 540 training return: -332.0121863026769
episode: 541 training return: -373.23824938664086
episode: 542 training return: -338.939936918763
episode: 543 training return: -347.34738710346227
epoch: 136 test_true_pfm: 481.18519519833745 sim_pfm: -312.7017102235574
episode: 544 training return: -347.9664984965812
episode: 545 training return: -346.3824309139454
episode: 546 training return: -352.9800640839954
episode: 547 training return: -332.49943386488405
epoch: 137 test_true_pfm: 480.80478941468226 sim_pfm: -303.5437640322533
episode: 548 training return: -339.07691399153515
episode: 549 training return: -335.26261627695055
episode: 550 training return: -345.92091241141515
episode: 551 training return: -322.5189309718542
epoch: 138 test_true_pfm: 461.8792230019381 sim_pfm: -313.31370542443256
episode: 552 training return: -328.9076026913919
episode: 553 training return: -345.41704446280204
episode: 554 training return: -330.00794196210677
episode: 555 training return: -335.5933593430206
epoch: 139 test_true_pfm: 392.93112464192245 sim_pfm: -331.96871567714726
episode: 556 training return: -332.1905223222248
episode: 557 training return: -325.11453693758807
episode: 558 training return: -329.81710226945046
episode: 559 training return: -326.5015104448769
epoch: 140 test_true_pfm: 457.96600457312206 sim_pfm: -304.17850243257755
episode: 560 training return: -337.13861918991614
episode: 561 training return: -336.28606577546213
episode: 562 training return: -353.67497742225174
episode: 563 training return: -363.8661616134917
epoch: 141 test_true_pfm: 467.1417466050309 sim_pfm: -304.2698379719581
episode: 564 training return: -329.8335308075114
episode: 565 training return: -320.6845159684887
episode: 566 training return: -358.5871614184793
episode: 567 training return: -333.6834307108985
epoch: 142 test_true_pfm: 465.73106716596027 sim_pfm: -304.4969748624052
episode: 568 training return: -322.4391203474132
episode: 569 training return: -341.7218350272455
episode: 570 training return: -324.50439140871043
episode: 571 training return: -357.17178441458833
epoch: 143 test_true_pfm: 475.50437362672574 sim_pfm: -300.47515135151906
episode: 572 training return: -330.9607317980321
episode: 573 training return: -339.3884183458335
episode: 574 training return: -354.72772806058623
episode: 575 training return: -346.9825532681659
epoch: 144 test_true_pfm: 507.0687239775093 sim_pfm: -290.598299362669
episode: 576 training return: -323.97319977573534
episode: 577 training return: -361.61075942320053
episode: 578 training return: -326.04103563889396
episode: 579 training return: -349.06659933153776
epoch: 145 test_true_pfm: 530.0740418937479 sim_pfm: -289.7130401285162
episode: 580 training return: -346.5219358332038
episode: 581 training return: -347.3810104308184
episode: 582 training return: -339.6479931723991
episode: 583 training return: -329.9633929074826
epoch: 146 test_true_pfm: 464.86643724880673 sim_pfm: -317.21081251636434
episode: 584 training return: -344.8613798320458
episode: 585 training return: -353.3212897640235
episode: 586 training return: -327.4904780542604
episode: 587 training return: -328.1649731075162
epoch: 147 test_true_pfm: 476.63861747603477 sim_pfm: -301.71232770776754
episode: 588 training return: -328.57658868007695
episode: 589 training return: -376.22079614219047
episode: 590 training return: -345.01890167580257
episode: 591 training return: -326.374735699156
epoch: 148 test_true_pfm: 488.0991355714502 sim_pfm: -293.6639335768786
episode: 592 training return: -339.16490387117034
episode: 593 training return: -331.7815709675314
episode: 594 training return: -316.4982061350227
episode: 595 training return: -344.1709657268542
epoch: 149 test_true_pfm: 470.81762786814556 sim_pfm: -297.87314511070946
episode: 596 training return: -342.4082123261744
episode: 597 training return: -340.01876349560473
episode: 598 training return: -342.41430827100925
episode: 599 training return: -355.97109132085706
epoch: 150 test_true_pfm: 501.6356148475174 sim_pfm: -300.70291620323104
