['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'uncertainty', '--traj', 'expert', '--seed', '1', '--data', '100000']
epoch: 0 training_loss 0.2854555209726095 test_loss: 0.19755051136016846
epoch: 1 training_loss 0.19451433904469012 test_loss: 0.16370121240615845
epoch: 2 training_loss 0.16377456706017257 test_loss: 0.15624666213989258
epoch: 3 training_loss 0.14933222688734532 test_loss: 0.16184183359146118
epoch: 4 training_loss 0.14573600359261035 test_loss: 0.17553458213806153
epoch: 5 training_loss 0.13879128519445658 test_loss: 0.15130513906478882
epoch: 6 training_loss 0.13965376120060682 test_loss: 0.15290381908416747
epoch: 7 training_loss 0.14102797400206327 test_loss: 0.14206923246383668
epoch: 8 training_loss 0.13643905855715274 test_loss: 0.15093040466308594
epoch: 9 training_loss 0.13398179963231086 test_loss: 0.15420079231262207
epoch: 10 training_loss 0.13715078365057706 test_loss: 0.1497629165649414
epoch: 11 training_loss 0.12586685556918384 test_loss: 0.11849544048309327
epoch: 12 training_loss 0.12576684832572937 test_loss: 0.13921362161636353
epoch: 13 training_loss 0.1321801082417369 test_loss: 0.14215049743652344
epoch: 14 training_loss 0.13020590126514434 test_loss: 0.14618322849273682
epoch: 15 training_loss 0.13360451973974705 test_loss: 0.1363008975982666
epoch: 16 training_loss 0.13209906805306673 test_loss: 0.13125623464584352
epoch: 17 training_loss 0.12818254891782999 test_loss: 0.12886859178543092
epoch: 18 training_loss 0.11825743332505226 test_loss: 0.12276464700698853
epoch: 19 training_loss 0.1233059785142541 test_loss: 0.1279388189315796
epoch: 20 training_loss 0.1229337109066546 test_loss: 0.12523194551467895
epoch: 21 training_loss 0.12310803193598986 test_loss: 0.145793879032135
epoch: 22 training_loss 0.12617801446467639 test_loss: 0.11459513902664184
epoch: 23 training_loss 0.1224401219189167 test_loss: 0.12405239343643189
epoch: 24 training_loss 0.12755055356770753 test_loss: 0.1467461347579956
epoch: 25 training_loss 0.12344414085149764 test_loss: 0.11562751531600952
epoch: 26 training_loss 0.1187625957466662 test_loss: 0.16404856443405152
epoch: 27 training_loss 0.11306012066081167 test_loss: 0.1505728244781494
epoch: 28 training_loss 0.1305150505155325 test_loss: 0.12219455242156982
epoch: 29 training_loss 0.12195814181119204 test_loss: 0.13003276586532592
epoch: 30 training_loss 0.11764032926410437 test_loss: 0.12922134399414062
epoch: 31 training_loss 0.12559641230851412 test_loss: 0.14401506185531615
epoch: 32 training_loss 0.11649039223790168 test_loss: 0.11348750591278076
epoch: 33 training_loss 0.11439869118854404 test_loss: 0.12821179628372192
epoch: 34 training_loss 0.12367052797228098 test_loss: 0.11152769327163696
epoch: 35 training_loss 0.11528675850480795 test_loss: 0.12074642181396485
epoch: 36 training_loss 0.1196005268022418 test_loss: 0.12178996801376343
epoch: 37 training_loss 0.12123458847403526 test_loss: 0.13068959712982178
epoch: 38 training_loss 0.12139047209173441 test_loss: 0.14534763097763062
epoch: 39 training_loss 0.12314342673867941 test_loss: 0.11936099529266357
epoch: 40 training_loss 0.12952662952244282 test_loss: 0.14215734004974365
epoch: 41 training_loss 0.1108104533702135 test_loss: 0.11518454551696777
epoch: 42 training_loss 0.1158665420860052 test_loss: 0.1267812132835388
epoch: 43 training_loss 0.12428571920841933 test_loss: 0.12119499444961548
epoch: 44 training_loss 0.12167667601257563 test_loss: 0.11945244073867797
epoch: 45 training_loss 0.12455009158700704 test_loss: 0.1277486801147461
epoch: 46 training_loss 0.12426355578005314 test_loss: 0.11231306791305543
epoch: 47 training_loss 0.1146936785429716 test_loss: 0.12962905168533326
epoch: 48 training_loss 0.11266179637983441 test_loss: 0.12711414098739623
epoch: 49 training_loss 0.11737754106521607 test_loss: 0.11970720291137696
epoch: 50 training_loss 0.1150991127640009 test_loss: 0.12256970405578613
epoch: 51 training_loss 0.1177207163721323 test_loss: 0.1182745337486267
epoch: 52 training_loss 0.12030574139207602 test_loss: 0.11801522970199585
epoch: 53 training_loss 0.12399656016379595 test_loss: 0.12154123783111573
epoch: 54 training_loss 0.11826586853712798 test_loss: 0.11869144439697266
epoch: 55 training_loss 0.12256136555224657 test_loss: 0.1282501697540283
epoch: 56 training_loss 0.11488015521317721 test_loss: 0.11816606521606446
epoch: 57 training_loss 0.11976578501984476 test_loss: 0.14002357721328734
epoch: 58 training_loss 0.12510006878525018 test_loss: 0.12506033182144166
epoch: 59 training_loss 0.12297872249037027 test_loss: 0.11431028842926025
epoch: 60 training_loss 0.12633042223751545 test_loss: 0.1279868245124817
epoch: 61 training_loss 0.10957025479525327 test_loss: 0.11703704595565796
epoch: 62 training_loss 0.12124823808670043 test_loss: 0.10603764057159423
epoch: 63 training_loss 0.12053765937685966 test_loss: 0.13314813375473022
epoch: 64 training_loss 0.11592994479462504 test_loss: 0.11574753522872924
epoch: 65 training_loss 0.11160679891705513 test_loss: 0.1267492651939392
epoch: 66 training_loss 0.1184074966982007 test_loss: 0.1259079933166504
epoch: 67 training_loss 0.11864489153027534 test_loss: 0.12324506044387817
epoch: 68 training_loss 0.12347258880734444 test_loss: 0.12476330995559692
epoch: 69 training_loss 0.12612696427851916 test_loss: 0.11948691606521607
epoch: 70 training_loss 0.12016046756878496 test_loss: 0.1190641164779663
epoch: 71 training_loss 0.11966052174568176 test_loss: 0.11953767538070678
epoch: 72 training_loss 0.11870550744235515 test_loss: 0.1277863621711731
epoch: 73 training_loss 0.116643974930048 test_loss: 0.12417263984680176
epoch: 74 training_loss 0.12420376662164927 test_loss: 0.13027018308639526
epoch: 75 training_loss 0.11541688829660415 test_loss: 0.11262037754058837
epoch: 76 training_loss 0.11209492556750775 test_loss: 0.1131890058517456
epoch: 77 training_loss 0.11875277269631625 test_loss: 0.11030796766281128
epoch: 78 training_loss 0.12371688596904278 test_loss: 0.1255314826965332
epoch: 79 training_loss 0.11975626962259411 test_loss: 0.1323355555534363
epoch: 80 training_loss 0.12050244620069861 test_loss: 0.10444319248199463
epoch: 81 training_loss 0.1155198971182108 test_loss: 0.1332538366317749
epoch: 82 training_loss 0.11537481110543013 test_loss: 0.12706153392791747
epoch: 83 training_loss 0.11501458760350942 test_loss: 0.12032485008239746
epoch: 84 training_loss 0.11968522161245346 test_loss: 0.11680612564086915
epoch: 85 training_loss 0.11875781450420618 test_loss: 0.11529766321182251
epoch: 86 training_loss 0.11836048640310765 test_loss: 0.1288978338241577
epoch: 87 training_loss 0.10867749305441976 test_loss: 0.10745489597320557
epoch: 88 training_loss 0.1113753011263907 test_loss: 0.13544055223464965
epoch: 89 training_loss 0.1127870132215321 test_loss: 0.12401318550109863
epoch: 90 training_loss 0.11575876846909523 test_loss: 0.12761294841766357
epoch: 91 training_loss 0.11973742332309484 test_loss: 0.11331843137741089
epoch: 92 training_loss 0.1147354881465435 test_loss: 0.1263887643814087
epoch: 93 training_loss 0.11322962839156389 test_loss: 0.12904733419418335
epoch: 94 training_loss 0.11354452943429351 test_loss: 0.11795190572738648
epoch: 95 training_loss 0.11144631233066321 test_loss: 0.11782549619674683
epoch: 96 training_loss 0.12145652256906032 test_loss: 0.10692504644393921
epoch: 97 training_loss 0.12094581566751003 test_loss: 0.11992044448852539
epoch: 98 training_loss 0.11096611611545086 test_loss: 0.10976775884628295
epoch: 99 training_loss 0.11544731920585036 test_loss: 0.1279021382331848
epoch: 100 training_loss 0.11951128805056214 test_loss: 0.12815941572189332
epoch: 101 training_loss 0.11407392993569374 test_loss: 0.12226163148880005
epoch: 102 training_loss 0.11678670167922973 test_loss: 0.12655264139175415
epoch: 103 training_loss 0.11859964560717344 test_loss: 0.11836080551147461
epoch: 104 training_loss 0.10856214910745621 test_loss: 0.1359003782272339
epoch: 105 training_loss 0.12026322748512029 test_loss: 0.13493034839630128
epoch: 106 training_loss 0.11601558901369571 test_loss: 0.14894392490386962
epoch: 107 training_loss 0.11146514685824513 test_loss: 0.11045085191726685
epoch: 108 training_loss 0.12490347895771264 test_loss: 0.11652706861495972
epoch: 109 training_loss 0.11727349016815423 test_loss: 0.12105017900466919
epoch: 110 training_loss 0.11717703871428967 test_loss: 0.11501718759536743
epoch: 111 training_loss 0.11569953344762325 test_loss: 0.1147731065750122
epoch: 112 training_loss 0.1213043799251318 test_loss: 0.14087749719619752
epoch: 113 training_loss 0.11583703331649303 test_loss: 0.11459261178970337
epoch: 114 training_loss 0.11986056700348854 test_loss: 0.10668166875839233
epoch: 115 training_loss 0.11359328147023916 test_loss: 0.12299294471740722
epoch: 116 training_loss 0.11247576519846916 test_loss: 0.119231116771698
epoch: 117 training_loss 0.11459758201614022 test_loss: 0.12473570108413697
epoch: 118 training_loss 0.11440788574516773 test_loss: 0.11147842407226563
epoch: 119 training_loss 0.11690326001495123 test_loss: 0.11484733819961548
epoch: 120 training_loss 0.11556516747921705 test_loss: 0.11643110513687134
epoch: 121 training_loss 0.1162798823416233 test_loss: 0.12374391555786132
epoch: 122 training_loss 0.11574905551970005 test_loss: 0.12043060064315796
epoch: 123 training_loss 0.11497418120503426 test_loss: 0.10971295833587646
epoch: 124 training_loss 0.11056633803993464 test_loss: 0.12081214189529418
epoch: 125 training_loss 0.11056372065097093 test_loss: 0.11798661947250366
epoch: 126 training_loss 0.1105443012714386 test_loss: 0.12163169384002685
epoch: 127 training_loss 0.11625975269824267 test_loss: 0.11680500507354737
epoch: 128 training_loss 0.10925556786358356 test_loss: 0.11714798212051392
epoch: 129 training_loss 0.11518018502742051 test_loss: 0.12083927392959595
epoch: 130 training_loss 0.11924600675702095 test_loss: 0.1240798830986023
epoch: 131 training_loss 0.11309308761730791 test_loss: 0.1411502480506897
epoch: 132 training_loss 0.114004209600389 test_loss: 0.11824922561645508
epoch: 133 training_loss 0.11823025396093727 test_loss: 0.1394995331764221
epoch: 134 training_loss 0.10759011123329401 test_loss: 0.12458347082138062
epoch: 135 training_loss 0.10374104145914316 test_loss: 0.12335200309753418
epoch: 136 training_loss 0.10523616645485162 test_loss: 0.11303086280822754
epoch: 137 training_loss 0.1084536400064826 test_loss: 0.12232506275177002
epoch: 138 training_loss 0.11258510168641805 test_loss: 0.11425145864486694
epoch: 139 training_loss 0.10886547520756722 test_loss: 0.11712133884429932
epoch: 140 training_loss 0.12080357536673546 test_loss: 0.12579407691955566
epoch: 141 training_loss 0.11583957204595208 test_loss: 0.1310474991798401
epoch: 142 training_loss 0.1077483076415956 test_loss: 0.12660681009292601
epoch: 143 training_loss 0.11670079577714204 test_loss: 0.10574052333831788
epoch: 144 training_loss 0.11353852726519108 test_loss: 0.12369298934936523
epoch: 145 training_loss 0.10848547147586941 test_loss: 0.13693314790725708
epoch: 146 training_loss 0.11113525830209255 test_loss: 0.12517858743667604
epoch: 147 training_loss 0.11908185413107276 test_loss: 0.1270797848701477
epoch: 148 training_loss 0.12205192245543003 test_loss: 0.14308526515960693
epoch: 149 training_loss 0.12045214645564556 test_loss: 0.12936127185821533
epoch: 0 training_loss 0.27865878127515314 test_loss: 0.21005620956420898
epoch: 1 training_loss 0.1886620232462883 test_loss: 0.1789108157157898
epoch: 2 training_loss 0.17488172493875026 test_loss: 0.15987275838851928
epoch: 3 training_loss 0.15150494601577522 test_loss: 0.15697406530380248
epoch: 4 training_loss 0.1551494375988841 test_loss: 0.15572597980499267
epoch: 5 training_loss 0.14649682324379681 test_loss: 0.1599169611930847
epoch: 6 training_loss 0.1402651111409068 test_loss: 0.1525449275970459
epoch: 7 training_loss 0.14310796238482 test_loss: 0.1347676157951355
epoch: 8 training_loss 0.15645052317529917 test_loss: 0.16616834402084352
epoch: 9 training_loss 0.13600247494876386 test_loss: 0.13540836572647094
epoch: 10 training_loss 0.1453969058766961 test_loss: 0.13835585117340088
epoch: 11 training_loss 0.12695968989282846 test_loss: 0.13176367282867432
epoch: 12 training_loss 0.12172823399305344 test_loss: 0.16450668573379518
epoch: 13 training_loss 0.13890350989997388 test_loss: 0.15252377986907958
epoch: 14 training_loss 0.13286420725286008 test_loss: 0.1508077025413513
epoch: 15 training_loss 0.12519018091261386 test_loss: 0.13411093950271608
epoch: 16 training_loss 0.12378796517848968 test_loss: 0.14025207757949829
epoch: 17 training_loss 0.12159951671957969 test_loss: 0.16244741678237914
epoch: 18 training_loss 0.1250134650245309 test_loss: 0.12738555669784546
epoch: 19 training_loss 0.1290642327070236 test_loss: 0.14243583679199218
epoch: 20 training_loss 0.12879470463842155 test_loss: 0.14406266212463378
epoch: 21 training_loss 0.114905253238976 test_loss: 0.1293838143348694
epoch: 22 training_loss 0.13406866531819106 test_loss: 0.1483738899230957
epoch: 23 training_loss 0.12686496935784816 test_loss: 0.14518800973892212
epoch: 24 training_loss 0.11860835570842028 test_loss: 0.14417024850845336
epoch: 25 training_loss 0.12800862703472377 test_loss: 0.1472417950630188
epoch: 26 training_loss 0.11912129640579223 test_loss: 0.1505696654319763
epoch: 27 training_loss 0.12174097556620836 test_loss: 0.14507192373275757
epoch: 28 training_loss 0.12825136926025152 test_loss: 0.12278021574020385
epoch: 29 training_loss 0.12002551287412644 test_loss: 0.13763095140457154
epoch: 30 training_loss 0.12145568722859025 test_loss: 0.1170575737953186
epoch: 31 training_loss 0.12583283107727766 test_loss: 0.15802561044692992
epoch: 32 training_loss 0.11929606953635812 test_loss: 0.11586753129959107
epoch: 33 training_loss 0.12180349368602038 test_loss: 0.15176918506622314
epoch: 34 training_loss 0.12433607760816813 test_loss: 0.14379881620407103
epoch: 35 training_loss 0.11794141624122859 test_loss: 0.12038112878799438
epoch: 36 training_loss 0.11879937332123518 test_loss: 0.1450006127357483
epoch: 37 training_loss 0.12783516447991133 test_loss: 0.1427815794944763
epoch: 38 training_loss 0.11958429846912623 test_loss: 0.12955136299133302
epoch: 39 training_loss 0.1209792684391141 test_loss: 0.13199172019958497
epoch: 40 training_loss 0.12258752871304751 test_loss: 0.1595823049545288
epoch: 41 training_loss 0.1237035846337676 test_loss: 0.1244653582572937
epoch: 42 training_loss 0.12039392728358507 test_loss: 0.13205068111419677
epoch: 43 training_loss 0.12456636555492878 test_loss: 0.13419082164764404
epoch: 44 training_loss 0.12573449186980723 test_loss: 0.12022351026535034
epoch: 45 training_loss 0.10780223328620195 test_loss: 0.15739634037017822
epoch: 46 training_loss 0.11846943825483322 test_loss: 0.11978647708892823
epoch: 47 training_loss 0.1234372055530548 test_loss: 0.13280811309814453
epoch: 48 training_loss 0.11737075582146644 test_loss: 0.1143532395362854
epoch: 49 training_loss 0.12088039796799421 test_loss: 0.12869058847427367
epoch: 50 training_loss 0.1279401697218418 test_loss: 0.12652541399002076
epoch: 51 training_loss 0.10647252805531025 test_loss: 0.12011913061141968
epoch: 52 training_loss 0.12055974490940571 test_loss: 0.14316834211349488
epoch: 53 training_loss 0.11784391902387142 test_loss: 0.1405272126197815
epoch: 54 training_loss 0.11294943563640118 test_loss: 0.13686439990997315
epoch: 55 training_loss 0.115224234983325 test_loss: 0.12363358736038207
epoch: 56 training_loss 0.11522786749526859 test_loss: 0.15397636890411376
epoch: 57 training_loss 0.12689010305330156 test_loss: 0.15417336225509642
epoch: 58 training_loss 0.11570227524265647 test_loss: 0.14154744148254395
epoch: 59 training_loss 0.12203572332859039 test_loss: 0.1316521406173706
epoch: 60 training_loss 0.1175751792266965 test_loss: 0.13028438091278077
epoch: 61 training_loss 0.119427734836936 test_loss: 0.13433482646942138
epoch: 62 training_loss 0.11521183313801885 test_loss: 0.12859200239181517
epoch: 63 training_loss 0.12109658788889646 test_loss: 0.13996694087982178
epoch: 64 training_loss 0.11888756729662418 test_loss: 0.12539552450180053
epoch: 65 training_loss 0.1187433072924614 test_loss: 0.1316828727722168
epoch: 66 training_loss 0.11247777953743934 test_loss: 0.12635295391082763
epoch: 67 training_loss 0.1111412657238543 test_loss: 0.13515760898590087
epoch: 68 training_loss 0.1176548987068236 test_loss: 0.14146311283111573
epoch: 69 training_loss 0.11926376357674599 test_loss: 0.12707282304763795
epoch: 70 training_loss 0.11681134529411792 test_loss: 0.12505027055740356
epoch: 71 training_loss 0.11463181030005216 test_loss: 0.12829890251159667
epoch: 72 training_loss 0.11885744541883468 test_loss: 0.11652514934539795
epoch: 73 training_loss 0.11912517644464969 test_loss: 0.1214408278465271
epoch: 74 training_loss 0.1202037413045764 test_loss: 0.13262939453125
epoch: 75 training_loss 0.11171335112303496 test_loss: 0.14022353887557984
epoch: 76 training_loss 0.11342448707669973 test_loss: 0.11829829216003418
epoch: 77 training_loss 0.10839600738137961 test_loss: 0.13114783763885499
epoch: 78 training_loss 0.11022476308047771 test_loss: 0.1276279091835022
epoch: 79 training_loss 0.11471619267016649 test_loss: 0.1455865740776062
epoch: 80 training_loss 0.11707466632127762 test_loss: 0.12949246168136597
epoch: 81 training_loss 0.11918605368584395 test_loss: 0.1289723753929138
epoch: 82 training_loss 0.10924860086292028 test_loss: 0.14019448757171632
epoch: 83 training_loss 0.10941649558022619 test_loss: 0.13832725286483766
epoch: 84 training_loss 0.11813206125050783 test_loss: 0.13393607139587402
epoch: 85 training_loss 0.10950756072998047 test_loss: 0.13678885698318483
epoch: 86 training_loss 0.11930281328037382 test_loss: 0.12480341196060181
epoch: 87 training_loss 0.11380469180643558 test_loss: 0.12618212699890136
epoch: 88 training_loss 0.11279133613221347 test_loss: 0.1276415467262268
epoch: 89 training_loss 0.12417815836146474 test_loss: 0.12090202569961547
epoch: 90 training_loss 0.12001485921442509 test_loss: 0.13042812347412108
epoch: 91 training_loss 0.10817652251571416 test_loss: 0.1498147130012512
epoch: 92 training_loss 0.11507446881383658 test_loss: 0.12494443655014038
epoch: 93 training_loss 0.11659861128777266 test_loss: 0.12356045246124267
epoch: 94 training_loss 0.1169467282667756 test_loss: 0.12314120531082154
epoch: 95 training_loss 0.10777250733226537 test_loss: 0.11696208715438842
epoch: 96 training_loss 0.11889622002840042 test_loss: 0.1485637903213501
epoch: 97 training_loss 0.12010165009647608 test_loss: 0.11778290271759033
epoch: 98 training_loss 0.12404682099819184 test_loss: 0.11284302473068238
epoch: 99 training_loss 0.11078630533069372 test_loss: 0.12405070066452026
epoch: 100 training_loss 0.11730932909995317 test_loss: 0.13867155313491822
epoch: 101 training_loss 0.11076378107070922 test_loss: 0.1312998652458191
epoch: 102 training_loss 0.1216044520586729 test_loss: 0.13244287967681884
epoch: 103 training_loss 0.11737362224608659 test_loss: 0.125488805770874
epoch: 104 training_loss 0.11122313549742102 test_loss: 0.12776408195495606
epoch: 105 training_loss 0.11744872763752938 test_loss: 0.11108248233795166
epoch: 106 training_loss 0.11100332215428352 test_loss: 0.13004052639007568
epoch: 107 training_loss 0.11494838271290064 test_loss: 0.12615761756896973
epoch: 108 training_loss 0.11482950653880834 test_loss: 0.1354006052017212
epoch: 109 training_loss 0.10994996812194585 test_loss: 0.12262599468231201
epoch: 110 training_loss 0.11781136807054281 test_loss: 0.15575467348098754
epoch: 111 training_loss 0.10716236038133502 test_loss: 0.12717512845993043
epoch: 112 training_loss 0.11772756338119507 test_loss: 0.12443212270736695
epoch: 113 training_loss 0.12137244835495949 test_loss: 0.12348442077636719
epoch: 114 training_loss 0.12031929355114698 test_loss: 0.1287570118904114
epoch: 115 training_loss 0.11132916763424873 test_loss: 0.12673606872558593
epoch: 116 training_loss 0.11054163238033653 test_loss: 0.13228812217712402
epoch: 117 training_loss 0.11805527944117784 test_loss: 0.14271442890167235
epoch: 118 training_loss 0.11643636494874954 test_loss: 0.128957200050354
epoch: 119 training_loss 0.12080898376181722 test_loss: 0.12149982452392578
epoch: 120 training_loss 0.11533021681010723 test_loss: 0.13253133296966552
epoch: 121 training_loss 0.11080606836825609 test_loss: 0.12245084047317505
epoch: 122 training_loss 0.11269035376608372 test_loss: 0.12003660202026367
epoch: 123 training_loss 0.11001835664734244 test_loss: 0.13011314868927001
epoch: 124 training_loss 0.11762308552861214 test_loss: 0.11631731986999512
epoch: 125 training_loss 0.11377369165420533 test_loss: 0.1289319396018982
epoch: 126 training_loss 0.11281685212627053 test_loss: 0.1281578540802002
epoch: 127 training_loss 0.11463958568871022 test_loss: 0.1332496166229248
epoch: 128 training_loss 0.10881998337805271 test_loss: 0.12534079551696778
epoch: 129 training_loss 0.1016469268873334 test_loss: 0.10599230527877808
epoch: 130 training_loss 0.11217773035168647 test_loss: 0.12217376232147217
epoch: 131 training_loss 0.10715037096291781 test_loss: 0.13098492622375488
epoch: 132 training_loss 0.11025937478989363 test_loss: 0.12251768112182618
epoch: 133 training_loss 0.12218721143901348 test_loss: 0.11384074687957764
epoch: 134 training_loss 0.11214897613972426 test_loss: 0.1303221344947815
epoch: 135 training_loss 0.10861089713871479 test_loss: 0.12743014097213745
epoch: 136 training_loss 0.10926568750292062 test_loss: 0.1379006624221802
epoch: 137 training_loss 0.11642873723059893 test_loss: 0.12865148782730101
epoch: 138 training_loss 0.11622125297784805 test_loss: 0.1486952304840088
epoch: 139 training_loss 0.11297364015132189 test_loss: 0.10641803741455078
epoch: 140 training_loss 0.11162873603403568 test_loss: 0.12452507019042969
epoch: 141 training_loss 0.1226263939216733 test_loss: 0.12463278770446777
epoch: 142 training_loss 0.12074867192655801 test_loss: 0.13614914417266846
epoch: 143 training_loss 0.11219303630292415 test_loss: 0.13775651454925536
epoch: 144 training_loss 0.11728809084743261 test_loss: 0.12239985466003418
epoch: 145 training_loss 0.11742767184972763 test_loss: 0.13042640686035156
epoch: 146 training_loss 0.11453691070899367 test_loss: 0.11516411304473877
epoch: 147 training_loss 0.10257184004411102 test_loss: 0.1440395712852478
epoch: 148 training_loss 0.12270748812705279 test_loss: 0.13503124713897705
epoch: 149 training_loss 0.11197686519473792 test_loss: 0.1318222999572754
epoch: 0 training_loss 0.27152070455253124 test_loss: 0.19242852926254272
epoch: 1 training_loss 0.1852305217087269 test_loss: 0.1755989670753479
epoch: 2 training_loss 0.16596357472240925 test_loss: 0.15238138437271118
epoch: 3 training_loss 0.1596117678284645 test_loss: 0.15510804653167726
epoch: 4 training_loss 0.1508043084666133 test_loss: 0.1318729043006897
epoch: 5 training_loss 0.14650200944393874 test_loss: 0.14739663600921632
epoch: 6 training_loss 0.15212831154465675 test_loss: 0.16419850587844848
epoch: 7 training_loss 0.13100167755037545 test_loss: 0.16125385761260985
epoch: 8 training_loss 0.14928512185811996 test_loss: 0.16243900060653688
epoch: 9 training_loss 0.13852447502315043 test_loss: 0.16248204708099365
epoch: 10 training_loss 0.13371217798441648 test_loss: 0.14076852798461914
epoch: 11 training_loss 0.13918035961687564 test_loss: 0.12463030815124512
epoch: 12 training_loss 0.1475683695822954 test_loss: 0.1760542869567871
epoch: 13 training_loss 0.1320663446933031 test_loss: 0.1341044068336487
epoch: 14 training_loss 0.13112605966627597 test_loss: 0.12679098844528197
epoch: 15 training_loss 0.12384041007608175 test_loss: 0.14287592172622682
epoch: 16 training_loss 0.13063096009194852 test_loss: 0.13062832355499268
epoch: 17 training_loss 0.1289286450855434 test_loss: 0.14070696830749513
epoch: 18 training_loss 0.12144652873277664 test_loss: 0.12182432413101196
epoch: 19 training_loss 0.1321493586152792 test_loss: 0.12787379026412965
epoch: 20 training_loss 0.1239466291666031 test_loss: 0.12223271131515503
epoch: 21 training_loss 0.12560146804898978 test_loss: 0.10984613895416259
epoch: 22 training_loss 0.13133347705006598 test_loss: 0.1237485408782959
epoch: 23 training_loss 0.12349657746031881 test_loss: 0.13153221607208251
epoch: 24 training_loss 0.12829469338059427 test_loss: 0.12772367000579835
epoch: 25 training_loss 0.12249346792697907 test_loss: 0.1319169282913208
epoch: 26 training_loss 0.13069370958954096 test_loss: 0.11929949522018432
epoch: 27 training_loss 0.11392244361341 test_loss: 0.1235429048538208
epoch: 28 training_loss 0.12465544721111656 test_loss: 0.15806810855865477
epoch: 29 training_loss 0.1255535303801298 test_loss: 0.11365941762924195
epoch: 30 training_loss 0.1168707412853837 test_loss: 0.14277162551879882
epoch: 31 training_loss 0.11658847980201244 test_loss: 0.10685454607009888
epoch: 32 training_loss 0.12140214771032333 test_loss: 0.1073913812637329
epoch: 33 training_loss 0.12767511416226626 test_loss: 0.12491286993026733
epoch: 34 training_loss 0.11702988237142563 test_loss: 0.11817176342010498
epoch: 35 training_loss 0.11921850889921189 test_loss: 0.122971510887146
epoch: 36 training_loss 0.11616014670580625 test_loss: 0.17588455677032472
epoch: 37 training_loss 0.11737101837992668 test_loss: 0.12384167909622193
epoch: 38 training_loss 0.1264687520265579 test_loss: 0.1228177547454834
epoch: 39 training_loss 0.11659481067210437 test_loss: 0.10949978828430176
epoch: 40 training_loss 0.12706176102161407 test_loss: 0.1396775245666504
epoch: 41 training_loss 0.1234652940928936 test_loss: 0.1324314832687378
epoch: 42 training_loss 0.12439759079366923 test_loss: 0.12740345001220704
epoch: 43 training_loss 0.1310653876233846 test_loss: 0.12200932502746582
epoch: 44 training_loss 0.12373353846371174 test_loss: 0.1246779203414917
epoch: 45 training_loss 0.12358680255711078 test_loss: 0.12517842054367065
epoch: 46 training_loss 0.11657934933900833 test_loss: 0.1349927544593811
epoch: 47 training_loss 0.11757289730012417 test_loss: 0.13008768558502198
epoch: 48 training_loss 0.11803497646003962 test_loss: 0.112486732006073
epoch: 49 training_loss 0.10753202160820365 test_loss: 0.1149362325668335
epoch: 50 training_loss 0.12007643859833479 test_loss: 0.1251070261001587
epoch: 51 training_loss 0.10953508023172617 test_loss: 0.11754850149154664
epoch: 52 training_loss 0.12199349626898766 test_loss: 0.12958790063858033
epoch: 53 training_loss 0.12059459026902913 test_loss: 0.11349767446517944
epoch: 54 training_loss 0.11637627832591534 test_loss: 0.12389705181121827
epoch: 55 training_loss 0.11542278992012144 test_loss: 0.12670170068740844
epoch: 56 training_loss 0.12157175358384847 test_loss: 0.11059683561325073
epoch: 57 training_loss 0.12374814100563526 test_loss: 0.12490078210830688
epoch: 58 training_loss 0.11834413453936576 test_loss: 0.1318243145942688
epoch: 59 training_loss 0.11550072126090527 test_loss: 0.11524002552032471
epoch: 60 training_loss 0.10921371042728424 test_loss: 0.12754499912261963
epoch: 61 training_loss 0.11425073839724063 test_loss: 0.12118710279464721
epoch: 62 training_loss 0.11285857087001204 test_loss: 0.12594534158706666
epoch: 63 training_loss 0.12876444552093744 test_loss: 0.10763773918151856
epoch: 64 training_loss 0.11985038205981255 test_loss: 0.1310137391090393
epoch: 65 training_loss 0.12403715539723635 test_loss: 0.1171783447265625
epoch: 66 training_loss 0.11934361204504967 test_loss: 0.11958630084991455
epoch: 67 training_loss 0.1246173532679677 test_loss: 0.1424519181251526
epoch: 68 training_loss 0.12163305727764964 test_loss: 0.12715524435043335
epoch: 69 training_loss 0.11913536556065082 test_loss: 0.12939072847366334
epoch: 70 training_loss 0.11741070747375489 test_loss: 0.12188888788223266
epoch: 71 training_loss 0.12485490964725614 test_loss: 0.15131338834762573
epoch: 72 training_loss 0.11537858936935663 test_loss: 0.11730962991714478
epoch: 73 training_loss 0.11114705454558134 test_loss: 0.11839809417724609
epoch: 74 training_loss 0.11927711989730597 test_loss: 0.12013814449310303
epoch: 75 training_loss 0.11840928602963686 test_loss: 0.11473599672317505
epoch: 76 training_loss 0.12011469902470708 test_loss: 0.12085937261581421
epoch: 77 training_loss 0.11463494583964348 test_loss: 0.12424871921539307
epoch: 78 training_loss 0.11690250020474195 test_loss: 0.13831838369369506
epoch: 79 training_loss 0.11562691509723663 test_loss: 0.11379811763763428
epoch: 80 training_loss 0.10980600018054247 test_loss: 0.11336216926574708
epoch: 81 training_loss 0.11229762084782123 test_loss: 0.1301702857017517
epoch: 82 training_loss 0.11213900912553072 test_loss: 0.12422090768814087
epoch: 83 training_loss 0.12178455155342817 test_loss: 0.13383060693740845
epoch: 84 training_loss 0.11186577238142491 test_loss: 0.1219962477684021
epoch: 85 training_loss 0.11311473367735743 test_loss: 0.12906355857849122
epoch: 86 training_loss 0.10800471035763622 test_loss: 0.10868982076644898
epoch: 87 training_loss 0.12046945415437221 test_loss: 0.12682597637176513
epoch: 88 training_loss 0.11670688286423683 test_loss: 0.10902535915374756
epoch: 89 training_loss 0.11057744663208723 test_loss: 0.12243739366531373
epoch: 90 training_loss 0.11194653002545238 test_loss: 0.14535051584243774
epoch: 91 training_loss 0.11662111345678568 test_loss: 0.14068735837936402
epoch: 92 training_loss 0.12206734269857407 test_loss: 0.12507468461990356
epoch: 93 training_loss 0.11952634040266276 test_loss: 0.12587035894393922
epoch: 94 training_loss 0.13367187578231096 test_loss: 0.10845445394515991
epoch: 95 training_loss 0.11418841050937772 test_loss: 0.15064694881439208
epoch: 96 training_loss 0.12028700385242701 test_loss: 0.10261026620864869
epoch: 97 training_loss 0.10617807613685727 test_loss: 0.1362399935722351
epoch: 98 training_loss 0.11129751710221171 test_loss: 0.12939521074295043
epoch: 99 training_loss 0.1156930885463953 test_loss: 0.11037062406539917
epoch: 100 training_loss 0.11968132857233287 test_loss: 0.10617038011550903
epoch: 101 training_loss 0.11864516992121935 test_loss: 0.11140550374984741
epoch: 102 training_loss 0.12264448940753937 test_loss: 0.11984243392944335
epoch: 103 training_loss 0.11352929547429085 test_loss: 0.10374294519424439
epoch: 104 training_loss 0.1139846932515502 test_loss: 0.13742843866348267
epoch: 105 training_loss 0.10897629262879491 test_loss: 0.12269318103790283
epoch: 106 training_loss 0.11716253463178873 test_loss: 0.10011163949966431
epoch: 107 training_loss 0.12102660177275539 test_loss: 0.12745320796966553
epoch: 108 training_loss 0.120099719427526 test_loss: 0.14147984981536865
epoch: 109 training_loss 0.12010120756924153 test_loss: 0.10700372457504273
epoch: 110 training_loss 0.11228022262454034 test_loss: 0.08342365026474
epoch: 111 training_loss 0.10706560796126724 test_loss: 0.13916414976119995
epoch: 112 training_loss 0.11574708770960569 test_loss: 0.112828528881073
epoch: 113 training_loss 0.11379927296191454 test_loss: 0.11705306768417359
epoch: 114 training_loss 0.10785716440528631 test_loss: 0.13649924993515014
epoch: 115 training_loss 0.1129695956595242 test_loss: 0.10865732431411743
epoch: 116 training_loss 0.11206110093742609 test_loss: 0.13000460863113403
epoch: 117 training_loss 0.11044626815244556 test_loss: 0.12941514253616332
epoch: 118 training_loss 0.11594154004007579 test_loss: 0.12350170612335205
epoch: 119 training_loss 0.11431291643530131 test_loss: 0.12121546268463135
epoch: 120 training_loss 0.10968391643837094 test_loss: 0.12218918800354003
epoch: 121 training_loss 0.11471746619790793 test_loss: 0.1213448166847229
epoch: 122 training_loss 0.10815110091120005 test_loss: 0.1038507103919983
epoch: 123 training_loss 0.10797421948984265 test_loss: 0.12341368198394775
epoch: 124 training_loss 0.11559539519250393 test_loss: 0.13789721727371215
epoch: 125 training_loss 0.10410460149869323 test_loss: 0.10721114873886109
epoch: 126 training_loss 0.11428563063964248 test_loss: 0.10535567998886108
epoch: 127 training_loss 0.11090237341821194 test_loss: 0.12723506689071656
epoch: 128 training_loss 0.10739964682608844 test_loss: 0.11724841594696045
epoch: 129 training_loss 0.12131045885384083 test_loss: 0.12434699535369872
epoch: 130 training_loss 0.11514701567590237 test_loss: 0.11161359548568725
epoch: 131 training_loss 0.10764924809336662 test_loss: 0.12600102424621581
epoch: 132 training_loss 0.11429138261824846 test_loss: 0.11591342687606812
epoch: 133 training_loss 0.10766425494104624 test_loss: 0.11451488733291626
epoch: 134 training_loss 0.10606029856950044 test_loss: 0.10857964754104614
epoch: 135 training_loss 0.11545976845547556 test_loss: 0.11930731534957886
epoch: 136 training_loss 0.10638962576165795 test_loss: 0.13431620597839355
epoch: 137 training_loss 0.11407475940883159 test_loss: 0.12027180194854736
epoch: 138 training_loss 0.11193219266831875 test_loss: 0.135598087310791
epoch: 139 training_loss 0.1166742230206728 test_loss: 0.1225056529045105
epoch: 140 training_loss 0.1117995847389102 test_loss: 0.12953290939331055
epoch: 141 training_loss 0.1109218580648303 test_loss: 0.10510846376419067
epoch: 142 training_loss 0.11379117770120502 test_loss: 0.1331445097923279
epoch: 143 training_loss 0.12004103058949113 test_loss: 0.13216592073440553
epoch: 144 training_loss 0.11653313059359789 test_loss: 0.13675732612609864
epoch: 145 training_loss 0.11128031264990568 test_loss: 0.10916351079940796
epoch: 146 training_loss 0.11546641064807772 test_loss: 0.10822309255599975
epoch: 147 training_loss 0.10916583547368645 test_loss: 0.09766881465911866
epoch: 148 training_loss 0.11202072195708751 test_loss: 0.10827349424362183
epoch: 149 training_loss 0.1141220324859023 test_loss: 0.1335626721382141
epoch: 0 training_loss 0.28961591407656667 test_loss: 0.20084631443023682
epoch: 1 training_loss 0.18844454638659955 test_loss: 0.17644312381744384
epoch: 2 training_loss 0.1757381419837475 test_loss: 0.1844666123390198
epoch: 3 training_loss 0.1636599575355649 test_loss: 0.1894747018814087
epoch: 4 training_loss 0.1620495254918933 test_loss: 0.17832324504852295
epoch: 5 training_loss 0.1508781375363469 test_loss: 0.14436417818069458
epoch: 6 training_loss 0.14736652195453645 test_loss: 0.14624347686767578
epoch: 7 training_loss 0.1473784054815769 test_loss: 0.15411972999572754
epoch: 8 training_loss 0.1333950721472502 test_loss: 0.15072360038757324
epoch: 9 training_loss 0.12707087241113185 test_loss: 0.12635364532470703
epoch: 10 training_loss 0.13972692511975765 test_loss: 0.14257913827896118
epoch: 11 training_loss 0.1372876986861229 test_loss: 0.1335233449935913
epoch: 12 training_loss 0.13379879549145698 test_loss: 0.1363855004310608
epoch: 13 training_loss 0.13538918159902097 test_loss: 0.12854576110839844
epoch: 14 training_loss 0.13520847339183092 test_loss: 0.14472289085388185
epoch: 15 training_loss 0.13714789424091578 test_loss: 0.1269700288772583
epoch: 16 training_loss 0.12824043337255717 test_loss: 0.142233943939209
epoch: 17 training_loss 0.13442980036139487 test_loss: 0.1521517276763916
epoch: 18 training_loss 0.12797570787370205 test_loss: 0.13729331493377686
epoch: 19 training_loss 0.13314923364669085 test_loss: 0.14675246477127074
epoch: 20 training_loss 0.12684749074280263 test_loss: 0.15562499761581422
epoch: 21 training_loss 0.13265414115041493 test_loss: 0.12199277877807617
epoch: 22 training_loss 0.1240025707706809 test_loss: 0.15678436756134034
epoch: 23 training_loss 0.11967013988643885 test_loss: 0.14468760490417482
epoch: 24 training_loss 0.11906513977795839 test_loss: 0.13483585119247438
epoch: 25 training_loss 0.1302381530404091 test_loss: 0.12678490877151488
epoch: 26 training_loss 0.12621908079832792 test_loss: 0.12672362327575684
epoch: 27 training_loss 0.12376195173710584 test_loss: 0.1878973960876465
epoch: 28 training_loss 0.12832627605646849 test_loss: 0.13187316656112671
epoch: 29 training_loss 0.12166115280240775 test_loss: 0.13216426372528076
epoch: 30 training_loss 0.13139648210257293 test_loss: 0.14288197755813598
epoch: 31 training_loss 0.1231696879118681 test_loss: 0.11929817199707031
epoch: 32 training_loss 0.12677205175161363 test_loss: 0.12816925048828126
epoch: 33 training_loss 0.12743549592792988 test_loss: 0.12704479694366455
epoch: 34 training_loss 0.12212456651031971 test_loss: 0.11007579565048217
epoch: 35 training_loss 0.12139963980764151 test_loss: 0.13102601766586303
epoch: 36 training_loss 0.12131103824824095 test_loss: 0.11688140630722046
epoch: 37 training_loss 0.1313368086144328 test_loss: 0.1375044345855713
epoch: 38 training_loss 0.12402890551835298 test_loss: 0.11796211004257202
epoch: 39 training_loss 0.12025204293429852 test_loss: 0.10655124187469482
epoch: 40 training_loss 0.12155826617032289 test_loss: 0.13607059717178344
epoch: 41 training_loss 0.11958848606795072 test_loss: 0.12475916147232055
epoch: 42 training_loss 0.11843774531036616 test_loss: 0.11058595180511474
epoch: 43 training_loss 0.11855394702404737 test_loss: 0.14272791147232056
epoch: 44 training_loss 0.12222280494868755 test_loss: 0.13058687448501588
epoch: 45 training_loss 0.12748696509748697 test_loss: 0.12387398481369019
epoch: 46 training_loss 0.10938847683370113 test_loss: 0.1441725969314575
epoch: 47 training_loss 0.12944402649998665 test_loss: 0.12487729787826538
epoch: 48 training_loss 0.11304930744692683 test_loss: 0.12042856216430664
epoch: 49 training_loss 0.12249170988798141 test_loss: 0.1224021315574646
epoch: 50 training_loss 0.11930016860365868 test_loss: 0.12013729810714721
epoch: 51 training_loss 0.11956678222864867 test_loss: 0.12651091814041138
epoch: 52 training_loss 0.1190733066946268 test_loss: 0.13837937116622925
epoch: 53 training_loss 0.1140787436440587 test_loss: 0.10379778146743775
epoch: 54 training_loss 0.11734170116484165 test_loss: 0.10959526300430297
epoch: 55 training_loss 0.11582023121416568 test_loss: 0.134671151638031
epoch: 56 training_loss 0.11712646946310996 test_loss: 0.11393176317214966
epoch: 57 training_loss 0.11757383283227682 test_loss: 0.12216187715530395
epoch: 58 training_loss 0.10900608483701944 test_loss: 0.12598111629486083
epoch: 59 training_loss 0.1211010256409645 test_loss: 0.12386770248413086
epoch: 60 training_loss 0.12700055250898004 test_loss: 0.12023301124572754
epoch: 61 training_loss 0.11660214100033045 test_loss: 0.11554886102676391
epoch: 62 training_loss 0.124215945340693 test_loss: 0.11072206497192383
epoch: 63 training_loss 0.10955592803657055 test_loss: 0.12246417999267578
epoch: 64 training_loss 0.11764346538111567 test_loss: 0.12296153306961059
epoch: 65 training_loss 0.1044155465811491 test_loss: 0.1089217185974121
epoch: 66 training_loss 0.11528773494064808 test_loss: 0.11591130495071411
epoch: 67 training_loss 0.12415176149457693 test_loss: 0.11987129449844361
epoch: 68 training_loss 0.12111732769757509 test_loss: 0.10282779932022094
epoch: 69 training_loss 0.11644770696759224 test_loss: 0.12053228616714477
epoch: 70 training_loss 0.11535729814320803 test_loss: 0.12383365631103516
epoch: 71 training_loss 0.11850288562476635 test_loss: 0.13438106775283815
epoch: 72 training_loss 0.11406249705702066 test_loss: 0.12308841943740845
epoch: 73 training_loss 0.11323446173220873 test_loss: 0.12092456817626954
epoch: 74 training_loss 0.11793507155030966 test_loss: 0.1174052596092224
epoch: 75 training_loss 0.1168435220234096 test_loss: 0.12939913272857667
epoch: 76 training_loss 0.12057199932634831 test_loss: 0.12087687253952026
epoch: 77 training_loss 0.12014896795153618 test_loss: 0.13738549947738649
epoch: 78 training_loss 0.11822439476847649 test_loss: 0.1283334493637085
epoch: 79 training_loss 0.12375705268234015 test_loss: 0.13795464038848876
epoch: 80 training_loss 0.11449004337191582 test_loss: 0.11097936630249024
epoch: 81 training_loss 0.12121472857892514 test_loss: 0.11730767488479614
epoch: 82 training_loss 0.11732226986438037 test_loss: 0.11647969484329224
epoch: 83 training_loss 0.11211320124566555 test_loss: 0.13165178298950195
epoch: 84 training_loss 0.1251848154515028 test_loss: 0.13721895217895508
epoch: 85 training_loss 0.11932540640234947 test_loss: 0.12018039226531982
epoch: 86 training_loss 0.11721393585205078 test_loss: 0.1246234655380249
epoch: 87 training_loss 0.11193378206342458 test_loss: 0.11942156553268432
epoch: 88 training_loss 0.1134264837577939 test_loss: 0.13122012615203857
epoch: 89 training_loss 0.11713725252076984 test_loss: 0.13367069959640504
epoch: 90 training_loss 0.11826397934928537 test_loss: 0.11560155153274536
epoch: 91 training_loss 0.11613240100443363 test_loss: 0.12452158927917481
epoch: 92 training_loss 0.10962332122027874 test_loss: 0.11631046533584595
epoch: 93 training_loss 0.11725563954561949 test_loss: 0.13587424755096436
epoch: 94 training_loss 0.11537945944815874 test_loss: 0.13250732421875
epoch: 95 training_loss 0.11146576460450888 test_loss: 0.11720882654190064
epoch: 96 training_loss 0.11972975654527546 test_loss: 0.1398900866508484
epoch: 97 training_loss 0.11475718881934881 test_loss: 0.1279921293258667
epoch: 98 training_loss 0.12182110290974378 test_loss: 0.10441956520080567
epoch: 99 training_loss 0.11631684623658657 test_loss: 0.14161609411239623
epoch: 100 training_loss 0.11546104550361633 test_loss: 0.11106604337692261
epoch: 101 training_loss 0.11391454130411148 test_loss: 0.11126042604446411
epoch: 102 training_loss 0.11376681361347436 test_loss: 0.11426796913146972
epoch: 103 training_loss 0.11396146330982447 test_loss: 0.11882821321487427
epoch: 104 training_loss 0.10951911391690373 test_loss: 0.13188430070877075
epoch: 105 training_loss 0.12006533298641443 test_loss: 0.11265825033187866
epoch: 106 training_loss 0.11751067508012056 test_loss: 0.1177642822265625
epoch: 107 training_loss 0.11453970529139042 test_loss: 0.13281009197235108
epoch: 108 training_loss 0.11906012516468763 test_loss: 0.11557284593582154
epoch: 109 training_loss 0.11026964291930198 test_loss: 0.11913201808929444
epoch: 110 training_loss 0.11262401349842549 test_loss: 0.11088627576828003
epoch: 111 training_loss 0.1083598898909986 test_loss: 0.14769160747528076
epoch: 112 training_loss 0.10955619622021913 test_loss: 0.11022976636886597
epoch: 113 training_loss 0.10970855209976435 test_loss: 0.13256913423538208
epoch: 114 training_loss 0.11426767259836197 test_loss: 0.10842232704162598
epoch: 115 training_loss 0.11235371058806777 test_loss: 0.12356414794921874
epoch: 116 training_loss 0.11795657901093363 test_loss: 0.11714290380477906
epoch: 117 training_loss 0.11549743194133043 test_loss: 0.1277581810951233
epoch: 118 training_loss 0.11386230655014515 test_loss: 0.10869704484939575
epoch: 119 training_loss 0.11537944789975882 test_loss: 0.11422295570373535
epoch: 120 training_loss 0.11944490101188421 test_loss: 0.1301407814025879
epoch: 121 training_loss 0.11260290324687958 test_loss: 0.11781985759735107
epoch: 122 training_loss 0.11092184975743294 test_loss: 0.1203505516052246
epoch: 123 training_loss 0.11231508892029524 test_loss: 0.13287060260772704
epoch: 124 training_loss 0.11427516464143991 test_loss: 0.11008305549621582
epoch: 125 training_loss 0.10507046446204185 test_loss: 0.11483947038650513
epoch: 126 training_loss 0.11148849230259657 test_loss: 0.11142349243164062
epoch: 127 training_loss 0.12325319983065128 test_loss: 0.1073644757270813
epoch: 128 training_loss 0.12020038850605488 test_loss: 0.13945391178131103
epoch: 129 training_loss 0.11524644296616315 test_loss: 0.11869980096817016
epoch: 130 training_loss 0.11601154124364257 test_loss: 0.11230571269989013
epoch: 131 training_loss 0.10728276588022709 test_loss: 0.13292001485824584
epoch: 132 training_loss 0.1176191370561719 test_loss: 0.13035048246383668
epoch: 133 training_loss 0.109776302985847 test_loss: 0.14016755819320678
epoch: 134 training_loss 0.1098233911767602 test_loss: 0.12483094930648804
epoch: 135 training_loss 0.11541192645207048 test_loss: 0.13189585208892823
epoch: 136 training_loss 0.11024505808949471 test_loss: 0.12206138372421264
epoch: 137 training_loss 0.11848021950572729 test_loss: 0.1287319302558899
epoch: 138 training_loss 0.11843928737565874 test_loss: 0.13717367649078369
epoch: 139 training_loss 0.11081308636814356 test_loss: 0.13267406225204467
epoch: 140 training_loss 0.11656574223190547 test_loss: 0.1273702025413513
epoch: 141 training_loss 0.11460803220048547 test_loss: 0.12703869342803956
epoch: 142 training_loss 0.12121899925172329 test_loss: 0.1269481897354126
epoch: 143 training_loss 0.11638095673173666 test_loss: 0.11963825225830078
epoch: 144 training_loss 0.1117830754444003 test_loss: 0.13714094161987306
epoch: 145 training_loss 0.11186342971399427 test_loss: 0.1168249249458313
epoch: 146 training_loss 0.11468450076878071 test_loss: 0.1146268367767334
epoch: 147 training_loss 0.11717550467699767 test_loss: 0.11768505573272706
epoch: 148 training_loss 0.11050714520737528 test_loss: 0.09914059042930604
epoch: 149 training_loss 0.10748872438445688 test_loss: 0.12985848188400267
episode: 0 training return: -1418.94678827024
episode: 1 training return: -1208.0831012714975
episode: 2 training return: -1280.0677994647529
episode: 3 training return: -1115.353334931417
epoch: 1 test_true_pfm: 0.15587134868740882 sim_pfm: -973.3547103502427
episode: 4 training return: -1260.6591047171808
episode: 5 training return: -1453.2324369821222
episode: 6 training return: -1213.4062101138538
episode: 7 training return: -1033.9164948923876
epoch: 2 test_true_pfm: 98.8154547153277 sim_pfm: -957.4565062184357
episode: 8 training return: -1412.0545687104477
episode: 9 training return: -1455.6153824883402
episode: 10 training return: -1006.6281506752737
episode: 11 training return: -1021.2746106069475
epoch: 3 test_true_pfm: 25.196117644111542 sim_pfm: -994.5872006355006
episode: 12 training return: -1011.8978688983782
episode: 13 training return: -981.3652644323558
episode: 14 training return: -1006.5921757459889
episode: 15 training return: -1001.8407874845046
epoch: 4 test_true_pfm: 989.5421579714615 sim_pfm: -961.7368277490718
episode: 16 training return: -968.7903933386549
episode: 17 training return: -975.5597431186869
episode: 18 training return: -981.377966874531
episode: 19 training return: -980.2930705103016
epoch: 5 test_true_pfm: 345.414838383391 sim_pfm: -1012.9821855986032
episode: 20 training return: -983.9032678316746
episode: 21 training return: -978.4978126836472
episode: 22 training return: -980.5936635984924
episode: 23 training return: -990.8542201625245
epoch: 6 test_true_pfm: 377.0019700250882 sim_pfm: -964.451390240226
episode: 24 training return: -994.8461010421549
episode: 25 training return: -990.5296674898224
episode: 26 training return: -999.7274714604553
episode: 27 training return: -995.0856627015528
epoch: 7 test_true_pfm: 375.3025943474344 sim_pfm: -905.4823180813313
episode: 28 training return: -987.1980635932447
episode: 29 training return: -985.8933550270602
episode: 30 training return: -990.730297147222
episode: 31 training return: -993.0954894640862
epoch: 8 test_true_pfm: 397.03068128650466 sim_pfm: -885.6040429370361
episode: 32 training return: -994.9338416055351
episode: 33 training return: -986.5140721171392
episode: 34 training return: -987.8076267477322
episode: 35 training return: -1029.8383291288494
epoch: 9 test_true_pfm: 379.05964705163615 sim_pfm: -920.2145967068177
episode: 36 training return: -985.5279447617015
episode: 37 training return: -994.3642224282313
episode: 38 training return: -995.690956001946
episode: 39 training return: -993.378252773402
epoch: 10 test_true_pfm: 380.02722619669834 sim_pfm: -888.2466301603305
episode: 40 training return: -991.4169001178053
episode: 41 training return: -989.5807432296849
episode: 42 training return: -991.836027796684
episode: 43 training return: -992.77190420623
epoch: 11 test_true_pfm: 326.40155299202826 sim_pfm: -883.8976351803407
episode: 44 training return: -989.7327640298365
episode: 45 training return: -995.7635051893791
episode: 46 training return: -994.1558466516884
episode: 47 training return: -992.0336302714938
epoch: 12 test_true_pfm: 316.46425504805705 sim_pfm: -884.215456827614
episode: 48 training return: -992.6626819710514
episode: 49 training return: -991.819007043857
episode: 50 training return: -996.9685632578884
episode: 51 training return: -986.5900562015705
epoch: 13 test_true_pfm: 326.42220847873386 sim_pfm: -906.928494713865
episode: 52 training return: -989.063912334457
episode: 53 training return: -990.0660910771402
episode: 54 training return: -979.2218839091043
episode: 55 training return: -988.7748044956857
epoch: 14 test_true_pfm: 336.71063440334177 sim_pfm: -875.805898683792
episode: 56 training return: -987.8843894894086
episode: 57 training return: -984.3369158442653
episode: 58 training return: -987.8829390318106
episode: 59 training return: -984.7869254095749
epoch: 15 test_true_pfm: 326.9220534372233 sim_pfm: -871.466199625122
episode: 60 training return: -988.7122289277477
episode: 61 training return: -986.9990301995085
episode: 62 training return: -989.5730998777128
episode: 63 training return: -989.8755476699248
epoch: 16 test_true_pfm: 353.9973729429578 sim_pfm: -868.2475764309132
episode: 64 training return: -991.3991682136774
episode: 65 training return: -976.8126687466062
episode: 66 training return: -995.6423189453091
episode: 67 training return: -989.0774798852513
epoch: 17 test_true_pfm: 334.0337802828919 sim_pfm: -881.5826305037796
episode: 68 training return: -984.0313473037453
episode: 69 training return: -982.3035472772157
episode: 70 training return: -998.10904520526
episode: 71 training return: -988.6916882016762
epoch: 18 test_true_pfm: 314.8244155099103 sim_pfm: -877.6080604303085
episode: 72 training return: -985.1950354292919
episode: 73 training return: -986.4757850091313
episode: 74 training return: -990.0901912341068
episode: 75 training return: -986.9408611197107
epoch: 19 test_true_pfm: 324.50660086424347 sim_pfm: -878.2275395719516
episode: 76 training return: -990.1035622795256
episode: 77 training return: -983.3071381908294
episode: 78 training return: -988.4281933517725
episode: 79 training return: -985.5463079107303
epoch: 20 test_true_pfm: 316.55324498644137 sim_pfm: -902.0052628870354
episode: 80 training return: -990.8940735184119
episode: 81 training return: -984.0407197583606
episode: 82 training return: -987.5434707154594
episode: 83 training return: -988.4620631983319
epoch: 21 test_true_pfm: 325.2571674488633 sim_pfm: -871.2557314301274
episode: 84 training return: -991.900372952572
episode: 85 training return: -982.7678194207881
episode: 86 training return: -993.3603449874905
episode: 87 training return: -985.9283547198568
epoch: 22 test_true_pfm: 321.2972210724535 sim_pfm: -875.3342192802796
episode: 88 training return: -981.1662040642597
episode: 89 training return: -992.5640948983358
episode: 90 training return: -989.224624288525
episode: 91 training return: -991.9382982354791
epoch: 23 test_true_pfm: 334.30257754666974 sim_pfm: -866.9626841814189
episode: 92 training return: -987.5054205528154
episode: 93 training return: -987.6350597586916
episode: 94 training return: -987.7649502115154
episode: 95 training return: -985.9840353413763
epoch: 24 test_true_pfm: 340.5684963718376 sim_pfm: -875.9837216360165
episode: 96 training return: -991.3901209134286
episode: 97 training return: -992.9695174923627
episode: 98 training return: -999.7974395573849
episode: 99 training return: -989.9407346923882
epoch: 25 test_true_pfm: 343.99250585242015 sim_pfm: -868.8978428177924
episode: 100 training return: -984.9595869263408
episode: 101 training return: -982.6573785073393
episode: 102 training return: -987.5093940783219
episode: 103 training return: -982.3834001572906
epoch: 26 test_true_pfm: 329.2591287564342 sim_pfm: -869.169656106526
episode: 104 training return: -985.8372820758321
episode: 105 training return: -989.9039349775943
episode: 106 training return: -987.7954865799372
episode: 107 training return: -987.2436287773794
epoch: 27 test_true_pfm: 334.6070776111186 sim_pfm: -863.559146348306
episode: 108 training return: -991.1969144730753
episode: 109 training return: -985.6191624642901
episode: 110 training return: -991.3713414827911
episode: 111 training return: -986.0533022645915
epoch: 28 test_true_pfm: 322.0786490556538 sim_pfm: -860.0054765926728
episode: 112 training return: -989.0217690316603
episode: 113 training return: -984.9924486547385
episode: 114 training return: -985.2161436506885
episode: 115 training return: -989.4353666140548
epoch: 29 test_true_pfm: 327.8493116125998 sim_pfm: -872.3290817223557
episode: 116 training return: -993.5436717706848
episode: 117 training return: -986.3492846930604
episode: 118 training return: -985.9280899156203
episode: 119 training return: -988.442473778745
epoch: 30 test_true_pfm: 332.1398258480697 sim_pfm: -868.9036789132819
episode: 120 training return: -985.5877402476814
episode: 121 training return: -983.9513419778133
episode: 122 training return: -981.0795486557248
episode: 123 training return: -982.0558758930194
epoch: 31 test_true_pfm: 322.1124916324045 sim_pfm: -869.6162729092079
episode: 124 training return: -997.1090126010645
episode: 125 training return: -992.317076293635
episode: 126 training return: -1000.2148440194143
episode: 127 training return: -986.431498430098
epoch: 32 test_true_pfm: 326.63284960718664 sim_pfm: -893.8099940463504
episode: 128 training return: -985.8153853746256
episode: 129 training return: -984.0381152758122
episode: 130 training return: -992.6831774911637
episode: 131 training return: -987.5877227247222
epoch: 33 test_true_pfm: 324.31250653794996 sim_pfm: -878.655861219218
episode: 132 training return: -995.3852163640942
episode: 133 training return: -992.8194096334952
episode: 134 training return: -990.4104798914809
episode: 135 training return: -992.1832921793244
epoch: 34 test_true_pfm: 319.4002213829547 sim_pfm: -875.009231080761
episode: 136 training return: -989.2609090865184
episode: 137 training return: -992.3942259976938
episode: 138 training return: -989.1221869714293
episode: 139 training return: -988.8983615501215
epoch: 35 test_true_pfm: 321.85941472855245 sim_pfm: -897.8927012471573
episode: 140 training return: -987.7046082491523
episode: 141 training return: -988.6364105728999
episode: 142 training return: -986.6656937774032
episode: 143 training return: -991.1739272969188
epoch: 36 test_true_pfm: 323.8878422901821 sim_pfm: -879.905808299461
episode: 144 training return: -982.571438033306
episode: 145 training return: -980.6259492257043
episode: 146 training return: -990.8653139354361
episode: 147 training return: -985.4117352095012
epoch: 37 test_true_pfm: 322.9228563750138 sim_pfm: -892.0045199621492
episode: 148 training return: -988.7794671656235
episode: 149 training return: -993.2230150654841
episode: 150 training return: -983.4303349886596
episode: 151 training return: -986.7511866948565
epoch: 38 test_true_pfm: 383.09922064298485 sim_pfm: -870.4316083752624
episode: 152 training return: -988.2772349864445
episode: 153 training return: -987.0225536970435
episode: 154 training return: -984.9177332590953
episode: 155 training return: -995.1782307395862
epoch: 39 test_true_pfm: 348.5740818660618 sim_pfm: -872.0787088881508
episode: 156 training return: -987.3509220247241
episode: 157 training return: -988.0462738326158
episode: 158 training return: -991.300694095488
episode: 159 training return: -993.2766557820917
epoch: 40 test_true_pfm: 320.42564317636396 sim_pfm: -877.2616026746828
episode: 160 training return: -994.9591674477939
episode: 161 training return: -995.8836505037099
episode: 162 training return: -992.016349718825
episode: 163 training return: -992.3529989211647
epoch: 41 test_true_pfm: 319.5126053287531 sim_pfm: -881.2778527296663
episode: 164 training return: -990.0924826248707
episode: 165 training return: -982.5528305513678
episode: 166 training return: -988.0292250350284
episode: 167 training return: -992.1478093674576
epoch: 42 test_true_pfm: 327.65934606168133 sim_pfm: -875.5049602092287
episode: 168 training return: -989.7181949152225
episode: 169 training return: -984.328257409737
episode: 170 training return: -983.3485507294789
episode: 171 training return: -994.3454094005672
epoch: 43 test_true_pfm: 339.9495333649079 sim_pfm: -870.7005141498702
episode: 172 training return: -992.0539018229343
episode: 173 training return: -981.2208985704839
episode: 174 training return: -993.461320984536
episode: 175 training return: -986.794403228234
epoch: 44 test_true_pfm: 334.86272946716446 sim_pfm: -869.1350982249628
episode: 176 training return: -982.9610049209009
episode: 177 training return: -993.3030400862889
episode: 178 training return: -995.8386809898255
episode: 179 training return: -993.8870856963923
epoch: 45 test_true_pfm: 320.4557428826245 sim_pfm: -871.1940965948735
episode: 180 training return: -989.0875193359575
episode: 181 training return: -991.6577758598548
episode: 182 training return: -992.4055535782827
episode: 183 training return: -986.6654660345606
epoch: 46 test_true_pfm: 319.24397477618123 sim_pfm: -873.1465892263814
episode: 184 training return: -985.9305592131424
episode: 185 training return: -991.320016176028
episode: 186 training return: -990.3655444480714
episode: 187 training return: -993.121937968198
epoch: 47 test_true_pfm: 327.01001166574366 sim_pfm: -873.3738626338177
episode: 188 training return: -983.9626407750515
episode: 189 training return: -986.1307067757896
episode: 190 training return: -989.6360021691319
episode: 191 training return: -991.0659744990797
epoch: 48 test_true_pfm: 319.8211557643106 sim_pfm: -871.0436040911469
episode: 192 training return: -986.6312485997116
episode: 193 training return: -987.6363451062701
episode: 194 training return: -987.9781173288576
episode: 195 training return: -981.3527445569483
epoch: 49 test_true_pfm: 320.1278368572893 sim_pfm: -865.6177228865412
episode: 196 training return: -983.8841334298864
episode: 197 training return: -997.804619263389
episode: 198 training return: -985.8704767331832
episode: 199 training return: -993.4007738146071
epoch: 50 test_true_pfm: 329.1788069419997 sim_pfm: -871.5655040096066
episode: 200 training return: -992.7873332803827
episode: 201 training return: -983.8360270184612
episode: 202 training return: -987.44750672921
episode: 203 training return: -986.0703348845726
epoch: 51 test_true_pfm: 322.7748349311592 sim_pfm: -874.4069445738099
episode: 204 training return: -991.5164147378379
episode: 205 training return: -989.0858244590191
episode: 206 training return: -992.956818065135
episode: 207 training return: -989.0664694812264
epoch: 52 test_true_pfm: 321.48196463682285 sim_pfm: -874.388448917061
episode: 208 training return: -988.5196805250898
episode: 209 training return: -983.7525632186391
episode: 210 training return: -992.5578936165001
episode: 211 training return: -985.3297964844834
epoch: 53 test_true_pfm: 316.21560653544043 sim_pfm: -873.0547135101584
episode: 212 training return: -992.8747975234777
episode: 213 training return: -989.1561831350723
episode: 214 training return: -993.0160333737236
episode: 215 training return: -983.9612427218585
epoch: 54 test_true_pfm: 326.4420199378881 sim_pfm: -873.6426654080057
episode: 216 training return: -997.0278298082388
episode: 217 training return: -985.6456452860684
episode: 218 training return: -991.7820781677357
episode: 219 training return: -995.3098870180104
epoch: 55 test_true_pfm: 320.5755446395581 sim_pfm: -874.3056704124219
episode: 220 training return: -985.7042123679436
episode: 221 training return: -985.2153828379933
episode: 222 training return: -983.3065360983408
episode: 223 training return: -990.3558144483898
epoch: 56 test_true_pfm: 340.49300989865753 sim_pfm: -869.0084797692843
episode: 224 training return: -992.6661833343703
episode: 225 training return: -980.5445340749621
episode: 226 training return: -990.632151738723
episode: 227 training return: -983.658821997401
epoch: 57 test_true_pfm: 325.95846985794134 sim_pfm: -868.3711032271804
episode: 228 training return: -986.6929277889978
episode: 229 training return: -984.1630332856581
episode: 230 training return: -991.6393202604007
episode: 231 training return: -991.4534223630392
epoch: 58 test_true_pfm: 318.9906225721673 sim_pfm: -872.5614569724647
episode: 232 training return: -997.2684519804658
episode: 233 training return: -984.9375737976276
episode: 234 training return: -990.4222445225024
episode: 235 training return: -986.5884482443272
epoch: 59 test_true_pfm: 321.95834753216405 sim_pfm: -866.5277601897595
episode: 236 training return: -984.7085460841255
episode: 237 training return: -992.8178603623628
episode: 238 training return: -993.3258389796916
episode: 239 training return: -987.7529083361765
epoch: 60 test_true_pfm: 323.03249775282035 sim_pfm: -868.8230340224836
episode: 240 training return: -994.4475401764464
episode: 241 training return: -996.8651019692928
episode: 242 training return: -992.4004953459535
episode: 243 training return: -993.7832566579654
epoch: 61 test_true_pfm: 321.44164218488515 sim_pfm: -896.5098029014736
episode: 244 training return: -983.0914071168781
episode: 245 training return: -995.6224042128466
episode: 246 training return: -994.582378522286
episode: 247 training return: -995.3654196396345
epoch: 62 test_true_pfm: 319.00118303538574 sim_pfm: -897.00904994616
episode: 248 training return: -992.0658092559923
episode: 249 training return: -989.0368663382986
episode: 250 training return: -989.9910216776557
episode: 251 training return: -995.2367716313224
epoch: 63 test_true_pfm: 328.48709444980597 sim_pfm: -868.1011450516111
episode: 252 training return: -991.1049018754435
episode: 253 training return: -990.0537422340088
episode: 254 training return: -991.4792422982504
episode: 255 training return: -989.3202236117658
epoch: 64 test_true_pfm: 322.55605202754583 sim_pfm: -865.4116326651291
episode: 256 training return: -985.9432475784605
episode: 257 training return: -994.9118547428617
episode: 258 training return: -988.9453672858904
episode: 259 training return: -991.6819541960909
epoch: 65 test_true_pfm: 329.4043094295531 sim_pfm: -894.9857991152735
episode: 260 training return: -989.7186245984519
episode: 261 training return: -993.9083423707185
episode: 262 training return: -994.946322678993
episode: 263 training return: -988.3373338817453
epoch: 66 test_true_pfm: 324.14353019247113 sim_pfm: -866.2523711797785
episode: 264 training return: -991.6951655614045
episode: 265 training return: -990.7719292554224
episode: 266 training return: -993.982050059762
episode: 267 training return: -985.0095954590105
epoch: 67 test_true_pfm: 329.88159433061213 sim_pfm: -877.5704091839206
episode: 268 training return: -987.4195075020009
episode: 269 training return: -997.9477955359545
episode: 270 training return: -993.2297309191264
episode: 271 training return: -986.9759577920798
epoch: 68 test_true_pfm: 321.2359898512222 sim_pfm: -872.8064444494145
episode: 272 training return: -990.1707217202726
episode: 273 training return: -995.6351378463526
episode: 274 training return: -993.954044828195
episode: 275 training return: -994.6160948847279
epoch: 69 test_true_pfm: 317.03463643939784 sim_pfm: -895.6161338922093
episode: 276 training return: -990.2745239184346
episode: 277 training return: -988.9107261190363
episode: 278 training return: -993.0595875490914
episode: 279 training return: -986.7904125466378
epoch: 70 test_true_pfm: 349.3141397376135 sim_pfm: -870.6750941656377
episode: 280 training return: -989.0940180997069
episode: 281 training return: -989.2187848644686
episode: 282 training return: -993.4245398073331
episode: 283 training return: -985.9693709687415
epoch: 71 test_true_pfm: 321.3346168361386 sim_pfm: -872.0492740134006
episode: 284 training return: -997.5011327935596
episode: 285 training return: -987.954140031233
episode: 286 training return: -986.380865538826
episode: 287 training return: -992.1977179459401
epoch: 72 test_true_pfm: 317.9709009773018 sim_pfm: -874.4028735350794
episode: 288 training return: -991.2643961797448
episode: 289 training return: -983.3366021126766
episode: 290 training return: -989.7647134513917
episode: 291 training return: -992.599165654658
epoch: 73 test_true_pfm: 326.1716057491267 sim_pfm: -868.7268621875473
episode: 292 training return: -988.114493319753
episode: 293 training return: -999.6394331440988
episode: 294 training return: -991.728427047043
episode: 295 training return: -995.0653890117695
epoch: 74 test_true_pfm: 322.69702696241575 sim_pfm: -862.6566269495647
episode: 296 training return: -993.1855154247245
episode: 297 training return: -990.0776036836521
episode: 298 training return: -988.3276718945858
episode: 299 training return: -995.3225655445634
epoch: 75 test_true_pfm: 321.96579996024485 sim_pfm: -868.2382856542914
episode: 300 training return: -989.2823722521442
episode: 301 training return: -989.0514959216604
episode: 302 training return: -988.2634067401102
episode: 303 training return: -987.8417028535482
epoch: 76 test_true_pfm: 322.4630564674737 sim_pfm: -874.0741980292488
episode: 304 training return: -993.5144698384689
episode: 305 training return: -991.9630803193636
episode: 306 training return: -996.7134907504985
episode: 307 training return: -989.4888043572265
epoch: 77 test_true_pfm: 329.31657329148896 sim_pfm: -873.9707050376791
episode: 308 training return: -986.3414060977966
episode: 309 training return: -988.9176497180875
episode: 310 training return: -988.2284795231706
episode: 311 training return: -983.3606357600979
epoch: 78 test_true_pfm: 319.30107150312125 sim_pfm: -868.5178347278752
episode: 312 training return: -982.5085688703797
episode: 313 training return: -986.672579259936
episode: 314 training return: -986.4876003372464
episode: 315 training return: -983.5117229606809
epoch: 79 test_true_pfm: 327.1356879987223 sim_pfm: -868.4489981508704
episode: 316 training return: -989.7487812346557
episode: 317 training return: -988.4418572162576
episode: 318 training return: -993.4399258765183
episode: 319 training return: -983.8776913288697
epoch: 80 test_true_pfm: 344.3915190054192 sim_pfm: -869.216603867521
episode: 320 training return: -989.8759767038094
episode: 321 training return: -990.5516518750029
episode: 322 training return: -983.7576411769587
episode: 323 training return: -991.6261282714592
epoch: 81 test_true_pfm: 321.3424091399775 sim_pfm: -869.284536520091
episode: 324 training return: -988.858966480779
episode: 325 training return: -987.524665182715
episode: 326 training return: -996.3222597966901
episode: 327 training return: -990.7810974107606
epoch: 82 test_true_pfm: 341.6339501312993 sim_pfm: -865.2168467306033
episode: 328 training return: -986.3113559783053
episode: 329 training return: -985.9133813071595
episode: 330 training return: -985.3890002827034
episode: 331 training return: -990.2892671332461
epoch: 83 test_true_pfm: 348.62890649582727 sim_pfm: -864.6251974259453
episode: 332 training return: -989.0421779105401
episode: 333 training return: -988.3159444710069
episode: 334 training return: -988.6657325951512
episode: 335 training return: -992.5844622147716
epoch: 84 test_true_pfm: 334.80810400963964 sim_pfm: -868.1785731228953
episode: 336 training return: -992.0128235024106
episode: 337 training return: -988.0673425179365
episode: 338 training return: -992.1450495948968
episode: 339 training return: -985.9662996493739
epoch: 85 test_true_pfm: 329.459715728355 sim_pfm: -873.4334526856275
episode: 340 training return: -998.9839674627148
episode: 341 training return: -985.9997423116113
episode: 342 training return: -991.031375824885
episode: 343 training return: -985.0122057781243
epoch: 86 test_true_pfm: 333.5222697670422 sim_pfm: -869.449963646341
episode: 344 training return: -990.4628096335838
episode: 345 training return: -990.9634470327343
episode: 346 training return: -992.210764783286
episode: 347 training return: -988.4639065406963
epoch: 87 test_true_pfm: 339.33678451088554 sim_pfm: -868.8468959493271
episode: 348 training return: -984.6011028067422
episode: 349 training return: -989.4365486992135
episode: 350 training return: -988.7590644426604
episode: 351 training return: -993.2520548110014
epoch: 88 test_true_pfm: 325.4035675683942 sim_pfm: -870.5021877789353
episode: 352 training return: -985.874501083237
episode: 353 training return: -989.251585180686
episode: 354 training return: -993.2616682956613
episode: 355 training return: -995.4617850489976
epoch: 89 test_true_pfm: 340.6790202139209 sim_pfm: -869.3551989954285
episode: 356 training return: -988.5219345945128
episode: 357 training return: -993.8562909433188
episode: 358 training return: -989.2910908324652
episode: 359 training return: -985.5587665733228
epoch: 90 test_true_pfm: 324.7918424671325 sim_pfm: -870.652874877601
episode: 360 training return: -989.5015825391855
episode: 361 training return: -988.1973791573754
episode: 362 training return: -988.2213393413615
episode: 363 training return: -989.645141005292
epoch: 91 test_true_pfm: 321.6567375058182 sim_pfm: -872.7146390185925
episode: 364 training return: -993.5191289792747
episode: 365 training return: -985.6395736013285
episode: 366 training return: -985.7884374617454
episode: 367 training return: -992.8581115279138
epoch: 92 test_true_pfm: 322.2500571849942 sim_pfm: -869.3091599560872
episode: 368 training return: -990.0142816670552
episode: 369 training return: -988.335748869692
episode: 370 training return: -987.9515739176362
episode: 371 training return: -996.4101522963034
epoch: 93 test_true_pfm: 336.8735060938697 sim_pfm: -870.5823131400839
episode: 372 training return: -990.7431890674779
episode: 373 training return: -982.552700147673
episode: 374 training return: -989.9618145968539
episode: 375 training return: -989.541297916101
epoch: 94 test_true_pfm: 330.12518234561907 sim_pfm: -873.4986960463435
episode: 376 training return: -992.5597868374283
episode: 377 training return: -985.1876899760063
episode: 378 training return: -994.1121247459473
episode: 379 training return: -992.6083705953654
epoch: 95 test_true_pfm: 332.2347247527648 sim_pfm: -873.2420326602102
episode: 380 training return: -988.6730448480137
episode: 381 training return: -988.6738484305456
episode: 382 training return: -987.5832274989311
episode: 383 training return: -991.8103292223469
epoch: 96 test_true_pfm: 321.94562842716124 sim_pfm: -864.2913422568043
episode: 384 training return: -987.7466849291148
episode: 385 training return: -982.3027397501277
episode: 386 training return: -990.484217718808
episode: 387 training return: -992.5413821265729
epoch: 97 test_true_pfm: 332.2190629767434 sim_pfm: -874.1415442835637
episode: 388 training return: -988.2066932077048
episode: 389 training return: -991.3546606805279
episode: 390 training return: -986.898522857122
episode: 391 training return: -985.4025373819252
epoch: 98 test_true_pfm: 332.8965377942241 sim_pfm: -863.1519941972891
episode: 392 training return: -986.4141144926178
episode: 393 training return: -987.179143172026
episode: 394 training return: -993.0422755168001
episode: 395 training return: -990.534063236993
epoch: 99 test_true_pfm: 323.20655331791164 sim_pfm: -868.990125966788
episode: 396 training return: -990.2662020872953
episode: 397 training return: -986.5646151442526
episode: 398 training return: -986.9362996164379
episode: 399 training return: -986.310304954617
epoch: 100 test_true_pfm: 358.55510511222474 sim_pfm: -875.7143146634376
episode: 400 training return: -987.111145806325
episode: 401 training return: -988.3096142111607
episode: 402 training return: -983.7173963074322
episode: 403 training return: -981.4050987478922
epoch: 101 test_true_pfm: 331.9667662585774 sim_pfm: -872.1356591213471
episode: 404 training return: -993.3766836415364
episode: 405 training return: -998.331459096468
episode: 406 training return: -994.9447353443676
episode: 407 training return: -981.5477009311824
epoch: 102 test_true_pfm: 342.7033840890849 sim_pfm: -891.372455270585
episode: 408 training return: -986.8844962008643
episode: 409 training return: -979.9563794109183
episode: 410 training return: -985.7016968567465
episode: 411 training return: -980.0138686240518
epoch: 103 test_true_pfm: 327.46049806536536 sim_pfm: -869.6453794192279
episode: 412 training return: -986.2504696505731
episode: 413 training return: -998.4221682084745
episode: 414 training return: -987.6877382358863
episode: 415 training return: -984.2065040729797
epoch: 104 test_true_pfm: 353.795216168786 sim_pfm: -865.1511082452661
episode: 416 training return: -983.2625487957168
episode: 417 training return: -989.8339477839569
episode: 418 training return: -985.2057470609884
episode: 419 training return: -991.0701849799376
epoch: 105 test_true_pfm: 330.72866785704576 sim_pfm: -874.7313476042697
episode: 420 training return: -982.9252794524415
episode: 421 training return: -979.020907105726
episode: 422 training return: -990.6177759042491
episode: 423 training return: -996.2278181250373
epoch: 106 test_true_pfm: 346.42261945713557 sim_pfm: -869.5666873816157
episode: 424 training return: -990.835669822281
episode: 425 training return: -994.198444472935
episode: 426 training return: -995.43740647923
episode: 427 training return: -991.0832965895016
epoch: 107 test_true_pfm: 339.74901244689454 sim_pfm: -868.3275553978765
episode: 428 training return: -991.0065789226175
episode: 429 training return: -988.8036718754986
episode: 430 training return: -989.2789011053848
episode: 431 training return: -988.893346693068
epoch: 108 test_true_pfm: 346.6357887321562 sim_pfm: -872.6996963871567
episode: 432 training return: -992.7270024032333
episode: 433 training return: -986.531955418995
episode: 434 training return: -992.1245994105617
episode: 435 training return: -992.2293632989622
epoch: 109 test_true_pfm: 340.963021395095 sim_pfm: -866.9190975365578
episode: 436 training return: -987.414902712074
episode: 437 training return: -987.818317433814
episode: 438 training return: -989.5946066941623
episode: 439 training return: -988.8588505135651
epoch: 110 test_true_pfm: 346.233085468394 sim_pfm: -872.5181347323163
episode: 440 training return: -990.6302644157549
episode: 441 training return: -988.0541720604881
episode: 442 training return: -1000.1713027460955
episode: 443 training return: -987.8016804520673
epoch: 111 test_true_pfm: 344.6472922788143 sim_pfm: -886.9796049802794
episode: 444 training return: -991.3199282903356
episode: 445 training return: -994.5871527248827
episode: 446 training return: -987.9457466283193
episode: 447 training return: -989.327596849747
epoch: 112 test_true_pfm: 330.660498034393 sim_pfm: -864.4525333953558
episode: 448 training return: -992.8402309720158
episode: 449 training return: -995.4297402040396
episode: 450 training return: -991.5734919749365
episode: 451 training return: -994.7694386364323
epoch: 113 test_true_pfm: 332.3491156116627 sim_pfm: -866.8378120561765
episode: 452 training return: -986.7406571765498
episode: 453 training return: -986.6734135778188
episode: 454 training return: -994.9340551202667
episode: 455 training return: -993.1266727525993
epoch: 114 test_true_pfm: 331.5471490522307 sim_pfm: -871.728052776634
episode: 456 training return: -991.8394619867531
episode: 457 training return: -991.8298576172905
episode: 458 training return: -987.8644141961191
episode: 459 training return: -987.4799846401453
epoch: 115 test_true_pfm: 320.2347148886909 sim_pfm: -868.3362694000472
episode: 460 training return: -986.0644292482773
episode: 461 training return: -989.8866467435536
episode: 462 training return: -981.8225333427855
episode: 463 training return: -996.1787683017491
epoch: 116 test_true_pfm: 342.2148086400546 sim_pfm: -881.2958407943612
episode: 464 training return: -991.4853921692068
episode: 465 training return: -993.9537421560814
episode: 466 training return: -989.176255923127
episode: 467 training return: -987.3157118020646
epoch: 117 test_true_pfm: 351.48344131615823 sim_pfm: -883.2412496307447
episode: 468 training return: -989.9064785882941
episode: 469 training return: -984.480910968522
episode: 470 training return: -994.8873935603841
episode: 471 training return: -992.6864271398398
epoch: 118 test_true_pfm: 358.74592531174704 sim_pfm: -898.3102006406053
episode: 472 training return: -990.7674788818458
episode: 473 training return: -984.3649701066478
episode: 474 training return: -988.7627234201212
episode: 475 training return: -987.1492271132138
epoch: 119 test_true_pfm: 379.6479238958088 sim_pfm: -896.7051455123683
episode: 476 training return: -987.8913871978186
episode: 477 training return: -983.2522127445964
episode: 478 training return: -983.5144854301916
episode: 479 training return: -989.5100684675227
epoch: 120 test_true_pfm: 326.1873254270746 sim_pfm: -877.1765054736956
episode: 480 training return: -991.4095874295741
episode: 481 training return: -991.0345206147616
episode: 482 training return: -989.4099130377705
episode: 483 training return: -986.8101256131973
epoch: 121 test_true_pfm: 340.861233435379 sim_pfm: -863.5027688350639
episode: 484 training return: -994.2385794718542
episode: 485 training return: -991.7984213630357
episode: 486 training return: -991.7578634368899
episode: 487 training return: -991.5808465385055
epoch: 122 test_true_pfm: 332.41340201544864 sim_pfm: -869.1782727513973
episode: 488 training return: -994.7670965888558
episode: 489 training return: -985.717967094061
episode: 490 training return: -987.7615143752238
episode: 491 training return: -989.0292714886906
epoch: 123 test_true_pfm: 338.2759578419673 sim_pfm: -877.9395162119441
episode: 492 training return: -994.073431752687
episode: 493 training return: -990.9157352133005
episode: 494 training return: -983.9329956676265
episode: 495 training return: -990.6408114390125
epoch: 124 test_true_pfm: 343.0421091622281 sim_pfm: -860.8899871911264
episode: 496 training return: -990.0901428441692
episode: 497 training return: -996.377912857632
episode: 498 training return: -990.5045137334348
episode: 499 training return: -984.075626701376
epoch: 125 test_true_pfm: 355.6585840387283 sim_pfm: -870.362258482134
episode: 500 training return: -981.6572298975456
episode: 501 training return: -984.8449216777443
episode: 502 training return: -991.5657395334047
episode: 503 training return: -988.2265479308563
epoch: 126 test_true_pfm: 345.5108167486647 sim_pfm: -871.2752029419133
episode: 504 training return: -991.7819584527904
episode: 505 training return: -989.3855018120291
episode: 506 training return: -992.8845306683421
episode: 507 training return: -995.071756932718
epoch: 127 test_true_pfm: 333.79468922470767 sim_pfm: -870.2731517925557
episode: 508 training return: -983.7710166215895
episode: 509 training return: -990.7498635398531
episode: 510 training return: -986.9848944086906
episode: 511 training return: -986.0291255399422
epoch: 128 test_true_pfm: 342.67928786378417 sim_pfm: -869.5680485651811
episode: 512 training return: -987.2414891597198
episode: 513 training return: -995.9531382135716
episode: 514 training return: -988.9371042829404
episode: 515 training return: -980.2883522955328
epoch: 129 test_true_pfm: 355.21181835668176 sim_pfm: -866.9962330047468
episode: 516 training return: -987.4349057765045
episode: 517 training return: -989.8629404394611
episode: 518 training return: -995.1416127400324
episode: 519 training return: -985.2193107840476
epoch: 130 test_true_pfm: 339.1828965050492 sim_pfm: -867.7429580203064
episode: 520 training return: -988.4004772428423
episode: 521 training return: -985.0946079395735
episode: 522 training return: -990.7637538149237
episode: 523 training return: -994.458521285438
epoch: 131 test_true_pfm: 332.6948484318444 sim_pfm: -868.3115200003817
episode: 524 training return: -994.259563637429
episode: 525 training return: -1000.2234648400434
episode: 526 training return: -989.4002187601438
episode: 527 training return: -990.3130388717632
epoch: 132 test_true_pfm: 340.83699544483613 sim_pfm: -871.1093666874322
episode: 528 training return: -986.1425054432902
episode: 529 training return: -990.6446192872995
episode: 530 training return: -984.0212460873238
episode: 531 training return: -983.5984644481387
epoch: 133 test_true_pfm: 346.90971690723785 sim_pfm: -875.921801622515
episode: 532 training return: -989.1342977539895
episode: 533 training return: -984.8149898328086
episode: 534 training return: -993.6426967956016
episode: 535 training return: -990.5756047204023
epoch: 134 test_true_pfm: 361.6359398139355 sim_pfm: -870.1355499675857
episode: 536 training return: -991.5324081068097
episode: 537 training return: -991.8262999838278
episode: 538 training return: -989.6003011589781
episode: 539 training return: -989.6324569193515
epoch: 135 test_true_pfm: 343.9357500631483 sim_pfm: -869.1961806887417
episode: 540 training return: -985.2709508011295
episode: 541 training return: -994.8854733588063
episode: 542 training return: -992.5269950104015
episode: 543 training return: -992.1634270203955
epoch: 136 test_true_pfm: 351.63044389769215 sim_pfm: -867.3415910149532
episode: 544 training return: -989.3647909976461
episode: 545 training return: -992.0470262646604
episode: 546 training return: -986.2521976265923
episode: 547 training return: -992.4652738611599
epoch: 137 test_true_pfm: 333.5992006776185 sim_pfm: -870.3294884260234
episode: 548 training return: -993.4121307887973
episode: 549 training return: -992.7727864947927
episode: 550 training return: -992.2163181073488
episode: 551 training return: -984.4584205458208
epoch: 138 test_true_pfm: 334.06238879520714 sim_pfm: -873.1230449726096
episode: 552 training return: -988.6515447778111
episode: 553 training return: -989.3070218480735
episode: 554 training return: -993.3794129277745
episode: 555 training return: -985.6798180747342
epoch: 139 test_true_pfm: 333.48039602037846 sim_pfm: -870.223415508594
episode: 556 training return: -991.2275172552391
episode: 557 training return: -988.3554571000237
episode: 558 training return: -983.72801085192
episode: 559 training return: -986.4088901167752
epoch: 140 test_true_pfm: 333.26547506065197 sim_pfm: -863.9684072075197
episode: 560 training return: -993.3129337048084
episode: 561 training return: -991.6045717629776
episode: 562 training return: -987.8907823105296
episode: 563 training return: -982.0495144677947
epoch: 141 test_true_pfm: 334.2780267369783 sim_pfm: -869.1486980518024
episode: 564 training return: -993.945550853473
episode: 565 training return: -988.6221162947325
episode: 566 training return: -991.3622522070016
episode: 567 training return: -994.6012035560277
epoch: 142 test_true_pfm: 369.1233939372707 sim_pfm: -870.665466891577
episode: 568 training return: -990.1043710080962
episode: 569 training return: -983.5415201269261
episode: 570 training return: -981.6118191329076
episode: 571 training return: -988.8263690020048
epoch: 143 test_true_pfm: 339.50419409057474 sim_pfm: -869.1641902135107
episode: 572 training return: -986.1519548002149
episode: 573 training return: -997.1782606450395
episode: 574 training return: -996.8537457375414
episode: 575 training return: -989.091824810099
epoch: 144 test_true_pfm: 345.95048039077597 sim_pfm: -869.2795179740043
episode: 576 training return: -984.1837932311745
episode: 577 training return: -992.2773804535743
episode: 578 training return: -987.827628874805
episode: 579 training return: -985.7358671271302
epoch: 145 test_true_pfm: 331.76537122429977 sim_pfm: -868.2757869730199
episode: 580 training return: -990.385930218613
episode: 581 training return: -987.8817220391161
episode: 582 training return: -990.0831724351945
episode: 583 training return: -983.836525595182
epoch: 146 test_true_pfm: 339.4092084833544 sim_pfm: -866.8116503891496
episode: 584 training return: -988.1015989683546
episode: 585 training return: -982.1832110034529
episode: 586 training return: -989.1520086194511
episode: 587 training return: -993.4725274092322
epoch: 147 test_true_pfm: 330.73791967694666 sim_pfm: -883.7951354226221
episode: 588 training return: -985.6925538106306
episode: 589 training return: -992.6521655087346
episode: 590 training return: -990.4247117847893
episode: 591 training return: -985.7124871905546
epoch: 148 test_true_pfm: 359.5647401130132 sim_pfm: -869.5822694012821
episode: 592 training return: -984.9906942656617
episode: 593 training return: -990.3768199995611
episode: 594 training return: -990.5287165006688
episode: 595 training return: -993.2417446048556
epoch: 149 test_true_pfm: 360.6696596534409 sim_pfm: -869.2430447457792
episode: 596 training return: -990.4141307922359
episode: 597 training return: -994.605320763353
episode: 598 training return: -990.5043953486048
episode: 599 training return: -992.4803119652269
epoch: 150 test_true_pfm: 334.934705274432 sim_pfm: -863.8591856931962
