['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'uncertainty', '--traj', 'expert', '--seed', '2', '--data', '100000']
epoch: 0 training_loss 0.2501227271556854 test_loss: 0.1904691457748413
epoch: 1 training_loss 0.16722294926643372 test_loss: 0.1886236071586609
epoch: 2 training_loss 0.158573269918561 test_loss: 0.16106754541397095
epoch: 3 training_loss 0.15844107262790202 test_loss: 0.15194712877273558
epoch: 4 training_loss 0.14486232835799456 test_loss: 0.14353314638137818
epoch: 5 training_loss 0.14082977194339036 test_loss: 0.13336460590362548
epoch: 6 training_loss 0.1375799087435007 test_loss: 0.16963828802108766
epoch: 7 training_loss 0.14943358510732652 test_loss: 0.16343700885772705
epoch: 8 training_loss 0.13755394537001847 test_loss: 0.16968019008636476
epoch: 9 training_loss 0.134870790168643 test_loss: 0.116336989402771
epoch: 10 training_loss 0.13001554813236 test_loss: 0.17166439294815064
epoch: 11 training_loss 0.14527398923411966 test_loss: 0.13449705839157106
epoch: 12 training_loss 0.12923295371234417 test_loss: 0.12207164764404296
epoch: 13 training_loss 0.1340546763315797 test_loss: 0.13604092597961426
epoch: 14 training_loss 0.1370626036822796 test_loss: 0.1293992280960083
epoch: 15 training_loss 0.12881074573844672 test_loss: 0.13659744262695311
epoch: 16 training_loss 0.12464038416743278 test_loss: 0.12806499004364014
epoch: 17 training_loss 0.12277026006951929 test_loss: 0.1178544044494629
epoch: 18 training_loss 0.12364495638757944 test_loss: 0.1386472225189209
epoch: 19 training_loss 0.12440011393278837 test_loss: 0.13555140495300294
epoch: 20 training_loss 0.12892634000629186 test_loss: 0.13393144607543944
epoch: 21 training_loss 0.12425546716898679 test_loss: 0.12134963274002075
epoch: 22 training_loss 0.12342354502528906 test_loss: 0.13840024471282958
epoch: 23 training_loss 0.12158451493829489 test_loss: 0.13884415626525878
epoch: 24 training_loss 0.12301059264689684 test_loss: 0.11744555234909057
epoch: 25 training_loss 0.12288010720163584 test_loss: 0.11413453817367554
epoch: 26 training_loss 0.11613535854965448 test_loss: 0.14923499822616576
epoch: 27 training_loss 0.12161582887172699 test_loss: 0.13001030683517456
epoch: 28 training_loss 0.12263684775680303 test_loss: 0.12040035724639893
epoch: 29 training_loss 0.11207222962751985 test_loss: 0.12796056270599365
epoch: 30 training_loss 0.12062777388840913 test_loss: 0.12375071048736572
epoch: 31 training_loss 0.13305224072188138 test_loss: 0.11479291915893555
epoch: 32 training_loss 0.11604115974158048 test_loss: 0.12200926542282105
epoch: 33 training_loss 0.11586205035448074 test_loss: 0.14468612670898437
epoch: 34 training_loss 0.11087044078856706 test_loss: 0.1344106078147888
epoch: 35 training_loss 0.10931598560884595 test_loss: 0.11756812334060669
epoch: 36 training_loss 0.1320041259378195 test_loss: 0.11938798427581787
epoch: 37 training_loss 0.11541615439578891 test_loss: 0.14880788326263428
epoch: 38 training_loss 0.11828399140387774 test_loss: 0.13432084321975707
epoch: 39 training_loss 0.11926120119169355 test_loss: 0.13179869651794435
epoch: 40 training_loss 0.12112246975302696 test_loss: 0.1336134910583496
epoch: 41 training_loss 0.11316409358754755 test_loss: 0.1268070101737976
epoch: 42 training_loss 0.12339488305151462 test_loss: 0.108473801612854
epoch: 43 training_loss 0.10847423050552607 test_loss: 0.13020397424697877
epoch: 44 training_loss 0.11610177751630545 test_loss: 0.10812788009643555
epoch: 45 training_loss 0.12152788758277894 test_loss: 0.13422249555587767
epoch: 46 training_loss 0.11508976552635432 test_loss: 0.1396934986114502
epoch: 47 training_loss 0.11595056828111411 test_loss: 0.11589900255203248
epoch: 48 training_loss 0.11600790768861771 test_loss: 0.13019075393676757
epoch: 49 training_loss 0.12109667807817459 test_loss: 0.12454332113265991
epoch: 50 training_loss 0.12085440549999475 test_loss: 0.11152861118316651
epoch: 51 training_loss 0.12001220528036356 test_loss: 0.11346209049224854
epoch: 52 training_loss 0.12345239736139774 test_loss: 0.12993180751800537
epoch: 53 training_loss 0.12657532323151827 test_loss: 0.11881788969039916
epoch: 54 training_loss 0.11909986644983292 test_loss: 0.14613542556762696
epoch: 55 training_loss 0.1231149085983634 test_loss: 0.14310163259506226
epoch: 56 training_loss 0.11434875909239053 test_loss: 0.12590105533599855
epoch: 57 training_loss 0.11703786712139845 test_loss: 0.09983800649642945
epoch: 58 training_loss 0.12324290059506893 test_loss: 0.124847674369812
epoch: 59 training_loss 0.1172964488901198 test_loss: 0.13523333072662352
epoch: 60 training_loss 0.11757914494723082 test_loss: 0.13660870790481566
epoch: 61 training_loss 0.11146375734359026 test_loss: 0.13867841958999633
epoch: 62 training_loss 0.11015585944056511 test_loss: 0.13436239957809448
epoch: 63 training_loss 0.11640247393399478 test_loss: 0.13576292991638184
epoch: 64 training_loss 0.11228309974074363 test_loss: 0.13759589195251465
epoch: 65 training_loss 0.11382647728547454 test_loss: 0.12975642681121827
epoch: 66 training_loss 0.12428751230239868 test_loss: 0.1258256196975708
epoch: 67 training_loss 0.11936144307255744 test_loss: 0.12366514205932617
epoch: 68 training_loss 0.11120383270084858 test_loss: 0.11859575510025025
epoch: 69 training_loss 0.11786501474678517 test_loss: 0.12704638242721558
epoch: 70 training_loss 0.1173929676786065 test_loss: 0.1259769082069397
epoch: 71 training_loss 0.12417209714651108 test_loss: 0.105797278881073
epoch: 72 training_loss 0.1164457966759801 test_loss: 0.149869167804718
epoch: 73 training_loss 0.1220911362953484 test_loss: 0.12211705446243286
epoch: 74 training_loss 0.10824557643383742 test_loss: 0.14410995244979857
epoch: 75 training_loss 0.11644306220114231 test_loss: 0.11762046813964844
epoch: 76 training_loss 0.11433676719665527 test_loss: 0.13586099147796632
epoch: 77 training_loss 0.11501065969467163 test_loss: 0.1384531855583191
epoch: 78 training_loss 0.11896443728357553 test_loss: 0.10934622287750244
epoch: 79 training_loss 0.11859259154647589 test_loss: 0.1507176637649536
epoch: 80 training_loss 0.11867995847016573 test_loss: 0.14692918062210084
epoch: 81 training_loss 0.10538863722234965 test_loss: 0.12484246492385864
epoch: 82 training_loss 0.1195702176913619 test_loss: 0.12652652263641356
epoch: 83 training_loss 0.11595672197639942 test_loss: 0.1179467797279358
epoch: 84 training_loss 0.12816690411418677 test_loss: 0.12268664836883544
epoch: 85 training_loss 0.12487163979560137 test_loss: 0.12001055479049683
epoch: 86 training_loss 0.11462790943682194 test_loss: 0.13644824028015137
epoch: 87 training_loss 0.11855693884193898 test_loss: 0.13874534368515015
epoch: 88 training_loss 0.1093222887814045 test_loss: 0.15903538465499878
epoch: 89 training_loss 0.1247109903767705 test_loss: 0.11345378160476685
epoch: 90 training_loss 0.11761045023798943 test_loss: 0.1254805326461792
epoch: 91 training_loss 0.11813568621873856 test_loss: 0.12312440872192383
epoch: 92 training_loss 0.1171008264273405 test_loss: 0.12369856834411622
epoch: 93 training_loss 0.10663980696350336 test_loss: 0.12613301277160643
epoch: 94 training_loss 0.1207723219320178 test_loss: 0.14422125816345216
epoch: 95 training_loss 0.11554042935371399 test_loss: 0.14225844144821168
epoch: 96 training_loss 0.11256316559389233 test_loss: 0.12008804082870483
epoch: 97 training_loss 0.11526384450495243 test_loss: 0.11974210739135742
epoch: 98 training_loss 0.1189620054513216 test_loss: 0.1250140428543091
epoch: 99 training_loss 0.1159219840168953 test_loss: 0.12246699333190918
epoch: 100 training_loss 0.11727565655484795 test_loss: 0.12066963911056519
epoch: 101 training_loss 0.1148119306191802 test_loss: 0.1379556655883789
epoch: 102 training_loss 0.10872305046766996 test_loss: 0.12553027868270875
epoch: 103 training_loss 0.12142166133970023 test_loss: 0.14603569507598876
epoch: 104 training_loss 0.11275206923484803 test_loss: 0.1318478465080261
epoch: 105 training_loss 0.12415415995754302 test_loss: 0.10950990915298461
epoch: 106 training_loss 0.12111672062426805 test_loss: 0.12315511703491211
epoch: 107 training_loss 0.12044083103537559 test_loss: 0.13910694122314454
epoch: 108 training_loss 0.11379397969692945 test_loss: 0.14055991172790527
epoch: 109 training_loss 0.12327802617102862 test_loss: 0.1185571312904358
epoch: 110 training_loss 0.11523833632469177 test_loss: 0.11387784481048584
epoch: 111 training_loss 0.11128720095381141 test_loss: 0.1410976767539978
epoch: 112 training_loss 0.11385133914649487 test_loss: 0.12539021968841552
epoch: 113 training_loss 0.11865652425214648 test_loss: 0.13112168312072753
epoch: 114 training_loss 0.11336748588830232 test_loss: 0.13320292234420777
epoch: 115 training_loss 0.11580118913203478 test_loss: 0.12431432008743286
epoch: 116 training_loss 0.11362345568835736 test_loss: 0.14496562480926514
epoch: 117 training_loss 0.1159338415414095 test_loss: 0.13053529262542723
epoch: 118 training_loss 0.11382090887054801 test_loss: 0.1223386287689209
epoch: 119 training_loss 0.10814060386270284 test_loss: 0.11402016878128052
epoch: 120 training_loss 0.11228886283934117 test_loss: 0.1281317114830017
epoch: 121 training_loss 0.11580027585849166 test_loss: 0.12305021286010742
epoch: 122 training_loss 0.11599860392510891 test_loss: 0.1355934262275696
epoch: 123 training_loss 0.10542765572667122 test_loss: 0.11355026960372924
epoch: 124 training_loss 0.11199165776371955 test_loss: 0.12244398593902588
epoch: 125 training_loss 0.11247297994792461 test_loss: 0.12502962350845337
epoch: 126 training_loss 0.11293556968681515 test_loss: 0.12992231845855712
epoch: 127 training_loss 0.11661692500114441 test_loss: 0.12384263277053834
epoch: 128 training_loss 0.11562690235674382 test_loss: 0.1396723985671997
epoch: 129 training_loss 0.12231070134788752 test_loss: 0.1331721782684326
epoch: 130 training_loss 0.12222024135291576 test_loss: 0.140969181060791
epoch: 131 training_loss 0.106237288787961 test_loss: 0.12067271471023559
epoch: 132 training_loss 0.1125842832773924 test_loss: 0.12495682239532471
epoch: 133 training_loss 0.11267251010984182 test_loss: 0.12693324089050292
epoch: 134 training_loss 0.10357737131416797 test_loss: 0.11149791479110718
epoch: 135 training_loss 0.11816825460642576 test_loss: 0.12256200313568115
epoch: 136 training_loss 0.11539845608174801 test_loss: 0.12374683618545532
epoch: 137 training_loss 0.10618522927165032 test_loss: 0.12611758708953857
epoch: 138 training_loss 0.11029832992702722 test_loss: 0.11630501747131347
epoch: 139 training_loss 0.1155364241078496 test_loss: 0.15585713386535643
epoch: 140 training_loss 0.11638575769960881 test_loss: 0.11780418157577514
epoch: 141 training_loss 0.10991253361105918 test_loss: 0.1295864462852478
epoch: 142 training_loss 0.11189152574166655 test_loss: 0.12425161600112915
epoch: 143 training_loss 0.12150140345096588 test_loss: 0.12098708152770996
epoch: 144 training_loss 0.10755828712135554 test_loss: 0.1155342698097229
epoch: 145 training_loss 0.10466159839183092 test_loss: 0.12760120630264282
epoch: 146 training_loss 0.11601203525438905 test_loss: 0.12632280588150024
epoch: 147 training_loss 0.11332446105778217 test_loss: 0.14321521520614625
epoch: 148 training_loss 0.11516946436837315 test_loss: 0.1101138710975647
epoch: 149 training_loss 0.112126789893955 test_loss: 0.1283918023109436
epoch: 0 training_loss 0.2803595139086246 test_loss: 0.20772006511688232
epoch: 1 training_loss 0.19128626644611357 test_loss: 0.1777014136314392
epoch: 2 training_loss 0.16239405736327173 test_loss: 0.20937955379486084
epoch: 3 training_loss 0.18371530465781688 test_loss: 0.14800117015838624
epoch: 4 training_loss 0.15007641553878784 test_loss: 0.1539464235305786
epoch: 5 training_loss 0.1388881392031908 test_loss: 0.15009547472000123
epoch: 6 training_loss 0.1450027247145772 test_loss: 0.1619080901145935
epoch: 7 training_loss 0.1372940331697464 test_loss: 0.13246691226959229
epoch: 8 training_loss 0.15017423905432226 test_loss: 0.15197463035583497
epoch: 9 training_loss 0.13126982174813748 test_loss: 0.13959652185440063
epoch: 10 training_loss 0.1408970445021987 test_loss: 0.1658525824546814
epoch: 11 training_loss 0.13002072600647807 test_loss: 0.11761807203292847
epoch: 12 training_loss 0.14290470123291016 test_loss: 0.1679708480834961
epoch: 13 training_loss 0.12313186019659042 test_loss: 0.16553802490234376
epoch: 14 training_loss 0.12347868733108043 test_loss: 0.14387843608856202
epoch: 15 training_loss 0.13608894515782594 test_loss: 0.12875871658325194
epoch: 16 training_loss 0.1361147914454341 test_loss: 0.14066851139068604
epoch: 17 training_loss 0.12473690796643495 test_loss: 0.14825164079666137
epoch: 18 training_loss 0.12290695708245039 test_loss: 0.14201301336288452
epoch: 19 training_loss 0.12346259493380785 test_loss: 0.12977346181869506
epoch: 20 training_loss 0.11978598881512881 test_loss: 0.12867821455001832
epoch: 21 training_loss 0.12737033385783433 test_loss: 0.13895221948623657
epoch: 22 training_loss 0.13423772901296616 test_loss: 0.15686386823654175
epoch: 23 training_loss 0.11708365738391877 test_loss: 0.1366463780403137
epoch: 24 training_loss 0.12254638221114873 test_loss: 0.13617841005325318
epoch: 25 training_loss 0.11967765677720309 test_loss: 0.12769789695739747
epoch: 26 training_loss 0.11656434446573258 test_loss: 0.15090653896331788
epoch: 27 training_loss 0.12112290807068347 test_loss: 0.13461883068084718
epoch: 28 training_loss 0.12883529383689166 test_loss: 0.13966789245605468
epoch: 29 training_loss 0.12373949136584997 test_loss: 0.15807281732559203
epoch: 30 training_loss 0.12196669884026051 test_loss: 0.12598196268081666
epoch: 31 training_loss 0.11640798464417458 test_loss: 0.14182919263839722
epoch: 32 training_loss 0.12248501285910607 test_loss: 0.14005123376846312
epoch: 33 training_loss 0.11752998881042004 test_loss: 0.13514678478240966
epoch: 34 training_loss 0.11268031232059002 test_loss: 0.13135812282562256
epoch: 35 training_loss 0.11041594009846449 test_loss: 0.12344647645950317
epoch: 36 training_loss 0.11886293223127722 test_loss: 0.11839009523391723
epoch: 37 training_loss 0.12062573241069913 test_loss: 0.13260118961334227
epoch: 38 training_loss 0.11777771545574069 test_loss: 0.13821948766708375
epoch: 39 training_loss 0.1232831932976842 test_loss: 0.13323291540145873
epoch: 40 training_loss 0.12075102124363184 test_loss: 0.11564682722091675
epoch: 41 training_loss 0.11273833755403757 test_loss: 0.12265406847000122
epoch: 42 training_loss 0.11660714820027351 test_loss: 0.13043259382247924
epoch: 43 training_loss 0.11750328283756971 test_loss: 0.1163330316543579
epoch: 44 training_loss 0.11434563729912042 test_loss: 0.1380828619003296
epoch: 45 training_loss 0.11899267898872495 test_loss: 0.1147836685180664
epoch: 46 training_loss 0.12771624613553287 test_loss: 0.1304178476333618
epoch: 47 training_loss 0.11555075760930776 test_loss: 0.14345812797546387
epoch: 48 training_loss 0.12424642749130727 test_loss: 0.11680816411972046
epoch: 49 training_loss 0.11675515279173851 test_loss: 0.13909035921096802
epoch: 50 training_loss 0.11673720613121986 test_loss: 0.12319231033325195
epoch: 51 training_loss 0.11757234193384647 test_loss: 0.1280733108520508
epoch: 52 training_loss 0.1263305201381445 test_loss: 0.1273068070411682
epoch: 53 training_loss 0.10628657406195999 test_loss: 0.12306462526321411
epoch: 54 training_loss 0.11283053111284971 test_loss: 0.12905097007751465
epoch: 55 training_loss 0.11928316500037908 test_loss: 0.13484485149383546
epoch: 56 training_loss 0.12635176800191403 test_loss: 0.12169531583786011
epoch: 57 training_loss 0.11730364359915256 test_loss: 0.10872811079025269
epoch: 58 training_loss 0.1284567124210298 test_loss: 0.12847261428833007
epoch: 59 training_loss 0.11338060153648258 test_loss: 0.1298962950706482
epoch: 60 training_loss 0.1177103496901691 test_loss: 0.1195152997970581
epoch: 61 training_loss 0.11957950558513403 test_loss: 0.12098047733306885
epoch: 62 training_loss 0.11862352803349495 test_loss: 0.12162700891494752
epoch: 63 training_loss 0.11721208333969116 test_loss: 0.1255293607711792
epoch: 64 training_loss 0.11459431843832135 test_loss: 0.13469756841659547
epoch: 65 training_loss 0.12607980214059353 test_loss: 0.12024692296981812
epoch: 66 training_loss 0.11448070069774985 test_loss: 0.13745061159133912
epoch: 67 training_loss 0.11714334949851037 test_loss: 0.12557307481765748
epoch: 68 training_loss 0.11588992688804865 test_loss: 0.11619076728820801
epoch: 69 training_loss 0.12560829930007458 test_loss: 0.115223228931427
epoch: 70 training_loss 0.11431493064388633 test_loss: 0.1292649507522583
epoch: 71 training_loss 0.10787106409668923 test_loss: 0.12336094379425049
epoch: 72 training_loss 0.12268960982561111 test_loss: 0.13094067573547363
epoch: 73 training_loss 0.11887776115909218 test_loss: 0.12510915994644164
epoch: 74 training_loss 0.11815210990607738 test_loss: 0.12425858974456787
epoch: 75 training_loss 0.11705730713903904 test_loss: 0.1449965000152588
epoch: 76 training_loss 0.13047308243811132 test_loss: 0.12841285467147828
epoch: 77 training_loss 0.11420565575361252 test_loss: 0.11899447441101074
epoch: 78 training_loss 0.11805430117994548 test_loss: 0.12444378137588501
epoch: 79 training_loss 0.11626605059951543 test_loss: 0.12966935634613036
epoch: 80 training_loss 0.11198359664529561 test_loss: 0.11851876974105835
epoch: 81 training_loss 0.11255466509610415 test_loss: 0.14482117891311647
epoch: 82 training_loss 0.11658927083015441 test_loss: 0.12769423723220824
epoch: 83 training_loss 0.11170644074678421 test_loss: 0.12340297698974609
epoch: 84 training_loss 0.11691065110266209 test_loss: 0.1375305771827698
epoch: 85 training_loss 0.11513103749603033 test_loss: 0.1450689196586609
epoch: 86 training_loss 0.11910670856013894 test_loss: 0.13315778970718384
epoch: 87 training_loss 0.1255748837813735 test_loss: 0.1141545295715332
epoch: 88 training_loss 0.11891210164874792 test_loss: 0.12396247386932373
epoch: 89 training_loss 0.11930420983582735 test_loss: 0.13058234453201295
epoch: 90 training_loss 0.11045891202986241 test_loss: 0.13337514400482178
epoch: 91 training_loss 0.11618165383115411 test_loss: 0.14137704372406007
epoch: 92 training_loss 0.11427509658038616 test_loss: 0.13935370445251466
epoch: 93 training_loss 0.11019710324704647 test_loss: 0.12307837009429931
epoch: 94 training_loss 0.1164806272648275 test_loss: 0.12467321157455444
epoch: 95 training_loss 0.11421666949987412 test_loss: 0.13043565750122071
epoch: 96 training_loss 0.11008686058223248 test_loss: 0.10888004302978516
epoch: 97 training_loss 0.11139838196337223 test_loss: 0.11970571279525757
epoch: 98 training_loss 0.1201392800733447 test_loss: 0.1386098623275757
epoch: 99 training_loss 0.1224632678180933 test_loss: 0.13231735229492186
epoch: 100 training_loss 0.12101624194532633 test_loss: 0.1212550163269043
epoch: 101 training_loss 0.12018482923507691 test_loss: 0.11898057460784912
epoch: 102 training_loss 0.11503618463873863 test_loss: 0.1404218316078186
epoch: 103 training_loss 0.11370096670463681 test_loss: 0.12239542007446289
epoch: 104 training_loss 0.11305875558406114 test_loss: 0.10950344800949097
epoch: 105 training_loss 0.12091334279626607 test_loss: 0.12885373830795288
epoch: 106 training_loss 0.11507404465228319 test_loss: 0.12155088186264038
epoch: 107 training_loss 0.11755923196673393 test_loss: 0.12334846258163452
epoch: 108 training_loss 0.11693989172577858 test_loss: 0.10760253667831421
epoch: 109 training_loss 0.1132697219774127 test_loss: 0.11238666772842407
epoch: 110 training_loss 0.10958643309772015 test_loss: 0.10761468410491944
epoch: 111 training_loss 0.11391582835465669 test_loss: 0.1361338257789612
epoch: 112 training_loss 0.10756575863808393 test_loss: 0.11944952011108398
epoch: 113 training_loss 0.10999331895262003 test_loss: 0.1261652946472168
epoch: 114 training_loss 0.11020734485238791 test_loss: 0.12773796319961547
epoch: 115 training_loss 0.11707085525617003 test_loss: 0.14342278242111206
epoch: 116 training_loss 0.10700114075094462 test_loss: 0.12468682527542115
epoch: 117 training_loss 0.1162086957320571 test_loss: 0.12971423864364623
epoch: 118 training_loss 0.10872871376574039 test_loss: 0.11865521669387817
epoch: 119 training_loss 0.10918171953409911 test_loss: 0.15366320610046386
epoch: 120 training_loss 0.11806892625987529 test_loss: 0.1381567597389221
epoch: 121 training_loss 0.10786504112184048 test_loss: 0.11439098119735717
epoch: 122 training_loss 0.11496794037520885 test_loss: 0.12283180952072144
epoch: 123 training_loss 0.11276936378329992 test_loss: 0.12761955261230468
epoch: 124 training_loss 0.10651905646547676 test_loss: 0.11805542707443237
epoch: 125 training_loss 0.12104876892641186 test_loss: 0.12841747999191283
epoch: 126 training_loss 0.1135248177498579 test_loss: 0.1247324824333191
epoch: 127 training_loss 0.10726966658607125 test_loss: 0.11469274759292603
epoch: 128 training_loss 0.11755607921630144 test_loss: 0.12997794151306152
epoch: 129 training_loss 0.11712930027395486 test_loss: 0.14653228521347045
epoch: 130 training_loss 0.11291064573451877 test_loss: 0.12422963380813598
epoch: 131 training_loss 0.11623268965631724 test_loss: 0.12270781993865967
epoch: 132 training_loss 0.11365671966224909 test_loss: 0.12887731790542603
epoch: 133 training_loss 0.11508328614756465 test_loss: 0.1345616340637207
epoch: 134 training_loss 0.10687075238674879 test_loss: 0.12678462266921997
epoch: 135 training_loss 0.1270342745259404 test_loss: 0.13197953701019288
epoch: 136 training_loss 0.11234789274632931 test_loss: 0.13470711708068847
epoch: 137 training_loss 0.11253548957407475 test_loss: 0.1313464641571045
epoch: 138 training_loss 0.11201781179755926 test_loss: 0.11553093194961547
epoch: 139 training_loss 0.11829199088737369 test_loss: 0.1196474552154541
epoch: 140 training_loss 0.11763710591942073 test_loss: 0.12241963148117066
epoch: 141 training_loss 0.11426541674882174 test_loss: 0.1127208113670349
epoch: 142 training_loss 0.11400307476520538 test_loss: 0.12251036167144776
epoch: 143 training_loss 0.1125920493900776 test_loss: 0.1323219656944275
epoch: 144 training_loss 0.11094902932643891 test_loss: 0.12846568822860718
epoch: 145 training_loss 0.10758398748934268 test_loss: 0.12189147472381592
epoch: 146 training_loss 0.1153658153116703 test_loss: 0.12045623064041137
epoch: 147 training_loss 0.11637382544577121 test_loss: 0.127392578125
epoch: 148 training_loss 0.12098437514156103 test_loss: 0.12971488237380982
epoch: 149 training_loss 0.11735044643282891 test_loss: 0.13339260816574097
epoch: 0 training_loss 0.28359052039682864 test_loss: 0.20806403160095216
epoch: 1 training_loss 0.17914845287799835 test_loss: 0.2019030809402466
epoch: 2 training_loss 0.15373416859656572 test_loss: 0.1724726915359497
epoch: 3 training_loss 0.1730747603625059 test_loss: 0.16161279678344725
epoch: 4 training_loss 0.15911727525293828 test_loss: 0.16060408353805541
epoch: 5 training_loss 0.14623588509857655 test_loss: 0.15710408687591554
epoch: 6 training_loss 0.14898884296417236 test_loss: 0.13308341503143312
epoch: 7 training_loss 0.1432228270173073 test_loss: 0.1481688380241394
epoch: 8 training_loss 0.1342810597643256 test_loss: 0.14746673107147218
epoch: 9 training_loss 0.13804810587316751 test_loss: 0.14275939464569093
epoch: 10 training_loss 0.13010567294433714 test_loss: 0.12513505220413207
epoch: 11 training_loss 0.12865977605804801 test_loss: 0.14940553903579712
epoch: 12 training_loss 0.12729827865958213 test_loss: 0.14545413255691528
epoch: 13 training_loss 0.13315662533044814 test_loss: 0.1569862961769104
epoch: 14 training_loss 0.13403527602553367 test_loss: 0.13486034870147706
epoch: 15 training_loss 0.1232285101339221 test_loss: 0.14352200031280518
epoch: 16 training_loss 0.12416358027607202 test_loss: 0.13717552423477172
epoch: 17 training_loss 0.12981476061046124 test_loss: 0.14080783128738403
epoch: 18 training_loss 0.1336719311401248 test_loss: 0.14237867593765258
epoch: 19 training_loss 0.12854230664670468 test_loss: 0.138972270488739
epoch: 20 training_loss 0.130549446195364 test_loss: 0.13053056001663207
epoch: 21 training_loss 0.12313400365412236 test_loss: 0.14011569023132325
epoch: 22 training_loss 0.11630116190761328 test_loss: 0.1329995274543762
epoch: 23 training_loss 0.12229742478579282 test_loss: 0.12420985698699952
epoch: 24 training_loss 0.12044754952192306 test_loss: 0.1370164155960083
epoch: 25 training_loss 0.12103498455137014 test_loss: 0.12111905813217164
epoch: 26 training_loss 0.11953564934432506 test_loss: 0.1210126519203186
epoch: 27 training_loss 0.1230625612847507 test_loss: 0.12199770212173462
epoch: 28 training_loss 0.12988661155104636 test_loss: 0.11979829072952271
epoch: 29 training_loss 0.12007661126554012 test_loss: 0.16286399364471435
epoch: 30 training_loss 0.11908915515989066 test_loss: 0.12450244426727294
epoch: 31 training_loss 0.12433328092098236 test_loss: 0.11349319219589234
epoch: 32 training_loss 0.12557816851884127 test_loss: 0.12595120668411255
epoch: 33 training_loss 0.12681500846520066 test_loss: 0.14440726041793822
epoch: 34 training_loss 0.12712804581969978 test_loss: 0.12509945631027222
epoch: 35 training_loss 0.12432941019535065 test_loss: 0.14207829236984254
epoch: 36 training_loss 0.12127835903316736 test_loss: 0.13509782552719116
epoch: 37 training_loss 0.11794185034930706 test_loss: 0.12676913738250734
epoch: 38 training_loss 0.12506719283759593 test_loss: 0.1299529790878296
epoch: 39 training_loss 0.11702069845050574 test_loss: 0.14591809511184692
epoch: 40 training_loss 0.11778486762195825 test_loss: 0.12987385988235473
epoch: 41 training_loss 0.11301718324422837 test_loss: 0.1135265588760376
epoch: 42 training_loss 0.11710959304124118 test_loss: 0.1385333776473999
epoch: 43 training_loss 0.11549729589372873 test_loss: 0.12945441007614136
epoch: 44 training_loss 0.11719762157648801 test_loss: 0.1295616626739502
epoch: 45 training_loss 0.12895503647625448 test_loss: 0.11594021320343018
epoch: 46 training_loss 0.11910381283611059 test_loss: 0.1355642318725586
epoch: 47 training_loss 0.11538713803514838 test_loss: 0.1318790078163147
epoch: 48 training_loss 0.11927247900515794 test_loss: 0.12434961795806884
epoch: 49 training_loss 0.11450284790247679 test_loss: 0.15163005590438844
epoch: 50 training_loss 0.12087256848812103 test_loss: 0.14659852981567384
epoch: 51 training_loss 0.11626288183033466 test_loss: 0.13098952770233155
epoch: 52 training_loss 0.11567719172686339 test_loss: 0.11034106016159058
epoch: 53 training_loss 0.12317207481712103 test_loss: 0.13046520948410034
epoch: 54 training_loss 0.1131575289182365 test_loss: 0.11411638259887695
epoch: 55 training_loss 0.12222370341420173 test_loss: 0.13930758237838745
epoch: 56 training_loss 0.11422221649438143 test_loss: 0.12349467277526856
epoch: 57 training_loss 0.11344245076179504 test_loss: 0.12576909065246583
epoch: 58 training_loss 0.11820464541204273 test_loss: 0.11272883415222168
epoch: 59 training_loss 0.115162708517164 test_loss: 0.14564206600189208
epoch: 60 training_loss 0.1148731978982687 test_loss: 0.13589086532592773
epoch: 61 training_loss 0.11800940867513418 test_loss: 0.13765746355056763
epoch: 62 training_loss 0.11748961109668016 test_loss: 0.12206809520721436
epoch: 63 training_loss 0.11619407813996077 test_loss: 0.1394164204597473
epoch: 64 training_loss 0.11423219598829747 test_loss: 0.13834036588668824
epoch: 65 training_loss 0.10978037603199482 test_loss: 0.11110868453979492
epoch: 66 training_loss 0.12116729523986577 test_loss: 0.13497005701065062
epoch: 67 training_loss 0.11685839291661977 test_loss: 0.13146051168441772
epoch: 68 training_loss 0.11257626112550496 test_loss: 0.14328501224517823
epoch: 69 training_loss 0.11115995284169912 test_loss: 0.12769242525100707
epoch: 70 training_loss 0.11775008458644151 test_loss: 0.1327337622642517
epoch: 71 training_loss 0.1203958173841238 test_loss: 0.12412818670272827
epoch: 72 training_loss 0.10815246023237705 test_loss: 0.1432529091835022
epoch: 73 training_loss 0.11920347608625889 test_loss: 0.13519834280014037
epoch: 74 training_loss 0.118611556366086 test_loss: 0.11966451406478881
epoch: 75 training_loss 0.11632502760738134 test_loss: 0.11714030504226684
epoch: 76 training_loss 0.11223664026707411 test_loss: 0.11688594818115235
epoch: 77 training_loss 0.10921913716942072 test_loss: 0.13763428926467897
epoch: 78 training_loss 0.10361039146780968 test_loss: 0.14683201313018798
epoch: 79 training_loss 0.11115388322621583 test_loss: 0.11079602241516114
epoch: 80 training_loss 0.1216028431802988 test_loss: 0.12487378120422363
epoch: 81 training_loss 0.11462046921253205 test_loss: 0.11409180164337158
epoch: 82 training_loss 0.11545176360756158 test_loss: 0.11356886625289916
epoch: 83 training_loss 0.12116494301706553 test_loss: 0.13173744678497315
epoch: 84 training_loss 0.114178219512105 test_loss: 0.11498560905456542
epoch: 85 training_loss 0.12257012967020273 test_loss: 0.11340751647949218
epoch: 86 training_loss 0.10315233455970883 test_loss: 0.1332203507423401
epoch: 87 training_loss 0.11411434274166822 test_loss: 0.12632731199264527
epoch: 88 training_loss 0.11724879585206509 test_loss: 0.1123661994934082
epoch: 89 training_loss 0.11646541096270084 test_loss: 0.10966686010360718
epoch: 90 training_loss 0.10108668295666576 test_loss: 0.13732969760894775
epoch: 91 training_loss 0.11398830279707908 test_loss: 0.13092190027236938
epoch: 92 training_loss 0.11595481077209115 test_loss: 0.1238824963569641
epoch: 93 training_loss 0.10743358749896288 test_loss: 0.1361055016517639
epoch: 94 training_loss 0.1285675670579076 test_loss: 0.1230273723602295
epoch: 95 training_loss 0.12593240242451428 test_loss: 0.13710477352142333
epoch: 96 training_loss 0.11364513665437698 test_loss: 0.13088586330413818
epoch: 97 training_loss 0.11294282589107751 test_loss: 0.106856369972229
epoch: 98 training_loss 0.11288947857916355 test_loss: 0.11613376140594482
epoch: 99 training_loss 0.10674380533397197 test_loss: 0.128928542137146
epoch: 100 training_loss 0.11017767049372196 test_loss: 0.15209052562713624
epoch: 101 training_loss 0.122771450355649 test_loss: 0.12775932550430297
epoch: 102 training_loss 0.11873845361173153 test_loss: 0.11107701063156128
epoch: 103 training_loss 0.11343440972268581 test_loss: 0.13621212244033815
epoch: 104 training_loss 0.1142563746124506 test_loss: 0.13231091499328612
epoch: 105 training_loss 0.11824881363660097 test_loss: 0.12769365310668945
epoch: 106 training_loss 0.1184197486564517 test_loss: 0.13276956081390381
epoch: 107 training_loss 0.11297446563839912 test_loss: 0.13143898248672486
epoch: 108 training_loss 0.11012013714760542 test_loss: 0.13063846826553344
epoch: 109 training_loss 0.119892648011446 test_loss: 0.1381392240524292
epoch: 110 training_loss 0.11601567008532583 test_loss: 0.11975024938583374
epoch: 111 training_loss 0.11036217026412487 test_loss: 0.1247740387916565
epoch: 112 training_loss 0.1168944289535284 test_loss: 0.11690678596496581
epoch: 113 training_loss 0.11196858629584312 test_loss: 0.13491348028182984
epoch: 114 training_loss 0.1201760072633624 test_loss: 0.11882256269454956
epoch: 115 training_loss 0.11600355062633753 test_loss: 0.11645904779434205
epoch: 116 training_loss 0.11722957246005535 test_loss: 0.13671523332595825
epoch: 117 training_loss 0.11642803752794861 test_loss: 0.13965870141983033
epoch: 118 training_loss 0.1095452181994915 test_loss: 0.13632242679595946
epoch: 119 training_loss 0.11267720490694046 test_loss: 0.12828879356384276
epoch: 120 training_loss 0.1189922184124589 test_loss: 0.11793956756591797
epoch: 121 training_loss 0.10133679453283548 test_loss: 0.12291580438613892
epoch: 122 training_loss 0.11294685086235404 test_loss: 0.1064754605293274
epoch: 123 training_loss 0.11448859490454197 test_loss: 0.1286543607711792
epoch: 124 training_loss 0.11045544486492873 test_loss: 0.11997904777526855
epoch: 125 training_loss 0.11226899525150656 test_loss: 0.12039464712142944
epoch: 126 training_loss 0.12210371434688568 test_loss: 0.1283273220062256
epoch: 127 training_loss 0.115069391541183 test_loss: 0.1436526894569397
epoch: 128 training_loss 0.11544793598353863 test_loss: 0.11907997131347656
epoch: 129 training_loss 0.11315767670050264 test_loss: 0.13129221200942992
epoch: 130 training_loss 0.11613377200439573 test_loss: 0.1285213351249695
epoch: 131 training_loss 0.11149500723928213 test_loss: 0.12021458148956299
epoch: 132 training_loss 0.10380480976775289 test_loss: 0.11654620170593262
epoch: 133 training_loss 0.111623666100204 test_loss: 0.12637988328933716
epoch: 134 training_loss 0.11204357147216797 test_loss: 0.1237145185470581
epoch: 135 training_loss 0.10737633913755416 test_loss: 0.11663614511489868
epoch: 136 training_loss 0.11514530804008245 test_loss: 0.12764995098114013
epoch: 137 training_loss 0.10746423970907927 test_loss: 0.13563787937164307
epoch: 138 training_loss 0.11728106014430523 test_loss: 0.13157219886779786
epoch: 139 training_loss 0.11183458583429456 test_loss: 0.12012779712677002
epoch: 140 training_loss 0.11094925776124001 test_loss: 0.14987678527832032
epoch: 141 training_loss 0.1122674897685647 test_loss: 0.12818851470947265
epoch: 142 training_loss 0.10935823187232017 test_loss: 0.12206135988235474
epoch: 143 training_loss 0.10966131642460823 test_loss: 0.124362051486969
epoch: 144 training_loss 0.1132994813285768 test_loss: 0.13174642324447633
epoch: 145 training_loss 0.11025257367640734 test_loss: 0.11523867845535278
epoch: 146 training_loss 0.1112012424133718 test_loss: 0.13186074495315553
epoch: 147 training_loss 0.10825309472158551 test_loss: 0.12364778518676758
epoch: 148 training_loss 0.10900618301704526 test_loss: 0.11795246601104736
epoch: 149 training_loss 0.11135318692773581 test_loss: 0.12311844825744629
epoch: 0 training_loss 0.2897667308151722 test_loss: 0.1890369772911072
epoch: 1 training_loss 0.1913009900599718 test_loss: 0.16861381530761718
epoch: 2 training_loss 0.18211555674672128 test_loss: 0.14913984537124633
epoch: 3 training_loss 0.1793167895823717 test_loss: 0.18209600448608398
epoch: 4 training_loss 0.16094698809087277 test_loss: 0.15212374925613403
epoch: 5 training_loss 0.15938316620886325 test_loss: 0.15679126977920532
epoch: 6 training_loss 0.15022034972906112 test_loss: 0.15524471998214723
epoch: 7 training_loss 0.1515955576300621 test_loss: 0.17243895530700684
epoch: 8 training_loss 0.14702399101108313 test_loss: 0.14564838409423828
epoch: 9 training_loss 0.13808989200741054 test_loss: 0.157622766494751
epoch: 10 training_loss 0.14981646664440632 test_loss: 0.15120829343795777
epoch: 11 training_loss 0.1345278749987483 test_loss: 0.1309548497200012
epoch: 12 training_loss 0.14410472586750983 test_loss: 0.1363752603530884
epoch: 13 training_loss 0.1189947796985507 test_loss: 0.13432310819625853
epoch: 14 training_loss 0.13280367735773324 test_loss: 0.1299477458000183
epoch: 15 training_loss 0.12300173837691546 test_loss: 0.15193814039230347
epoch: 16 training_loss 0.14266234260052443 test_loss: 0.13669838905334472
epoch: 17 training_loss 0.13452229134738444 test_loss: 0.14009463787078857
epoch: 18 training_loss 0.12867789782583713 test_loss: 0.1579538941383362
epoch: 19 training_loss 0.12312104500830173 test_loss: 0.14992066621780395
epoch: 20 training_loss 0.12471046358346939 test_loss: 0.15487134456634521
epoch: 21 training_loss 0.13440502185374498 test_loss: 0.13913687467575073
epoch: 22 training_loss 0.11836759854108095 test_loss: 0.14792277812957763
epoch: 23 training_loss 0.11996179977431894 test_loss: 0.11592098474502563
epoch: 24 training_loss 0.13206908674910664 test_loss: 0.1141932725906372
epoch: 25 training_loss 0.12858165878802538 test_loss: 0.1342068910598755
epoch: 26 training_loss 0.12895569356158376 test_loss: 0.13128993511199952
epoch: 27 training_loss 0.12178561236709357 test_loss: 0.1452265501022339
epoch: 28 training_loss 0.119131936468184 test_loss: 0.1068155288696289
epoch: 29 training_loss 0.12244459263980388 test_loss: 0.15207557678222655
epoch: 30 training_loss 0.12387331210076809 test_loss: 0.12867672443389894
epoch: 31 training_loss 0.1178733266890049 test_loss: 0.11665314435958862
epoch: 32 training_loss 0.1256104938313365 test_loss: 0.12939343452453614
epoch: 33 training_loss 0.12287635250017047 test_loss: 0.13461680412292482
epoch: 34 training_loss 0.11568189729005099 test_loss: 0.13016353845596312
epoch: 35 training_loss 0.12644724145531655 test_loss: 0.13746025562286376
epoch: 36 training_loss 0.12138772202655673 test_loss: 0.12706791162490844
epoch: 37 training_loss 0.12886107232421637 test_loss: 0.1263468623161316
epoch: 38 training_loss 0.12004253204911947 test_loss: 0.12792986631393433
epoch: 39 training_loss 0.11838103652000427 test_loss: 0.15819194316864013
epoch: 40 training_loss 0.12481016837060452 test_loss: 0.13339051008224487
epoch: 41 training_loss 0.12289786295965315 test_loss: 0.12792446613311767
epoch: 42 training_loss 0.12012250145897269 test_loss: 0.12079246044158935
epoch: 43 training_loss 0.11571988049894572 test_loss: 0.12219569683074952
epoch: 44 training_loss 0.12608633399009705 test_loss: 0.12031775712966919
epoch: 45 training_loss 0.11087222930043936 test_loss: 0.1367242693901062
epoch: 46 training_loss 0.12364252116531134 test_loss: 0.12572367191314698
epoch: 47 training_loss 0.12080536644905805 test_loss: 0.14301612377166747
epoch: 48 training_loss 0.11833757692947984 test_loss: 0.14138059616088866
epoch: 49 training_loss 0.12039734527468682 test_loss: 0.13414791822433472
epoch: 50 training_loss 0.11949477456510067 test_loss: 0.12289416790008545
epoch: 51 training_loss 0.1158885682746768 test_loss: 0.129373300075531
epoch: 52 training_loss 0.12293333925306797 test_loss: 0.12344385385513305
epoch: 53 training_loss 0.11880885016173125 test_loss: 0.12044934034347535
epoch: 54 training_loss 0.11882694367319345 test_loss: 0.12782769203186034
epoch: 55 training_loss 0.11719705998897552 test_loss: 0.13094716072082518
epoch: 56 training_loss 0.11528288945555687 test_loss: 0.13296031951904297
epoch: 57 training_loss 0.12104628205299378 test_loss: 0.13165291547775268
epoch: 58 training_loss 0.11023639377206564 test_loss: 0.13213006258010865
epoch: 59 training_loss 0.11869568895548582 test_loss: 0.140372371673584
epoch: 60 training_loss 0.12387468222528696 test_loss: 0.1358594298362732
epoch: 61 training_loss 0.11653210382908583 test_loss: 0.15159010887145996
epoch: 62 training_loss 0.11520356692373752 test_loss: 0.12639731168746948
epoch: 63 training_loss 0.1148342376947403 test_loss: 0.1349042296409607
epoch: 64 training_loss 0.11636621218174696 test_loss: 0.11990941762924194
epoch: 65 training_loss 0.11800425883382559 test_loss: 0.12417366504669189
epoch: 66 training_loss 0.11188642263412475 test_loss: 0.13928346633911132
epoch: 67 training_loss 0.1252239966765046 test_loss: 0.13237723112106323
epoch: 68 training_loss 0.11698272828012705 test_loss: 0.14171022176742554
epoch: 69 training_loss 0.11115620601922274 test_loss: 0.12911479473114013
epoch: 70 training_loss 0.1128686385601759 test_loss: 0.12571660280227662
epoch: 71 training_loss 0.11845794830471278 test_loss: 0.13680500984191896
epoch: 72 training_loss 0.11025285759940744 test_loss: 0.12839256525039672
epoch: 73 training_loss 0.11262771751731634 test_loss: 0.13095923662185668
epoch: 74 training_loss 0.11303770560771227 test_loss: 0.10578804016113282
epoch: 75 training_loss 0.11666667763143777 test_loss: 0.13111238479614257
epoch: 76 training_loss 0.11527055092155933 test_loss: 0.1381599187850952
epoch: 77 training_loss 0.11245085362344981 test_loss: 0.13242743015289307
epoch: 78 training_loss 0.11244160275906324 test_loss: 0.14256275892257692
epoch: 79 training_loss 0.11792100818827748 test_loss: 0.11982510089874268
epoch: 80 training_loss 0.10934170335531235 test_loss: 0.11892644166946412
epoch: 81 training_loss 0.11209575667977333 test_loss: 0.14191983938217162
epoch: 82 training_loss 0.11494014456868172 test_loss: 0.11926165819168091
epoch: 83 training_loss 0.1179670487344265 test_loss: 0.12529884576797484
epoch: 84 training_loss 0.1207802077755332 test_loss: 0.1335121989250183
epoch: 85 training_loss 0.12164095789194107 test_loss: 0.1351686954498291
epoch: 86 training_loss 0.11523109743371605 test_loss: 0.14165879487991334
epoch: 87 training_loss 0.11861795980483293 test_loss: 0.13728190660476686
epoch: 88 training_loss 0.12114194240421057 test_loss: 0.16402864456176758
epoch: 89 training_loss 0.11659101080149412 test_loss: 0.11407142877578735
epoch: 90 training_loss 0.12197039626538754 test_loss: 0.14078004360198976
epoch: 91 training_loss 0.11168088655918837 test_loss: 0.12750825881958008
epoch: 92 training_loss 0.11539945403113962 test_loss: 0.11042393445968628
epoch: 93 training_loss 0.11752810798585415 test_loss: 0.12109991312026977
epoch: 94 training_loss 0.11709263019263744 test_loss: 0.11991623640060425
epoch: 95 training_loss 0.11686978187412024 test_loss: 0.12815641164779662
epoch: 96 training_loss 0.11461221568286419 test_loss: 0.11644619703292847
epoch: 97 training_loss 0.11787761755287647 test_loss: 0.12349182367324829
epoch: 98 training_loss 0.11880873654037714 test_loss: 0.11580458879470826
epoch: 99 training_loss 0.11693798692896962 test_loss: 0.11780766248703003
epoch: 100 training_loss 0.11733375560492278 test_loss: 0.11865468025207519
epoch: 101 training_loss 0.12302003178745508 test_loss: 0.13902342319488525
epoch: 102 training_loss 0.11311425276100635 test_loss: 0.11000680923461914
epoch: 103 training_loss 0.11098082780838013 test_loss: 0.11955403089523316
epoch: 104 training_loss 0.11806137129664421 test_loss: 0.13040509223937988
epoch: 105 training_loss 0.11374216750264168 test_loss: 0.13293130397796632
epoch: 106 training_loss 0.11318641722202301 test_loss: 0.12996139526367187
epoch: 107 training_loss 0.11044071059674025 test_loss: 0.11958987712860107
epoch: 108 training_loss 0.11464977350085974 test_loss: 0.13332116603851318
epoch: 109 training_loss 0.11934759305790067 test_loss: 0.12458120584487915
epoch: 110 training_loss 0.11197286853566765 test_loss: 0.12073711156845093
epoch: 111 training_loss 0.11723557183519005 test_loss: 0.13959052562713622
epoch: 112 training_loss 0.11114544624462724 test_loss: 0.10675525665283203
epoch: 113 training_loss 0.11883236676454544 test_loss: 0.1297564387321472
epoch: 114 training_loss 0.10916822209954262 test_loss: 0.12809430360794066
epoch: 115 training_loss 0.1114766089618206 test_loss: 0.11171200275421142
epoch: 116 training_loss 0.10980453405529261 test_loss: 0.1240920901298523
epoch: 117 training_loss 0.1222905570268631 test_loss: 0.11936384439468384
epoch: 118 training_loss 0.11421964582055807 test_loss: 0.13730432987213134
epoch: 119 training_loss 0.10922156330198049 test_loss: 0.112386155128479
epoch: 120 training_loss 0.11431779060512781 test_loss: 0.12748394012451172
epoch: 121 training_loss 0.11651093076914548 test_loss: 0.1265074372291565
epoch: 122 training_loss 0.11064211491495371 test_loss: 0.12799060344696045
epoch: 123 training_loss 0.11677012596279383 test_loss: 0.1280719518661499
epoch: 124 training_loss 0.11392755245789886 test_loss: 0.11466569900512695
epoch: 125 training_loss 0.11216633388772607 test_loss: 0.1379926919937134
epoch: 126 training_loss 0.1112675098888576 test_loss: 0.12424169778823853
epoch: 127 training_loss 0.11259147699922323 test_loss: 0.10211024284362794
epoch: 128 training_loss 0.11117261512205005 test_loss: 0.1268715739250183
epoch: 129 training_loss 0.10550002822652459 test_loss: 0.11080876588821412
epoch: 130 training_loss 0.10977240145206452 test_loss: 0.1188174843788147
epoch: 131 training_loss 0.12063071500509977 test_loss: 0.13806384801864624
epoch: 132 training_loss 0.11659052085131406 test_loss: 0.11920696496963501
epoch: 133 training_loss 0.11989791030064226 test_loss: 0.12441202402114868
epoch: 134 training_loss 0.11519405420869588 test_loss: 0.12452502250671386
epoch: 135 training_loss 0.10911546602845192 test_loss: 0.12026731967926026
epoch: 136 training_loss 0.11205289976671338 test_loss: 0.11559407711029053
epoch: 137 training_loss 0.1107805935665965 test_loss: 0.1432901382446289
epoch: 138 training_loss 0.12138627041131259 test_loss: 0.13367947340011596
epoch: 139 training_loss 0.11918063953518868 test_loss: 0.13180805444717408
epoch: 140 training_loss 0.11890411423519254 test_loss: 0.10701123476028443
epoch: 141 training_loss 0.1143503700196743 test_loss: 0.13230885267257692
epoch: 142 training_loss 0.11910500958561897 test_loss: 0.15171914100646972
epoch: 143 training_loss 0.11055282164365053 test_loss: 0.11693024635314941
epoch: 144 training_loss 0.11274459652602672 test_loss: 0.11866557598114014
epoch: 145 training_loss 0.10548696227371693 test_loss: 0.1213563323020935
epoch: 146 training_loss 0.1142129822820425 test_loss: 0.12419886589050293
epoch: 147 training_loss 0.11173990778625012 test_loss: 0.13735792636871338
epoch: 148 training_loss 0.11281558971852064 test_loss: 0.14659675359725952
epoch: 149 training_loss 0.110431253220886 test_loss: 0.11348209381103516
episode: 0 training return: -1241.9724499585948
episode: 1 training return: -1089.278999900195
episode: 2 training return: -1142.0618677984535
episode: 3 training return: -1251.0151100883656
epoch: 1 test_true_pfm: 98.67794506650652 sim_pfm: -994.2474286043495
episode: 4 training return: -1142.9081680649122
episode: 5 training return: -1110.4738829453267
episode: 6 training return: -1326.2945799428308
episode: 7 training return: -1300.733779891964
epoch: 2 test_true_pfm: 81.20295047173768 sim_pfm: -998.7090174954852
episode: 8 training return: -1101.54773099138
episode: 9 training return: -1380.801410723442
episode: 10 training return: -1028.1744870855712
episode: 11 training return: -1061.5302296387165
epoch: 3 test_true_pfm: -12.639033768940967 sim_pfm: -991.9955045912596
episode: 12 training return: -1041.562624117413
episode: 13 training return: -1016.0908254254291
episode: 14 training return: -1015.6050883106748
episode: 15 training return: -1056.3953443303205
epoch: 4 test_true_pfm: -51.007735799157025 sim_pfm: -985.6271901149067
episode: 16 training return: -1020.3033707171984
episode: 17 training return: -1042.4572888240418
episode: 18 training return: -1118.565412631628
episode: 19 training return: -1010.8787244432709
epoch: 5 test_true_pfm: 3.3955183566862757 sim_pfm: -979.7091378447213
episode: 20 training return: -1010.8181797374821
episode: 21 training return: -1043.455789438058
episode: 22 training return: -1053.46535424259
episode: 23 training return: -1053.3116432604636
epoch: 6 test_true_pfm: -20.954213223423338 sim_pfm: -978.1567967321392
episode: 24 training return: -1021.9149715359606
episode: 25 training return: -1024.8092216031494
episode: 26 training return: -1077.5368515197642
episode: 27 training return: -1097.3386204339097
epoch: 7 test_true_pfm: -11.650222070980442 sim_pfm: -928.4916223692222
episode: 28 training return: -1041.1353988160133
episode: 29 training return: -1032.7742518813106
episode: 30 training return: -1016.8271639480168
episode: 31 training return: -1049.577582531891
epoch: 8 test_true_pfm: -40.60962840923665 sim_pfm: -939.2774916098873
episode: 32 training return: -998.4937387601577
episode: 33 training return: -1014.9708190726227
episode: 34 training return: -1006.3822116517039
episode: 35 training return: -1024.0111228277046
epoch: 9 test_true_pfm: -27.255568551429548 sim_pfm: -955.2938540384249
episode: 36 training return: -1018.0696831516415
episode: 37 training return: -1011.3984269736628
episode: 38 training return: -1017.5631823152297
episode: 39 training return: -1016.0448357884322
epoch: 10 test_true_pfm: -45.462273978018594 sim_pfm: -956.7709219167747
episode: 40 training return: -1017.6227689556263
episode: 41 training return: -1014.928999862886
episode: 42 training return: -1030.2600116945175
episode: 43 training return: -1003.2956558397475
epoch: 11 test_true_pfm: -50.87290064588672 sim_pfm: -960.5943059738835
episode: 44 training return: -1006.6176782077694
episode: 45 training return: -1002.9181964969858
episode: 46 training return: -1012.2565327515289
episode: 47 training return: -1012.9650803281071
epoch: 12 test_true_pfm: 88.87255277168192 sim_pfm: -975.5128114119833
episode: 48 training return: -1024.2832683799757
episode: 49 training return: -1003.3617911107015
episode: 50 training return: -1002.6663112280288
episode: 51 training return: -1012.3683489917621
epoch: 13 test_true_pfm: -36.882000613199644 sim_pfm: -984.8129158983766
episode: 52 training return: -998.0770104807524
episode: 53 training return: -996.2924969810529
episode: 54 training return: -1012.458481712854
episode: 55 training return: -1003.3449328964886
epoch: 14 test_true_pfm: -26.540852708017123 sim_pfm: -962.6128356022556
episode: 56 training return: -1008.3227420739706
episode: 57 training return: -1003.9322618280686
episode: 58 training return: -1011.7416061006841
episode: 59 training return: -996.5433164653163
epoch: 15 test_true_pfm: -2.598071146803718 sim_pfm: -986.0036986395677
episode: 60 training return: -1033.1274351965246
episode: 61 training return: -1015.9478112881501
episode: 62 training return: -995.2907067014951
episode: 63 training return: -1000.6019445320969
epoch: 16 test_true_pfm: 327.93414151520557 sim_pfm: -983.0767279163366
episode: 64 training return: -1013.1305011905114
episode: 65 training return: -1012.7167602511056
episode: 66 training return: -1016.7234376662409
episode: 67 training return: -1005.3720536719563
epoch: 17 test_true_pfm: 296.3218983309423 sim_pfm: -985.440598460717
episode: 68 training return: -1010.2405690592204
episode: 69 training return: -998.6273266314954
episode: 70 training return: -1014.1197241985378
episode: 71 training return: -1005.5900574317219
epoch: 18 test_true_pfm: 314.585919087524 sim_pfm: -983.9461054185252
episode: 72 training return: -1010.9794686337083
episode: 73 training return: -1000.7720829884456
episode: 74 training return: -1016.9196275326519
episode: 75 training return: -1030.8592752732354
epoch: 19 test_true_pfm: 137.07699136602363 sim_pfm: -991.8734506227851
episode: 76 training return: -1011.921511474989
episode: 77 training return: -989.6591214843108
episode: 78 training return: -1002.9420117839586
episode: 79 training return: -1007.448265818176
epoch: 20 test_true_pfm: -52.785805228669524 sim_pfm: -997.2692310437927
episode: 80 training return: -1002.1349972357036
episode: 81 training return: -992.5166815663583
episode: 82 training return: -996.3071773614392
episode: 83 training return: -995.8151199789748
epoch: 21 test_true_pfm: 179.96659704058825 sim_pfm: -995.092274650318
episode: 84 training return: -993.1749418803752
episode: 85 training return: -1002.3052708393855
episode: 86 training return: -1004.6888215098608
episode: 87 training return: -1007.3144068282833
epoch: 22 test_true_pfm: -85.40658693497858 sim_pfm: -996.6662321767958
episode: 88 training return: -998.7908772567405
episode: 89 training return: -1006.6112731930192
episode: 90 training return: -1003.9845430615986
episode: 91 training return: -999.1010075463602
epoch: 23 test_true_pfm: 207.4133525863509 sim_pfm: -992.1968564600735
episode: 92 training return: -1006.9856005090331
episode: 93 training return: -1008.3387341753565
episode: 94 training return: -994.0156924304885
episode: 95 training return: -1007.5395326923417
epoch: 24 test_true_pfm: 205.25057343619002 sim_pfm: -990.5455392472498
episode: 96 training return: -1002.8678989768056
episode: 97 training return: -1004.27622399196
episode: 98 training return: -996.7391280683877
episode: 99 training return: -1001.8689754454799
epoch: 25 test_true_pfm: 102.20735832758358 sim_pfm: -993.8606721095886
episode: 100 training return: -993.2910176604315
episode: 101 training return: -1002.2958959344381
episode: 102 training return: -1005.5240507705422
episode: 103 training return: -998.2708817375908
epoch: 26 test_true_pfm: 199.7164463246803 sim_pfm: -992.5350649688847
episode: 104 training return: -999.2542245922899
episode: 105 training return: -1004.557218611096
episode: 106 training return: -994.1767693161398
episode: 107 training return: -1002.9963158694296
epoch: 27 test_true_pfm: 182.33767288871988 sim_pfm: -991.7106584048247
episode: 108 training return: -987.3736253934962
episode: 109 training return: -999.5156469706234
episode: 110 training return: -1010.4434505440075
episode: 111 training return: -1005.6472586921777
epoch: 28 test_true_pfm: 99.38154173130465 sim_pfm: -988.7248128114647
episode: 112 training return: -1007.1951339938649
episode: 113 training return: -995.0810182154243
episode: 114 training return: -982.877665433255
episode: 115 training return: -999.0376977630631
epoch: 29 test_true_pfm: 40.922858013796876 sim_pfm: -986.9260853794823
episode: 116 training return: -1001.279224263957
episode: 117 training return: -996.9703455997281
episode: 118 training return: -1005.4938166000605
episode: 119 training return: -1000.4076337332245
epoch: 30 test_true_pfm: 73.90180849876317 sim_pfm: -984.9715574628132
episode: 120 training return: -999.9245488832329
episode: 121 training return: -1002.3178930731439
episode: 122 training return: -1002.5997933933197
episode: 123 training return: -1001.9952677992659
epoch: 31 test_true_pfm: 40.421403758248715 sim_pfm: -984.3235112243718
episode: 124 training return: -999.5411590920776
episode: 125 training return: -996.9786154063802
episode: 126 training return: -989.9403137999842
episode: 127 training return: -983.5931921567375
epoch: 32 test_true_pfm: 213.9501075468207 sim_pfm: -988.3969221589385
episode: 128 training return: -992.8431261979096
episode: 129 training return: -1007.2265040284151
episode: 130 training return: -997.7325078148971
episode: 131 training return: -1002.4264674116828
epoch: 33 test_true_pfm: 39.54992929853145 sim_pfm: -982.0241556447876
episode: 132 training return: -994.0844102652713
episode: 133 training return: -1004.7672602319167
episode: 134 training return: -998.398105960651
episode: 135 training return: -1003.4449133108448
epoch: 34 test_true_pfm: 29.852380326938903 sim_pfm: -1011.7023012396568
episode: 136 training return: -997.0998616725925
episode: 137 training return: -995.8562985001599
episode: 138 training return: -1002.1435639830942
episode: 139 training return: -991.0597404048357
epoch: 35 test_true_pfm: -23.789763912627503 sim_pfm: -999.8722151135595
episode: 140 training return: -1008.4826206013219
episode: 141 training return: -1005.8726123954027
episode: 142 training return: -989.8784057950838
episode: 143 training return: -1002.118503711421
epoch: 36 test_true_pfm: 28.51681595194819 sim_pfm: -1046.771519449103
episode: 144 training return: -996.8648245027141
episode: 145 training return: -1005.0593547675437
episode: 146 training return: -998.4868384230085
episode: 147 training return: -993.6728019388948
epoch: 37 test_true_pfm: -24.32044376324951 sim_pfm: -986.0180201353436
episode: 148 training return: -1005.6116253226088
episode: 149 training return: -997.7580999261345
episode: 150 training return: -999.0105440963622
episode: 151 training return: -995.5027047995544
epoch: 38 test_true_pfm: 51.50976578067975 sim_pfm: -1021.9711960157991
episode: 152 training return: -999.9566582000648
episode: 153 training return: -1003.5242285280748
episode: 154 training return: -1002.9713147604534
episode: 155 training return: -995.8965697387631
epoch: 39 test_true_pfm: 20.716018941705883 sim_pfm: -1020.5125753318775
episode: 156 training return: -983.6200298934226
episode: 157 training return: -997.9769520887473
episode: 158 training return: -988.4562261533107
episode: 159 training return: -993.4042212806191
epoch: 40 test_true_pfm: 76.43578430322219 sim_pfm: -981.5716512356197
episode: 160 training return: -1002.1636289085662
episode: 161 training return: -994.4129848037464
episode: 162 training return: -1000.4868917578618
episode: 163 training return: -977.0733648108144
epoch: 41 test_true_pfm: 39.39402990986491 sim_pfm: -984.4618435794204
episode: 164 training return: -997.7706595953953
episode: 165 training return: -992.2863714374605
episode: 166 training return: -999.5971743907681
episode: 167 training return: -982.0938153163078
epoch: 42 test_true_pfm: 26.435854481159655 sim_pfm: -997.3562039737877
episode: 168 training return: -998.5518103076467
episode: 169 training return: -997.385236120775
episode: 170 training return: -997.1454567097383
episode: 171 training return: -978.5223689094065
epoch: 43 test_true_pfm: 25.551133596437868 sim_pfm: -1008.0795738952328
episode: 172 training return: -997.4756105326537
episode: 173 training return: -999.6343291175297
episode: 174 training return: -982.8802638734388
episode: 175 training return: -993.3141372948598
epoch: 44 test_true_pfm: 1.9348334982481108 sim_pfm: -983.0229437385988
episode: 176 training return: -1002.7207277119645
episode: 177 training return: -1006.9328757034144
episode: 178 training return: -997.6287161299382
episode: 179 training return: -960.5484516963793
epoch: 45 test_true_pfm: 22.538814255808933 sim_pfm: -1032.0516682448406
episode: 180 training return: -1006.0926957757191
episode: 181 training return: -1005.6145691612936
episode: 182 training return: -998.9022310730691
episode: 183 training return: -998.0793827302182
epoch: 46 test_true_pfm: 12.857394911003894 sim_pfm: -980.9493009107499
episode: 184 training return: -996.0941025409593
episode: 185 training return: -990.2681982265096
episode: 186 training return: -977.2004513263264
episode: 187 training return: -984.653902315034
epoch: 47 test_true_pfm: 17.276804461958985 sim_pfm: -1004.336907973839
episode: 188 training return: -991.9842764593822
episode: 189 training return: -982.3425238451285
episode: 190 training return: -973.3951391802188
episode: 191 training return: -976.7646089386743
epoch: 48 test_true_pfm: 30.05427010349086 sim_pfm: -990.8263118659714
episode: 192 training return: -996.1297274100309
episode: 193 training return: -997.6052091527886
episode: 194 training return: -1001.5685799805287
episode: 195 training return: -992.6708999595543
epoch: 49 test_true_pfm: 23.07192496833891 sim_pfm: -986.9363121091437
episode: 196 training return: -995.3346181534847
episode: 197 training return: -989.0664523630007
episode: 198 training return: -991.0294877594224
episode: 199 training return: -1011.1093985325754
epoch: 50 test_true_pfm: 39.632597020939095 sim_pfm: -985.0698100681915
episode: 200 training return: -997.2709122007874
episode: 201 training return: -995.7082082335878
episode: 202 training return: -1000.1908276267752
episode: 203 training return: -994.6760656792561
epoch: 51 test_true_pfm: 64.709354250907 sim_pfm: -985.040000892761
episode: 204 training return: -993.1948749358589
episode: 205 training return: -998.8968792654478
episode: 206 training return: -983.6285708221175
episode: 207 training return: -1002.3437616889212
epoch: 52 test_true_pfm: 27.005907465283247 sim_pfm: -981.2889191154637
episode: 208 training return: -999.2628860127463
episode: 209 training return: -999.5612024992919
episode: 210 training return: -1008.1377776893362
episode: 211 training return: -993.3918779851999
epoch: 53 test_true_pfm: 38.717745413259856 sim_pfm: -951.5930085802578
episode: 212 training return: -983.3340079387915
episode: 213 training return: -998.5061954269314
episode: 214 training return: -1008.96441619797
episode: 215 training return: -984.6380849743681
epoch: 54 test_true_pfm: 100.95819208244745 sim_pfm: -974.3521301435144
episode: 216 training return: -992.3308554756301
episode: 217 training return: -994.8343499325014
episode: 218 training return: -995.0025015553105
episode: 219 training return: -996.1510780349049
epoch: 55 test_true_pfm: 48.80917369136023 sim_pfm: -989.2774697835627
episode: 220 training return: -1001.3011082506202
episode: 221 training return: -1000.9612209346977
episode: 222 training return: -978.7912687049048
episode: 223 training return: -982.3841371952961
epoch: 56 test_true_pfm: 31.13340279967285 sim_pfm: -1021.4349992539607
episode: 224 training return: -999.5623010576289
episode: 225 training return: -1003.6791591199265
episode: 226 training return: -1003.1472394451391
episode: 227 training return: -996.7698943277943
epoch: 57 test_true_pfm: 81.4518557889117 sim_pfm: -972.1070583461704
episode: 228 training return: -996.4960418363273
episode: 229 training return: -967.8025449639249
episode: 230 training return: -1006.8928941813103
episode: 231 training return: -994.1417288456954
epoch: 58 test_true_pfm: 70.41225365377962 sim_pfm: -973.7139396801557
episode: 232 training return: -987.3556189207926
episode: 233 training return: -990.5020435632787
episode: 234 training return: -987.2802912979619
episode: 235 training return: -995.7978895423013
epoch: 59 test_true_pfm: 93.31463636543715 sim_pfm: -980.8722508094612
episode: 236 training return: -976.5259983617846
episode: 237 training return: -998.4290311838821
episode: 238 training return: -973.2665865350368
episode: 239 training return: -998.3016675007444
epoch: 60 test_true_pfm: 157.49987677253182 sim_pfm: -956.7331512616903
episode: 240 training return: -997.5462476402439
episode: 241 training return: -990.0968555470126
episode: 242 training return: -989.4540454977238
episode: 243 training return: -993.7946460382233
epoch: 61 test_true_pfm: 56.11521193532915 sim_pfm: -966.3849665437571
episode: 244 training return: -960.593238374131
episode: 245 training return: -979.9070170608976
episode: 246 training return: -990.9134892598456
episode: 247 training return: -982.7557184695073
epoch: 62 test_true_pfm: 44.362225203636854 sim_pfm: -962.0303128575828
episode: 248 training return: -962.748178177591
episode: 249 training return: -1039.216050796578
episode: 250 training return: -985.2625373199525
episode: 251 training return: -991.9100126611512
epoch: 63 test_true_pfm: 90.76518515293434 sim_pfm: -906.3229544608911
episode: 252 training return: -992.3498977168446
episode: 253 training return: -992.3092315669355
episode: 254 training return: -990.2902256744783
episode: 255 training return: -991.0467184021485
epoch: 64 test_true_pfm: 43.364344421111845 sim_pfm: -976.4841238160049
episode: 256 training return: -992.5732930652028
episode: 257 training return: -972.8465065685801
episode: 258 training return: -1000.5662567919373
episode: 259 training return: -975.5915216025782
epoch: 65 test_true_pfm: -21.188049607795715 sim_pfm: -975.6560639211457
episode: 260 training return: -992.663417919648
episode: 261 training return: -989.9534479439055
episode: 262 training return: -987.9285763647665
episode: 263 training return: -995.246834333853
epoch: 66 test_true_pfm: 96.6195093023053 sim_pfm: -982.6881806284497
episode: 264 training return: -995.1452286949842
episode: 265 training return: -985.4676986874008
episode: 266 training return: -995.8552715052566
episode: 267 training return: -995.0018740945204
epoch: 67 test_true_pfm: 81.75812479779255 sim_pfm: -978.4272498748114
episode: 268 training return: -977.9706233468696
episode: 269 training return: -982.8142068530012
episode: 270 training return: -972.5479468009174
episode: 271 training return: -973.8847241402016
epoch: 68 test_true_pfm: 26.862286149061404 sim_pfm: -983.5430928466914
episode: 272 training return: -992.6208873707506
episode: 273 training return: -993.6143239794638
episode: 274 training return: -989.2547277248538
episode: 275 training return: -986.5855023505862
epoch: 69 test_true_pfm: -12.627174182995029 sim_pfm: -975.8168450315234
episode: 276 training return: -984.0729725760654
episode: 277 training return: -988.0885943071347
episode: 278 training return: -969.2718876295077
episode: 279 training return: -994.1723663938677
epoch: 70 test_true_pfm: 162.37449977793722 sim_pfm: -949.408527372656
episode: 280 training return: -968.3613590097372
episode: 281 training return: -995.5157333455918
episode: 282 training return: -998.0290683794959
episode: 283 training return: -999.3699432316607
epoch: 71 test_true_pfm: 26.573117684138566 sim_pfm: -917.225834632226
episode: 284 training return: -956.9177338198405
episode: 285 training return: -991.5217084118875
episode: 286 training return: -995.5315394596921
episode: 287 training return: -990.3756726717926
epoch: 72 test_true_pfm: 115.95175662602274 sim_pfm: -964.3908149794348
episode: 288 training return: -986.2355251231819
episode: 289 training return: -1002.1392415148146
episode: 290 training return: -977.9740396022435
episode: 291 training return: -987.3405293502149
epoch: 73 test_true_pfm: 66.4094435418125 sim_pfm: -982.2642147108107
episode: 292 training return: -1001.4638704786901
episode: 293 training return: -969.0628149992605
episode: 294 training return: -985.7817481039124
episode: 295 training return: -995.5670218423913
epoch: 74 test_true_pfm: 110.51548268025051 sim_pfm: -973.919269912563
episode: 296 training return: -991.9790572592472
episode: 297 training return: -997.648051551737
episode: 298 training return: -982.6083431065848
episode: 299 training return: -991.4134024450856
epoch: 75 test_true_pfm: 107.72277665696105 sim_pfm: -977.2684377805773
episode: 300 training return: -980.0113141947265
episode: 301 training return: -996.8007931706976
episode: 302 training return: -970.6402366482805
episode: 303 training return: -990.9261448151668
epoch: 76 test_true_pfm: -8.250950784680837 sim_pfm: -963.0286284189842
episode: 304 training return: -966.2809531097331
episode: 305 training return: -983.1813546874349
episode: 306 training return: -946.0353543244029
episode: 307 training return: -994.8252680665137
epoch: 77 test_true_pfm: 39.17055579566199 sim_pfm: -977.7615654486312
episode: 308 training return: -992.1819180186748
episode: 309 training return: -983.1315442265742
episode: 310 training return: -991.8714995262964
episode: 311 training return: -994.9704194887546
epoch: 78 test_true_pfm: 4.841675915359862 sim_pfm: -961.4426984598626
episode: 312 training return: -982.4718043987906
episode: 313 training return: -999.6970792589641
episode: 314 training return: -971.496948235768
episode: 315 training return: -972.57502591022
epoch: 79 test_true_pfm: 41.696509950877974 sim_pfm: -970.2142527359151
episode: 316 training return: -993.1522431934369
episode: 317 training return: -982.6257829876863
episode: 318 training return: -997.1474980414125
episode: 319 training return: -994.2040429796012
epoch: 80 test_true_pfm: 89.57646191589008 sim_pfm: -981.3370510592734
episode: 320 training return: -997.677052169036
episode: 321 training return: -969.2876320504574
episode: 322 training return: -946.3512385621963
episode: 323 training return: -1004.0205327710019
epoch: 81 test_true_pfm: 35.88478210242865 sim_pfm: -1015.2401696791802
episode: 324 training return: -980.6774806599982
episode: 325 training return: -962.0262855448799
episode: 326 training return: -995.0537322741511
episode: 327 training return: -976.7703884067029
epoch: 82 test_true_pfm: -39.191033461543725 sim_pfm: -974.1305542914341
episode: 328 training return: -987.0555429984757
episode: 329 training return: -984.2833837633088
episode: 330 training return: -979.8193188617587
episode: 331 training return: -991.7361510547172
epoch: 83 test_true_pfm: -21.625405579309938 sim_pfm: -985.0821977246511
episode: 332 training return: -996.2602635494608
episode: 333 training return: -996.1312704356144
episode: 334 training return: -999.8438988919977
episode: 335 training return: -993.458791804841
epoch: 84 test_true_pfm: -7.57853638577069 sim_pfm: -964.8987201803548
episode: 336 training return: -952.5531834744086
episode: 337 training return: -966.0503191145165
episode: 338 training return: -996.7749971199988
episode: 339 training return: -972.6419349107161
epoch: 85 test_true_pfm: 54.32571709587558 sim_pfm: -952.024339569882
episode: 340 training return: -997.1011602228753
episode: 341 training return: -984.5963546920002
episode: 342 training return: -947.2029269875226
episode: 343 training return: -970.210724144396
epoch: 86 test_true_pfm: 47.02387179989834 sim_pfm: -932.3752911490336
episode: 344 training return: -966.6891555703451
episode: 345 training return: -988.5576789472144
episode: 346 training return: -962.4034182996827
episode: 347 training return: -994.9088492000953
epoch: 87 test_true_pfm: 70.2896779740294 sim_pfm: -990.8668596173267
episode: 348 training return: -969.2490397089425
episode: 349 training return: -978.1460600722697
episode: 350 training return: -985.7809491209267
episode: 351 training return: -1001.1630850683406
epoch: 88 test_true_pfm: 49.911706805655136 sim_pfm: -969.8081238014056
episode: 352 training return: -983.0385568158789
episode: 353 training return: -993.4839580390585
episode: 354 training return: -962.9257840613327
episode: 355 training return: -961.8718074361951
epoch: 89 test_true_pfm: 34.788128005248744 sim_pfm: -1010.8264190553635
episode: 356 training return: -994.1929325615046
episode: 357 training return: -995.334133863626
episode: 358 training return: -951.008523094251
episode: 359 training return: -977.9748320263875
epoch: 90 test_true_pfm: -68.30055314120834 sim_pfm: -980.4018630257959
episode: 360 training return: -994.242340908969
episode: 361 training return: -996.9473866189416
episode: 362 training return: -998.4092312960926
episode: 363 training return: -987.4093225082643
epoch: 91 test_true_pfm: 43.086592779719545 sim_pfm: -1015.7137449049293
episode: 364 training return: -990.829443574041
episode: 365 training return: -983.9768135987836
episode: 366 training return: -980.8181514395858
episode: 367 training return: -981.0140440777877
epoch: 92 test_true_pfm: 24.334042223755123 sim_pfm: -1041.5757611067972
episode: 368 training return: -989.1448466420303
episode: 369 training return: -959.8283944887849
episode: 370 training return: -979.6213062641498
episode: 371 training return: -977.8684344413103
epoch: 93 test_true_pfm: 140.78297220895277 sim_pfm: -899.9072096365104
episode: 372 training return: -991.5777489836421
episode: 373 training return: -994.9088758160096
episode: 374 training return: -989.4875103096286
episode: 375 training return: -975.5750655151347
epoch: 94 test_true_pfm: 51.72405350336549 sim_pfm: -974.0600815348826
episode: 376 training return: -969.7234210827895
episode: 377 training return: -996.7934543739293
episode: 378 training return: -962.1135701927574
episode: 379 training return: -999.6975193208169
epoch: 95 test_true_pfm: 104.63286892684441 sim_pfm: -959.6750403637853
episode: 380 training return: -984.3996864583191
episode: 381 training return: -991.1236653576666
episode: 382 training return: -996.1093721614859
episode: 383 training return: -957.0310052350167
epoch: 96 test_true_pfm: 44.346501193142494 sim_pfm: -982.0518921982867
episode: 384 training return: -990.5921657455535
episode: 385 training return: -999.6115958711976
episode: 386 training return: -986.9138301163889
episode: 387 training return: -989.7426815364815
epoch: 97 test_true_pfm: 71.89142438376801 sim_pfm: -953.2285712391032
episode: 388 training return: -979.7377487883363
episode: 389 training return: -986.6417531214121
episode: 390 training return: -985.4019965127036
episode: 391 training return: -1004.9384412755942
epoch: 98 test_true_pfm: 35.128806132581104 sim_pfm: -945.7696793367608
episode: 392 training return: -977.9248599932297
episode: 393 training return: -995.1364151058225
episode: 394 training return: -996.7266956601001
episode: 395 training return: -970.7539690170902
epoch: 99 test_true_pfm: 50.60553001437884 sim_pfm: -978.0935321561025
episode: 396 training return: -1003.6988781664695
episode: 397 training return: -997.4974518830542
episode: 398 training return: -993.410976725702
episode: 399 training return: -993.6230514503189
epoch: 100 test_true_pfm: 62.62357209991877 sim_pfm: -960.4408464101331
episode: 400 training return: -969.450181423816
episode: 401 training return: -981.5357383356296
episode: 402 training return: -983.1961202829617
episode: 403 training return: -971.9257516807521
epoch: 101 test_true_pfm: 44.98612387698039 sim_pfm: -973.4096897180817
episode: 404 training return: -997.8971770202369
episode: 405 training return: -989.3252524112877
episode: 406 training return: -953.708338673581
episode: 407 training return: -974.8367985219041
epoch: 102 test_true_pfm: -12.230129025859561 sim_pfm: -1021.6565712230228
episode: 408 training return: -992.0151572368022
episode: 409 training return: -988.9437748521158
episode: 410 training return: -992.901867992551
episode: 411 training return: -980.9435508940829
epoch: 103 test_true_pfm: 73.12146966676005 sim_pfm: -976.8305187525726
episode: 412 training return: -957.429079212956
episode: 413 training return: -985.4298009058394
episode: 414 training return: -1002.4144648539278
episode: 415 training return: -980.2221456989204
epoch: 104 test_true_pfm: 9.976221154448917 sim_pfm: -963.6276225216735
episode: 416 training return: -987.5228639809794
episode: 417 training return: -975.4834030326401
episode: 418 training return: -994.5797935082627
episode: 419 training return: -978.9065260668024
epoch: 105 test_true_pfm: 191.66826990200335 sim_pfm: -965.1200740325106
episode: 420 training return: -982.3612606664005
episode: 421 training return: -978.7292633101483
episode: 422 training return: -992.3785623220064
episode: 423 training return: -992.4244623145377
epoch: 106 test_true_pfm: 53.78904611663023 sim_pfm: -975.2098120787778
episode: 424 training return: -997.5811060952984
episode: 425 training return: -991.1609968205005
episode: 426 training return: -977.3651436270922
episode: 427 training return: -988.6526728810823
epoch: 107 test_true_pfm: 88.6718942469111 sim_pfm: -985.3123338804735
episode: 428 training return: -1000.9796167333349
episode: 429 training return: -994.436061238267
episode: 430 training return: -973.3356306226323
episode: 431 training return: -996.1446559399947
epoch: 108 test_true_pfm: 70.4628498387849 sim_pfm: -970.1569639283472
episode: 432 training return: -990.521363365794
episode: 433 training return: -992.0125277572602
episode: 434 training return: -944.7551607805092
episode: 435 training return: -981.852882258118
epoch: 109 test_true_pfm: 36.25094196037916 sim_pfm: -981.0181642254273
episode: 436 training return: -990.5865715687964
episode: 437 training return: -990.881553115186
episode: 438 training return: -996.9066588352395
episode: 439 training return: -993.84668230199
epoch: 110 test_true_pfm: 36.37913585081606 sim_pfm: -1024.0453037262523
episode: 440 training return: -983.906359281295
episode: 441 training return: -994.3178450104408
episode: 442 training return: -975.4676638821829
episode: 443 training return: -1027.439109017128
epoch: 111 test_true_pfm: 53.79295200700773 sim_pfm: -963.1154079754357
episode: 444 training return: -973.8918558912502
episode: 445 training return: -998.1692744467658
episode: 446 training return: -993.7117124927594
episode: 447 training return: -989.1541279185176
epoch: 112 test_true_pfm: 57.06034768489815 sim_pfm: -978.8253335833019
episode: 448 training return: -986.3593826619614
episode: 449 training return: -965.0822197334571
episode: 450 training return: -985.321549147446
episode: 451 training return: -999.4420519683332
epoch: 113 test_true_pfm: 125.05275494723226 sim_pfm: -958.4199178440012
episode: 452 training return: -977.5683082509646
episode: 453 training return: -946.8844737801571
episode: 454 training return: -1002.3032027083927
episode: 455 training return: -966.2558760348248
epoch: 114 test_true_pfm: 121.60116733490007 sim_pfm: -976.6496464425936
episode: 456 training return: -966.7307861212363
episode: 457 training return: -999.3722335987121
episode: 458 training return: -984.0976474496351
episode: 459 training return: -991.4895744579477
epoch: 115 test_true_pfm: 35.40449035678714 sim_pfm: -1042.4913564666952
episode: 460 training return: -986.2781231348338
episode: 461 training return: -922.0751063190457
episode: 462 training return: -990.3049213005253
episode: 463 training return: -978.5454505909122
epoch: 116 test_true_pfm: 32.561776975737075 sim_pfm: -1032.6576987350834
episode: 464 training return: -993.3852259772189
episode: 465 training return: -999.0270767890996
episode: 466 training return: -993.8217894848514
episode: 467 training return: -997.504361677537
epoch: 117 test_true_pfm: 78.92695113554096 sim_pfm: -861.9830103417767
episode: 468 training return: -995.9300175571108
episode: 469 training return: -1001.2621847365978
episode: 470 training return: -992.4600212050185
episode: 471 training return: -963.9600487131363
epoch: 118 test_true_pfm: 120.55928346346313 sim_pfm: -964.5461215650415
episode: 472 training return: -998.6296315309977
episode: 473 training return: -966.2228373718754
episode: 474 training return: -986.662377193436
episode: 475 training return: -984.725627743609
epoch: 119 test_true_pfm: 195.76177105801818 sim_pfm: -968.5004063036253
episode: 476 training return: -986.0085181544722
episode: 477 training return: -989.5868313298544
episode: 478 training return: -930.7696412386375
episode: 479 training return: -950.1630578799999
epoch: 120 test_true_pfm: 126.80030666028959 sim_pfm: -1003.7401462036083
episode: 480 training return: -920.8923501457515
episode: 481 training return: -960.8192917972376
episode: 482 training return: -937.8537026362822
episode: 483 training return: -991.7118435180233
epoch: 121 test_true_pfm: 32.07719456545454 sim_pfm: -992.5772752476202
episode: 484 training return: -991.8343267465366
episode: 485 training return: -989.1784589561925
episode: 486 training return: -954.3364329070774
episode: 487 training return: -988.2498622545131
epoch: 122 test_true_pfm: 203.98634788692422 sim_pfm: -959.8840600690497
episode: 488 training return: -986.8017027190347
episode: 489 training return: -989.5643548785432
episode: 490 training return: -995.5485589873244
episode: 491 training return: -971.662555934856
epoch: 123 test_true_pfm: 92.1823916905651 sim_pfm: -993.268841035136
episode: 492 training return: -977.2436079577687
episode: 493 training return: -977.9253201067139
episode: 494 training return: -978.2196188219878
episode: 495 training return: -988.6241239474524
epoch: 124 test_true_pfm: 174.53227364979182 sim_pfm: -981.0564545422311
episode: 496 training return: -996.0234519773038
episode: 497 training return: -892.9885308222376
episode: 498 training return: -954.830934214209
episode: 499 training return: -996.7752462572269
epoch: 125 test_true_pfm: 130.01427316979127 sim_pfm: -963.9892993422473
episode: 500 training return: -981.4765301303249
episode: 501 training return: -939.7867970899534
episode: 502 training return: -998.4811571318726
episode: 503 training return: -988.7234653952621
epoch: 126 test_true_pfm: 152.05249211292747 sim_pfm: -931.4404827221307
episode: 504 training return: -982.5405499900928
episode: 505 training return: -926.4635427431615
episode: 506 training return: -1001.6766689496595
episode: 507 training return: -998.0898918832887
epoch: 127 test_true_pfm: 116.45897072080146 sim_pfm: -898.2095482475711
episode: 508 training return: -983.6400247660082
episode: 509 training return: -986.9770734455609
episode: 510 training return: -996.8354061462144
episode: 511 training return: -985.4693097494076
epoch: 128 test_true_pfm: 187.7051001973223 sim_pfm: -993.2631536036155
episode: 512 training return: -989.0468963137731
episode: 513 training return: -920.2700834459185
episode: 514 training return: -976.7109419920905
episode: 515 training return: -991.7014226810326
epoch: 129 test_true_pfm: 188.14108045643897 sim_pfm: -909.7339679439796
episode: 516 training return: -1000.9871398568123
episode: 517 training return: -918.3760759806904
episode: 518 training return: -960.3997391115323
episode: 519 training return: -982.2628558715015
epoch: 130 test_true_pfm: 157.70648832948274 sim_pfm: -958.5182980992962
episode: 520 training return: -978.4979077136248
episode: 521 training return: -973.9633565607844
episode: 522 training return: -972.6008523588612
episode: 523 training return: -993.5794284643101
epoch: 131 test_true_pfm: 291.387406191326 sim_pfm: -885.4715707634513
episode: 524 training return: -982.3887373290472
episode: 525 training return: -955.9872488306547
episode: 526 training return: -960.2785517592961
episode: 527 training return: -990.1034447123163
epoch: 132 test_true_pfm: 178.37372607858902 sim_pfm: -920.935445165359
episode: 528 training return: -974.0304934548637
episode: 529 training return: -978.7070859534027
episode: 530 training return: -988.9728079812559
episode: 531 training return: -967.3414470117242
epoch: 133 test_true_pfm: 79.30103522597118 sim_pfm: -966.1493380980829
episode: 532 training return: -959.0654754551197
episode: 533 training return: -981.3559932353569
episode: 534 training return: -996.562411883451
episode: 535 training return: -991.3254729438371
epoch: 134 test_true_pfm: 151.6324672764532 sim_pfm: -963.2423596964289
episode: 536 training return: -991.4131877897056
episode: 537 training return: -983.7816621415434
episode: 538 training return: -1000.9262585249023
episode: 539 training return: -990.7202031568953
epoch: 135 test_true_pfm: 90.22864825367277 sim_pfm: -1010.1428071100391
episode: 540 training return: -937.0890926009774
episode: 541 training return: -955.8942932906368
episode: 542 training return: -969.0438819338925
episode: 543 training return: -961.6842818675221
epoch: 136 test_true_pfm: 264.2141607387211 sim_pfm: -910.4364780197883
episode: 544 training return: -975.2485888136835
episode: 545 training return: -979.1010526693348
episode: 546 training return: -920.0056802976884
episode: 547 training return: -982.2633496619206
epoch: 137 test_true_pfm: 44.97517418267346 sim_pfm: -1008.6366147496328
episode: 548 training return: -982.3217471423296
episode: 549 training return: -989.8016186240899
episode: 550 training return: -994.687651848274
episode: 551 training return: -976.9414936955897
epoch: 138 test_true_pfm: 195.9989012673384 sim_pfm: -968.002211402824
episode: 552 training return: -959.7342533776422
episode: 553 training return: -987.7588221961636
episode: 554 training return: -969.6344209014054
episode: 555 training return: -989.820359217671
epoch: 139 test_true_pfm: 37.371337672887506 sim_pfm: -1055.450127407861
episode: 556 training return: -947.0483553522329
episode: 557 training return: -986.2079749863252
episode: 558 training return: -988.6612974392334
episode: 559 training return: -919.7632518440244
epoch: 140 test_true_pfm: 275.62877061098084 sim_pfm: -968.9324381361839
episode: 560 training return: -924.6956800146861
episode: 561 training return: -969.9415423500917
episode: 562 training return: -978.3614963549857
episode: 563 training return: -968.3282220069848
epoch: 141 test_true_pfm: 160.57279459529641 sim_pfm: -933.5838493927046
episode: 564 training return: -988.0843030478242
episode: 565 training return: -994.0313110062128
episode: 566 training return: -992.4400636225852
episode: 567 training return: -993.8519225048874
epoch: 142 test_true_pfm: 180.4689290230307 sim_pfm: -859.6976240122516
episode: 568 training return: -989.0370105915631
episode: 569 training return: -965.4392115347282
episode: 570 training return: -997.0967229603135
episode: 571 training return: -914.1674736763565
epoch: 143 test_true_pfm: 42.1987977421262 sim_pfm: -987.1766178766966
episode: 572 training return: -989.9822898636993
episode: 573 training return: -954.1545126036581
episode: 574 training return: -973.1457800189087
episode: 575 training return: -996.4126791249462
epoch: 144 test_true_pfm: 122.76584842464139 sim_pfm: -1035.6771603088248
episode: 576 training return: -939.0952118566274
episode: 577 training return: -952.5685278512042
episode: 578 training return: -992.8560381093009
episode: 579 training return: -994.632174636538
epoch: 145 test_true_pfm: 153.71097911470056 sim_pfm: -917.4436062210076
episode: 580 training return: -991.4978264366925
episode: 581 training return: -990.6325517920524
episode: 582 training return: -961.7445369257247
episode: 583 training return: -981.1375896137773
epoch: 146 test_true_pfm: 23.313094901340126 sim_pfm: -1008.4908671898173
episode: 584 training return: -978.2776055268741
episode: 585 training return: -1007.4069127110965
episode: 586 training return: -991.3833457507438
episode: 587 training return: -996.5936952430932
epoch: 147 test_true_pfm: 171.9893869945694 sim_pfm: -913.9702122928197
episode: 588 training return: -988.2643368139684
episode: 589 training return: -991.0393177382159
episode: 590 training return: -996.3034117894293
episode: 591 training return: -1001.8277714598216
epoch: 148 test_true_pfm: 31.67561881520435 sim_pfm: -979.5626488125489
episode: 592 training return: -984.1951641766772
episode: 593 training return: -971.9712750984359
episode: 594 training return: -963.3780603972624
episode: 595 training return: -986.0332191113007
epoch: 149 test_true_pfm: 134.5447654129864 sim_pfm: -898.7370600663588
episode: 596 training return: -990.879393698062
episode: 597 training return: -981.6286356006885
episode: 598 training return: -983.1814075495055
episode: 599 training return: -990.6232022866214
epoch: 150 test_true_pfm: 80.47181221099184 sim_pfm: -948.5988088998544
