['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'uncertainty', '--traj', 'expert', '--seed', '3', '--data', '100000']
epoch: 0 training_loss 0.21047038808465005 test_loss: 0.1539526104927063
epoch: 1 training_loss 0.14385156363248824 test_loss: 0.12750893831253052
epoch: 2 training_loss 0.12912790589034556 test_loss: 0.13857710361480713
epoch: 3 training_loss 0.12781973093748092 test_loss: 0.11319682598114014
epoch: 4 training_loss 0.12679011665284634 test_loss: 0.12903863191604614
epoch: 5 training_loss 0.12269998978823424 test_loss: 0.10714637041091919
epoch: 6 training_loss 0.12095413267612458 test_loss: 0.10595142841339111
epoch: 7 training_loss 0.12761275634169578 test_loss: 0.10285969972610473
epoch: 8 training_loss 0.12222223974764347 test_loss: 0.11242250204086304
epoch: 9 training_loss 0.11675384145230056 test_loss: 0.10893205404281617
epoch: 10 training_loss 0.11884963493794203 test_loss: 0.11473159790039063
epoch: 11 training_loss 0.1267536014318466 test_loss: 0.12355580329895019
epoch: 12 training_loss 0.12194492157548666 test_loss: 0.11655726432800292
epoch: 13 training_loss 0.12120817977935076 test_loss: 0.10666497945785522
epoch: 14 training_loss 0.1233337202295661 test_loss: 0.11159523725509643
epoch: 15 training_loss 0.11373835474252701 test_loss: 0.10875627994537354
epoch: 16 training_loss 0.114946518689394 test_loss: 0.1124037742614746
epoch: 17 training_loss 0.10807482548058032 test_loss: 0.11566736698150634
epoch: 18 training_loss 0.12116714000701904 test_loss: 0.11772389411926269
epoch: 19 training_loss 0.11440948311239481 test_loss: 0.123499596118927
epoch: 20 training_loss 0.11458083495497703 test_loss: 0.10650509595870972
epoch: 21 training_loss 0.11502938240766525 test_loss: 0.11459962129592896
epoch: 22 training_loss 0.11231704901903868 test_loss: 0.11206969022750854
epoch: 23 training_loss 0.12404118184000254 test_loss: 0.11809293031692505
epoch: 24 training_loss 0.11377169638872146 test_loss: 0.1045350432395935
epoch: 25 training_loss 0.11534132350236177 test_loss: 0.11864005327224732
epoch: 26 training_loss 0.11634947542101144 test_loss: 0.10960763692855835
epoch: 27 training_loss 0.11864441715180873 test_loss: 0.10969620943069458
epoch: 28 training_loss 0.1119152869656682 test_loss: 0.1255894899368286
epoch: 29 training_loss 0.11458308596163988 test_loss: 0.10497634410858155
epoch: 30 training_loss 0.11204618204385042 test_loss: 0.10852991342544556
epoch: 31 training_loss 0.11475676633417606 test_loss: 0.11884665489196777
epoch: 32 training_loss 0.11033679571002722 test_loss: 0.10408174991607666
epoch: 33 training_loss 0.11410742115229368 test_loss: 0.11343680620193482
epoch: 34 training_loss 0.11313941329717636 test_loss: 0.10207548141479492
epoch: 35 training_loss 0.11213805310428143 test_loss: 0.11969821453094483
epoch: 36 training_loss 0.1095383907109499 test_loss: 0.11108620166778564
epoch: 37 training_loss 0.11290445134043693 test_loss: 0.10197926759719848
epoch: 38 training_loss 0.11011653650552035 test_loss: 0.11861293315887451
epoch: 39 training_loss 0.11462823640555143 test_loss: 0.1202963948249817
epoch: 40 training_loss 0.12081190582364798 test_loss: 0.10275964736938477
epoch: 41 training_loss 0.11584714286029339 test_loss: 0.13131052255630493
epoch: 42 training_loss 0.11830796308815479 test_loss: 0.1172222375869751
epoch: 43 training_loss 0.1148937538638711 test_loss: 0.10813311338424683
epoch: 44 training_loss 0.11307201012969018 test_loss: 0.11567631959915162
epoch: 45 training_loss 0.12244012974202632 test_loss: 0.11061183214187623
epoch: 46 training_loss 0.11195893500000238 test_loss: 0.1046485185623169
epoch: 47 training_loss 0.11147303886711597 test_loss: 0.09948309659957885
epoch: 48 training_loss 0.11427308678627014 test_loss: 0.11894357204437256
epoch: 49 training_loss 0.11460179045796394 test_loss: 0.11267222166061401
epoch: 50 training_loss 0.10659496866166591 test_loss: 0.11756299734115601
epoch: 51 training_loss 0.12074543047696353 test_loss: 0.11455382108688354
epoch: 52 training_loss 0.11242128312587737 test_loss: 0.10225604772567749
epoch: 53 training_loss 0.11526390679180622 test_loss: 0.10997726917266845
epoch: 54 training_loss 0.11727772522717714 test_loss: 0.1115041732788086
epoch: 55 training_loss 0.1163710731267929 test_loss: 0.10325523614883422
epoch: 56 training_loss 0.11470498964190483 test_loss: 0.1126362919807434
epoch: 57 training_loss 0.115252755433321 test_loss: 0.11031111478805541
epoch: 58 training_loss 0.11366614930331707 test_loss: 0.11599243879318237
epoch: 59 training_loss 0.11365322858095168 test_loss: 0.10993055105209351
epoch: 60 training_loss 0.11423103857785463 test_loss: 0.11114789247512817
epoch: 61 training_loss 0.11110221648588776 test_loss: 0.10490583181381226
epoch: 62 training_loss 0.11053929902613163 test_loss: 0.11679551601409913
epoch: 63 training_loss 0.10836909096688033 test_loss: 0.10800971984863281
epoch: 64 training_loss 0.11543407093733549 test_loss: 0.10277377367019654
epoch: 65 training_loss 0.11508952047675848 test_loss: 0.1014379620552063
epoch: 66 training_loss 0.11452515788376331 test_loss: 0.11780236959457398
epoch: 67 training_loss 0.11035902723670006 test_loss: 0.11506911516189575
epoch: 68 training_loss 0.11930903028696775 test_loss: 0.10803784132003784
epoch: 69 training_loss 0.10822845079004764 test_loss: 0.0915973424911499
epoch: 70 training_loss 0.11103824064135552 test_loss: 0.10742989778518677
epoch: 71 training_loss 0.1111795812100172 test_loss: 0.10772302150726318
epoch: 72 training_loss 0.11259539019316435 test_loss: 0.10885821580886841
epoch: 73 training_loss 0.11682072971016169 test_loss: 0.11719048023223877
epoch: 74 training_loss 0.11274689085781574 test_loss: 0.10557307004928589
epoch: 75 training_loss 0.1104438279569149 test_loss: 0.11587330102920532
epoch: 76 training_loss 0.1110108920559287 test_loss: 0.10771278142929078
epoch: 77 training_loss 0.11063492003828287 test_loss: 0.10420583486557007
epoch: 78 training_loss 0.1083401358127594 test_loss: 0.11438699960708618
epoch: 79 training_loss 0.10894883988425136 test_loss: 0.10779476165771484
epoch: 80 training_loss 0.11435186121612788 test_loss: 0.10781311988830566
epoch: 81 training_loss 0.11240854233503342 test_loss: 0.11151589155197143
epoch: 82 training_loss 0.10742775477468967 test_loss: 0.10516645908355712
epoch: 83 training_loss 0.1096812766045332 test_loss: 0.10657492876052857
epoch: 84 training_loss 0.10885119013488292 test_loss: 0.12138489484786988
epoch: 85 training_loss 0.10751898430287837 test_loss: 0.11157429218292236
epoch: 86 training_loss 0.1149601561203599 test_loss: 0.10821713209152221
epoch: 87 training_loss 0.1181454623118043 test_loss: 0.11357862949371338
epoch: 88 training_loss 0.11183785945177079 test_loss: 0.10898725986480713
epoch: 89 training_loss 0.10952083799988031 test_loss: 0.10195038318634034
epoch: 90 training_loss 0.1103182217106223 test_loss: 0.11027404069900512
epoch: 91 training_loss 0.11313472528010607 test_loss: 0.1111524224281311
epoch: 92 training_loss 0.10884614989161491 test_loss: 0.11483362913131714
epoch: 93 training_loss 0.11073579266667366 test_loss: 0.10424600839614868
epoch: 94 training_loss 0.11159901291131974 test_loss: 0.11444641351699829
epoch: 95 training_loss 0.11092936269938945 test_loss: 0.10073190927505493
epoch: 96 training_loss 0.10706155888736248 test_loss: 0.10680474042892456
epoch: 97 training_loss 0.10775505213066935 test_loss: 0.10945701599121094
epoch: 98 training_loss 0.11492338858544826 test_loss: 0.10537891387939453
epoch: 99 training_loss 0.11027137961238623 test_loss: 0.1032423734664917
epoch: 100 training_loss 0.10921939559280873 test_loss: 0.11347745656967163
epoch: 101 training_loss 0.11054183576256037 test_loss: 0.10740879774093628
epoch: 102 training_loss 0.11093181908130646 test_loss: 0.09854368567466736
epoch: 103 training_loss 0.11159844491630792 test_loss: 0.10168360471725464
epoch: 104 training_loss 0.10954159799963235 test_loss: 0.10871331691741944
epoch: 105 training_loss 0.10655076388269663 test_loss: 0.09860132932662964
epoch: 106 training_loss 0.11010646790266038 test_loss: 0.10652152299880982
epoch: 107 training_loss 0.10796507854014635 test_loss: 0.10655995607376098
epoch: 108 training_loss 0.11134477898478508 test_loss: 0.10228782892227173
epoch: 109 training_loss 0.11104381952434778 test_loss: 0.10965288877487182
epoch: 110 training_loss 0.11852163057774305 test_loss: 0.1085923194885254
epoch: 111 training_loss 0.11148503713309765 test_loss: 0.1157943844795227
epoch: 112 training_loss 0.1088838866353035 test_loss: 0.11092959642410279
epoch: 113 training_loss 0.11818117368966341 test_loss: 0.10323699712753295
epoch: 114 training_loss 0.11299466665834189 test_loss: 0.10959233045578003
epoch: 115 training_loss 0.1126317708939314 test_loss: 0.09882222414016724
epoch: 116 training_loss 0.1098714230209589 test_loss: 0.10522288084030151
epoch: 117 training_loss 0.10894069865345955 test_loss: 0.09757631421089172
epoch: 118 training_loss 0.11187049198895693 test_loss: 0.10317684412002563
epoch: 119 training_loss 0.10951458666473628 test_loss: 0.09926369190216064
epoch: 120 training_loss 0.10598783172667027 test_loss: 0.11016979217529296
epoch: 121 training_loss 0.11069194659590721 test_loss: 0.09729729890823365
epoch: 122 training_loss 0.10729483913630247 test_loss: 0.12149481773376465
epoch: 123 training_loss 0.1091409520804882 test_loss: 0.1128316044807434
epoch: 124 training_loss 0.11133049950003623 test_loss: 0.1079687237739563
epoch: 125 training_loss 0.10299303341656924 test_loss: 0.10714422464370728
epoch: 126 training_loss 0.10699134714901447 test_loss: 0.10465577840805054
epoch: 127 training_loss 0.10961861655116081 test_loss: 0.10319066047668457
epoch: 128 training_loss 0.10675706524401903 test_loss: 0.10969207286834717
epoch: 129 training_loss 0.10896862480789422 test_loss: 0.10233359336853028
epoch: 130 training_loss 0.1107604131102562 test_loss: 0.11743707656860351
epoch: 131 training_loss 0.10690546501427889 test_loss: 0.11116236448287964
epoch: 132 training_loss 0.10687026556581258 test_loss: 0.11462616920471191
epoch: 133 training_loss 0.11031801089644432 test_loss: 0.10467672348022461
epoch: 134 training_loss 0.11204318810254335 test_loss: 0.11257188320159912
epoch: 135 training_loss 0.11334253001958132 test_loss: 0.08919584155082702
epoch: 136 training_loss 0.10321848761290311 test_loss: 0.09991779923439026
epoch: 137 training_loss 0.11226285383105278 test_loss: 0.10106165409088134
epoch: 138 training_loss 0.11401566993445159 test_loss: 0.10251613855361938
epoch: 139 training_loss 0.1146187923103571 test_loss: 0.1096901535987854
epoch: 140 training_loss 0.10929069429636001 test_loss: 0.10608936548233032
epoch: 141 training_loss 0.11425258796662092 test_loss: 0.10800015926361084
epoch: 142 training_loss 0.11324147805571556 test_loss: 0.11456460952758789
epoch: 143 training_loss 0.1082169209048152 test_loss: 0.10819355249404908
epoch: 144 training_loss 0.10728585485368967 test_loss: 0.10575857162475585
epoch: 145 training_loss 0.10734387800097465 test_loss: 0.1141398310661316
epoch: 146 training_loss 0.10959045495837927 test_loss: 0.10261449813842774
epoch: 147 training_loss 0.11260196164250375 test_loss: 0.1051396369934082
epoch: 148 training_loss 0.10631751280277968 test_loss: 0.108154559135437
epoch: 149 training_loss 0.10382974453270435 test_loss: 0.11152070760726929
epoch: 0 training_loss 0.19990686170756816 test_loss: 0.14717482328414916
epoch: 1 training_loss 0.14017603639513254 test_loss: 0.14243317842483522
epoch: 2 training_loss 0.1364017556607723 test_loss: 0.12685627937316896
epoch: 3 training_loss 0.1261956899613142 test_loss: 0.12257812023162842
epoch: 4 training_loss 0.13009279530495405 test_loss: 0.13465673923492433
epoch: 5 training_loss 0.12973382480442525 test_loss: 0.1093024492263794
epoch: 6 training_loss 0.12410416096448898 test_loss: 0.12678928375244142
epoch: 7 training_loss 0.12763532847166062 test_loss: 0.11220670938491821
epoch: 8 training_loss 0.11908852376043796 test_loss: 0.11698505878448487
epoch: 9 training_loss 0.1202934905514121 test_loss: 0.11606494188308716
epoch: 10 training_loss 0.11539171312004327 test_loss: 0.1262815475463867
epoch: 11 training_loss 0.12904455773532392 test_loss: 0.13813114166259766
epoch: 12 training_loss 0.11357621610164642 test_loss: 0.11676156520843506
epoch: 13 training_loss 0.11593542031943799 test_loss: 0.10249007940292358
epoch: 14 training_loss 0.1211814334988594 test_loss: 0.11572484970092774
epoch: 15 training_loss 0.12005110085010529 test_loss: 0.1182168960571289
epoch: 16 training_loss 0.12233523603528738 test_loss: 0.11322824954986573
epoch: 17 training_loss 0.11859519377350808 test_loss: 0.12037031650543213
epoch: 18 training_loss 0.11363984510302544 test_loss: 0.11132171154022216
epoch: 19 training_loss 0.11514355126768351 test_loss: 0.10995491743087768
epoch: 20 training_loss 0.12056166931986809 test_loss: 0.12556289434432982
epoch: 21 training_loss 0.11784477826207876 test_loss: 0.1263470768928528
epoch: 22 training_loss 0.12303364895284176 test_loss: 0.1089514970779419
epoch: 23 training_loss 0.11990358941257 test_loss: 0.11778783798217773
epoch: 24 training_loss 0.12079234696924686 test_loss: 0.10079635381698608
epoch: 25 training_loss 0.11902093823999166 test_loss: 0.11618572473526001
epoch: 26 training_loss 0.11937098413705825 test_loss: 0.11797088384628296
epoch: 27 training_loss 0.11592169221490621 test_loss: 0.11868284940719605
epoch: 28 training_loss 0.11556994076818228 test_loss: 0.1164480447769165
epoch: 29 training_loss 0.11668535728007555 test_loss: 0.1199065089225769
epoch: 30 training_loss 0.12002237729728221 test_loss: 0.10733072757720948
epoch: 31 training_loss 0.11107496447861194 test_loss: 0.11183539628982545
epoch: 32 training_loss 0.10801326543092728 test_loss: 0.13074761629104614
epoch: 33 training_loss 0.11017312534153462 test_loss: 0.11893779039382935
epoch: 34 training_loss 0.11979648228734732 test_loss: 0.10588339567184449
epoch: 35 training_loss 0.1148105152696371 test_loss: 0.11919152736663818
epoch: 36 training_loss 0.12137589447200298 test_loss: 0.11919301748275757
epoch: 37 training_loss 0.11636885043233633 test_loss: 0.11851937770843506
epoch: 38 training_loss 0.11482100833207369 test_loss: 0.11564395427703858
epoch: 39 training_loss 0.11693164128810167 test_loss: 0.10327435731887817
epoch: 40 training_loss 0.11548887435346841 test_loss: 0.11867705583572388
epoch: 41 training_loss 0.12354275591671467 test_loss: 0.11339633464813233
epoch: 42 training_loss 0.11711349308490754 test_loss: 0.10887830257415772
epoch: 43 training_loss 0.11477686151862145 test_loss: 0.12694203853607178
epoch: 44 training_loss 0.11477429237216712 test_loss: 0.099726003408432
epoch: 45 training_loss 0.11868498831987381 test_loss: 0.13313689231872558
epoch: 46 training_loss 0.11786737758666277 test_loss: 0.10808309316635131
epoch: 47 training_loss 0.11282994028180837 test_loss: 0.11205512285232544
epoch: 48 training_loss 0.11574902813881635 test_loss: 0.10329663753509521
epoch: 49 training_loss 0.11817019686102867 test_loss: 0.10970884561538696
epoch: 50 training_loss 0.11102246772497892 test_loss: 0.11874065399169922
epoch: 51 training_loss 0.11334828849881888 test_loss: 0.11679021120071412
epoch: 52 training_loss 0.11630106955766678 test_loss: 0.10353649854660034
epoch: 53 training_loss 0.10493820048868656 test_loss: 0.11678429841995239
epoch: 54 training_loss 0.11888549163937569 test_loss: 0.11543124914169312
epoch: 55 training_loss 0.11451739452779293 test_loss: 0.10716004371643066
epoch: 56 training_loss 0.11473973613232374 test_loss: 0.11697185039520264
epoch: 57 training_loss 0.11426234243437648 test_loss: 0.1161225438117981
epoch: 58 training_loss 0.10785736229270697 test_loss: 0.1144605040550232
epoch: 59 training_loss 0.11797114953398705 test_loss: 0.12346267700195312
epoch: 60 training_loss 0.11292400363832712 test_loss: 0.11521508693695068
epoch: 61 training_loss 0.11345872420817614 test_loss: 0.11657546758651734
epoch: 62 training_loss 0.11216842591762542 test_loss: 0.12262934446334839
epoch: 63 training_loss 0.11612170927226544 test_loss: 0.12398339509963989
epoch: 64 training_loss 0.11336684826761484 test_loss: 0.12153964042663574
epoch: 65 training_loss 0.10663341525942087 test_loss: 0.12583972215652467
epoch: 66 training_loss 0.11639412831515074 test_loss: 0.11529366970062256
epoch: 67 training_loss 0.11576489243656397 test_loss: 0.12193331718444825
epoch: 68 training_loss 0.11322502326220274 test_loss: 0.11347850561141967
epoch: 69 training_loss 0.11557140696793794 test_loss: 0.1138431191444397
epoch: 70 training_loss 0.10869125999510289 test_loss: 0.10982170104980468
epoch: 71 training_loss 0.10741173729300499 test_loss: 0.11626757383346557
epoch: 72 training_loss 0.11530667070299387 test_loss: 0.11442462205886841
epoch: 73 training_loss 0.10958106953650713 test_loss: 0.11034890413284301
epoch: 74 training_loss 0.11476682703942061 test_loss: 0.11849857568740844
epoch: 75 training_loss 0.10891429968178272 test_loss: 0.10500023365020753
epoch: 76 training_loss 0.11539598539471627 test_loss: 0.11520295143127442
epoch: 77 training_loss 0.10557237673550844 test_loss: 0.12637171745300294
epoch: 78 training_loss 0.11557280473411083 test_loss: 0.1133040428161621
epoch: 79 training_loss 0.11955893103033305 test_loss: 0.12096148729324341
epoch: 80 training_loss 0.11085179299116135 test_loss: 0.1053243637084961
epoch: 81 training_loss 0.11575813408941031 test_loss: 0.1245508074760437
epoch: 82 training_loss 0.1113128376379609 test_loss: 0.10311324596405029
epoch: 83 training_loss 0.10967657666653395 test_loss: 0.11948471069335938
epoch: 84 training_loss 0.11489874102175236 test_loss: 0.11512669324874877
epoch: 85 training_loss 0.112619623914361 test_loss: 0.10436614751815795
epoch: 86 training_loss 0.11373271014541388 test_loss: 0.12255630493164063
epoch: 87 training_loss 0.11519366569817066 test_loss: 0.1254047155380249
epoch: 88 training_loss 0.11596998244524002 test_loss: 0.10935503244400024
epoch: 89 training_loss 0.1101988173276186 test_loss: 0.11604880094528199
epoch: 90 training_loss 0.10267395529896021 test_loss: 0.12512603998184205
epoch: 91 training_loss 0.11077031407505274 test_loss: 0.1194527268409729
epoch: 92 training_loss 0.11624564979225398 test_loss: 0.10719106197357178
epoch: 93 training_loss 0.11214230444282293 test_loss: 0.10394848585128784
epoch: 94 training_loss 0.11008078649640084 test_loss: 0.10821173191070557
epoch: 95 training_loss 0.10743212088942528 test_loss: 0.12024677991867065
epoch: 96 training_loss 0.10635740093886853 test_loss: 0.123009192943573
epoch: 97 training_loss 0.11389558363705873 test_loss: 0.11699444055557251
epoch: 98 training_loss 0.1064439833164215 test_loss: 0.11296751499176025
epoch: 99 training_loss 0.10968304805457592 test_loss: 0.11009296178817748
epoch: 100 training_loss 0.10729853060096502 test_loss: 0.10966727733612061
epoch: 101 training_loss 0.11019212983548642 test_loss: 0.12331854104995728
epoch: 102 training_loss 0.1145397824421525 test_loss: 0.12582722902297974
epoch: 103 training_loss 0.10811648432165384 test_loss: 0.10630789995193482
epoch: 104 training_loss 0.10792792215943336 test_loss: 0.10913012027740479
epoch: 105 training_loss 0.11010467689484357 test_loss: 0.10633387565612792
epoch: 106 training_loss 0.11348590202629566 test_loss: 0.1144403338432312
epoch: 107 training_loss 0.11226306859403849 test_loss: 0.11145879030227661
epoch: 108 training_loss 0.11735069386661053 test_loss: 0.11080061197280884
epoch: 109 training_loss 0.11077297609299422 test_loss: 0.12280430793762206
epoch: 110 training_loss 0.11405838772654533 test_loss: 0.11844711303710938
epoch: 111 training_loss 0.11015675503760576 test_loss: 0.12179306745529175
epoch: 112 training_loss 0.10824226140975952 test_loss: 0.10687108039855957
epoch: 113 training_loss 0.1081159421429038 test_loss: 0.11487090587615967
epoch: 114 training_loss 0.10742860190570354 test_loss: 0.10814098119735718
epoch: 115 training_loss 0.10542336363345385 test_loss: 0.12116985321044922
epoch: 116 training_loss 0.11042663596570491 test_loss: 0.1029610276222229
epoch: 117 training_loss 0.11239581279456616 test_loss: 0.11867339611053467
epoch: 118 training_loss 0.10735112760215998 test_loss: 0.1212567925453186
epoch: 119 training_loss 0.11045679930597543 test_loss: 0.10844587087631226
epoch: 120 training_loss 0.11104992248117923 test_loss: 0.1177836537361145
epoch: 121 training_loss 0.11581282336264849 test_loss: 0.11092019081115723
epoch: 122 training_loss 0.10712505536153913 test_loss: 0.11940081119537353
epoch: 123 training_loss 0.11024269871413708 test_loss: 0.11730262041091918
epoch: 124 training_loss 0.11430805534124375 test_loss: 0.11427115201950074
epoch: 125 training_loss 0.11978711027652025 test_loss: 0.11451441049575806
epoch: 126 training_loss 0.1164085879176855 test_loss: 0.13542473316192627
epoch: 127 training_loss 0.10943713564425707 test_loss: 0.11614075899124146
epoch: 128 training_loss 0.10855240162461996 test_loss: 0.1131022572517395
epoch: 129 training_loss 0.11407554561272264 test_loss: 0.10159875154495239
epoch: 130 training_loss 0.1079676715284586 test_loss: 0.11522458791732788
epoch: 131 training_loss 0.10685387261211872 test_loss: 0.1150091290473938
epoch: 132 training_loss 0.11131737381219864 test_loss: 0.1161230444908142
epoch: 133 training_loss 0.1085463473200798 test_loss: 0.10294383764266968
epoch: 134 training_loss 0.11214207537472248 test_loss: 0.10324854850769043
epoch: 135 training_loss 0.1047986551746726 test_loss: 0.11334115266799927
epoch: 136 training_loss 0.11443778596818448 test_loss: 0.10371922254562378
epoch: 137 training_loss 0.10952358473092318 test_loss: 0.11839678287506103
epoch: 138 training_loss 0.11018999699503183 test_loss: 0.11242051124572754
epoch: 139 training_loss 0.11259872276335954 test_loss: 0.11984144449234009
epoch: 140 training_loss 0.10820688847452402 test_loss: 0.11223075389862061
epoch: 141 training_loss 0.11040883008390664 test_loss: 0.11037715673446655
epoch: 142 training_loss 0.10691703978925943 test_loss: 0.10903877019882202
epoch: 143 training_loss 0.1066497665271163 test_loss: 0.10917583703994752
epoch: 144 training_loss 0.11003208722919226 test_loss: 0.12124760150909424
epoch: 145 training_loss 0.1112390973418951 test_loss: 0.10919302701950073
epoch: 146 training_loss 0.11287018980830908 test_loss: 0.11446737051010132
epoch: 147 training_loss 0.11037003677338361 test_loss: 0.1161301612854004
epoch: 148 training_loss 0.116539272852242 test_loss: 0.11199742555618286
epoch: 149 training_loss 0.10998310729861259 test_loss: 0.11134727001190185
epoch: 0 training_loss 0.21654242418706418 test_loss: 0.15986753702163697
epoch: 1 training_loss 0.14232998073101044 test_loss: 0.13378068208694457
epoch: 2 training_loss 0.137856302857399 test_loss: 0.14133404493331908
epoch: 3 training_loss 0.1291944669932127 test_loss: 0.12324439287185669
epoch: 4 training_loss 0.12344903238117695 test_loss: 0.13581435680389403
epoch: 5 training_loss 0.1193703406304121 test_loss: 0.1247824788093567
epoch: 6 training_loss 0.12005047149956226 test_loss: 0.12741435766220094
epoch: 7 training_loss 0.1256673865765333 test_loss: 0.1320386528968811
epoch: 8 training_loss 0.12044221252202987 test_loss: 0.13438140153884887
epoch: 9 training_loss 0.11358852885663509 test_loss: 0.12859994173049927
epoch: 10 training_loss 0.11629526119679212 test_loss: 0.11191948652267455
epoch: 11 training_loss 0.1208701652288437 test_loss: 0.1205331802368164
epoch: 12 training_loss 0.12115895297378301 test_loss: 0.11819946765899658
epoch: 13 training_loss 0.11523969750851393 test_loss: 0.12889374494552613
epoch: 14 training_loss 0.11495351310819388 test_loss: 0.12955026626586913
epoch: 15 training_loss 0.11956801541149616 test_loss: 0.12163454294204712
epoch: 16 training_loss 0.12260409917682409 test_loss: 0.12355051040649415
epoch: 17 training_loss 0.11967499613761902 test_loss: 0.1159979224205017
epoch: 18 training_loss 0.11645430862903595 test_loss: 0.12661124467849733
epoch: 19 training_loss 0.11323523089289665 test_loss: 0.1388964295387268
epoch: 20 training_loss 0.11416289694607258 test_loss: 0.1188763976097107
epoch: 21 training_loss 0.11710716124624014 test_loss: 0.13939378261566163
epoch: 22 training_loss 0.11612976580858231 test_loss: 0.135193395614624
epoch: 23 training_loss 0.11493083573877812 test_loss: 0.1164246916770935
epoch: 24 training_loss 0.11914029609411955 test_loss: 0.12433018684387206
epoch: 25 training_loss 0.12061330284923315 test_loss: 0.12022837400436401
epoch: 26 training_loss 0.1169739630445838 test_loss: 0.12802774906158448
epoch: 27 training_loss 0.11419241484254598 test_loss: 0.13126168251037598
epoch: 28 training_loss 0.1128460480645299 test_loss: 0.12966346740722656
epoch: 29 training_loss 0.12047127339988947 test_loss: 0.13306392431259156
epoch: 30 training_loss 0.12297312386333942 test_loss: 0.11260867118835449
epoch: 31 training_loss 0.12019175909459591 test_loss: 0.12408254146575928
epoch: 32 training_loss 0.11254928838461638 test_loss: 0.1285109281539917
epoch: 33 training_loss 0.11272115942090749 test_loss: 0.11944549083709717
epoch: 34 training_loss 0.11902539659291506 test_loss: 0.1262676954269409
epoch: 35 training_loss 0.11646202374249696 test_loss: 0.12974604368209838
epoch: 36 training_loss 0.1163834223523736 test_loss: 0.12175158262252808
epoch: 37 training_loss 0.1164246965199709 test_loss: 0.12574546337127684
epoch: 38 training_loss 0.11523116394877433 test_loss: 0.12632960081100464
epoch: 39 training_loss 0.12020247858017682 test_loss: 0.1326223134994507
epoch: 40 training_loss 0.11639689780771732 test_loss: 0.12008124589920044
epoch: 41 training_loss 0.11522484965622425 test_loss: 0.11058299541473389
epoch: 42 training_loss 0.11581441471353174 test_loss: 0.12796896696090698
epoch: 43 training_loss 0.11796302691102029 test_loss: 0.11745543479919433
epoch: 44 training_loss 0.11325644429773092 test_loss: 0.1271335482597351
epoch: 45 training_loss 0.11916594069451093 test_loss: 0.11537103652954102
epoch: 46 training_loss 0.11198728166520595 test_loss: 0.13021337985992432
epoch: 47 training_loss 0.11794358354061842 test_loss: 0.13856366872787476
epoch: 48 training_loss 0.11402174934744835 test_loss: 0.12278109788894653
epoch: 49 training_loss 0.11209097132086754 test_loss: 0.12875962257385254
epoch: 50 training_loss 0.11228139732033014 test_loss: 0.12625867128372192
epoch: 51 training_loss 0.11509876325726509 test_loss: 0.13082796335220337
epoch: 52 training_loss 0.11877553638070822 test_loss: 0.13585281372070312
epoch: 53 training_loss 0.11435867980122566 test_loss: 0.11760283708572387
epoch: 54 training_loss 0.11309807896614074 test_loss: 0.12595633268356324
epoch: 55 training_loss 0.11471505366265773 test_loss: 0.11390899419784546
epoch: 56 training_loss 0.11458910439163446 test_loss: 0.13026273250579834
epoch: 57 training_loss 0.11611321546137333 test_loss: 0.13173537254333495
epoch: 58 training_loss 0.12237920049577951 test_loss: 0.12008976936340332
epoch: 59 training_loss 0.11530927091836929 test_loss: 0.12016400098800659
epoch: 60 training_loss 0.10832122709602117 test_loss: 0.1351724624633789
epoch: 61 training_loss 0.1122677431255579 test_loss: 0.11649506092071533
epoch: 62 training_loss 0.11416563764214516 test_loss: 0.12177326679229736
epoch: 63 training_loss 0.12193830687552691 test_loss: 0.11455260515213013
epoch: 64 training_loss 0.11753994330763817 test_loss: 0.1137465000152588
epoch: 65 training_loss 0.11533408794552087 test_loss: 0.1260641932487488
epoch: 66 training_loss 0.10668517656624317 test_loss: 0.11540095806121826
epoch: 67 training_loss 0.11398527037352324 test_loss: 0.10996675491333008
epoch: 68 training_loss 0.1124132964387536 test_loss: 0.12187711000442505
epoch: 69 training_loss 0.1164543892070651 test_loss: 0.12008870840072632
epoch: 70 training_loss 0.1179184952378273 test_loss: 0.12374000549316407
epoch: 71 training_loss 0.11241041574627161 test_loss: 0.12359933853149414
epoch: 72 training_loss 0.11952768038958311 test_loss: 0.12204122543334961
epoch: 73 training_loss 0.10975053176283836 test_loss: 0.12284724712371826
epoch: 74 training_loss 0.11776341564953327 test_loss: 0.1101646900177002
epoch: 75 training_loss 0.11352284163236619 test_loss: 0.12477644681930541
epoch: 76 training_loss 0.11055703885853291 test_loss: 0.1067468285560608
epoch: 77 training_loss 0.11615746062248945 test_loss: 0.11896482706069947
epoch: 78 training_loss 0.11193230602890253 test_loss: 0.12362762689590454
epoch: 79 training_loss 0.10920162472873926 test_loss: 0.12214291095733643
epoch: 80 training_loss 0.10445139292627573 test_loss: 0.11421800851821899
epoch: 81 training_loss 0.10791122861206531 test_loss: 0.11424447298049926
epoch: 82 training_loss 0.1100289599597454 test_loss: 0.11226532459259034
epoch: 83 training_loss 0.11092253129929304 test_loss: 0.10904145240783691
epoch: 84 training_loss 0.113896897546947 test_loss: 0.12946414947509766
epoch: 85 training_loss 0.11380268663167953 test_loss: 0.10998215675354003
epoch: 86 training_loss 0.1072088334709406 test_loss: 0.1260121703147888
epoch: 87 training_loss 0.11969980236142874 test_loss: 0.11797773838043213
epoch: 88 training_loss 0.11654610224068165 test_loss: 0.13300719261169433
epoch: 89 training_loss 0.10956303730607032 test_loss: 0.12969982624053955
epoch: 90 training_loss 0.11137641850858927 test_loss: 0.13067442178726196
epoch: 91 training_loss 0.11010149423032999 test_loss: 0.11300723552703858
epoch: 92 training_loss 0.11267655074596405 test_loss: 0.1215468168258667
epoch: 93 training_loss 0.11320536166429519 test_loss: 0.12541019916534424
epoch: 94 training_loss 0.10737446539103984 test_loss: 0.1314786911010742
epoch: 95 training_loss 0.1069376715272665 test_loss: 0.12612766027450562
epoch: 96 training_loss 0.10713931679725647 test_loss: 0.12132568359375
epoch: 97 training_loss 0.11312238954007625 test_loss: 0.11414639949798584
epoch: 98 training_loss 0.10978980027139187 test_loss: 0.12541621923446655
epoch: 99 training_loss 0.10929809708148241 test_loss: 0.11011524200439453
epoch: 100 training_loss 0.11325181361287832 test_loss: 0.11773470640182496
epoch: 101 training_loss 0.10735865298658609 test_loss: 0.12760541439056397
epoch: 102 training_loss 0.11497080631554127 test_loss: 0.12184333801269531
epoch: 103 training_loss 0.10806154813617468 test_loss: 0.12769687175750732
epoch: 104 training_loss 0.1122324276342988 test_loss: 0.12574658393859864
epoch: 105 training_loss 0.10621553286910057 test_loss: 0.1168520212173462
epoch: 106 training_loss 0.10906974397599697 test_loss: 0.11185888051986695
epoch: 107 training_loss 0.1081054401770234 test_loss: 0.12184348106384277
epoch: 108 training_loss 0.11007227420806885 test_loss: 0.11521310806274414
epoch: 109 training_loss 0.11252580609172583 test_loss: 0.1177220106124878
epoch: 110 training_loss 0.10929226867854595 test_loss: 0.12396852970123291
epoch: 111 training_loss 0.10768423840403557 test_loss: 0.11701095104217529
epoch: 112 training_loss 0.11015222869813442 test_loss: 0.10704256296157837
epoch: 113 training_loss 0.10918764226138591 test_loss: 0.12757054567337037
epoch: 114 training_loss 0.1179552486911416 test_loss: 0.11993534564971924
epoch: 115 training_loss 0.11016675654798747 test_loss: 0.13710554838180541
epoch: 116 training_loss 0.10792350817471742 test_loss: 0.13176199197769164
epoch: 117 training_loss 0.10889750171452761 test_loss: 0.12165093421936035
epoch: 118 training_loss 0.11199734538793564 test_loss: 0.1250338077545166
epoch: 119 training_loss 0.11671209003776312 test_loss: 0.1230971097946167
epoch: 120 training_loss 0.10675453439354897 test_loss: 0.11409826278686523
epoch: 121 training_loss 0.11299723532050848 test_loss: 0.1087357759475708
epoch: 122 training_loss 0.11035970319062471 test_loss: 0.14224389791488648
epoch: 123 training_loss 0.11268333308398723 test_loss: 0.11664344072341919
epoch: 124 training_loss 0.10822358224540948 test_loss: 0.11677330732345581
epoch: 125 training_loss 0.11178550362586975 test_loss: 0.11270111799240112
epoch: 126 training_loss 0.10911450985819102 test_loss: 0.1134257197380066
epoch: 127 training_loss 0.10947932511568069 test_loss: 0.1262684941291809
epoch: 128 training_loss 0.10740670848637819 test_loss: 0.11719671487808228
epoch: 129 training_loss 0.11516291815787554 test_loss: 0.11963788270950318
epoch: 130 training_loss 0.10762236904352904 test_loss: 0.11887325048446655
epoch: 131 training_loss 0.11174451559782028 test_loss: 0.11941416263580322
epoch: 132 training_loss 0.10321371637284756 test_loss: 0.11369143724441529
epoch: 133 training_loss 0.11304715670645236 test_loss: 0.10885783433914184
epoch: 134 training_loss 0.1144730769470334 test_loss: 0.11506749391555786
epoch: 135 training_loss 0.11695515371859073 test_loss: 0.12026973962783813
epoch: 136 training_loss 0.10932047117501498 test_loss: 0.11544243097305298
epoch: 137 training_loss 0.11165938723832369 test_loss: 0.11231937408447265
epoch: 138 training_loss 0.12015401601791381 test_loss: 0.12059905529022216
epoch: 139 training_loss 0.11324324078857899 test_loss: 0.11862046718597412
epoch: 140 training_loss 0.10989418879151344 test_loss: 0.11709762811660766
epoch: 141 training_loss 0.1155991581827402 test_loss: 0.12699884176254272
epoch: 142 training_loss 0.11058001838624477 test_loss: 0.11384203433990478
epoch: 143 training_loss 0.11283198222517968 test_loss: 0.11790937185287476
epoch: 144 training_loss 0.11072125464677811 test_loss: 0.12630035877227783
epoch: 145 training_loss 0.10759769342839717 test_loss: 0.1122393250465393
epoch: 146 training_loss 0.10736700709909201 test_loss: 0.11921166181564331
epoch: 147 training_loss 0.10481788571923971 test_loss: 0.11939141750335694
epoch: 148 training_loss 0.10914387658238411 test_loss: 0.12114925384521484
epoch: 149 training_loss 0.10608338430523873 test_loss: 0.1054571032524109
epoch: 0 training_loss 0.20601735144853592 test_loss: 0.1788394570350647
epoch: 1 training_loss 0.14162609681487084 test_loss: 0.13240770101547242
epoch: 2 training_loss 0.13487124312669038 test_loss: 0.12386034727096558
epoch: 3 training_loss 0.1272984079644084 test_loss: 0.1258534550666809
epoch: 4 training_loss 0.12934763383120298 test_loss: 0.12090514898300171
epoch: 5 training_loss 0.1275610827282071 test_loss: 0.12820563316345215
epoch: 6 training_loss 0.1221067652106285 test_loss: 0.12615580558776857
epoch: 7 training_loss 0.1286131665110588 test_loss: 0.13062211275100707
epoch: 8 training_loss 0.11934122398495674 test_loss: 0.12514643669128417
epoch: 9 training_loss 0.12285063289105892 test_loss: 0.1239364743232727
epoch: 10 training_loss 0.12277432046830654 test_loss: 0.11187045574188233
epoch: 11 training_loss 0.12810588516294957 test_loss: 0.12848343849182128
epoch: 12 training_loss 0.11592546686530113 test_loss: 0.12904008626937866
epoch: 13 training_loss 0.12161418180912734 test_loss: 0.12754344940185547
epoch: 14 training_loss 0.1232124599814415 test_loss: 0.11293261051177979
epoch: 15 training_loss 0.11743995472788811 test_loss: 0.11529462337493897
epoch: 16 training_loss 0.12251531451940537 test_loss: 0.11263914108276367
epoch: 17 training_loss 0.12285482320934533 test_loss: 0.12272591590881347
epoch: 18 training_loss 0.11963935703039169 test_loss: 0.12368717193603515
epoch: 19 training_loss 0.11604572776705027 test_loss: 0.11523102521896363
epoch: 20 training_loss 0.11844424493610858 test_loss: 0.11883647441864013
epoch: 21 training_loss 0.11873346112668515 test_loss: 0.13189327716827393
epoch: 22 training_loss 0.11849609442055226 test_loss: 0.12159632444381714
epoch: 23 training_loss 0.11773631915450096 test_loss: 0.11709890365600586
epoch: 24 training_loss 0.11919472008943557 test_loss: 0.11627986431121826
epoch: 25 training_loss 0.12093366403132677 test_loss: 0.11106396913528442
epoch: 26 training_loss 0.11712988521903753 test_loss: 0.11860421895980836
epoch: 27 training_loss 0.12184745978564024 test_loss: 0.11619387865066529
epoch: 28 training_loss 0.1146895670890808 test_loss: 0.11175621747970581
epoch: 29 training_loss 0.11798637930303812 test_loss: 0.11238719224929809
epoch: 30 training_loss 0.11479133259505034 test_loss: 0.12789063453674315
epoch: 31 training_loss 0.11162840455770492 test_loss: 0.12342600822448731
epoch: 32 training_loss 0.11747743215411902 test_loss: 0.11814066171646118
epoch: 33 training_loss 0.11899682894349098 test_loss: 0.11645898818969727
epoch: 34 training_loss 0.11115075819194317 test_loss: 0.10851149559020996
epoch: 35 training_loss 0.12408162683248519 test_loss: 0.12238562107086182
epoch: 36 training_loss 0.11997013825923204 test_loss: 0.12229174375534058
epoch: 37 training_loss 0.1144189177080989 test_loss: 0.11131391525268555
epoch: 38 training_loss 0.1143058330565691 test_loss: 0.1235529899597168
epoch: 39 training_loss 0.119760921895504 test_loss: 0.11363781690597534
epoch: 40 training_loss 0.11471120547503233 test_loss: 0.1208264708518982
epoch: 41 training_loss 0.11296277087181807 test_loss: 0.11940697431564332
epoch: 42 training_loss 0.11797640416771174 test_loss: 0.11642100811004638
epoch: 43 training_loss 0.11311107162386179 test_loss: 0.11478312015533447
epoch: 44 training_loss 0.11870053302496672 test_loss: 0.12587777376174927
epoch: 45 training_loss 0.12003042861819267 test_loss: 0.12253806591033936
epoch: 46 training_loss 0.11546110648661852 test_loss: 0.11965762376785279
epoch: 47 training_loss 0.11542627539485693 test_loss: 0.10166037082672119
epoch: 48 training_loss 0.11300084505230189 test_loss: 0.10285815000534057
epoch: 49 training_loss 0.10986603025346994 test_loss: 0.1254620909690857
epoch: 50 training_loss 0.10891883593052626 test_loss: 0.10817047357559204
epoch: 51 training_loss 0.11567875042557717 test_loss: 0.10622594356536866
epoch: 52 training_loss 0.1198799155652523 test_loss: 0.1260829210281372
epoch: 53 training_loss 0.11240316394716501 test_loss: 0.11855159997940064
epoch: 54 training_loss 0.12048039704561234 test_loss: 0.11998206377029419
epoch: 55 training_loss 0.11549749106168747 test_loss: 0.11034729480743408
epoch: 56 training_loss 0.11202781535685062 test_loss: 0.12144302129745484
epoch: 57 training_loss 0.12298613771796227 test_loss: 0.12189500331878662
epoch: 58 training_loss 0.11097694605588913 test_loss: 0.1102556824684143
epoch: 59 training_loss 0.11471699316054583 test_loss: 0.11237989664077759
epoch: 60 training_loss 0.10635309297591448 test_loss: 0.11738253831863403
epoch: 61 training_loss 0.10825156066566706 test_loss: 0.11992400884628296
epoch: 62 training_loss 0.11906567309051752 test_loss: 0.10107845067977905
epoch: 63 training_loss 0.11652416380122305 test_loss: 0.10590260028839112
epoch: 64 training_loss 0.11308983318507672 test_loss: 0.11703659296035766
epoch: 65 training_loss 0.11489297356456518 test_loss: 0.11950862407684326
epoch: 66 training_loss 0.1224723732471466 test_loss: 0.1163362741470337
epoch: 67 training_loss 0.10969017200171947 test_loss: 0.10778721570968627
epoch: 68 training_loss 0.11536059197038412 test_loss: 0.11205811500549316
epoch: 69 training_loss 0.11479797400534153 test_loss: 0.11783626079559326
epoch: 70 training_loss 0.11253957394510508 test_loss: 0.10837568044662475
epoch: 71 training_loss 0.11494168996810913 test_loss: 0.12026939392089844
epoch: 72 training_loss 0.11271442752331495 test_loss: 0.12581814527511598
epoch: 73 training_loss 0.1059425762295723 test_loss: 0.10734046697616577
epoch: 74 training_loss 0.10897763477638364 test_loss: 0.10784403085708619
epoch: 75 training_loss 0.11802647069096565 test_loss: 0.11154203414916992
epoch: 76 training_loss 0.11428840026259422 test_loss: 0.10048085451126099
epoch: 77 training_loss 0.11104083824902773 test_loss: 0.12090638875961304
epoch: 78 training_loss 0.1090404811874032 test_loss: 0.11762213706970215
epoch: 79 training_loss 0.11034216187894344 test_loss: 0.11186562776565552
epoch: 80 training_loss 0.1111133635416627 test_loss: 0.1089856743812561
epoch: 81 training_loss 0.1168005859106779 test_loss: 0.10822465419769287
epoch: 82 training_loss 0.10654405601322652 test_loss: 0.11442950963974
epoch: 83 training_loss 0.11772760044783354 test_loss: 0.10927708148956299
epoch: 84 training_loss 0.11134007081389427 test_loss: 0.11651449203491211
epoch: 85 training_loss 0.11077562801539897 test_loss: 0.10614806413650513
epoch: 86 training_loss 0.11113876692950725 test_loss: 0.12928788661956786
epoch: 87 training_loss 0.11685317195951939 test_loss: 0.10905505418777466
epoch: 88 training_loss 0.11446105364710092 test_loss: 0.10445938110351563
epoch: 89 training_loss 0.10716835681349039 test_loss: 0.11292999982833862
epoch: 90 training_loss 0.1164417226612568 test_loss: 0.11363424062728882
epoch: 91 training_loss 0.11241733632981778 test_loss: 0.11876980066299439
epoch: 92 training_loss 0.11547540124505758 test_loss: 0.11275608539581299
epoch: 93 training_loss 0.10876605426892638 test_loss: 0.11378623247146606
epoch: 94 training_loss 0.11500411730259658 test_loss: 0.12052747011184692
epoch: 95 training_loss 0.11299525044858455 test_loss: 0.11672489643096924
epoch: 96 training_loss 0.11380366306751967 test_loss: 0.10939297676086426
epoch: 97 training_loss 0.10663365688174962 test_loss: 0.13598625659942626
epoch: 98 training_loss 0.11495496768504382 test_loss: 0.11763696670532227
epoch: 99 training_loss 0.1158119897916913 test_loss: 0.11093039512634277
epoch: 100 training_loss 0.11782326623797416 test_loss: 0.10445386171340942
epoch: 101 training_loss 0.11509448133409023 test_loss: 0.11471318006515503
epoch: 102 training_loss 0.11352937642484903 test_loss: 0.11149499416351319
epoch: 103 training_loss 0.10940532144159079 test_loss: 0.10877913236618042
epoch: 104 training_loss 0.11951642833650113 test_loss: 0.11615046262741088
epoch: 105 training_loss 0.11324617892503738 test_loss: 0.1173203706741333
epoch: 106 training_loss 0.11689485840499401 test_loss: 0.11141089200973511
epoch: 107 training_loss 0.11500520847737788 test_loss: 0.1163981318473816
epoch: 108 training_loss 0.1151537862047553 test_loss: 0.10090280771255493
epoch: 109 training_loss 0.11459698230028152 test_loss: 0.11456983089447022
epoch: 110 training_loss 0.11872134935110808 test_loss: 0.11651077270507812
epoch: 111 training_loss 0.10955572098493577 test_loss: 0.11319574117660522
epoch: 112 training_loss 0.11192828476428986 test_loss: 0.12345333099365234
epoch: 113 training_loss 0.11763072777539492 test_loss: 0.10661826133728028
epoch: 114 training_loss 0.10984815228730441 test_loss: 0.11967884302139283
epoch: 115 training_loss 0.1075590731203556 test_loss: 0.117698073387146
epoch: 116 training_loss 0.11338319677859544 test_loss: 0.10561870336532593
epoch: 117 training_loss 0.11831267450004816 test_loss: 0.10092893838882447
epoch: 118 training_loss 0.11448729515075684 test_loss: 0.1230776309967041
epoch: 119 training_loss 0.11545401584357023 test_loss: 0.10954334735870361
epoch: 120 training_loss 0.11098097454756499 test_loss: 0.10891178846359253
epoch: 121 training_loss 0.1115927005186677 test_loss: 0.09757235050201415
epoch: 122 training_loss 0.10629202831536531 test_loss: 0.11598519086837769
epoch: 123 training_loss 0.11431312721222639 test_loss: 0.12599689960479737
epoch: 124 training_loss 0.11049824327230454 test_loss: 0.10935817956924439
epoch: 125 training_loss 0.10711746163666248 test_loss: 0.11799978017807007
epoch: 126 training_loss 0.1155916852876544 test_loss: 0.12068541049957275
epoch: 127 training_loss 0.11518912326544523 test_loss: 0.10219507217407227
epoch: 128 training_loss 0.11283270854502916 test_loss: 0.11329602003097534
epoch: 129 training_loss 0.11615126095712185 test_loss: 0.11164369583129882
epoch: 130 training_loss 0.11498428460210562 test_loss: 0.11685059070587159
epoch: 131 training_loss 0.11211298156529664 test_loss: 0.10716289281845093
epoch: 132 training_loss 0.11229592680931091 test_loss: 0.11185808181762695
epoch: 133 training_loss 0.10999140594154597 test_loss: 0.12245415449142456
epoch: 134 training_loss 0.11076807655394078 test_loss: 0.11387203931808472
epoch: 135 training_loss 0.1076093290001154 test_loss: 0.10915205478668213
epoch: 136 training_loss 0.10786367740482092 test_loss: 0.11046066284179687
epoch: 137 training_loss 0.10745788540691137 test_loss: 0.11283767223358154
epoch: 138 training_loss 0.11091134622693062 test_loss: 0.11983858346939087
epoch: 139 training_loss 0.11328483812510967 test_loss: 0.1189878225326538
epoch: 140 training_loss 0.11619802597910166 test_loss: 0.13253129720687867
epoch: 141 training_loss 0.11268268913030624 test_loss: 0.10766874551773072
epoch: 142 training_loss 0.11511569805443286 test_loss: 0.11099511384963989
epoch: 143 training_loss 0.10573341930285096 test_loss: 0.11584442853927612
epoch: 144 training_loss 0.10518851030617953 test_loss: 0.10954563617706299
epoch: 145 training_loss 0.11221470147371292 test_loss: 0.10456614494323731
epoch: 146 training_loss 0.1126858864352107 test_loss: 0.10469480752944946
epoch: 147 training_loss 0.11184276320040226 test_loss: 0.10135515928268432
epoch: 148 training_loss 0.1058189034089446 test_loss: 0.11309813261032105
epoch: 149 training_loss 0.1053846886754036 test_loss: 0.12303371429443359
episode: 0 training return: -895.3853230439105
episode: 1 training return: -866.6824594555683
episode: 2 training return: -935.2226993997828
episode: 3 training return: -862.9397485557507
epoch: 1 test_true_pfm: 28.528343194418284 sim_pfm: -2098.717625366703
episode: 4 training return: -957.2972763352564
episode: 5 training return: -892.8478343220132
episode: 6 training return: -757.0649547821788
episode: 7 training return: -747.0872573693433
epoch: 2 test_true_pfm: 23.774945761050628 sim_pfm: -509.2587623813648
episode: 8 training return: -1072.659796177261
episode: 9 training return: -960.6633518143775
episode: 10 training return: -634.469466255158
episode: 11 training return: -348.1768653723698
epoch: 3 test_true_pfm: 29.100786115308864 sim_pfm: -184.52996281078092
episode: 12 training return: -315.54155421358183
episode: 13 training return: -400.4282436896955
episode: 14 training return: -139.84008572161142
episode: 15 training return: -87.54010277864771
epoch: 4 test_true_pfm: 30.7741193551662 sim_pfm: -329.8963147929863
episode: 16 training return: -431.9487095812729
episode: 17 training return: -379.3226402132372
episode: 18 training return: -249.58102081382768
episode: 19 training return: -123.43491554079456
epoch: 5 test_true_pfm: 23.08505822044587 sim_pfm: -384.16669140403593
episode: 20 training return: -624.3320704797655
episode: 21 training return: -134.12172869092953
episode: 22 training return: -39.09600445405538
episode: 23 training return: 123.10892638820441
epoch: 6 test_true_pfm: 50.53806140637991 sim_pfm: -81.2844303907132
episode: 24 training return: 80.88918295573838
episode: 25 training return: -409.4773494974673
episode: 26 training return: -156.2991795849826
episode: 27 training return: -241.77190040796148
epoch: 7 test_true_pfm: 40.05292551766158 sim_pfm: -101.09211197971835
episode: 28 training return: -115.83125738111649
episode: 29 training return: 47.54937113902612
episode: 30 training return: -12.17431610881745
episode: 31 training return: -65.04724848025108
epoch: 8 test_true_pfm: -7.3537585206151705 sim_pfm: -220.95664271825552
episode: 32 training return: -196.4790896788495
episode: 33 training return: 138.92891228370493
episode: 34 training return: -179.75338539552166
episode: 35 training return: 36.776311814452335
epoch: 9 test_true_pfm: 40.90295698908899 sim_pfm: 208.91769684723369
episode: 36 training return: 257.1451022645619
episode: 37 training return: 279.2527311131933
episode: 38 training return: 346.22473064130907
episode: 39 training return: 346.034581996572
epoch: 10 test_true_pfm: -5.840354102681642 sim_pfm: 360.81509456887505
episode: 40 training return: 397.108754673302
episode: 41 training return: 433.0304412172392
episode: 42 training return: 427.6480159460628
episode: 43 training return: 436.1442097860894
epoch: 11 test_true_pfm: -9.133384014704193 sim_pfm: 436.5802618862881
episode: 44 training return: 432.3574437446885
episode: 45 training return: 378.95722634593176
episode: 46 training return: 418.2292971775219
episode: 47 training return: 389.7255551506061
epoch: 12 test_true_pfm: 15.108862566977376 sim_pfm: 413.51708331410464
episode: 48 training return: 388.10161701135377
episode: 49 training return: 387.7076540882025
episode: 50 training return: 398.1579354440222
episode: 51 training return: 455.25229105472494
epoch: 13 test_true_pfm: 5.802531101492213 sim_pfm: 466.1449922866508
episode: 52 training return: 450.5055523778774
episode: 53 training return: 477.2100154245036
episode: 54 training return: 507.64823648921384
episode: 55 training return: 494.0492369539463
epoch: 14 test_true_pfm: 20.263680965248213 sim_pfm: 499.3171485990796
episode: 56 training return: 508.9745621964213
episode: 57 training return: 446.28879761163074
episode: 58 training return: 533.1579507167912
episode: 59 training return: 534.6165175711217
epoch: 15 test_true_pfm: 24.886329926443263 sim_pfm: 522.5211345706499
episode: 60 training return: 515.7821014525034
episode: 61 training return: 539.6268917755258
episode: 62 training return: 551.0842817791522
episode: 63 training return: 551.5975214798533
epoch: 16 test_true_pfm: 33.651710305041966 sim_pfm: 553.0324735312936
episode: 64 training return: 564.7636393945513
episode: 65 training return: 541.5744046102307
episode: 66 training return: 532.2069431636733
episode: 67 training return: 543.4764742600685
epoch: 17 test_true_pfm: 31.046804899378877 sim_pfm: 529.1140717219124
episode: 68 training return: 503.0949660679158
episode: 69 training return: 532.7102973312378
episode: 70 training return: 532.8883566890946
episode: 71 training return: 538.2979520682056
epoch: 18 test_true_pfm: 35.22998309625439 sim_pfm: 562.515298307479
episode: 72 training return: 529.8503428184686
episode: 73 training return: 555.1556427604922
episode: 74 training return: 549.7044883302368
episode: 75 training return: 554.4945556331445
epoch: 19 test_true_pfm: 31.724239133429755 sim_pfm: 562.8105389283877
episode: 76 training return: 562.0990276402225
episode: 77 training return: 550.4649708585473
episode: 78 training return: 563.0066583441975
episode: 79 training return: 578.6175606572265
epoch: 20 test_true_pfm: 28.64587031814066 sim_pfm: 589.6192309647058
episode: 80 training return: 561.9773174127087
episode: 81 training return: 575.7523346340737
episode: 82 training return: 578.727421507972
episode: 83 training return: 575.6733791878638
epoch: 21 test_true_pfm: 26.685581417638115 sim_pfm: 626.4115549540417
episode: 84 training return: 581.0109870103621
episode: 85 training return: 587.0226631559452
episode: 86 training return: 580.933690053086
episode: 87 training return: 564.5381256835683
epoch: 22 test_true_pfm: 27.909239216450885 sim_pfm: 639.6254663240443
episode: 88 training return: 601.1452804597178
episode: 89 training return: 567.8407513205748
episode: 90 training return: 586.5753757807997
episode: 91 training return: 609.7573287321666
epoch: 23 test_true_pfm: 26.297501808446725 sim_pfm: 604.4965236372705
episode: 92 training return: 579.9427999619472
episode: 93 training return: 646.0499497845052
episode: 94 training return: 645.89832603575
episode: 95 training return: 633.9382948194277
epoch: 24 test_true_pfm: 26.65908972147947 sim_pfm: 737.6283711479145
episode: 96 training return: 647.8011037831409
episode: 97 training return: 668.2057929645407
episode: 98 training return: 669.6327694288987
episode: 99 training return: 615.0882162540823
epoch: 25 test_true_pfm: 21.7097180225663 sim_pfm: 673.2980314591597
episode: 100 training return: 613.5809581626143
episode: 101 training return: 426.8245118056492
episode: 102 training return: 549.6513576199646
episode: 103 training return: 492.90134000335394
epoch: 26 test_true_pfm: 26.349822501512865 sim_pfm: 782.9834566676197
episode: 104 training return: 592.3417889363617
episode: 105 training return: 579.6769719232054
episode: 106 training return: 491.5852617011287
episode: 107 training return: 561.8651298972156
epoch: 27 test_true_pfm: 27.289274210431593 sim_pfm: 837.633191339565
episode: 108 training return: 465.58605799555005
episode: 109 training return: 602.8283826495282
episode: 110 training return: 537.1709621657859
episode: 111 training return: 600.7061874903986
epoch: 28 test_true_pfm: 8.073236016753217 sim_pfm: 835.515881289714
episode: 112 training return: 687.723183445812
episode: 113 training return: 477.77713194691205
episode: 114 training return: 544.8019863292283
episode: 115 training return: 623.4769691497615
epoch: 29 test_true_pfm: 10.283248158737484 sim_pfm: 800.4231496922991
episode: 116 training return: 454.00194967710087
episode: 117 training return: 549.1278400174501
episode: 118 training return: 724.5553168524197
episode: 119 training return: 704.2896124685898
epoch: 30 test_true_pfm: -3.202034031260098 sim_pfm: 892.3024883837364
episode: 120 training return: 751.8301587419705
episode: 121 training return: 610.9736044338667
episode: 122 training return: 580.4097682507294
episode: 123 training return: 610.4487481083587
epoch: 31 test_true_pfm: 10.498933179491276 sim_pfm: 900.5571976469776
episode: 124 training return: 732.4251766159555
episode: 125 training return: 827.4669600641466
episode: 126 training return: 707.6908185170014
episode: 127 training return: 824.7938059512728
epoch: 32 test_true_pfm: -0.09658404363995743 sim_pfm: 898.7712219491526
episode: 128 training return: 478.36017340344466
episode: 129 training return: 804.2368018368174
episode: 130 training return: 758.8715683116212
episode: 131 training return: 597.9150267548165
epoch: 33 test_true_pfm: -0.6015924674674832 sim_pfm: 873.0643430208926
episode: 132 training return: 621.275151956205
episode: 133 training return: 643.2491883227494
episode: 134 training return: 684.8358325797658
episode: 135 training return: 692.7272860868601
epoch: 34 test_true_pfm: 1.6523002987859456 sim_pfm: 820.9101267289936
episode: 136 training return: 556.5055995470747
episode: 137 training return: 656.3949361278775
episode: 138 training return: 616.0641907405865
episode: 139 training return: 535.3856855389932
epoch: 35 test_true_pfm: -1.795229014239752 sim_pfm: 876.1199455652004
episode: 140 training return: 681.3243991022254
episode: 141 training return: 730.884954006219
episode: 142 training return: 740.4073313736624
episode: 143 training return: 602.167972844398
epoch: 36 test_true_pfm: -0.0362562915925567 sim_pfm: 885.7367500339427
episode: 144 training return: 704.4421362269143
episode: 145 training return: 490.6540434135417
episode: 146 training return: 349.19292501872906
episode: 147 training return: 508.44185810170967
epoch: 37 test_true_pfm: -2.0477574158108585 sim_pfm: 892.1277646717214
episode: 148 training return: 649.0719620460992
episode: 149 training return: 649.578239117273
episode: 150 training return: 472.63875999272295
episode: 151 training return: 408.6324835312822
epoch: 38 test_true_pfm: -0.7245519673053279 sim_pfm: 861.3327206830387
episode: 152 training return: 786.3218381273325
episode: 153 training return: 442.9444646076145
episode: 154 training return: 476.8448117557693
episode: 155 training return: 358.1649798585365
epoch: 39 test_true_pfm: 0.1847554124997969 sim_pfm: 846.0379640958469
episode: 156 training return: 465.6180783231673
episode: 157 training return: 755.3599722121775
episode: 158 training return: 736.5713209302589
episode: 159 training return: 736.1778345549529
epoch: 40 test_true_pfm: 6.710506892201645 sim_pfm: 842.0136522835616
episode: 160 training return: 634.1532555083063
episode: 161 training return: 730.5059180422962
episode: 162 training return: 563.8811554580861
episode: 163 training return: 725.941418488725
epoch: 41 test_true_pfm: 5.030821408409531 sim_pfm: 886.1364220172863
episode: 164 training return: 548.5871300566034
episode: 165 training return: 643.2319959740097
episode: 166 training return: 695.2542994937036
episode: 167 training return: 573.0209462051142
epoch: 42 test_true_pfm: 3.0592094038031155 sim_pfm: 831.7974276155786
episode: 168 training return: 624.9109686883316
episode: 169 training return: 634.1320033184007
episode: 170 training return: 381.55503021084826
episode: 171 training return: 673.3751379079818
epoch: 43 test_true_pfm: 1.1267656977430704 sim_pfm: 864.0320934847332
episode: 172 training return: 622.926940390824
episode: 173 training return: 652.798413699365
episode: 174 training return: 612.2927694654948
episode: 175 training return: 475.6598568578503
epoch: 44 test_true_pfm: 2.4775050337774567 sim_pfm: 904.6461102730348
episode: 176 training return: 467.7251171178204
episode: 177 training return: 604.0404542718701
episode: 178 training return: 482.8300584068142
episode: 179 training return: 629.9232929831766
epoch: 45 test_true_pfm: 0.639708642147739 sim_pfm: 826.8211224227103
episode: 180 training return: 415.0005378703276
episode: 181 training return: 631.4616384764081
episode: 182 training return: 656.9440590400166
episode: 183 training return: 469.70614052138876
epoch: 46 test_true_pfm: 12.057802451324267 sim_pfm: 857.9730416314694
episode: 184 training return: 661.1952871100808
episode: 185 training return: 606.052881406834
episode: 186 training return: 553.314989514328
episode: 187 training return: 519.4211604038669
epoch: 47 test_true_pfm: 8.69164240293425 sim_pfm: 880.3579363680432
episode: 188 training return: 490.6350372099316
episode: 189 training return: 795.5324895111219
episode: 190 training return: 398.13916523856324
episode: 191 training return: 583.0005454383745
epoch: 48 test_true_pfm: 2.7377010080490294 sim_pfm: 844.5736530784388
episode: 192 training return: 766.0676152280316
episode: 193 training return: 389.59985860989156
episode: 194 training return: 618.608206861234
episode: 195 training return: 486.38720167794435
epoch: 49 test_true_pfm: 10.109781337293166 sim_pfm: 858.753659573098
episode: 196 training return: 640.4532700629717
episode: 197 training return: 429.5219911136126
episode: 198 training return: 572.5054971289588
episode: 199 training return: 403.05656011970547
epoch: 50 test_true_pfm: 1.875315992013824 sim_pfm: 819.245410849346
episode: 200 training return: 557.3468647208932
episode: 201 training return: 606.913991380334
episode: 202 training return: 544.8979958322784
episode: 203 training return: 661.3035994676537
epoch: 51 test_true_pfm: 13.161651704721137 sim_pfm: 855.9492917076559
episode: 204 training return: 597.5073627475119
episode: 205 training return: 497.2279729350382
episode: 206 training return: 673.4525577564905
episode: 207 training return: 617.0999941496199
epoch: 52 test_true_pfm: 4.644297505655134 sim_pfm: 635.0264999928995
episode: 208 training return: 590.2945380369484
episode: 209 training return: 739.113517607033
episode: 210 training return: 741.8231079966838
episode: 211 training return: 650.7639285991925
epoch: 53 test_true_pfm: -4.197951200332435 sim_pfm: 880.4609230504553
episode: 212 training return: 717.5611583348636
episode: 213 training return: 555.0324539946681
episode: 214 training return: 543.0714297547851
episode: 215 training return: 624.6482073996254
epoch: 54 test_true_pfm: 1.0704134265219047 sim_pfm: 884.2719596615119
episode: 216 training return: 719.2909346412406
episode: 217 training return: 568.4173595892468
episode: 218 training return: 604.2398442548081
episode: 219 training return: 734.9508762487058
epoch: 55 test_true_pfm: 0.23890881168951844 sim_pfm: 850.7412634201121
episode: 220 training return: 672.4688200632229
episode: 221 training return: 664.316171561833
episode: 222 training return: 649.7262559716582
episode: 223 training return: 755.9228130105869
epoch: 56 test_true_pfm: 3.3145248867344153 sim_pfm: 815.4038250067197
episode: 224 training return: 750.5966990685029
episode: 225 training return: 697.8481202532929
episode: 226 training return: 658.2282298927024
episode: 227 training return: 671.9770390005968
epoch: 57 test_true_pfm: -0.7603250541713141 sim_pfm: 751.3356611049592
episode: 228 training return: 585.2124261911675
episode: 229 training return: 220.77514150843032
episode: 230 training return: 695.8615413275826
episode: 231 training return: 763.3895768936892
epoch: 58 test_true_pfm: 3.5881570519335413 sim_pfm: 862.4391945464329
episode: 232 training return: 674.581119288111
episode: 233 training return: 579.1638466487809
episode: 234 training return: 640.3454082808386
episode: 235 training return: 547.8114291378168
epoch: 59 test_true_pfm: 9.450481531839655 sim_pfm: 816.7946085895344
episode: 236 training return: 337.48460586098247
episode: 237 training return: 337.3182026746756
episode: 238 training return: 743.0228900732644
episode: 239 training return: 655.2766862428357
epoch: 60 test_true_pfm: 4.484311957005241 sim_pfm: 865.4825050951591
episode: 240 training return: 515.1514737647965
episode: 241 training return: 335.30112404827133
episode: 242 training return: 716.2190601555981
episode: 243 training return: 414.48687850833693
epoch: 61 test_true_pfm: 7.1544561394431145 sim_pfm: 893.1292414087153
episode: 244 training return: 713.9007803484483
episode: 245 training return: 410.9266497059257
episode: 246 training return: 404.6695772526552
episode: 247 training return: 595.3988226857997
epoch: 62 test_true_pfm: 5.4787897406017985 sim_pfm: 865.9063639270214
episode: 248 training return: 674.1255223528187
episode: 249 training return: 768.2520716911525
episode: 250 training return: 695.1079042587622
episode: 251 training return: 633.931568511662
epoch: 63 test_true_pfm: 8.58605623174127 sim_pfm: 855.93716224996
episode: 252 training return: 470.88217588031586
episode: 253 training return: 564.3832927581457
episode: 254 training return: 477.95975595025243
episode: 255 training return: 446.226058980133
epoch: 64 test_true_pfm: 0.21896644582590757 sim_pfm: 884.5166062591231
episode: 256 training return: 605.7288637848432
episode: 257 training return: 668.0044155797682
episode: 258 training return: 640.7435626678788
episode: 259 training return: 693.4827128897983
epoch: 65 test_true_pfm: 6.17395246903649 sim_pfm: 903.6346799712144
episode: 260 training return: 574.0467872241276
episode: 261 training return: 669.4347696619508
episode: 262 training return: 556.109105359257
episode: 263 training return: 686.5878014872444
epoch: 66 test_true_pfm: 1.524085533408454 sim_pfm: 901.1992574530237
episode: 264 training return: 563.6761892987403
episode: 265 training return: 680.9728931513496
episode: 266 training return: 507.3362845441564
episode: 267 training return: 533.7516576150347
epoch: 67 test_true_pfm: 1.1837256685433517 sim_pfm: 890.1566467376997
episode: 268 training return: 529.9570835495048
episode: 269 training return: 581.9786380584486
episode: 270 training return: 503.368207277883
episode: 271 training return: 430.8104269055363
epoch: 68 test_true_pfm: -1.306197184385756 sim_pfm: 848.0078242001788
episode: 272 training return: 471.5457008354586
episode: 273 training return: 617.902659256736
episode: 274 training return: 584.8776432840876
episode: 275 training return: 360.58539947152
epoch: 69 test_true_pfm: -0.9745536216027695 sim_pfm: 903.8479069456349
episode: 276 training return: 556.8084883734007
episode: 277 training return: 599.7322437642144
episode: 278 training return: 721.9593642224196
episode: 279 training return: 536.7316054800488
epoch: 70 test_true_pfm: 0.8774362582792561 sim_pfm: 900.660296550981
episode: 280 training return: 323.0806248277394
episode: 281 training return: 470.38809803117755
episode: 282 training return: 488.568146625625
episode: 283 training return: 562.6927276050184
epoch: 71 test_true_pfm: 5.717821631286139 sim_pfm: 868.4897368448426
episode: 284 training return: 371.8731031177405
episode: 285 training return: 475.9281523320161
episode: 286 training return: 650.3724883253922
episode: 287 training return: 571.1938339117157
epoch: 72 test_true_pfm: 1.5212957917462464 sim_pfm: 787.4410200650002
episode: 288 training return: 590.4147516973466
episode: 289 training return: 355.577399047871
episode: 290 training return: 441.2183936516125
episode: 291 training return: 466.55152979757594
epoch: 73 test_true_pfm: 0.7569040141681078 sim_pfm: 868.3791432111977
episode: 292 training return: 504.7496821774258
episode: 293 training return: 274.23481719918874
episode: 294 training return: 477.1780357881601
episode: 295 training return: 530.0646046154897
epoch: 74 test_true_pfm: 15.799807601719271 sim_pfm: 827.8310873507756
episode: 296 training return: 341.24168644365164
episode: 297 training return: 400.3829116736253
episode: 298 training return: 364.70146466848536
episode: 299 training return: 243.9006657338297
epoch: 75 test_true_pfm: 10.323859595523396 sim_pfm: 808.6130009868926
episode: 300 training return: 276.345356973814
episode: 301 training return: 588.3446426672143
episode: 302 training return: 337.2958818960929
episode: 303 training return: 646.121042444953
epoch: 76 test_true_pfm: 4.684085695240262 sim_pfm: 854.2518339881055
episode: 304 training return: 487.707465639205
episode: 305 training return: 527.7715350751716
episode: 306 training return: 598.6463850719306
episode: 307 training return: 552.6183513671016
epoch: 77 test_true_pfm: 1.7085037587290395 sim_pfm: 824.5270537280824
episode: 308 training return: 283.2066643263166
episode: 309 training return: 500.0746251903316
episode: 310 training return: 731.895628892815
episode: 311 training return: 630.2582605428254
epoch: 78 test_true_pfm: -0.36972010344789047 sim_pfm: 879.8160916973262
episode: 312 training return: 387.0646648494239
episode: 313 training return: 439.320456558497
episode: 314 training return: 535.3749447439383
episode: 315 training return: 433.4572904025733
epoch: 79 test_true_pfm: 12.015024761027876 sim_pfm: 760.3381343171893
episode: 316 training return: 393.9096357991151
episode: 317 training return: 352.7931928500732
episode: 318 training return: 430.13278266383156
episode: 319 training return: 542.3800314658697
epoch: 80 test_true_pfm: 4.71917855426628 sim_pfm: 600.1878375227188
episode: 320 training return: 539.0414159810632
episode: 321 training return: 486.800261223126
episode: 322 training return: 499.1711099731644
episode: 323 training return: 598.2361145632505
epoch: 81 test_true_pfm: 7.785427203925652 sim_pfm: 806.2203733543826
episode: 324 training return: 286.32713781538246
episode: 325 training return: 385.8742123766373
episode: 326 training return: 475.3665261479236
episode: 327 training return: 533.1278457539827
epoch: 82 test_true_pfm: 1.1812231586872106 sim_pfm: 833.4922473657656
episode: 328 training return: 564.5840919969899
episode: 329 training return: 541.518332650383
episode: 330 training return: 611.7439545794439
episode: 331 training return: 575.7752825360974
epoch: 83 test_true_pfm: 3.592148659839112 sim_pfm: 784.0839887725832
episode: 332 training return: 616.4226981252343
episode: 333 training return: 623.2824564258236
episode: 334 training return: 497.9554221655284
episode: 335 training return: 659.3910817097263
epoch: 84 test_true_pfm: 14.01088827067812 sim_pfm: 688.1832715562748
episode: 336 training return: 659.9507741737523
episode: 337 training return: 523.4623677042425
episode: 338 training return: 621.4334149106968
episode: 339 training return: 329.5767209302519
epoch: 85 test_true_pfm: 6.242564773981828 sim_pfm: 824.4059043425211
episode: 340 training return: 553.2542497824154
episode: 341 training return: 562.4275050807241
episode: 342 training return: 633.5159728062406
episode: 343 training return: 615.3532412283963
epoch: 86 test_true_pfm: 7.590696427706087 sim_pfm: 860.0987461432957
episode: 344 training return: 299.6528063407155
episode: 345 training return: 371.17122040011094
episode: 346 training return: 528.079242869121
episode: 347 training return: 616.3414908998362
epoch: 87 test_true_pfm: 23.08642272433023 sim_pfm: 730.7005317783721
episode: 348 training return: 568.9646592713211
episode: 349 training return: 422.0640657566525
episode: 350 training return: 550.7702541791368
episode: 351 training return: 532.8215232723701
epoch: 88 test_true_pfm: 3.114488906360193 sim_pfm: 884.4888948417247
episode: 352 training return: 619.8726381392879
episode: 353 training return: 585.2554204540143
episode: 354 training return: 531.8289032892195
episode: 355 training return: 187.4677111092707
epoch: 89 test_true_pfm: 10.333007025696576 sim_pfm: 835.0806964380208
episode: 356 training return: 319.6945285651324
episode: 357 training return: 607.8300084583964
episode: 358 training return: 303.6940623469846
episode: 359 training return: 488.61382618569706
epoch: 90 test_true_pfm: -1.1548503988233274 sim_pfm: 865.4970807471688
episode: 360 training return: 625.0674621309679
episode: 361 training return: 548.5773030745856
episode: 362 training return: 420.49909253619677
episode: 363 training return: 325.0597006681188
epoch: 91 test_true_pfm: -0.7645105293819373 sim_pfm: 896.3529155963615
episode: 364 training return: 616.8852066843015
episode: 365 training return: 712.935744640988
episode: 366 training return: 460.12916057402504
episode: 367 training return: 304.6340481502435
epoch: 92 test_true_pfm: 2.3272499510660523 sim_pfm: 888.8448068241232
episode: 368 training return: 362.76925144267767
episode: 369 training return: 613.7554208146443
episode: 370 training return: 543.0507806660759
episode: 371 training return: 538.5964405584568
epoch: 93 test_true_pfm: -0.23900092585292523 sim_pfm: 882.1937473952758
episode: 372 training return: 420.07772813138763
episode: 373 training return: 461.88149243243356
episode: 374 training return: 257.77218825615046
episode: 375 training return: 231.1521721472559
epoch: 94 test_true_pfm: -1.6473487929321486 sim_pfm: 889.2176319752974
episode: 376 training return: 613.1537794193803
episode: 377 training return: 527.1506834240672
episode: 378 training return: 375.78374254959226
episode: 379 training return: 509.3652060374622
epoch: 95 test_true_pfm: 2.595889449091558 sim_pfm: 853.987721584228
episode: 380 training return: 565.3576869134746
episode: 381 training return: 631.4876066784562
episode: 382 training return: 545.4582803657452
episode: 383 training return: 641.2319660134028
epoch: 96 test_true_pfm: -1.3386214071575249 sim_pfm: 890.5378597405448
episode: 384 training return: 599.3711919677365
episode: 385 training return: 430.1689468562286
episode: 386 training return: 412.9613192144832
episode: 387 training return: 645.3755235668655
epoch: 97 test_true_pfm: 0.4443661095092911 sim_pfm: 811.9583308167572
episode: 388 training return: 544.6895172157548
episode: 389 training return: 672.040871107649
episode: 390 training return: 665.2275667247133
episode: 391 training return: 673.0079553386512
epoch: 98 test_true_pfm: -1.3422303357934424 sim_pfm: 886.1997010122617
episode: 392 training return: 538.0832697912921
episode: 393 training return: 548.340776585117
episode: 394 training return: 420.6253959204615
episode: 395 training return: 486.1360065479776
epoch: 99 test_true_pfm: 5.202806965143934 sim_pfm: 862.4112497853812
episode: 396 training return: 564.981386520372
episode: 397 training return: 362.18617138534296
episode: 398 training return: 632.1276537772288
episode: 399 training return: 395.85090686590263
epoch: 100 test_true_pfm: 10.446137289226206 sim_pfm: 853.5677882383865
episode: 400 training return: 315.800940428559
episode: 401 training return: 389.465917747299
episode: 402 training return: 329.54336434015727
episode: 403 training return: 446.65722889900843
epoch: 101 test_true_pfm: 5.075383829384909 sim_pfm: 871.1048945855758
episode: 404 training return: 448.2553611410525
episode: 405 training return: 399.5830827389379
episode: 406 training return: 632.643286495625
episode: 407 training return: 374.1350696102228
epoch: 102 test_true_pfm: 2.0017475655077344 sim_pfm: 868.1287447124303
episode: 408 training return: 420.04135572599876
episode: 409 training return: 575.5054334052227
episode: 410 training return: 466.3958236642458
episode: 411 training return: 665.8571860640295
epoch: 103 test_true_pfm: 6.647919343068696 sim_pfm: 852.3063307958994
episode: 412 training return: 593.1794807886956
episode: 413 training return: 327.6418715656114
episode: 414 training return: 417.7313965738364
episode: 415 training return: 581.2425077107562
epoch: 104 test_true_pfm: 4.1888200416592465 sim_pfm: 815.0470433425974
episode: 416 training return: 343.1775938159615
episode: 417 training return: 306.555652429612
episode: 418 training return: 589.9018589347197
episode: 419 training return: 424.4361261745538
epoch: 105 test_true_pfm: 6.171054810365712 sim_pfm: 866.1882007309317
episode: 420 training return: 407.74849758161696
episode: 421 training return: 410.37976514523166
episode: 422 training return: 457.0901147587174
episode: 423 training return: 379.85377459033543
epoch: 106 test_true_pfm: 2.4799244759445385 sim_pfm: 809.6249236342331
episode: 424 training return: 368.7130905652197
episode: 425 training return: 265.77440450896944
episode: 426 training return: 509.75926826077676
episode: 427 training return: 374.1024741765126
epoch: 107 test_true_pfm: -0.4030541687565778 sim_pfm: 888.2024357464413
episode: 428 training return: 529.7227642196848
episode: 429 training return: 576.9512288020395
episode: 430 training return: 485.1960734957284
episode: 431 training return: 603.8132121843925
epoch: 108 test_true_pfm: -0.8927479144844217 sim_pfm: 875.2950319098209
episode: 432 training return: 420.65605176654697
episode: 433 training return: 393.9522336685104
episode: 434 training return: 349.68751886071374
episode: 435 training return: 480.16253425804064
epoch: 109 test_true_pfm: -0.7921228404711427 sim_pfm: 896.4898481853752
episode: 436 training return: 378.3739283050623
episode: 437 training return: 513.8060114809023
episode: 438 training return: 291.8266143985828
episode: 439 training return: 227.57213536236736
epoch: 110 test_true_pfm: 2.474634156317284 sim_pfm: 861.6460206602984
episode: 440 training return: 319.932172164773
episode: 441 training return: 473.878387343314
episode: 442 training return: 602.7628586262085
episode: 443 training return: 436.6490990395356
epoch: 111 test_true_pfm: -0.024543308065483504 sim_pfm: 889.2004452531703
episode: 444 training return: 516.6163117706122
episode: 445 training return: 515.4488442738996
episode: 446 training return: 496.5086236509307
episode: 447 training return: 633.14920272305
epoch: 112 test_true_pfm: -0.09001053517767592 sim_pfm: 892.717546708023
episode: 448 training return: 350.79985257570223
episode: 449 training return: 399.7479941697026
episode: 450 training return: 573.1382609897715
episode: 451 training return: 421.91307071589137
epoch: 113 test_true_pfm: 14.102404065384182 sim_pfm: 831.8948849012193
episode: 452 training return: 584.4129376577963
episode: 453 training return: 554.1353548682669
episode: 454 training return: 638.0927949064296
episode: 455 training return: 597.5286516833122
epoch: 114 test_true_pfm: 4.65784903467416 sim_pfm: 856.6421984360461
episode: 456 training return: 441.7840778349804
episode: 457 training return: 312.3073345243118
episode: 458 training return: 297.6327037041381
episode: 459 training return: 462.1461187688563
epoch: 115 test_true_pfm: -0.8345588443568099 sim_pfm: 892.2247192129287
episode: 460 training return: 376.13865099663127
episode: 461 training return: 637.0326785016792
episode: 462 training return: 387.4243921307316
episode: 463 training return: 585.4410666222967
epoch: 116 test_true_pfm: -1.769563473488578 sim_pfm: 852.3810566622769
episode: 464 training return: 370.2750452605152
episode: 465 training return: 494.64562950923903
episode: 466 training return: 644.0233272901181
episode: 467 training return: 564.1691306839234
epoch: 117 test_true_pfm: 1.809204636146956 sim_pfm: 858.4245350311691
episode: 468 training return: 362.8270024979204
episode: 469 training return: 594.8215088937862
episode: 470 training return: 548.0353475011932
episode: 471 training return: 555.922495467036
epoch: 118 test_true_pfm: 3.802848994827438 sim_pfm: 813.9613888697779
episode: 472 training return: 617.4913892178553
episode: 473 training return: 566.6634962341927
episode: 474 training return: 568.7777684011056
episode: 475 training return: 491.9018665619469
epoch: 119 test_true_pfm: 5.497705424775356 sim_pfm: 831.6630616393579
episode: 476 training return: 538.0346823353846
episode: 477 training return: 489.32759277286493
episode: 478 training return: 651.8640898249229
episode: 479 training return: 443.6461370683704
epoch: 120 test_true_pfm: 4.784769779437187 sim_pfm: 824.364555556295
episode: 480 training return: 466.6534115562996
episode: 481 training return: 486.56138881895436
episode: 482 training return: 397.6096781951855
episode: 483 training return: 441.60044328742396
epoch: 121 test_true_pfm: 0.9408617074154041 sim_pfm: 796.9343968924968
episode: 484 training return: 545.5000245423806
episode: 485 training return: 509.3192195793171
episode: 486 training return: 444.3651242202998
episode: 487 training return: 491.30766971768145
epoch: 122 test_true_pfm: 0.5033209451756426 sim_pfm: 892.5211392034462
episode: 488 training return: 536.1542716340439
episode: 489 training return: 374.13925891927397
episode: 490 training return: 549.9824481915737
episode: 491 training return: 406.4936309195118
epoch: 123 test_true_pfm: 3.7732004582520964 sim_pfm: 859.6338478967366
episode: 492 training return: 483.41539366000063
episode: 493 training return: 521.1571051704238
episode: 494 training return: 528.0766552332261
episode: 495 training return: 568.3078878741264
epoch: 124 test_true_pfm: 1.5655004029340405 sim_pfm: 860.7178394627978
episode: 496 training return: 412.4196057707054
episode: 497 training return: 385.37531328880135
episode: 498 training return: 505.8206076472089
episode: 499 training return: 622.4056782870649
epoch: 125 test_true_pfm: -0.972088161914782 sim_pfm: 881.1538575562494
episode: 500 training return: 510.99879006125644
episode: 501 training return: 433.77189608384333
episode: 502 training return: 424.7592557910787
episode: 503 training return: 558.1147322781467
epoch: 126 test_true_pfm: 5.360969868992285 sim_pfm: 838.6550860618366
episode: 504 training return: 311.6400543604289
episode: 505 training return: 416.1189975088172
episode: 506 training return: 382.14139815651885
episode: 507 training return: 472.21496179109084
epoch: 127 test_true_pfm: 3.4597438740597695 sim_pfm: 868.4449010260211
episode: 508 training return: 428.421423846449
episode: 509 training return: 460.91577964417473
episode: 510 training return: 515.409286288748
episode: 511 training return: 476.6558881391612
epoch: 128 test_true_pfm: 3.9508925895705502 sim_pfm: 875.9731528181252
episode: 512 training return: 486.81499398946346
episode: 513 training return: 345.80063003049673
episode: 514 training return: 484.42336896326697
episode: 515 training return: 469.22066959092245
epoch: 129 test_true_pfm: 3.261035385724007 sim_pfm: 841.8242749485338
episode: 516 training return: 424.56469908912084
episode: 517 training return: 393.41348340290864
episode: 518 training return: 429.5391037346173
episode: 519 training return: 432.17468546424305
epoch: 130 test_true_pfm: 2.7068908667479556 sim_pfm: 869.8418356143648
episode: 520 training return: 425.6477749363611
episode: 521 training return: 466.8966207836198
episode: 522 training return: 431.3402981534488
episode: 523 training return: 454.7538905532342
epoch: 131 test_true_pfm: 11.282748073495428 sim_pfm: 830.0426004366585
episode: 524 training return: 490.83853676219036
episode: 525 training return: 482.6996563828553
episode: 526 training return: 522.8697028650458
episode: 527 training return: 462.53796935309646
epoch: 132 test_true_pfm: 1.6644275389331782 sim_pfm: 839.0033708629901
episode: 528 training return: 428.62010732187974
episode: 529 training return: 441.2326389768133
episode: 530 training return: 426.33128446222787
episode: 531 training return: 489.33646443785545
epoch: 133 test_true_pfm: 5.013586528948754 sim_pfm: 819.0281040922637
episode: 532 training return: 615.6580031050386
episode: 533 training return: 526.2936526081185
episode: 534 training return: 516.0899135735297
episode: 535 training return: 576.6430795065208
epoch: 134 test_true_pfm: 5.12134549184322 sim_pfm: 846.3373949912095
episode: 536 training return: 583.9778551833309
episode: 537 training return: 456.19753447897085
episode: 538 training return: 632.7305768086278
episode: 539 training return: 495.9476968695644
epoch: 135 test_true_pfm: 12.131483986326478 sim_pfm: 839.7674790760772
episode: 540 training return: 490.64634868664893
episode: 541 training return: 488.5891469204989
episode: 542 training return: 409.4204078642179
episode: 543 training return: 407.8243708426991
epoch: 136 test_true_pfm: 2.010947705998997 sim_pfm: 851.2836297828744
episode: 544 training return: 462.4140917430127
episode: 545 training return: 369.1675524307309
episode: 546 training return: 440.63788125680196
episode: 547 training return: 541.4688689383677
epoch: 137 test_true_pfm: 4.909681391807957 sim_pfm: 869.3975852718124
episode: 548 training return: 394.84375216795115
episode: 549 training return: 415.2929367157025
episode: 550 training return: 389.6683849446102
episode: 551 training return: 490.4803934449856
epoch: 138 test_true_pfm: 6.479254499939705 sim_pfm: 821.3786864078018
episode: 552 training return: 445.03847223455045
episode: 553 training return: 398.71876398722907
episode: 554 training return: 440.35319210743074
episode: 555 training return: 438.5673324038076
epoch: 139 test_true_pfm: 1.9273128830848048 sim_pfm: 824.6788242144399
episode: 556 training return: 398.8457840119354
episode: 557 training return: 574.9946510013482
episode: 558 training return: 515.2210297637016
episode: 559 training return: 522.066771419488
epoch: 140 test_true_pfm: 0.25716491396930685 sim_pfm: 869.5623501230235
episode: 560 training return: 559.7130904792614
episode: 561 training return: 549.4558417217315
episode: 562 training return: 310.911898270726
episode: 563 training return: 389.6461263749908
epoch: 141 test_true_pfm: 2.80562957606115 sim_pfm: 871.7222376469241
episode: 564 training return: 515.2739858073827
episode: 565 training return: 357.67834666254646
episode: 566 training return: 392.257273137792
episode: 567 training return: 547.766530823107
epoch: 142 test_true_pfm: 1.9570931745579965 sim_pfm: 868.9151856483317
episode: 568 training return: 444.51783457490467
episode: 569 training return: 417.51936272135043
episode: 570 training return: 390.2301437111684
episode: 571 training return: 433.57382524752467
epoch: 143 test_true_pfm: 3.186962554477547 sim_pfm: 849.2593131920466
episode: 572 training return: 600.699683254415
episode: 573 training return: 612.5228326616012
episode: 574 training return: 642.4856075553479
episode: 575 training return: 635.7450638242344
epoch: 144 test_true_pfm: 3.109244656593618 sim_pfm: 816.7683055531838
episode: 576 training return: 388.98847818942176
episode: 577 training return: 412.4653904116977
episode: 578 training return: 459.1815647802679
episode: 579 training return: 454.1723548844447
epoch: 145 test_true_pfm: 0.5113644173639645 sim_pfm: 891.980929243661
episode: 580 training return: 397.605075048842
episode: 581 training return: 338.9281581622207
episode: 582 training return: 606.8362159944592
episode: 583 training return: 390.95275240814885
epoch: 146 test_true_pfm: 4.5318181902284795 sim_pfm: 817.5445691215828
episode: 584 training return: 443.9349042874017
episode: 585 training return: 334.43387489396906
episode: 586 training return: 621.4610362137875
episode: 587 training return: 392.5942814291947
epoch: 147 test_true_pfm: 7.380317055593466 sim_pfm: 789.942921575728
episode: 588 training return: 444.9244550725759
episode: 589 training return: 404.69788815217794
episode: 590 training return: 460.87993362195454
episode: 591 training return: 540.6392858293602
epoch: 148 test_true_pfm: 12.715354123598335 sim_pfm: 844.6859385605214
episode: 592 training return: 368.4065769524494
episode: 593 training return: 427.9602101358401
episode: 594 training return: 464.24437355105994
episode: 595 training return: 429.40894877093785
epoch: 149 test_true_pfm: 3.575238311700532 sim_pfm: 860.5868281742175
episode: 596 training return: 387.9166983595673
episode: 597 training return: 420.66170989813355
episode: 598 training return: 439.48221488020107
episode: 599 training return: 349.75699288424084
epoch: 150 test_true_pfm: 7.242071718255062 sim_pfm: 841.2124545111176
