['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'uncertainty', '--traj', 'expert', '--seed', '1', '--data', '100000']
epoch: 0 training_loss 0.2034082742780447 test_loss: 0.1407177686691284
epoch: 1 training_loss 0.1424675116688013 test_loss: 0.12597235441207885
epoch: 2 training_loss 0.13317495577037333 test_loss: 0.10439049005508423
epoch: 3 training_loss 0.13456699751317502 test_loss: 0.14425215721130372
epoch: 4 training_loss 0.12806033290922642 test_loss: 0.11017205715179443
epoch: 5 training_loss 0.12307026263326407 test_loss: 0.13293133974075316
epoch: 6 training_loss 0.127128691598773 test_loss: 0.11915422677993774
epoch: 7 training_loss 0.12156688708811998 test_loss: 0.1394512414932251
epoch: 8 training_loss 0.12099620390683413 test_loss: 0.12267825603485108
epoch: 9 training_loss 0.13059796884655953 test_loss: 0.10735068321228028
epoch: 10 training_loss 0.12035916551947594 test_loss: 0.10870851278305053
epoch: 11 training_loss 0.115105797983706 test_loss: 0.12017612457275391
epoch: 12 training_loss 0.12140065100044012 test_loss: 0.12171597480773926
epoch: 13 training_loss 0.11658111777156592 test_loss: 0.12693123817443847
epoch: 14 training_loss 0.11517351575195789 test_loss: 0.1130640149116516
epoch: 15 training_loss 0.12589836541563273 test_loss: 0.1036412000656128
epoch: 16 training_loss 0.11800630213692784 test_loss: 0.12349065542221069
epoch: 17 training_loss 0.11745048604905606 test_loss: 0.1257285952568054
epoch: 18 training_loss 0.1198675710707903 test_loss: 0.11461080312728882
epoch: 19 training_loss 0.11489693719893694 test_loss: 0.12111681699752808
epoch: 20 training_loss 0.11553198084235192 test_loss: 0.1191143274307251
epoch: 21 training_loss 0.12017443276941776 test_loss: 0.11483219861984253
epoch: 22 training_loss 0.11499250803142785 test_loss: 0.11008914709091186
epoch: 23 training_loss 0.11303615998476743 test_loss: 0.10222882032394409
epoch: 24 training_loss 0.1171328842639923 test_loss: 0.1176449179649353
epoch: 25 training_loss 0.11585017610341311 test_loss: 0.11244736909866333
epoch: 26 training_loss 0.1146726893633604 test_loss: 0.12336300611495972
epoch: 27 training_loss 0.11627040479332208 test_loss: 0.1041609764099121
epoch: 28 training_loss 0.11611450634896756 test_loss: 0.11623595952987671
epoch: 29 training_loss 0.11880553230643272 test_loss: 0.11685768365859986
epoch: 30 training_loss 0.11509199790656567 test_loss: 0.10392413139343262
epoch: 31 training_loss 0.11494688659906388 test_loss: 0.10718321800231934
epoch: 32 training_loss 0.1124966248869896 test_loss: 0.12510935068130494
epoch: 33 training_loss 0.1181497648358345 test_loss: 0.11368101835250854
epoch: 34 training_loss 0.11380388073623181 test_loss: 0.11082797050476074
epoch: 35 training_loss 0.11235682740807533 test_loss: 0.10943740606307983
epoch: 36 training_loss 0.11354494724422694 test_loss: 0.09685823917388917
epoch: 37 training_loss 0.11293374594300985 test_loss: 0.10779107809066772
epoch: 38 training_loss 0.11265705615282058 test_loss: 0.1194911003112793
epoch: 39 training_loss 0.10970835760235786 test_loss: 0.11980782747268677
epoch: 40 training_loss 0.1132501593604684 test_loss: 0.11746351718902588
epoch: 41 training_loss 0.11335165683180094 test_loss: 0.1053114652633667
epoch: 42 training_loss 0.11006049770861864 test_loss: 0.12674570083618164
epoch: 43 training_loss 0.11492913089692593 test_loss: 0.10443100929260254
epoch: 44 training_loss 0.11839216463267803 test_loss: 0.11606711149215698
epoch: 45 training_loss 0.11432618621736765 test_loss: 0.11502110958099365
epoch: 46 training_loss 0.11144589856266976 test_loss: 0.12086402177810669
epoch: 47 training_loss 0.11348217589780689 test_loss: 0.12607465982437133
epoch: 48 training_loss 0.11132536444813013 test_loss: 0.1139567255973816
epoch: 49 training_loss 0.1152410751208663 test_loss: 0.10723389387130737
epoch: 50 training_loss 0.10854682736098767 test_loss: 0.11347633600234985
epoch: 51 training_loss 0.11380432102829217 test_loss: 0.11739317178726197
epoch: 52 training_loss 0.11144319418817758 test_loss: 0.10355100631713868
epoch: 53 training_loss 0.11218866646289825 test_loss: 0.11220823526382447
epoch: 54 training_loss 0.1124146232008934 test_loss: 0.11929061412811279
epoch: 55 training_loss 0.11664177067577838 test_loss: 0.10495119094848633
epoch: 56 training_loss 0.11363802794367076 test_loss: 0.11352545022964478
epoch: 57 training_loss 0.10786512993276119 test_loss: 0.12080742120742798
epoch: 58 training_loss 0.1072025516629219 test_loss: 0.10979996919631958
epoch: 59 training_loss 0.10536101162433624 test_loss: 0.12180939912796021
epoch: 60 training_loss 0.11114228144288063 test_loss: 0.11887987852096557
epoch: 61 training_loss 0.11691521029919386 test_loss: 0.1071282148361206
epoch: 62 training_loss 0.11402336314320564 test_loss: 0.11623965501785279
epoch: 63 training_loss 0.11642670132219791 test_loss: 0.11196810007095337
epoch: 64 training_loss 0.11851528722792865 test_loss: 0.11291692256927491
epoch: 65 training_loss 0.11660794597119092 test_loss: 0.12318000793457032
epoch: 66 training_loss 0.11209350008517503 test_loss: 0.11017963886260987
epoch: 67 training_loss 0.11604767739772796 test_loss: 0.10694388151168824
epoch: 68 training_loss 0.11050536137074232 test_loss: 0.1154884934425354
epoch: 69 training_loss 0.10734462533146143 test_loss: 0.12149546146392823
epoch: 70 training_loss 0.11708551783114672 test_loss: 0.1137101411819458
epoch: 71 training_loss 0.10696385612711311 test_loss: 0.10951287746429443
epoch: 72 training_loss 0.10748947132378817 test_loss: 0.1113411545753479
epoch: 73 training_loss 0.1136968669295311 test_loss: 0.11563880443572998
epoch: 74 training_loss 0.11375895950943232 test_loss: 0.11141437292098999
epoch: 75 training_loss 0.10693693842738866 test_loss: 0.11447790861129761
epoch: 76 training_loss 0.11078436464071274 test_loss: 0.12345443964004517
epoch: 77 training_loss 0.10533983122557401 test_loss: 0.11446336507797242
epoch: 78 training_loss 0.10735770847648382 test_loss: 0.11483782529830933
epoch: 79 training_loss 0.11459694128483534 test_loss: 0.12677513360977172
epoch: 80 training_loss 0.10862682707607746 test_loss: 0.11102228164672852
epoch: 81 training_loss 0.1098204954713583 test_loss: 0.11337943077087402
epoch: 82 training_loss 0.11008091494441033 test_loss: 0.12278503179550171
epoch: 83 training_loss 0.11221400756388902 test_loss: 0.12977197170257568
epoch: 84 training_loss 0.11348277315497399 test_loss: 0.09846602082252502
epoch: 85 training_loss 0.10864104326814412 test_loss: 0.11628921031951904
epoch: 86 training_loss 0.11290822938084602 test_loss: 0.10086461305618286
epoch: 87 training_loss 0.10906853727996349 test_loss: 0.11300286054611205
epoch: 88 training_loss 0.1135883191600442 test_loss: 0.11134976148605347
epoch: 89 training_loss 0.100254484731704 test_loss: 0.10141332149505615
epoch: 90 training_loss 0.10828891862183809 test_loss: 0.10868599414825439
epoch: 91 training_loss 0.10852395083755255 test_loss: 0.12264149188995362
epoch: 92 training_loss 0.10670862529426813 test_loss: 0.10864135026931762
epoch: 93 training_loss 0.1106853374093771 test_loss: 0.12216721773147583
epoch: 94 training_loss 0.11361799266189337 test_loss: 0.10968514680862426
epoch: 95 training_loss 0.11290372714400292 test_loss: 0.12240313291549683
epoch: 96 training_loss 0.10803650494664907 test_loss: 0.11702717542648315
epoch: 97 training_loss 0.11009140610694886 test_loss: 0.10471954345703124
epoch: 98 training_loss 0.11326224148273469 test_loss: 0.11083581447601318
epoch: 99 training_loss 0.10817529253661633 test_loss: 0.11320933103561401
epoch: 100 training_loss 0.11454294517636299 test_loss: 0.10401369333267212
epoch: 101 training_loss 0.1128190479055047 test_loss: 0.10622174739837646
epoch: 102 training_loss 0.11108367878943681 test_loss: 0.11166330575942993
epoch: 103 training_loss 0.11438879258930683 test_loss: 0.10881930589675903
epoch: 104 training_loss 0.10836370028555393 test_loss: 0.11124368906021118
epoch: 105 training_loss 0.10955457650125026 test_loss: 0.10771427154541016
epoch: 106 training_loss 0.10987081974744797 test_loss: 0.1100288987159729
epoch: 107 training_loss 0.1085315315797925 test_loss: 0.10572856664657593
epoch: 108 training_loss 0.10845627479255199 test_loss: 0.10547157526016235
epoch: 109 training_loss 0.11006718620657921 test_loss: 0.11927570104598999
epoch: 110 training_loss 0.11344713687896729 test_loss: 0.12164472341537476
epoch: 111 training_loss 0.1048556649312377 test_loss: 0.11075708866119385
epoch: 112 training_loss 0.10768087651580573 test_loss: 0.1183435320854187
epoch: 113 training_loss 0.10808129280805588 test_loss: 0.11508765220642089
epoch: 114 training_loss 0.10856067083775997 test_loss: 0.10858933925628662
epoch: 115 training_loss 0.105623122677207 test_loss: 0.10756981372833252
epoch: 116 training_loss 0.1089216710627079 test_loss: 0.12518973350524903
epoch: 117 training_loss 0.11071098577231168 test_loss: 0.0961635410785675
epoch: 118 training_loss 0.1136164465919137 test_loss: 0.10561205148696899
epoch: 119 training_loss 0.11274672504514456 test_loss: 0.11347023248672486
epoch: 120 training_loss 0.10620691936463117 test_loss: 0.12494410276412964
epoch: 121 training_loss 0.11094932109117508 test_loss: 0.11218429803848266
epoch: 122 training_loss 0.11125102635473012 test_loss: 0.10688135623931885
epoch: 123 training_loss 0.11263523302972317 test_loss: 0.11780353784561157
epoch: 124 training_loss 0.10770274039357901 test_loss: 0.11324591636657715
epoch: 125 training_loss 0.10902751807123423 test_loss: 0.1017086386680603
epoch: 126 training_loss 0.1043092268705368 test_loss: 0.10850309133529663
epoch: 127 training_loss 0.1100039991736412 test_loss: 0.10617839097976685
epoch: 128 training_loss 0.11013172458857298 test_loss: 0.1141506314277649
epoch: 129 training_loss 0.11012537471950054 test_loss: 0.11222437620162964
epoch: 130 training_loss 0.10645019516348839 test_loss: 0.10721086263656616
epoch: 131 training_loss 0.11085622504353523 test_loss: 0.11550706624984741
epoch: 132 training_loss 0.11670186243951321 test_loss: 0.1019213080406189
epoch: 133 training_loss 0.10965357135981321 test_loss: 0.10871042013168335
epoch: 134 training_loss 0.11795373946428299 test_loss: 0.09925193190574647
epoch: 135 training_loss 0.1077859679237008 test_loss: 0.10971003770828247
epoch: 136 training_loss 0.11474603824317456 test_loss: 0.11806113719940185
epoch: 137 training_loss 0.10723225623369217 test_loss: 0.10840131044387817
epoch: 138 training_loss 0.10671623516827822 test_loss: 0.10899165868759156
epoch: 139 training_loss 0.11225147802382708 test_loss: 0.119399094581604
epoch: 140 training_loss 0.10815025374293327 test_loss: 0.11622211933135987
epoch: 141 training_loss 0.1090071988105774 test_loss: 0.1183133840560913
epoch: 142 training_loss 0.10670408960431814 test_loss: 0.11654874086380004
epoch: 143 training_loss 0.11125353718176484 test_loss: 0.12067307233810425
epoch: 144 training_loss 0.1132480376586318 test_loss: 0.11315717697143554
epoch: 145 training_loss 0.11345684085041284 test_loss: 0.1089969277381897
epoch: 146 training_loss 0.1089608022943139 test_loss: 0.11494498252868653
epoch: 147 training_loss 0.11242383938282728 test_loss: 0.1123235821723938
epoch: 148 training_loss 0.10974388249218464 test_loss: 0.09757452011108399
epoch: 149 training_loss 0.10691606275737285 test_loss: 0.11946700811386109
epoch: 0 training_loss 0.20588524892926216 test_loss: 0.16659438610076904
epoch: 1 training_loss 0.14331274941563607 test_loss: 0.13960729837417601
epoch: 2 training_loss 0.13547572240233421 test_loss: 0.12931786775588988
epoch: 3 training_loss 0.12573924116790294 test_loss: 0.14162352085113525
epoch: 4 training_loss 0.12731752548366784 test_loss: 0.12981388568878174
epoch: 5 training_loss 0.1225752466544509 test_loss: 0.12565006017684938
epoch: 6 training_loss 0.12260569423437119 test_loss: 0.1248626470565796
epoch: 7 training_loss 0.1221839739009738 test_loss: 0.12850364446640014
epoch: 8 training_loss 0.11785505261272192 test_loss: 0.12826513051986693
epoch: 9 training_loss 0.11925052739679813 test_loss: 0.1271403431892395
epoch: 10 training_loss 0.12305643871426582 test_loss: 0.11551529169082642
epoch: 11 training_loss 0.11229085836559534 test_loss: 0.12342636585235596
epoch: 12 training_loss 0.1154886469990015 test_loss: 0.1308612585067749
epoch: 13 training_loss 0.10963389411568641 test_loss: 0.1365692734718323
epoch: 14 training_loss 0.11519491270184518 test_loss: 0.13575735092163085
epoch: 15 training_loss 0.1104247759655118 test_loss: 0.1335506796836853
epoch: 16 training_loss 0.11939956560730934 test_loss: 0.12339482307434083
epoch: 17 training_loss 0.11681927844882012 test_loss: 0.12793532609939576
epoch: 18 training_loss 0.11634481068700551 test_loss: 0.12237274646759033
epoch: 19 training_loss 0.11063722129911184 test_loss: 0.13041356801986695
epoch: 20 training_loss 0.11057987589389086 test_loss: 0.13889360427856445
epoch: 21 training_loss 0.1062032474949956 test_loss: 0.14030566215515136
epoch: 22 training_loss 0.111677679233253 test_loss: 0.157833993434906
epoch: 23 training_loss 0.11985502142459153 test_loss: 0.12142660617828369
epoch: 24 training_loss 0.11873148635029793 test_loss: 0.12342441082000732
epoch: 25 training_loss 0.11685090761631728 test_loss: 0.12887325286865234
epoch: 26 training_loss 0.10766755495220423 test_loss: 0.1499793767929077
epoch: 27 training_loss 0.11881574284285307 test_loss: 0.11141132116317749
epoch: 28 training_loss 0.11746627997606993 test_loss: 0.1385634422302246
epoch: 29 training_loss 0.11325641456991434 test_loss: 0.12860864400863647
epoch: 30 training_loss 0.11228883471339941 test_loss: 0.12075945138931274
epoch: 31 training_loss 0.1124694810062647 test_loss: 0.12229262590408325
epoch: 32 training_loss 0.10546898450702429 test_loss: 0.13648498058319092
epoch: 33 training_loss 0.1095155580341816 test_loss: 0.13127646446228028
epoch: 34 training_loss 0.11089183829724789 test_loss: 0.13769183158874512
epoch: 35 training_loss 0.11359391160309315 test_loss: 0.12663543224334717
epoch: 36 training_loss 0.10810868985950947 test_loss: 0.12093185186386109
epoch: 37 training_loss 0.1169642298668623 test_loss: 0.13053550720214843
epoch: 38 training_loss 0.11355806641280651 test_loss: 0.14460937976837157
epoch: 39 training_loss 0.11504184205085038 test_loss: 0.13532434701919555
epoch: 40 training_loss 0.11844446305185556 test_loss: 0.11457339525222779
epoch: 41 training_loss 0.10618856627494097 test_loss: 0.13150643110275267
epoch: 42 training_loss 0.10865801708772778 test_loss: 0.1269049286842346
epoch: 43 training_loss 0.11184008534997701 test_loss: 0.12920385599136353
epoch: 44 training_loss 0.11055583942681552 test_loss: 0.12210400104522705
epoch: 45 training_loss 0.1118603878468275 test_loss: 0.12966468334197997
epoch: 46 training_loss 0.11402543012052774 test_loss: 0.1347681999206543
epoch: 47 training_loss 0.11466037884354591 test_loss: 0.13321080207824706
epoch: 48 training_loss 0.11250362202525138 test_loss: 0.13166762590408326
epoch: 49 training_loss 0.11034489747136832 test_loss: 0.13862528800964355
epoch: 50 training_loss 0.10228490367531777 test_loss: 0.12060041427612304
epoch: 51 training_loss 0.1072680002823472 test_loss: 0.12807092666625977
epoch: 52 training_loss 0.10546425443142653 test_loss: 0.12461291551589966
epoch: 53 training_loss 0.11297746598720551 test_loss: 0.12571051120758056
epoch: 54 training_loss 0.10976342288777233 test_loss: 0.14404160976409913
epoch: 55 training_loss 0.11541213639080525 test_loss: 0.1369829535484314
epoch: 56 training_loss 0.11082742977887391 test_loss: 0.12335480451583862
epoch: 57 training_loss 0.11142890732735396 test_loss: 0.1277337670326233
epoch: 58 training_loss 0.11035046387463808 test_loss: 0.13217655420303345
epoch: 59 training_loss 0.11190092511475086 test_loss: 0.12557506561279297
epoch: 60 training_loss 0.10875907998532057 test_loss: 0.12984594106674194
epoch: 61 training_loss 0.11257456820458174 test_loss: 0.12744156122207642
epoch: 62 training_loss 0.10879776641726494 test_loss: 0.10471906661987304
epoch: 63 training_loss 0.10667049527168274 test_loss: 0.1431592583656311
epoch: 64 training_loss 0.10375037208199502 test_loss: 0.11672700643539428
epoch: 65 training_loss 0.11252558935433626 test_loss: 0.11591267585754395
epoch: 66 training_loss 0.11190906558185816 test_loss: 0.12984694242477418
epoch: 67 training_loss 0.11292022801935672 test_loss: 0.1245302677154541
epoch: 68 training_loss 0.1141232468187809 test_loss: 0.13732378482818602
epoch: 69 training_loss 0.1117995484918356 test_loss: 0.12374073266983032
epoch: 70 training_loss 0.11081603109836578 test_loss: 0.12677216529846191
epoch: 71 training_loss 0.10910226676613093 test_loss: 0.11401269435882569
epoch: 72 training_loss 0.11009007424116135 test_loss: 0.11857229471206665
epoch: 73 training_loss 0.113665387108922 test_loss: 0.13493609428405762
epoch: 74 training_loss 0.11091727793216705 test_loss: 0.12200627326965333
epoch: 75 training_loss 0.11167719136923551 test_loss: 0.121444571018219
epoch: 76 training_loss 0.10754977837204933 test_loss: 0.11799713373184204
epoch: 77 training_loss 0.11070414330810309 test_loss: 0.12422047853469849
epoch: 78 training_loss 0.10649726457893849 test_loss: 0.12897368669509887
epoch: 79 training_loss 0.1168417563289404 test_loss: 0.11500318050384521
epoch: 80 training_loss 0.10817564941942692 test_loss: 0.12371031045913697
epoch: 81 training_loss 0.10991288155317307 test_loss: 0.14464287757873534
epoch: 82 training_loss 0.11448060180991888 test_loss: 0.1332340121269226
epoch: 83 training_loss 0.11434030663222075 test_loss: 0.12288180589675904
epoch: 84 training_loss 0.12008533418178559 test_loss: 0.14191774129867554
epoch: 85 training_loss 0.10855523033067584 test_loss: 0.12853615283966063
epoch: 86 training_loss 0.11047245569527149 test_loss: 0.12401961088180542
epoch: 87 training_loss 0.10858465742319823 test_loss: 0.12467341423034668
epoch: 88 training_loss 0.1129984325543046 test_loss: 0.12961108684539796
epoch: 89 training_loss 0.1127233037352562 test_loss: 0.13353897333145143
epoch: 90 training_loss 0.10469664443284273 test_loss: 0.12657322883605956
epoch: 91 training_loss 0.11186723403632641 test_loss: 0.1259975552558899
epoch: 92 training_loss 0.10766687087714671 test_loss: 0.12501367330551147
epoch: 93 training_loss 0.11448567770421506 test_loss: 0.11993893384933471
epoch: 94 training_loss 0.10974828975275158 test_loss: 0.10809839963912964
epoch: 95 training_loss 0.11077017609030008 test_loss: 0.1263789415359497
epoch: 96 training_loss 0.11078474223613739 test_loss: 0.12611287832260132
epoch: 97 training_loss 0.10799169655889272 test_loss: 0.1296068549156189
epoch: 98 training_loss 0.10797416474670171 test_loss: 0.12828670740127562
epoch: 99 training_loss 0.10647641088813543 test_loss: 0.12855910062789916
epoch: 100 training_loss 0.10792369114235044 test_loss: 0.12275638580322265
epoch: 101 training_loss 0.10792973171919584 test_loss: 0.12178912162780761
epoch: 102 training_loss 0.10834157403558492 test_loss: 0.12916580438613892
epoch: 103 training_loss 0.10979300636798144 test_loss: 0.1332331418991089
epoch: 104 training_loss 0.11023818250745535 test_loss: 0.13782566785812378
epoch: 105 training_loss 0.11241148740053176 test_loss: 0.12336649894714355
epoch: 106 training_loss 0.11170438643544912 test_loss: 0.1226344108581543
epoch: 107 training_loss 0.10447003971785307 test_loss: 0.12528543472290038
epoch: 108 training_loss 0.10548567328602075 test_loss: 0.14298415184020996
epoch: 109 training_loss 0.11570977456867695 test_loss: 0.12306219339370728
epoch: 110 training_loss 0.10970254272222518 test_loss: 0.12849985361099242
epoch: 111 training_loss 0.1053055964782834 test_loss: 0.13994823694229125
epoch: 112 training_loss 0.10955558359622955 test_loss: 0.1152052640914917
epoch: 113 training_loss 0.11183419786393642 test_loss: 0.13286534547805787
epoch: 114 training_loss 0.10969999302178621 test_loss: 0.11653848886489868
epoch: 115 training_loss 0.11051114924252033 test_loss: 0.13025692701339722
epoch: 116 training_loss 0.10613607335835695 test_loss: 0.1292857050895691
epoch: 117 training_loss 0.11036459356546402 test_loss: 0.13954613208770753
epoch: 118 training_loss 0.11065386585891247 test_loss: 0.1129810094833374
epoch: 119 training_loss 0.10917350593954325 test_loss: 0.13649241924285888
epoch: 120 training_loss 0.10825271725654602 test_loss: 0.1251831293106079
epoch: 121 training_loss 0.11189413506537677 test_loss: 0.13437594175338746
epoch: 122 training_loss 0.10738741464912892 test_loss: 0.1328486680984497
epoch: 123 training_loss 0.10785556867718697 test_loss: 0.11913518905639649
epoch: 124 training_loss 0.10232396226376295 test_loss: 0.1309869885444641
epoch: 125 training_loss 0.11312362976372242 test_loss: 0.12457479238510132
epoch: 126 training_loss 0.10948662932962179 test_loss: 0.12540024518966675
epoch: 127 training_loss 0.10459033027291298 test_loss: 0.1228641152381897
epoch: 128 training_loss 0.10970983870327472 test_loss: 0.11936682462692261
epoch: 129 training_loss 0.10906219564378261 test_loss: 0.12466533184051513
epoch: 130 training_loss 0.10210822869092226 test_loss: 0.1326175093650818
epoch: 131 training_loss 0.10893152285367251 test_loss: 0.12397804260253906
epoch: 132 training_loss 0.1044669308140874 test_loss: 0.1305135130882263
epoch: 133 training_loss 0.11206516832113265 test_loss: 0.13638521432876588
epoch: 134 training_loss 0.10896365072578192 test_loss: 0.13234566450119017
epoch: 135 training_loss 0.10825141835957766 test_loss: 0.13021645545959473
epoch: 136 training_loss 0.11496300306171178 test_loss: 0.11737449169158935
epoch: 137 training_loss 0.1058116315677762 test_loss: 0.12491998672485352
epoch: 138 training_loss 0.11193786978721619 test_loss: 0.1323685884475708
epoch: 139 training_loss 0.10995497670024633 test_loss: 0.13622583150863649
epoch: 140 training_loss 0.11032197553664445 test_loss: 0.12307190895080566
epoch: 141 training_loss 0.10616614319384098 test_loss: 0.13224403858184813
epoch: 142 training_loss 0.10724038736894727 test_loss: 0.13303327560424805
epoch: 143 training_loss 0.10916278049349785 test_loss: 0.13983075618743895
epoch: 144 training_loss 0.10641548410058022 test_loss: 0.12202713489532471
epoch: 145 training_loss 0.11011534448713065 test_loss: 0.11828736066818238
epoch: 146 training_loss 0.11211374264210462 test_loss: 0.1364741563796997
epoch: 147 training_loss 0.1044963650032878 test_loss: 0.11854608058929443
epoch: 148 training_loss 0.11181432630866767 test_loss: 0.12199316024780274
epoch: 149 training_loss 0.11182709597051144 test_loss: 0.13622571229934693
epoch: 0 training_loss 0.21150614701211454 test_loss: 0.15118671655654908
epoch: 1 training_loss 0.1480489632487297 test_loss: 0.13562127351760864
epoch: 2 training_loss 0.14180372342467307 test_loss: 0.12658358812332154
epoch: 3 training_loss 0.13712122075259686 test_loss: 0.12251832485198974
epoch: 4 training_loss 0.1211935480684042 test_loss: 0.12177773714065551
epoch: 5 training_loss 0.12664802577346562 test_loss: 0.13544806241989135
epoch: 6 training_loss 0.12416178993880748 test_loss: 0.12340813875198364
epoch: 7 training_loss 0.12695774089545012 test_loss: 0.13028310537338256
epoch: 8 training_loss 0.12247334085404873 test_loss: 0.12822986841201783
epoch: 9 training_loss 0.12590893983840942 test_loss: 0.13191912174224854
epoch: 10 training_loss 0.13214103892445564 test_loss: 0.12321549654006958
epoch: 11 training_loss 0.12740116998553275 test_loss: 0.12265723943710327
epoch: 12 training_loss 0.11822088956832885 test_loss: 0.11554478406906128
epoch: 13 training_loss 0.12278893418610096 test_loss: 0.11944454908370972
epoch: 14 training_loss 0.120587268024683 test_loss: 0.1165464997291565
epoch: 15 training_loss 0.1249879303574562 test_loss: 0.13968106508255004
epoch: 16 training_loss 0.1169282453879714 test_loss: 0.11265956163406372
epoch: 17 training_loss 0.11774906393140555 test_loss: 0.12955647706985474
epoch: 18 training_loss 0.11766494058072567 test_loss: 0.11697494983673096
epoch: 19 training_loss 0.11659605134278536 test_loss: 0.12693833112716674
epoch: 20 training_loss 0.1222573683783412 test_loss: 0.13501807451248168
epoch: 21 training_loss 0.12151898730546236 test_loss: 0.11955924034118652
epoch: 22 training_loss 0.12296410825103521 test_loss: 0.120521080493927
epoch: 23 training_loss 0.11978639926761389 test_loss: 0.12577741146087645
epoch: 24 training_loss 0.11444748114794492 test_loss: 0.11831387281417846
epoch: 25 training_loss 0.12053780686110258 test_loss: 0.11571625471115113
epoch: 26 training_loss 0.12210391402244568 test_loss: 0.12122695446014405
epoch: 27 training_loss 0.11849698707461358 test_loss: 0.11181812286376953
epoch: 28 training_loss 0.11883269343525171 test_loss: 0.11865978240966797
epoch: 29 training_loss 0.11368330676108598 test_loss: 0.11428468227386475
epoch: 30 training_loss 0.12675627741962672 test_loss: 0.1294563889503479
epoch: 31 training_loss 0.11513084586709738 test_loss: 0.11721614599227906
epoch: 32 training_loss 0.12200853515416384 test_loss: 0.1217616081237793
epoch: 33 training_loss 0.12104869440197945 test_loss: 0.12288477420806884
epoch: 34 training_loss 0.11600888662040233 test_loss: 0.11564360857009888
epoch: 35 training_loss 0.12006447438150644 test_loss: 0.1146273136138916
epoch: 36 training_loss 0.10966151624917984 test_loss: 0.12716587781906127
epoch: 37 training_loss 0.11782727848738432 test_loss: 0.11444718837738037
epoch: 38 training_loss 0.1211108474433422 test_loss: 0.12759774923324585
epoch: 39 training_loss 0.1229115853458643 test_loss: 0.10465279817581177
epoch: 40 training_loss 0.12099107090383768 test_loss: 0.11196082830429077
epoch: 41 training_loss 0.11996519163250924 test_loss: 0.12292389869689942
epoch: 42 training_loss 0.12331301078200341 test_loss: 0.11971490383148194
epoch: 43 training_loss 0.12288209579885007 test_loss: 0.1271521806716919
epoch: 44 training_loss 0.11891785461455584 test_loss: 0.11564095020294189
epoch: 45 training_loss 0.11593199525028468 test_loss: 0.11470949649810791
epoch: 46 training_loss 0.11928049188107252 test_loss: 0.11895657777786255
epoch: 47 training_loss 0.11006434287875891 test_loss: 0.11199018955230713
epoch: 48 training_loss 0.11406979657709598 test_loss: 0.11490771770477295
epoch: 49 training_loss 0.11642970409244299 test_loss: 0.10116174221038818
epoch: 50 training_loss 0.120693093650043 test_loss: 0.11548750400543213
epoch: 51 training_loss 0.12127379015088081 test_loss: 0.10519911050796509
epoch: 52 training_loss 0.11768365684896707 test_loss: 0.12030181884765626
epoch: 53 training_loss 0.12009939819574356 test_loss: 0.1138572335243225
epoch: 54 training_loss 0.11682561423629523 test_loss: 0.10861542224884033
epoch: 55 training_loss 0.1144981463626027 test_loss: 0.1273144245147705
epoch: 56 training_loss 0.12350165374577045 test_loss: 0.10597217082977295
epoch: 57 training_loss 0.11445512354373932 test_loss: 0.12052470445632935
epoch: 58 training_loss 0.12072877116501331 test_loss: 0.11134227514266967
epoch: 59 training_loss 0.11220851957798005 test_loss: 0.12546925544738768
epoch: 60 training_loss 0.12057879246771336 test_loss: 0.11978892087936402
epoch: 61 training_loss 0.12127123892307282 test_loss: 0.12095988988876342
epoch: 62 training_loss 0.11527105666697025 test_loss: 0.1087805151939392
epoch: 63 training_loss 0.11383543271571397 test_loss: 0.11708862781524658
epoch: 64 training_loss 0.11996458698064089 test_loss: 0.10715324878692627
epoch: 65 training_loss 0.11623349711298943 test_loss: 0.11335299015045167
epoch: 66 training_loss 0.12071748878806829 test_loss: 0.112736976146698
epoch: 67 training_loss 0.11525627192109823 test_loss: 0.11205105781555176
epoch: 68 training_loss 0.11519316766411065 test_loss: 0.11651148796081542
epoch: 69 training_loss 0.11630700804293155 test_loss: 0.11203672885894775
epoch: 70 training_loss 0.11484403103590012 test_loss: 0.11474487781524659
epoch: 71 training_loss 0.11205353546887636 test_loss: 0.10376681089401245
epoch: 72 training_loss 0.11147083476185798 test_loss: 0.11447644233703613
epoch: 73 training_loss 0.10715182214975356 test_loss: 0.11369669437408447
epoch: 74 training_loss 0.11648055005818606 test_loss: 0.11197034120559693
epoch: 75 training_loss 0.10964943043887615 test_loss: 0.10882633924484253
epoch: 76 training_loss 0.11213246721774339 test_loss: 0.11626440286636353
epoch: 77 training_loss 0.11898886136710644 test_loss: 0.11522071361541748
epoch: 78 training_loss 0.11549313724040985 test_loss: 0.11345571279525757
epoch: 79 training_loss 0.12053585819900035 test_loss: 0.1060765266418457
epoch: 80 training_loss 0.10982049889862537 test_loss: 0.11629613637924194
epoch: 81 training_loss 0.1186992284655571 test_loss: 0.1143069863319397
epoch: 82 training_loss 0.11476463161408901 test_loss: 0.12132585048675537
epoch: 83 training_loss 0.11473248336464166 test_loss: 0.12714987993240356
epoch: 84 training_loss 0.1171180496737361 test_loss: 0.11537153720855713
epoch: 85 training_loss 0.11713166415691376 test_loss: 0.11071096658706665
epoch: 86 training_loss 0.11592775315046311 test_loss: 0.11774601936340331
epoch: 87 training_loss 0.10698888942599297 test_loss: 0.12163389921188354
epoch: 88 training_loss 0.11415052093565464 test_loss: 0.11769152879714966
epoch: 89 training_loss 0.11405381985008717 test_loss: 0.11948859691619873
epoch: 90 training_loss 0.11581764493137597 test_loss: 0.11629581451416016
epoch: 91 training_loss 0.11354811049997807 test_loss: 0.12069956064224244
epoch: 92 training_loss 0.11491117294877767 test_loss: 0.10886815786361695
epoch: 93 training_loss 0.11409071575850248 test_loss: 0.11768945455551147
epoch: 94 training_loss 0.11392776265740395 test_loss: 0.11088523864746094
epoch: 95 training_loss 0.11376955971121788 test_loss: 0.11364805698394775
epoch: 96 training_loss 0.10412282973527909 test_loss: 0.10724296569824218
epoch: 97 training_loss 0.11850744314491748 test_loss: 0.11843265295028686
epoch: 98 training_loss 0.11102469108998775 test_loss: 0.11259328126907349
epoch: 99 training_loss 0.1166210925579071 test_loss: 0.10612529516220093
epoch: 100 training_loss 0.1136520253866911 test_loss: 0.11910219192504883
epoch: 101 training_loss 0.10906598623842001 test_loss: 0.1255979061126709
epoch: 102 training_loss 0.1125959586724639 test_loss: 0.1169021725654602
epoch: 103 training_loss 0.1124124063178897 test_loss: 0.12269887924194336
epoch: 104 training_loss 0.11136308498680592 test_loss: 0.11578530073165894
epoch: 105 training_loss 0.11136299032717943 test_loss: 0.11814604997634888
epoch: 106 training_loss 0.10706875532865524 test_loss: 0.1196069598197937
epoch: 107 training_loss 0.11110485211014748 test_loss: 0.11624714136123657
epoch: 108 training_loss 0.11720854047685862 test_loss: 0.11297944784164429
epoch: 109 training_loss 0.11182184997946024 test_loss: 0.10522147417068481
epoch: 110 training_loss 0.11618008119985461 test_loss: 0.12817419767379762
epoch: 111 training_loss 0.11451255600899458 test_loss: 0.1295612335205078
epoch: 112 training_loss 0.11549570236355067 test_loss: 0.12273178100585938
epoch: 113 training_loss 0.11741384916007519 test_loss: 0.120876145362854
epoch: 114 training_loss 0.11748718220740556 test_loss: 0.1093111515045166
epoch: 115 training_loss 0.11274364598095417 test_loss: 0.11187984943389892
epoch: 116 training_loss 0.11310235649347306 test_loss: 0.12252693176269532
epoch: 117 training_loss 0.11958445586264134 test_loss: 0.11203839778900146
epoch: 118 training_loss 0.11426123417913914 test_loss: 0.10424971580505371
epoch: 119 training_loss 0.11301241487264634 test_loss: 0.11655886173248291
epoch: 120 training_loss 0.11367428809404373 test_loss: 0.11578600406646729
epoch: 121 training_loss 0.11399647623300552 test_loss: 0.10849401950836182
epoch: 122 training_loss 0.11520663730800151 test_loss: 0.10573058128356934
epoch: 123 training_loss 0.11714514061808586 test_loss: 0.11334332227706909
epoch: 124 training_loss 0.11104626070708036 test_loss: 0.10479534864425659
epoch: 125 training_loss 0.11128758747130632 test_loss: 0.11060531139373779
epoch: 126 training_loss 0.12015291057527065 test_loss: 0.10354653596878052
epoch: 127 training_loss 0.11306687891483307 test_loss: 0.1151425838470459
epoch: 128 training_loss 0.11455290883779526 test_loss: 0.12193861007690429
epoch: 129 training_loss 0.11298268858343363 test_loss: 0.10996532440185547
epoch: 130 training_loss 0.1201478984951973 test_loss: 0.11013861894607543
epoch: 131 training_loss 0.11115963887423277 test_loss: 0.11342873573303222
epoch: 132 training_loss 0.10494422517716885 test_loss: 0.11947709321975708
epoch: 133 training_loss 0.11551040977239609 test_loss: 0.11082106828689575
epoch: 134 training_loss 0.10829065691679716 test_loss: 0.11827354431152344
epoch: 135 training_loss 0.10970500957220793 test_loss: 0.11679333448410034
epoch: 136 training_loss 0.11382145088165999 test_loss: 0.10918817520141602
epoch: 137 training_loss 0.11053542573004961 test_loss: 0.10575413703918457
epoch: 138 training_loss 0.11712923187762499 test_loss: 0.127779221534729
epoch: 139 training_loss 0.1088217206299305 test_loss: 0.11787889003753663
epoch: 140 training_loss 0.11100616447627544 test_loss: 0.10499069690704346
epoch: 141 training_loss 0.10896055720746517 test_loss: 0.1124841570854187
epoch: 142 training_loss 0.1144002564623952 test_loss: 0.10453294515609741
epoch: 143 training_loss 0.1153549599647522 test_loss: 0.11711592674255371
epoch: 144 training_loss 0.11911153472959995 test_loss: 0.10913578271865845
epoch: 145 training_loss 0.11172604985535145 test_loss: 0.10182383060455322
epoch: 146 training_loss 0.11166077315807342 test_loss: 0.12209265232086182
epoch: 147 training_loss 0.1103727800399065 test_loss: 0.11745604276657104
epoch: 148 training_loss 0.11855736646801233 test_loss: 0.11625102758407593
epoch: 149 training_loss 0.11387091103941202 test_loss: 0.10282496213912964
epoch: 0 training_loss 0.20736964263021945 test_loss: 0.1451059103012085
epoch: 1 training_loss 0.13967594116926194 test_loss: 0.16308053731918334
epoch: 2 training_loss 0.1442655122280121 test_loss: 0.13820358514785766
epoch: 3 training_loss 0.13488274533301592 test_loss: 0.12830603122711182
epoch: 4 training_loss 0.13205254051834345 test_loss: 0.13235892057418824
epoch: 5 training_loss 0.12866506446152925 test_loss: 0.15294467210769652
epoch: 6 training_loss 0.13022327598184347 test_loss: 0.12747364044189452
epoch: 7 training_loss 0.1291607091575861 test_loss: 0.13022079467773437
epoch: 8 training_loss 0.12964406345039606 test_loss: 0.12524601221084594
epoch: 9 training_loss 0.1219509569555521 test_loss: 0.1430269479751587
epoch: 10 training_loss 0.13109400127083062 test_loss: 0.11738911867141724
epoch: 11 training_loss 0.12105928529053926 test_loss: 0.12970902919769287
epoch: 12 training_loss 0.11855965454131365 test_loss: 0.12962164878845214
epoch: 13 training_loss 0.12016261447221041 test_loss: 0.13077168464660643
epoch: 14 training_loss 0.12190238453447819 test_loss: 0.1338127374649048
epoch: 15 training_loss 0.12062179416418076 test_loss: 0.11727322340011596
epoch: 16 training_loss 0.11963871471583844 test_loss: 0.12630975246429443
epoch: 17 training_loss 0.12187946788966655 test_loss: 0.12120239734649658
epoch: 18 training_loss 0.12418044790625572 test_loss: 0.12497087717056274
epoch: 19 training_loss 0.12513605806976558 test_loss: 0.11711350679397584
epoch: 20 training_loss 0.11714870359748603 test_loss: 0.11493363380432128
epoch: 21 training_loss 0.11370141863822937 test_loss: 0.12828452587127687
epoch: 22 training_loss 0.11786391898989677 test_loss: 0.12130658626556397
epoch: 23 training_loss 0.12399518355727196 test_loss: 0.1149178147315979
epoch: 24 training_loss 0.12107893742620945 test_loss: 0.12081634998321533
epoch: 25 training_loss 0.11871912352740764 test_loss: 0.1165877103805542
epoch: 26 training_loss 0.11573348093777895 test_loss: 0.12317852973937989
epoch: 27 training_loss 0.11458187110722065 test_loss: 0.11402736902236939
epoch: 28 training_loss 0.12400716688483954 test_loss: 0.1326759934425354
epoch: 29 training_loss 0.11730586893856526 test_loss: 0.1179616928100586
epoch: 30 training_loss 0.11570768266916275 test_loss: 0.1228334665298462
epoch: 31 training_loss 0.11002029497176409 test_loss: 0.11976820230484009
epoch: 32 training_loss 0.11917782112956048 test_loss: 0.11120978593826295
epoch: 33 training_loss 0.11618316978216171 test_loss: 0.13252156972885132
epoch: 34 training_loss 0.11612742945551872 test_loss: 0.10741627216339111
epoch: 35 training_loss 0.11368296407163143 test_loss: 0.11339917182922363
epoch: 36 training_loss 0.12041381109505891 test_loss: 0.11228088140487671
epoch: 37 training_loss 0.11058867257088423 test_loss: 0.13024680614471434
epoch: 38 training_loss 0.1188809059932828 test_loss: 0.12754788398742675
epoch: 39 training_loss 0.11695010393857956 test_loss: 0.12517869472503662
epoch: 40 training_loss 0.12059653237462044 test_loss: 0.12397011518478393
epoch: 41 training_loss 0.11399993356317281 test_loss: 0.11184099912643433
epoch: 42 training_loss 0.11314468510448933 test_loss: 0.11619099378585815
epoch: 43 training_loss 0.11931533101946115 test_loss: 0.10241514444351196
epoch: 44 training_loss 0.11498250473290682 test_loss: 0.12084953784942627
epoch: 45 training_loss 0.11424965485930443 test_loss: 0.110836660861969
epoch: 46 training_loss 0.11537620384246111 test_loss: 0.12168935537338257
epoch: 47 training_loss 0.11757108625024557 test_loss: 0.12317240238189697
epoch: 48 training_loss 0.11006155360490083 test_loss: 0.120918607711792
epoch: 49 training_loss 0.11551566146314145 test_loss: 0.11514565944671631
epoch: 50 training_loss 0.1242072769254446 test_loss: 0.11523422002792358
epoch: 51 training_loss 0.1132586694136262 test_loss: 0.11189708709716797
epoch: 52 training_loss 0.1246527510508895 test_loss: 0.11549317836761475
epoch: 53 training_loss 0.11685316666960716 test_loss: 0.11699775457382203
epoch: 54 training_loss 0.11587740276008844 test_loss: 0.12466727495193482
epoch: 55 training_loss 0.12482546254992485 test_loss: 0.11606689691543579
epoch: 56 training_loss 0.11363617293536663 test_loss: 0.12454780340194702
epoch: 57 training_loss 0.11455368679016828 test_loss: 0.10496129989624023
epoch: 58 training_loss 0.11713734891265631 test_loss: 0.11259336471557617
epoch: 59 training_loss 0.12446793746203184 test_loss: 0.11057758331298828
epoch: 60 training_loss 0.11471047580242157 test_loss: 0.10731836557388305
epoch: 61 training_loss 0.113504458963871 test_loss: 0.12057119607925415
epoch: 62 training_loss 0.11437399130314589 test_loss: 0.12221344709396362
epoch: 63 training_loss 0.11722175754606724 test_loss: 0.1225466251373291
epoch: 64 training_loss 0.10963033229112625 test_loss: 0.12154901027679443
epoch: 65 training_loss 0.11344576388597488 test_loss: 0.11878784894943237
epoch: 66 training_loss 0.11481489919126034 test_loss: 0.12148245573043823
epoch: 67 training_loss 0.11565733890980483 test_loss: 0.11769688129425049
epoch: 68 training_loss 0.11301606625318528 test_loss: 0.11443594694137574
epoch: 69 training_loss 0.1126198410987854 test_loss: 0.12659555673599243
epoch: 70 training_loss 0.11161997489631176 test_loss: 0.11151269674301148
epoch: 71 training_loss 0.11591399244964123 test_loss: 0.10657366514205932
epoch: 72 training_loss 0.11476700223982333 test_loss: 0.13128795623779296
epoch: 73 training_loss 0.10678281582891941 test_loss: 0.11363837718963624
epoch: 74 training_loss 0.11504150561988354 test_loss: 0.11771010160446167
epoch: 75 training_loss 0.11838212728500366 test_loss: 0.10988527536392212
epoch: 76 training_loss 0.11565573126077652 test_loss: 0.12135310173034668
epoch: 77 training_loss 0.11825485821813345 test_loss: 0.1171223759651184
epoch: 78 training_loss 0.11452017281204462 test_loss: 0.12040367126464843
epoch: 79 training_loss 0.11140643630176783 test_loss: 0.114021635055542
epoch: 80 training_loss 0.11410434149205685 test_loss: 0.11104481220245362
epoch: 81 training_loss 0.114186154268682 test_loss: 0.12239300012588501
epoch: 82 training_loss 0.11947376444935799 test_loss: 0.10809847116470336
epoch: 83 training_loss 0.1156836511567235 test_loss: 0.11461859941482544
epoch: 84 training_loss 0.10988873284310102 test_loss: 0.11959500312805176
epoch: 85 training_loss 0.11183457333594561 test_loss: 0.11898562908172608
epoch: 86 training_loss 0.10969911877065897 test_loss: 0.1182091236114502
epoch: 87 training_loss 0.10808079604059458 test_loss: 0.11502984762191773
epoch: 88 training_loss 0.11159317964687944 test_loss: 0.11106452941894532
epoch: 89 training_loss 0.11759970623999834 test_loss: 0.11013234853744507
epoch: 90 training_loss 0.11608196619898081 test_loss: 0.12001320123672485
epoch: 91 training_loss 0.11413747653365135 test_loss: 0.11237510442733764
epoch: 92 training_loss 0.11194672141224146 test_loss: 0.11755353212356567
epoch: 93 training_loss 0.11543025471270084 test_loss: 0.11996521949768066
epoch: 94 training_loss 0.11209663677960634 test_loss: 0.12805695533752443
epoch: 95 training_loss 0.11278189200907945 test_loss: 0.10787211656570435
epoch: 96 training_loss 0.11560577012598515 test_loss: 0.11604143381118774
epoch: 97 training_loss 0.11724687650799752 test_loss: 0.10759203433990479
epoch: 98 training_loss 0.11275711070746183 test_loss: 0.1174461841583252
epoch: 99 training_loss 0.11304237373173237 test_loss: 0.11649283170700073
epoch: 100 training_loss 0.11558746419847012 test_loss: 0.10226070880889893
epoch: 101 training_loss 0.11349468667060136 test_loss: 0.11864609718322754
epoch: 102 training_loss 0.10933047007769346 test_loss: 0.1084055781364441
epoch: 103 training_loss 0.11744891207665205 test_loss: 0.11030985116958618
epoch: 104 training_loss 0.11721537541598082 test_loss: 0.11646677255630493
epoch: 105 training_loss 0.10925801701843739 test_loss: 0.12410213947296142
epoch: 106 training_loss 0.11075434848666191 test_loss: 0.11905720233917236
epoch: 107 training_loss 0.1132449747249484 test_loss: 0.11688086986541749
epoch: 108 training_loss 0.11373331800103187 test_loss: 0.12814112901687622
epoch: 109 training_loss 0.11322597116231918 test_loss: 0.12232441902160644
epoch: 110 training_loss 0.11074927482753992 test_loss: 0.1074711799621582
epoch: 111 training_loss 0.11561433024704457 test_loss: 0.11886900663375854
epoch: 112 training_loss 0.10674875177443027 test_loss: 0.12182621955871582
epoch: 113 training_loss 0.11059294488281012 test_loss: 0.12616682052612305
epoch: 114 training_loss 0.11133346121758222 test_loss: 0.11470617055892944
epoch: 115 training_loss 0.11782076511532068 test_loss: 0.11183410882949829
epoch: 116 training_loss 0.11373637430369854 test_loss: 0.11792654991149902
epoch: 117 training_loss 0.11131356120109558 test_loss: 0.11436275243759156
epoch: 118 training_loss 0.1131121264025569 test_loss: 0.11089690923690795
epoch: 119 training_loss 0.11448115646839142 test_loss: 0.11401653289794922
epoch: 120 training_loss 0.1170905314385891 test_loss: 0.12862648963928222
epoch: 121 training_loss 0.11398632559925317 test_loss: 0.10350557565689086
epoch: 122 training_loss 0.1086709938198328 test_loss: 0.11716071367263795
epoch: 123 training_loss 0.11131201799958944 test_loss: 0.11898913383483886
epoch: 124 training_loss 0.11916442606598139 test_loss: 0.11117708683013916
epoch: 125 training_loss 0.10571071106940508 test_loss: 0.11482105255126954
epoch: 126 training_loss 0.11239563968032598 test_loss: 0.11670479774475098
epoch: 127 training_loss 0.10655437715351582 test_loss: 0.11646080017089844
epoch: 128 training_loss 0.11237287133932114 test_loss: 0.10760596990585328
epoch: 129 training_loss 0.10803410414606333 test_loss: 0.10818134546279908
epoch: 130 training_loss 0.11880676440894604 test_loss: 0.10670944452285766
epoch: 131 training_loss 0.11002467945218086 test_loss: 0.1095122218132019
epoch: 132 training_loss 0.10729994498193264 test_loss: 0.11279534101486206
epoch: 133 training_loss 0.10989148493856192 test_loss: 0.10354759693145751
epoch: 134 training_loss 0.11209719568490982 test_loss: 0.11870609521865845
epoch: 135 training_loss 0.10711403284221888 test_loss: 0.1091429591178894
epoch: 136 training_loss 0.10897985657677055 test_loss: 0.11750292778015137
epoch: 137 training_loss 0.11329864352941513 test_loss: 0.11327364444732665
epoch: 138 training_loss 0.11260212618857622 test_loss: 0.12362803220748901
epoch: 139 training_loss 0.10889861430972815 test_loss: 0.11345665454864502
epoch: 140 training_loss 0.1173378811776638 test_loss: 0.11246678829193116
epoch: 141 training_loss 0.11032854605466128 test_loss: 0.10568825006484986
epoch: 142 training_loss 0.10369244363158941 test_loss: 0.11110123395919799
epoch: 143 training_loss 0.11006402496248484 test_loss: 0.11778700351715088
epoch: 144 training_loss 0.1092317183315754 test_loss: 0.1079669713973999
epoch: 145 training_loss 0.1056516220420599 test_loss: 0.11461557149887085
epoch: 146 training_loss 0.10735502440482378 test_loss: 0.11177175045013428
epoch: 147 training_loss 0.10874006103724242 test_loss: 0.1218954086303711
epoch: 148 training_loss 0.11272157784551382 test_loss: 0.11262521743774415
epoch: 149 training_loss 0.10780740372836589 test_loss: 0.10328073501586914
episode: 0 training return: -1051.1116444730167
episode: 1 training return: -1256.7861236498775
episode: 2 training return: -1176.9841165387
episode: 3 training return: -1122.4889527468379
epoch: 1 test_true_pfm: 27.420330346997 sim_pfm: 35.4391549578087
episode: 4 training return: -950.7232189335174
episode: 5 training return: -807.6575063579006
episode: 6 training return: -1198.1530826098963
episode: 7 training return: -1659.120788183273
epoch: 2 test_true_pfm: 20.522356572485503 sim_pfm: 51.30877361036837
episode: 8 training return: -1326.2335477724214
episode: 9 training return: -1423.821415030205
episode: 10 training return: -102.43856349119893
episode: 11 training return: -15.748947223017588
epoch: 3 test_true_pfm: 26.317383789713006 sim_pfm: 62.023567663523224
episode: 12 training return: 4.715363235313917
episode: 13 training return: -38.10208411165544
episode: 14 training return: -357.1404424355644
episode: 15 training return: -763.4483676081979
epoch: 4 test_true_pfm: 39.41689394973001 sim_pfm: -49.491521913284906
episode: 16 training return: -183.83136989756184
episode: 17 training return: -34.81791072573536
episode: 18 training return: -283.321101902601
episode: 19 training return: -372.76158789200804
epoch: 5 test_true_pfm: 1.185142145679644 sim_pfm: -328.4823409169826
episode: 20 training return: -318.2425584237979
episode: 21 training return: -177.29530963426112
episode: 22 training return: -255.4904689956514
episode: 23 training return: -177.84685587040673
epoch: 6 test_true_pfm: 34.34243514897661 sim_pfm: -160.6647127872158
episode: 24 training return: -210.69371668393796
episode: 25 training return: -192.03528052909869
episode: 26 training return: -121.81004759512686
episode: 27 training return: -87.38523293254816
epoch: 7 test_true_pfm: 55.7500213724489 sim_pfm: -19.154124054618684
episode: 28 training return: -78.4957638228858
episode: 29 training return: -58.34054591062291
episode: 30 training return: -58.565072787085114
episode: 31 training return: -98.49456701490921
epoch: 8 test_true_pfm: 44.1638458592584 sim_pfm: -98.56508946268569
episode: 32 training return: -147.37532644274302
episode: 33 training return: -170.33992624293933
episode: 34 training return: -25.52671438166176
episode: 35 training return: -25.96017090184176
epoch: 9 test_true_pfm: 33.893671863844624 sim_pfm: 16.79619532870775
episode: 36 training return: -18.814246496517196
episode: 37 training return: -35.50756630497307
episode: 38 training return: -81.56344001175125
episode: 39 training return: -34.917681580989196
epoch: 10 test_true_pfm: 38.11540027369547 sim_pfm: 29.66431086580389
episode: 40 training return: -42.990972280009935
episode: 41 training return: -58.12464749219159
episode: 42 training return: -73.61567950084704
episode: 43 training return: -77.7934366169212
epoch: 11 test_true_pfm: 29.101073954497316 sim_pfm: 2.0267629410072336
episode: 44 training return: -44.90737691069258
episode: 45 training return: 46.32707304592002
episode: 46 training return: -86.22139847874918
episode: 47 training return: -12.803573721904371
epoch: 12 test_true_pfm: 14.2999529018837 sim_pfm: 255.93889227440476
episode: 48 training return: 362.04849819807964
episode: 49 training return: 139.10951348151187
episode: 50 training return: -6.0673979833974325
episode: 51 training return: -5.317090730487802
epoch: 13 test_true_pfm: 18.391530289323043 sim_pfm: 36.02045834280436
episode: 52 training return: -168.62182113050415
episode: 53 training return: -99.17966574302861
episode: 54 training return: -47.162350739733064
episode: 55 training return: -384.0175775335344
epoch: 14 test_true_pfm: 16.834633753653193 sim_pfm: -610.3892010470183
episode: 56 training return: -54.156201212025074
episode: 57 training return: 0.3682911024574056
episode: 58 training return: 135.29392167075602
episode: 59 training return: 27.799127968049902
epoch: 15 test_true_pfm: 22.880683951129846 sim_pfm: 264.23666734191886
episode: 60 training return: 173.1871510387543
episode: 61 training return: 185.51875964442524
episode: 62 training return: 204.58015893666112
episode: 63 training return: 453.4993842227759
epoch: 16 test_true_pfm: 21.748198267960067 sim_pfm: 472.685237843922
episode: 64 training return: 384.3005966856742
episode: 65 training return: 573.7696621546667
episode: 66 training return: 498.268386497494
episode: 67 training return: -1604.8306324136488
epoch: 17 test_true_pfm: 33.97363990646129 sim_pfm: -2035.6051112737473
episode: 68 training return: -501.605739399817
episode: 69 training return: -885.3361643648896
episode: 70 training return: -215.59120622450524
episode: 71 training return: 571.358674420889
epoch: 18 test_true_pfm: -37.43242820561895 sim_pfm: 626.4565203876757
episode: 72 training return: 603.283103262653
episode: 73 training return: 696.4844468876729
episode: 74 training return: -35.23941056920692
episode: 75 training return: 111.32432829315763
epoch: 19 test_true_pfm: 4.154124498316126 sim_pfm: 729.8545031994097
episode: 76 training return: 698.844024915697
episode: 77 training return: 779.7740663435469
episode: 78 training return: 807.1795814216556
episode: 79 training return: 830.429963580028
epoch: 20 test_true_pfm: 0.6875532242238966 sim_pfm: 873.0463991780155
episode: 80 training return: 863.8800423768502
episode: 81 training return: 750.3461472924547
episode: 82 training return: 773.6239736202912
episode: 83 training return: 781.1580854410416
epoch: 21 test_true_pfm: -0.8784141045040336 sim_pfm: 913.6028400623934
episode: 84 training return: 870.0734926316918
episode: 85 training return: 883.4845577830963
episode: 86 training return: 894.7430844652821
episode: 87 training return: 830.5341102911166
epoch: 22 test_true_pfm: -5.294479917902974 sim_pfm: 855.4343644584626
episode: 88 training return: 840.4993397637904
episode: 89 training return: 858.969936614909
episode: 90 training return: 716.5191952328281
episode: 91 training return: 608.3568180644041
epoch: 23 test_true_pfm: 2.5623261431419926 sim_pfm: 896.3997567854324
episode: 92 training return: 751.4479550886119
episode: 93 training return: 864.0311966825336
episode: 94 training return: 852.1508803780074
episode: 95 training return: 839.0099861721513
epoch: 24 test_true_pfm: 3.8432667777596548 sim_pfm: 858.929750255993
episode: 96 training return: 776.1833259775657
episode: 97 training return: 844.5120232123068
episode: 98 training return: 891.927257076243
episode: 99 training return: 893.4215945979222
epoch: 25 test_true_pfm: 3.7939843797280766 sim_pfm: 930.1303266238992
episode: 100 training return: 871.5317242427101
episode: 101 training return: 864.2832670745744
episode: 102 training return: 838.3115382262016
episode: 103 training return: 833.5011733764807
epoch: 26 test_true_pfm: -4.041431486657936 sim_pfm: 886.3114103764353
episode: 104 training return: 785.7706031795097
episode: 105 training return: 810.9609231429091
episode: 106 training return: 816.1643951800446
episode: 107 training return: 842.8007063785934
epoch: 27 test_true_pfm: 1.6668614976259004 sim_pfm: 898.9769176059414
episode: 108 training return: 898.0946119361681
episode: 109 training return: 813.1100709880795
episode: 110 training return: 790.3336035052912
episode: 111 training return: 795.4463669683166
epoch: 28 test_true_pfm: -7.820278457397869 sim_pfm: 834.1391046727365
episode: 112 training return: 812.4835186294804
episode: 113 training return: 822.4301381472217
episode: 114 training return: 827.2212193466903
episode: 115 training return: 802.666546100495
epoch: 29 test_true_pfm: -3.842861112549854 sim_pfm: 917.1785395379936
episode: 116 training return: 832.3326815564797
episode: 117 training return: 879.0203452317932
episode: 118 training return: 860.2479847814056
episode: 119 training return: 753.0723688809431
epoch: 30 test_true_pfm: -6.2713807003373265 sim_pfm: 882.5545064393446
episode: 120 training return: 833.3491365397065
episode: 121 training return: 752.5425665677467
episode: 122 training return: 829.3245718618218
episode: 123 training return: 878.7376300873059
epoch: 31 test_true_pfm: -7.167762683760827 sim_pfm: 908.5049413540324
episode: 124 training return: 839.224963112106
episode: 125 training return: 844.5528840460379
episode: 126 training return: 757.7411490318252
episode: 127 training return: 802.8104825771792
epoch: 32 test_true_pfm: 22.114061792748654 sim_pfm: 888.8820142996635
episode: 128 training return: 742.4717438399126
episode: 129 training return: 837.2231673517712
episode: 130 training return: 845.5277123907097
episode: 131 training return: 871.760365527615
epoch: 33 test_true_pfm: -11.063567666925945 sim_pfm: 898.3202942379492
episode: 132 training return: 791.855253219154
episode: 133 training return: 859.0315311529223
episode: 134 training return: 796.9135600122335
episode: 135 training return: 753.0354617865567
epoch: 34 test_true_pfm: 19.784608898307564 sim_pfm: 737.2451210036658
episode: 136 training return: 719.5193040084647
episode: 137 training return: 780.4723494319729
episode: 138 training return: 788.1449738149101
episode: 139 training return: 846.4365248975381
epoch: 35 test_true_pfm: 11.100652804667119 sim_pfm: 900.1921595854168
episode: 140 training return: 760.4346317868951
episode: 141 training return: 867.5186412295144
episode: 142 training return: 874.400002280748
episode: 143 training return: 794.576631800732
epoch: 36 test_true_pfm: 15.679857667820167 sim_pfm: 878.334819871493
episode: 144 training return: 820.2879945707089
episode: 145 training return: 847.5419154070394
episode: 146 training return: 762.7258739899643
episode: 147 training return: 677.6460911447698
epoch: 37 test_true_pfm: 12.148336572803668 sim_pfm: 933.6171082975686
episode: 148 training return: 800.6423263114161
episode: 149 training return: 878.0634478977315
episode: 150 training return: 786.7760317689758
episode: 151 training return: 773.6166857645281
epoch: 38 test_true_pfm: -0.16734398557820465 sim_pfm: 920.2261062658317
episode: 152 training return: 730.4706085426699
episode: 153 training return: 806.8363988327549
episode: 154 training return: 884.0511080269155
episode: 155 training return: 728.8728327166212
epoch: 39 test_true_pfm: 5.056525191014822 sim_pfm: 855.3464517083037
episode: 156 training return: 868.9650406143661
episode: 157 training return: 821.9037113865382
episode: 158 training return: 846.3890477697504
episode: 159 training return: 753.10920948386
epoch: 40 test_true_pfm: 9.085550418246854 sim_pfm: 900.4325648958841
episode: 160 training return: 764.3841992502406
episode: 161 training return: 764.0108584576412
episode: 162 training return: 806.0914945378021
episode: 163 training return: 809.5745454732488
epoch: 41 test_true_pfm: 1.2688349919134807 sim_pfm: 906.5629566130795
episode: 164 training return: 812.2044508544055
episode: 165 training return: 811.1749685170795
episode: 166 training return: 771.825495945468
episode: 167 training return: 825.7839958779483
epoch: 42 test_true_pfm: 12.01969357912667 sim_pfm: 817.7412669300955
episode: 168 training return: 725.0421875814403
episode: 169 training return: 748.4491914142806
episode: 170 training return: 828.7047754277555
episode: 171 training return: 820.6389374010402
epoch: 43 test_true_pfm: 4.450023072835657 sim_pfm: 868.0655190683207
episode: 172 training return: 771.3804536901348
episode: 173 training return: 726.5246068506096
episode: 174 training return: 811.3018264060106
episode: 175 training return: 785.6552723435335
epoch: 44 test_true_pfm: 0.6236590991225853 sim_pfm: 877.1206770280312
episode: 176 training return: 834.9993632815972
episode: 177 training return: 856.9989369485284
episode: 178 training return: 781.0066489491472
episode: 179 training return: 781.810322898957
epoch: 45 test_true_pfm: 2.4647054767979517 sim_pfm: 915.1452358737737
episode: 180 training return: 869.028933285166
episode: 181 training return: 806.4433265612532
episode: 182 training return: 873.5483448554204
episode: 183 training return: 854.5309715296876
epoch: 46 test_true_pfm: -5.756293592425017 sim_pfm: 910.4607777876421
episode: 184 training return: 683.281320887903
episode: 185 training return: 765.5207440116428
episode: 186 training return: 752.7367014361623
episode: 187 training return: 769.7570797603289
epoch: 47 test_true_pfm: 17.06073298424162 sim_pfm: 913.6770423229489
episode: 188 training return: 724.1780680406221
episode: 189 training return: 773.4143360701869
episode: 190 training return: 724.5118065613675
episode: 191 training return: 700.8310029638278
epoch: 48 test_true_pfm: 1.689904776245669 sim_pfm: 888.4050313380385
episode: 192 training return: 772.8420222231447
episode: 193 training return: 761.5546465175181
episode: 194 training return: 810.5460491900939
episode: 195 training return: 743.7794542272146
epoch: 49 test_true_pfm: -12.937553991843274 sim_pfm: 902.7013398058662
episode: 196 training return: 816.1815382702499
episode: 197 training return: 735.2734977381086
episode: 198 training return: 814.4515771022197
episode: 199 training return: 792.4885620036503
epoch: 50 test_true_pfm: -2.594492491965481 sim_pfm: 835.8110254522253
episode: 200 training return: 726.7214521394923
episode: 201 training return: 780.7639227672521
episode: 202 training return: 763.3020550003189
episode: 203 training return: 797.9298336255696
epoch: 51 test_true_pfm: 11.060065401432178 sim_pfm: 884.5729438697983
episode: 204 training return: 729.93150246323
episode: 205 training return: 763.3492128719486
episode: 206 training return: 814.144201356899
episode: 207 training return: 697.4641162393264
epoch: 52 test_true_pfm: -2.034592132851027 sim_pfm: 840.2430418131368
episode: 208 training return: 746.5271766225841
episode: 209 training return: 713.6746123078542
episode: 210 training return: 737.953387047325
episode: 211 training return: 710.9380199899387
epoch: 53 test_true_pfm: -9.407637769639143 sim_pfm: 861.6820716166034
episode: 212 training return: 788.484189694886
episode: 213 training return: 731.7015168970895
episode: 214 training return: 717.2745760255794
episode: 215 training return: 727.0640725602807
epoch: 54 test_true_pfm: -16.483004615312204 sim_pfm: 895.7158752057567
episode: 216 training return: 708.2823247286555
episode: 217 training return: 787.7627148079536
episode: 218 training return: 782.9494987610155
episode: 219 training return: 749.0947263126681
epoch: 55 test_true_pfm: -5.049040363597623 sim_pfm: 883.9191030168134
episode: 220 training return: 725.8496031555161
episode: 221 training return: 772.4315397253146
episode: 222 training return: 760.4128762798223
episode: 223 training return: 781.7829672171977
epoch: 56 test_true_pfm: 4.663382691164789 sim_pfm: 915.7772690134905
episode: 224 training return: 769.407681402174
episode: 225 training return: 778.4477857960507
episode: 226 training return: 787.152720341308
episode: 227 training return: 702.4931175023925
epoch: 57 test_true_pfm: 16.236104497269352 sim_pfm: 886.673431224896
episode: 228 training return: 797.8214041469562
episode: 229 training return: 724.5380534486771
episode: 230 training return: 808.2720944271872
episode: 231 training return: 838.9733674867863
epoch: 58 test_true_pfm: 12.347329591085636 sim_pfm: 906.1715174246774
episode: 232 training return: 768.3520019382904
episode: 233 training return: 780.011183153605
episode: 234 training return: 778.1735002555731
episode: 235 training return: 832.6473135283633
epoch: 59 test_true_pfm: 7.051383837373568 sim_pfm: 902.9760399328441
episode: 236 training return: 762.9505818998667
episode: 237 training return: 697.3770123431381
episode: 238 training return: 814.2796754078635
episode: 239 training return: 871.7066796882889
epoch: 60 test_true_pfm: 1.038550456179936 sim_pfm: 927.1098876475262
episode: 240 training return: 801.3061516672169
episode: 241 training return: 892.5533890300072
episode: 242 training return: 874.1058397154496
episode: 243 training return: 725.7519190983081
epoch: 61 test_true_pfm: -5.895270860599661 sim_pfm: 905.4570161196947
episode: 244 training return: 829.1040170450511
episode: 245 training return: 719.0885198111417
episode: 246 training return: 730.9393142086924
episode: 247 training return: 742.5253810109136
epoch: 62 test_true_pfm: -9.118329088144359 sim_pfm: 893.9080721715296
episode: 248 training return: 772.1532460573395
episode: 249 training return: 782.9018499500081
episode: 250 training return: 804.0922634529886
episode: 251 training return: 696.1793434962719
epoch: 63 test_true_pfm: -7.601591095051188 sim_pfm: 893.1302506563812
episode: 252 training return: 788.7190780413481
episode: 253 training return: 797.8745289167555
episode: 254 training return: 697.4590155662485
episode: 255 training return: 705.9123296531053
epoch: 64 test_true_pfm: 2.812455513476902 sim_pfm: 874.2923736460689
episode: 256 training return: 777.5545744812506
episode: 257 training return: 710.5413568199963
episode: 258 training return: 762.1949712954175
episode: 259 training return: 765.5677324325625
epoch: 65 test_true_pfm: -3.2358072355726932 sim_pfm: 852.8246939006418
episode: 260 training return: 704.4195896137622
episode: 261 training return: 725.5186822352067
episode: 262 training return: 763.2768851278511
episode: 263 training return: 748.2142544841792
epoch: 66 test_true_pfm: 1.9079039428765576 sim_pfm: 863.9506776075286
episode: 264 training return: 712.9525347601327
episode: 265 training return: 815.7270042674703
episode: 266 training return: 805.7905190871236
episode: 267 training return: 778.065307640983
epoch: 67 test_true_pfm: -7.117388439079013 sim_pfm: 864.0096433640359
episode: 268 training return: 782.219995480452
episode: 269 training return: 794.638276560429
episode: 270 training return: 796.2731363196167
episode: 271 training return: 817.3737266281572
epoch: 68 test_true_pfm: 11.011038497765933 sim_pfm: 846.9462401941782
episode: 272 training return: 788.5357407612489
episode: 273 training return: 764.6921567052433
episode: 274 training return: 746.3763133932592
episode: 275 training return: 785.4374605288361
epoch: 69 test_true_pfm: 16.287173188772577 sim_pfm: 894.2523002640231
episode: 276 training return: 815.5327633968816
episode: 277 training return: 790.2885328243441
episode: 278 training return: 739.5695235211267
episode: 279 training return: 750.7214773709636
epoch: 70 test_true_pfm: 1.9832071751481757 sim_pfm: 765.8157170403059
episode: 280 training return: 765.9426466471699
episode: 281 training return: 689.5857179180822
episode: 282 training return: 788.8945019812074
episode: 283 training return: 717.27023891555
epoch: 71 test_true_pfm: 6.86783912393941 sim_pfm: 817.3993727094085
episode: 284 training return: 729.8042922886281
episode: 285 training return: 760.7994115611222
episode: 286 training return: 735.2651469373183
episode: 287 training return: 715.0854702905151
epoch: 72 test_true_pfm: -3.278265839723833 sim_pfm: 817.619017170334
episode: 288 training return: 744.0593603487578
episode: 289 training return: 706.2548273193792
episode: 290 training return: 715.252011914022
episode: 291 training return: 716.1697302644177
epoch: 73 test_true_pfm: 0.2444535543776619 sim_pfm: 821.6078162033846
episode: 292 training return: 767.0311733552907
episode: 293 training return: 731.75203478714
episode: 294 training return: 812.9919674848973
episode: 295 training return: 721.5343264822737
epoch: 74 test_true_pfm: 5.269795977026403 sim_pfm: 879.3408822540638
episode: 296 training return: 696.5725034485586
episode: 297 training return: 808.0091145785868
episode: 298 training return: 650.837358997155
episode: 299 training return: 848.9867091042406
epoch: 75 test_true_pfm: -3.1056182981221268 sim_pfm: 857.8143442508848
episode: 300 training return: 842.9313884649466
episode: 301 training return: 690.4584473621418
episode: 302 training return: 726.7525011025103
episode: 303 training return: 731.4435471293838
epoch: 76 test_true_pfm: 5.949288384555243 sim_pfm: 891.6418417887402
episode: 304 training return: 832.3081430304518
episode: 305 training return: 777.0751093402677
episode: 306 training return: 884.3461400904037
episode: 307 training return: 810.9553923013365
epoch: 77 test_true_pfm: 6.8256606409027345 sim_pfm: 924.5917587405278
episode: 308 training return: 811.6544526201742
episode: 309 training return: 820.3258400054091
episode: 310 training return: 889.0141566548382
episode: 311 training return: 791.7791673301605
epoch: 78 test_true_pfm: -10.498064597525433 sim_pfm: 886.3106350664473
episode: 312 training return: 811.3176327670475
episode: 313 training return: 827.7752164193347
episode: 314 training return: 763.7937909002114
episode: 315 training return: 742.7929435987315
epoch: 79 test_true_pfm: 1.488144326920454 sim_pfm: 897.5293422658726
episode: 316 training return: 809.1146711693796
episode: 317 training return: 773.0334016432627
episode: 318 training return: 799.8450559992483
episode: 319 training return: 719.5118531509142
epoch: 80 test_true_pfm: 5.767938772325115 sim_pfm: 873.5504281512074
episode: 320 training return: 754.6564757436465
episode: 321 training return: 743.2804324055128
episode: 322 training return: 747.8213653755498
episode: 323 training return: 754.1208584335013
epoch: 81 test_true_pfm: -1.8009236358278258 sim_pfm: 874.1365539062647
episode: 324 training return: 759.2600384309499
episode: 325 training return: 754.692596881804
episode: 326 training return: 761.5823198177976
episode: 327 training return: 838.6221045795476
epoch: 82 test_true_pfm: 7.237768344148094 sim_pfm: 900.1053574172332
episode: 328 training return: 731.0494296012346
episode: 329 training return: 723.202878340254
episode: 330 training return: 810.4074383978515
episode: 331 training return: 865.996716960302
epoch: 83 test_true_pfm: 6.9730849142250335 sim_pfm: 927.9837515157226
episode: 332 training return: 886.4599191406869
episode: 333 training return: 779.1299970513309
episode: 334 training return: 802.0664214158278
episode: 335 training return: 872.7027039226264
epoch: 84 test_true_pfm: 11.446990334725651 sim_pfm: 893.3412580252518
episode: 336 training return: 847.9551181905616
episode: 337 training return: 796.0248961907363
episode: 338 training return: 803.0597615013096
episode: 339 training return: 823.0663241309569
epoch: 85 test_true_pfm: -3.569257916570585 sim_pfm: 856.4326824946532
episode: 340 training return: 831.8824537274171
episode: 341 training return: 812.4429528813886
episode: 342 training return: 762.7675162269554
episode: 343 training return: 802.9612262241286
epoch: 86 test_true_pfm: -2.6027562293716255 sim_pfm: 875.2850001918941
episode: 344 training return: 806.890880860043
episode: 345 training return: 782.0117717328684
episode: 346 training return: 780.7233053206683
episode: 347 training return: 782.0232363935125
epoch: 87 test_true_pfm: 14.921961095182727 sim_pfm: 883.1469286825761
episode: 348 training return: 819.2766684183974
episode: 349 training return: 766.9201389987693
episode: 350 training return: 830.4390740311786
episode: 351 training return: 809.1657538436142
epoch: 88 test_true_pfm: 15.588808532967125 sim_pfm: 885.7034147797604
episode: 352 training return: 694.808722512647
episode: 353 training return: 732.8566379871639
episode: 354 training return: 724.4225803339879
episode: 355 training return: 790.5927650313175
epoch: 89 test_true_pfm: 8.699777555105252 sim_pfm: 874.3107287342025
episode: 356 training return: 732.9826720000242
episode: 357 training return: 779.3062834711291
episode: 358 training return: 806.5038424151226
episode: 359 training return: 770.5297633615321
epoch: 90 test_true_pfm: 1.4853765514140729 sim_pfm: 869.5833540586015
episode: 360 training return: 731.4499449904911
episode: 361 training return: 802.511509733649
episode: 362 training return: 791.6872868739935
episode: 363 training return: 786.7185249620994
epoch: 91 test_true_pfm: 10.874792305911047 sim_pfm: 912.8474194079575
episode: 364 training return: 798.480187090278
episode: 365 training return: 773.9351410534766
episode: 366 training return: 882.198166155227
episode: 367 training return: 805.1715832413915
epoch: 92 test_true_pfm: 13.201777354248074 sim_pfm: 893.8540316053301
episode: 368 training return: 821.7593663770544
episode: 369 training return: 797.4928900150736
episode: 370 training return: 831.3138070092126
episode: 371 training return: 797.5508971499008
epoch: 93 test_true_pfm: 6.406318809913342 sim_pfm: 909.5497474521977
episode: 372 training return: 791.9331301832876
episode: 373 training return: 826.9889430463479
episode: 374 training return: 826.1676268901044
episode: 375 training return: 783.1906196756902
epoch: 94 test_true_pfm: 1.9905102983547844 sim_pfm: 898.5420190687086
episode: 376 training return: 798.7108928211449
episode: 377 training return: 829.2182258098538
episode: 378 training return: 860.3707049618882
episode: 379 training return: 794.704279961919
epoch: 95 test_true_pfm: 13.324177301525044 sim_pfm: 920.2135740135997
episode: 380 training return: 854.9984353476642
episode: 381 training return: 862.1136789165532
episode: 382 training return: 788.1338288498781
episode: 383 training return: 786.3606240577035
epoch: 96 test_true_pfm: 8.188058639805604 sim_pfm: 907.0803644616968
episode: 384 training return: 821.3686142263177
episode: 385 training return: 820.724154840414
episode: 386 training return: 830.7358846211199
episode: 387 training return: 848.9379476226147
epoch: 97 test_true_pfm: 6.7785036821234685 sim_pfm: 873.8851183682533
episode: 388 training return: 840.489588544311
episode: 389 training return: 789.0808350946866
episode: 390 training return: 792.9337685758566
episode: 391 training return: 786.6316206752601
epoch: 98 test_true_pfm: -1.7771386449247835 sim_pfm: 877.363044675653
episode: 392 training return: 688.1170301769273
episode: 393 training return: 770.2216227921182
episode: 394 training return: 768.0174355303479
episode: 395 training return: 818.242349738138
epoch: 99 test_true_pfm: 1.0639714880625402 sim_pfm: 902.1566042796742
episode: 396 training return: 781.2834586678038
episode: 397 training return: 785.2143307613009
episode: 398 training return: 762.7894795126566
episode: 399 training return: 835.3288474177527
epoch: 100 test_true_pfm: -11.188462270298373 sim_pfm: 866.4998355179177
episode: 400 training return: 810.2632578420679
episode: 401 training return: 802.6949361377373
episode: 402 training return: 826.9324574961574
episode: 403 training return: 790.7516181065657
epoch: 101 test_true_pfm: -5.425944366502607 sim_pfm: 886.0817469333251
episode: 404 training return: 827.9368661778511
episode: 405 training return: 803.12544534426
episode: 406 training return: 778.2501032899214
episode: 407 training return: 805.0221888496552
epoch: 102 test_true_pfm: -9.161379056750198 sim_pfm: 880.162365530213
episode: 408 training return: 797.5993745570029
episode: 409 training return: 777.819686977384
episode: 410 training return: 758.4704140017137
episode: 411 training return: 751.678273090823
epoch: 103 test_true_pfm: -5.766768629717139 sim_pfm: 880.7078719384857
episode: 412 training return: 836.9924407181364
episode: 413 training return: 764.6830758969206
episode: 414 training return: 847.2486174067781
episode: 415 training return: 714.6979124940618
epoch: 104 test_true_pfm: -3.8351319450781767 sim_pfm: 895.3618094630223
episode: 416 training return: 816.710439357438
episode: 417 training return: 691.0967567647806
episode: 418 training return: 808.5513202958683
episode: 419 training return: 728.6628944953311
epoch: 105 test_true_pfm: 5.583056316231969 sim_pfm: 897.2498565490954
episode: 420 training return: 793.7272974238724
episode: 421 training return: 839.7617383873404
episode: 422 training return: 831.7181436504584
episode: 423 training return: 801.6649089502308
epoch: 106 test_true_pfm: -2.939034569581751 sim_pfm: 899.5630854859944
episode: 424 training return: 754.81399400768
episode: 425 training return: 839.7896904743534
episode: 426 training return: 824.0208608498316
episode: 427 training return: 798.7077070570772
epoch: 107 test_true_pfm: -1.5756676880178453 sim_pfm: 903.9659121768433
episode: 428 training return: 813.9273566968528
episode: 429 training return: 791.1199021496873
episode: 430 training return: 759.3642528821143
episode: 431 training return: 747.7040280234334
epoch: 108 test_true_pfm: 4.2691410027244645 sim_pfm: 882.0927965123756
episode: 432 training return: 709.8015815125708
episode: 433 training return: 787.907739690749
episode: 434 training return: 824.7809817711765
episode: 435 training return: 874.0234535666076
epoch: 109 test_true_pfm: 3.086268647460662 sim_pfm: 891.6316416025029
episode: 436 training return: 876.1125219628706
episode: 437 training return: 822.367448431598
episode: 438 training return: 763.2392057901815
episode: 439 training return: 751.3632052295608
epoch: 110 test_true_pfm: 1.352073196788171 sim_pfm: 852.5364884665129
episode: 440 training return: 764.0772156624394
episode: 441 training return: 810.5564826868647
episode: 442 training return: 756.9844641144543
episode: 443 training return: 762.4726674151173
epoch: 111 test_true_pfm: 12.435874645131985 sim_pfm: 900.5211466198309
episode: 444 training return: 795.7817816953485
episode: 445 training return: 767.0868732409137
episode: 446 training return: 243.55638526788647
episode: 447 training return: 773.0682077288943
epoch: 112 test_true_pfm: 6.46578331357645 sim_pfm: 894.0929616958222
episode: 448 training return: 751.7573212947568
episode: 449 training return: 703.6447463433839
episode: 450 training return: 763.9646083640066
episode: 451 training return: 788.4768111461387
epoch: 113 test_true_pfm: 4.518617952067747 sim_pfm: 889.7503858457646
episode: 452 training return: 848.2143400640446
episode: 453 training return: 875.841715267557
episode: 454 training return: 690.5022143173695
episode: 455 training return: 876.3927994896125
epoch: 114 test_true_pfm: 5.15872267138594 sim_pfm: 897.6636506135486
episode: 456 training return: 768.0833550203816
episode: 457 training return: 815.5774672067572
episode: 458 training return: 805.091547566244
episode: 459 training return: 803.0917241957133
epoch: 115 test_true_pfm: -0.27379559729209646 sim_pfm: 826.5795734521546
episode: 460 training return: 825.1776491788447
episode: 461 training return: 780.8210008162171
episode: 462 training return: 767.4678196384893
episode: 463 training return: 710.0301298274048
epoch: 116 test_true_pfm: 21.058691528217235 sim_pfm: 890.8600271218784
episode: 464 training return: 807.9648500266628
episode: 465 training return: 800.7198710897877
episode: 466 training return: 742.9790862114946
episode: 467 training return: 790.5765836103318
epoch: 117 test_true_pfm: 4.61173371427434 sim_pfm: 873.7472236793963
episode: 468 training return: 725.2758025908454
episode: 469 training return: 777.1898485696005
episode: 470 training return: 747.7420841373535
episode: 471 training return: 785.9193008127934
epoch: 118 test_true_pfm: 11.62315032799992 sim_pfm: 916.7469643306262
episode: 472 training return: 768.2192592807884
episode: 473 training return: 807.5040104284262
episode: 474 training return: 761.7684408985932
episode: 475 training return: 709.9980165829717
epoch: 119 test_true_pfm: 9.723663814519657 sim_pfm: 937.1741026618787
episode: 476 training return: 737.9216706913282
episode: 477 training return: 750.0718198406354
episode: 478 training return: 808.2155569994779
episode: 479 training return: 762.3080269483843
epoch: 120 test_true_pfm: 7.699935271491126 sim_pfm: 938.9663134478742
episode: 480 training return: 812.9083420501315
episode: 481 training return: 782.3584137887268
episode: 482 training return: 814.8512589407687
episode: 483 training return: 830.7333908365857
epoch: 121 test_true_pfm: 7.936944345614258 sim_pfm: 909.2710699194926
episode: 484 training return: 838.1457508392526
episode: 485 training return: 873.0874671273845
episode: 486 training return: 815.6148546632866
episode: 487 training return: 802.0526806577167
epoch: 122 test_true_pfm: 2.534646094025878 sim_pfm: 914.483764968733
episode: 488 training return: 834.8705677859492
episode: 489 training return: 769.8573357763933
episode: 490 training return: 768.1624600014809
episode: 491 training return: 768.5446935772223
epoch: 123 test_true_pfm: 6.171397553995565 sim_pfm: 850.5331253332668
episode: 492 training return: 722.6185922140666
episode: 493 training return: 713.0600026512775
episode: 494 training return: 745.2997173839123
episode: 495 training return: 791.8711951160502
epoch: 124 test_true_pfm: -8.198245247373922 sim_pfm: 848.2084069474811
episode: 496 training return: 774.5754878817461
episode: 497 training return: 728.1278334424157
episode: 498 training return: 759.26201381695
episode: 499 training return: 779.9944965497591
epoch: 125 test_true_pfm: -15.772504070824942 sim_pfm: 847.0649993092993
episode: 500 training return: 714.3389428062144
episode: 501 training return: 762.3924950928464
episode: 502 training return: 791.7623156007654
episode: 503 training return: 786.8051496908574
epoch: 126 test_true_pfm: -4.089217866694367 sim_pfm: 892.7347977938656
episode: 504 training return: 766.829783605352
episode: 505 training return: 735.2125631269356
episode: 506 training return: 837.7039803764229
episode: 507 training return: 771.7274252574202
epoch: 127 test_true_pfm: 5.313915384027643 sim_pfm: 887.4022771392567
episode: 508 training return: 781.5951084947565
episode: 509 training return: 812.3612233668683
episode: 510 training return: 746.9013801982483
episode: 511 training return: 764.7856790547545
epoch: 128 test_true_pfm: 13.435815775733932 sim_pfm: 898.3351811533606
episode: 512 training return: 749.7437791742639
episode: 513 training return: 740.9182878712257
episode: 514 training return: 754.1571642009394
episode: 515 training return: 690.54178861246
epoch: 129 test_true_pfm: 2.343145714387015 sim_pfm: 849.1748568192731
episode: 516 training return: 700.054301435399
episode: 517 training return: 727.02636821748
episode: 518 training return: 746.204358791211
episode: 519 training return: 745.2210815696183
epoch: 130 test_true_pfm: -9.216002260810734 sim_pfm: 879.5360250555466
episode: 520 training return: 714.9743303206264
episode: 521 training return: 702.8042867221934
episode: 522 training return: 743.5795496953733
episode: 523 training return: 650.1389292708313
epoch: 131 test_true_pfm: 18.076149454230563 sim_pfm: 892.6143464134435
episode: 524 training return: 698.7481719592424
episode: 525 training return: 783.6952213682802
episode: 526 training return: 737.5524651970817
episode: 527 training return: 734.3675891369965
epoch: 132 test_true_pfm: 3.12743300307548 sim_pfm: 899.3802843982587
episode: 528 training return: 716.6918344473447
episode: 529 training return: 728.5785776810116
episode: 530 training return: 779.213053525831
episode: 531 training return: 748.2394651324736
epoch: 133 test_true_pfm: -6.5099770784910405 sim_pfm: 890.3471468226444
episode: 532 training return: 741.3767347188935
episode: 533 training return: 773.5557477451032
episode: 534 training return: 725.8105955342072
episode: 535 training return: 727.8040370737614
epoch: 134 test_true_pfm: -8.256214988863201 sim_pfm: 909.8210944586247
episode: 536 training return: 740.8963476000206
episode: 537 training return: 794.9748518283465
episode: 538 training return: 774.0055335455314
episode: 539 training return: 800.4556560135786
epoch: 135 test_true_pfm: -1.8952320803288842 sim_pfm: 868.6073525712924
episode: 540 training return: 723.7779589977015
episode: 541 training return: 733.3214271091801
episode: 542 training return: 755.1746423002169
episode: 543 training return: 773.6500116368454
epoch: 136 test_true_pfm: 11.062468391803618 sim_pfm: 897.3297813575366
episode: 544 training return: 749.1428568886453
episode: 545 training return: 776.2861838526368
episode: 546 training return: 777.7391346802107
episode: 547 training return: 726.8915385548452
epoch: 137 test_true_pfm: 15.008667809922514 sim_pfm: 911.3003587994065
episode: 548 training return: 716.9107788147423
episode: 549 training return: 715.4614787827783
episode: 550 training return: 762.7426637503244
episode: 551 training return: 772.6743882371985
epoch: 138 test_true_pfm: 8.69864405278165 sim_pfm: 928.9097121902194
episode: 552 training return: 729.7123821057552
episode: 553 training return: 794.1701793935997
episode: 554 training return: 837.5091895843691
episode: 555 training return: 795.0101705170098
epoch: 139 test_true_pfm: -8.284289615973652 sim_pfm: 857.6174784365073
episode: 556 training return: 743.8137156012477
episode: 557 training return: 782.915760963396
episode: 558 training return: 675.7965285084648
episode: 559 training return: 834.8348653772106
epoch: 140 test_true_pfm: -10.167247347200634 sim_pfm: 906.687287284647
episode: 560 training return: 813.8243324402328
episode: 561 training return: 736.8178556113802
episode: 562 training return: 783.2984264034903
episode: 563 training return: 854.8956410095731
epoch: 141 test_true_pfm: 7.08025840052845 sim_pfm: 880.5036585010934
episode: 564 training return: 827.654299705533
episode: 565 training return: 775.3883697689446
episode: 566 training return: 856.2306698826226
episode: 567 training return: 734.7306987643675
epoch: 142 test_true_pfm: 4.121804771241992 sim_pfm: 900.8867841990134
episode: 568 training return: 837.9641172423917
episode: 569 training return: 780.0893714681658
episode: 570 training return: 807.9562857654197
episode: 571 training return: 699.5178249284155
epoch: 143 test_true_pfm: -10.286212740909871 sim_pfm: 891.1585219934998
episode: 572 training return: 742.2834860854256
episode: 573 training return: 792.7568175000944
episode: 574 training return: 821.7089382582319
episode: 575 training return: 750.8141347832589
epoch: 144 test_true_pfm: 0.6464313765423298 sim_pfm: 890.2126474306735
episode: 576 training return: 792.6168441219895
episode: 577 training return: 749.9259950367252
episode: 578 training return: 821.8429405403748
episode: 579 training return: 704.0784598384779
epoch: 145 test_true_pfm: -0.2843934347074194 sim_pfm: 891.1072633848971
episode: 580 training return: 736.8713799111035
episode: 581 training return: 788.7330826822237
episode: 582 training return: 730.0845973011856
episode: 583 training return: 800.1932196301352
epoch: 146 test_true_pfm: -7.600697098314649 sim_pfm: 875.3718571073043
episode: 584 training return: 751.8507917396959
episode: 585 training return: 694.9633828739129
episode: 586 training return: 779.5940050473396
episode: 587 training return: 703.9025737136742
epoch: 147 test_true_pfm: -5.840282418409839 sim_pfm: 903.0257262699503
episode: 588 training return: 823.1563633640724
episode: 589 training return: 711.3106633299737
episode: 590 training return: 747.7229996955455
episode: 591 training return: 757.74250102149
epoch: 148 test_true_pfm: -13.626438832483775 sim_pfm: 844.0606137057885
episode: 592 training return: 775.220319404388
episode: 593 training return: 729.2259183640051
episode: 594 training return: 720.833732117062
episode: 595 training return: 772.5218522022644
epoch: 149 test_true_pfm: 10.401411295068685 sim_pfm: 890.814523699168
episode: 596 training return: 753.3422432169326
episode: 597 training return: 790.8819588593772
episode: 598 training return: 787.6492723775968
episode: 599 training return: 810.7157307287754
epoch: 150 test_true_pfm: 10.469262468001647 sim_pfm: 899.3323115375742
