['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'uncertainty', '--traj', 'expert', '--seed', '4', '--data', '100000']
epoch: 0 training_loss 0.3393136519193649 test_loss: 0.23054375648498535
epoch: 1 training_loss 0.21903861716389655 test_loss: 0.2216575860977173
epoch: 2 training_loss 0.19403567738831043 test_loss: 0.18255300521850587
epoch: 3 training_loss 0.16550031788647174 test_loss: 0.19020806550979613
epoch: 4 training_loss 0.16218782633543014 test_loss: 0.15737035274505615
epoch: 5 training_loss 0.14756801702082156 test_loss: 0.14975539445877076
epoch: 6 training_loss 0.1586144284904003 test_loss: 0.15198915004730223
epoch: 7 training_loss 0.1407920304313302 test_loss: 0.13136684894561768
epoch: 8 training_loss 0.13608975544571877 test_loss: 0.1517820954322815
epoch: 9 training_loss 0.14105900317430498 test_loss: 0.13997316360473633
epoch: 10 training_loss 0.13036410383880137 test_loss: 0.1340625524520874
epoch: 11 training_loss 0.13198170602321624 test_loss: 0.12066096067428589
epoch: 12 training_loss 0.1335413271933794 test_loss: 0.13983309268951416
epoch: 13 training_loss 0.12987154006958007 test_loss: 0.1306563973426819
epoch: 14 training_loss 0.13049943469464778 test_loss: 0.14553877115249633
epoch: 15 training_loss 0.12645974341779948 test_loss: 0.13910167217254638
epoch: 16 training_loss 0.12779155489057303 test_loss: 0.12664257287979125
epoch: 17 training_loss 0.12286637334153055 test_loss: 0.14806504249572755
epoch: 18 training_loss 0.12285280961543321 test_loss: 0.1259841203689575
epoch: 19 training_loss 0.12048841029405594 test_loss: 0.12290644645690918
epoch: 20 training_loss 0.1235766176879406 test_loss: 0.11706008911132812
epoch: 21 training_loss 0.11584009688347578 test_loss: 0.10904542207717896
epoch: 22 training_loss 0.12428058851510286 test_loss: 0.12794331312179566
epoch: 23 training_loss 0.12257564540952444 test_loss: 0.12747472524642944
epoch: 24 training_loss 0.1201241298764944 test_loss: 0.12775771617889403
epoch: 25 training_loss 0.11669086940586566 test_loss: 0.1071651816368103
epoch: 26 training_loss 0.12244017906486988 test_loss: 0.12596335411071777
epoch: 27 training_loss 0.11929714553058147 test_loss: 0.11407909393310547
epoch: 28 training_loss 0.11630869496613741 test_loss: 0.1236683487892151
epoch: 29 training_loss 0.12142749588936567 test_loss: 0.12547355890274048
epoch: 30 training_loss 0.12024538844823837 test_loss: 0.13011244535446168
epoch: 31 training_loss 0.11332355158403516 test_loss: 0.13062806129455568
epoch: 32 training_loss 0.1166150851547718 test_loss: 0.11698158979415893
epoch: 33 training_loss 0.11051583014428616 test_loss: 0.13933614492416382
epoch: 34 training_loss 0.11917052641510964 test_loss: 0.12632900476455688
epoch: 35 training_loss 0.1208960623666644 test_loss: 0.1204652190208435
epoch: 36 training_loss 0.11101762726902961 test_loss: 0.11299142837524415
epoch: 37 training_loss 0.11556581161916256 test_loss: 0.13061344623565674
epoch: 38 training_loss 0.124258555136621 test_loss: 0.11914435625076295
epoch: 39 training_loss 0.11431696340441704 test_loss: 0.12218488454818725
epoch: 40 training_loss 0.11725916981697082 test_loss: 0.11843841075897217
epoch: 41 training_loss 0.11242405477911234 test_loss: 0.12539139986038209
epoch: 42 training_loss 0.11591984134167432 test_loss: 0.10738308429718017
epoch: 43 training_loss 0.1109403525851667 test_loss: 0.11250942945480347
epoch: 44 training_loss 0.11903946097940206 test_loss: 0.11762290000915528
epoch: 45 training_loss 0.11053686425089836 test_loss: 0.11166361570358277
epoch: 46 training_loss 0.11427166994661092 test_loss: 0.1265385627746582
epoch: 47 training_loss 0.11815948463976383 test_loss: 0.11263889074325562
epoch: 48 training_loss 0.10898109652101994 test_loss: 0.1245790958404541
epoch: 49 training_loss 0.11361103858798742 test_loss: 0.12036175727844238
epoch: 50 training_loss 0.11341885792091488 test_loss: 0.11843276023864746
epoch: 51 training_loss 0.11465701205655933 test_loss: 0.11140201091766358
epoch: 52 training_loss 0.11484462432563305 test_loss: 0.11064471006393432
epoch: 53 training_loss 0.11217293623834848 test_loss: 0.11866700649261475
epoch: 54 training_loss 0.11452197298407554 test_loss: 0.10988000631332398
epoch: 55 training_loss 0.11550664182752371 test_loss: 0.10012228488922119
epoch: 56 training_loss 0.10623307079076767 test_loss: 0.1271888256072998
epoch: 57 training_loss 0.11438959904015064 test_loss: 0.11313912868499756
epoch: 58 training_loss 0.1147860685735941 test_loss: 0.12469117641448975
epoch: 59 training_loss 0.11389962665736675 test_loss: 0.11133513450622559
epoch: 60 training_loss 0.1128516311571002 test_loss: 0.1077775239944458
epoch: 61 training_loss 0.10915355237200856 test_loss: 0.11940248012542724
epoch: 62 training_loss 0.10806451968848706 test_loss: 0.13862121105194092
epoch: 63 training_loss 0.10909898422658443 test_loss: 0.09918848276138306
epoch: 64 training_loss 0.11100921103730797 test_loss: 0.12390358448028564
epoch: 65 training_loss 0.11073254708200693 test_loss: 0.1323412537574768
epoch: 66 training_loss 0.11729835469275712 test_loss: 0.11532732248306274
epoch: 67 training_loss 0.11314120389521122 test_loss: 0.11331837177276612
epoch: 68 training_loss 0.10569538496434688 test_loss: 0.1306278347969055
epoch: 69 training_loss 0.10693750280886888 test_loss: 0.11457381248474122
epoch: 70 training_loss 0.10912838708609343 test_loss: 0.1172722339630127
epoch: 71 training_loss 0.11884131759405137 test_loss: 0.10018517971038818
epoch: 72 training_loss 0.11025401107966899 test_loss: 0.09889113306999206
epoch: 73 training_loss 0.11108564972877502 test_loss: 0.10171453952789307
epoch: 74 training_loss 0.10575144194066524 test_loss: 0.1142256498336792
epoch: 75 training_loss 0.1146757658943534 test_loss: 0.11425362825393677
epoch: 76 training_loss 0.1055851075425744 test_loss: 0.10933533906936646
epoch: 77 training_loss 0.09976821959018707 test_loss: 0.11696467399597169
epoch: 78 training_loss 0.11345094744116067 test_loss: 0.10430999994277954
epoch: 79 training_loss 0.10983284004032612 test_loss: 0.10749092102050781
epoch: 80 training_loss 0.10859347078949214 test_loss: 0.10798437595367431
epoch: 81 training_loss 0.11505698554217815 test_loss: 0.10885539054870605
epoch: 82 training_loss 0.11655150882899762 test_loss: 0.11282474994659424
epoch: 83 training_loss 0.0996307997033 test_loss: 0.11802594661712647
epoch: 84 training_loss 0.114719408005476 test_loss: 0.10560948848724365
epoch: 85 training_loss 0.10860675603151321 test_loss: 0.12073990106582641
epoch: 86 training_loss 0.11832596138119697 test_loss: 0.11643574237823487
epoch: 87 training_loss 0.10811016395688057 test_loss: 0.12777713537216187
epoch: 88 training_loss 0.10658896684646607 test_loss: 0.10604461431503295
epoch: 89 training_loss 0.10643834413960576 test_loss: 0.10653070211410523
epoch: 90 training_loss 0.10464929658919572 test_loss: 0.09838885664939881
epoch: 91 training_loss 0.11077258076518774 test_loss: 0.11058132648468018
epoch: 92 training_loss 0.10692705925554037 test_loss: 0.11718829870223998
epoch: 93 training_loss 0.10952144853770733 test_loss: 0.12198173999786377
epoch: 94 training_loss 0.10805328853428364 test_loss: 0.1160531759262085
epoch: 95 training_loss 0.11852515064179897 test_loss: 0.10297904014587403
epoch: 96 training_loss 0.10460830142721533 test_loss: 0.10160762071609497
epoch: 97 training_loss 0.1055224895477295 test_loss: 0.12201344966888428
epoch: 98 training_loss 0.11258451540023089 test_loss: 0.10360962152481079
epoch: 99 training_loss 0.10606163930147887 test_loss: 0.10786521434783936
epoch: 100 training_loss 0.10618549281731247 test_loss: 0.11903274059295654
epoch: 101 training_loss 0.11575798194855452 test_loss: 0.115418541431427
epoch: 102 training_loss 0.11351370368152856 test_loss: 0.1116440773010254
epoch: 103 training_loss 0.10989878483116627 test_loss: 0.10688242912292481
epoch: 104 training_loss 0.10562809325754642 test_loss: 0.12142994403839111
epoch: 105 training_loss 0.10980289150029421 test_loss: 0.10492761135101318
epoch: 106 training_loss 0.1137331748008728 test_loss: 0.10426861047744751
epoch: 107 training_loss 0.10888348272070289 test_loss: 0.11069680452346801
epoch: 108 training_loss 0.10616887673735619 test_loss: 0.11083046197891236
epoch: 109 training_loss 0.11101231385022402 test_loss: 0.10317919254302979
epoch: 110 training_loss 0.10771437767893076 test_loss: 0.1384728193283081
epoch: 111 training_loss 0.10645357120782137 test_loss: 0.09629576802253723
epoch: 112 training_loss 0.09993661925196648 test_loss: 0.11402944326400757
epoch: 113 training_loss 0.10698106426745653 test_loss: 0.12375266551971435
epoch: 114 training_loss 0.1047648505680263 test_loss: 0.09991883635520935
epoch: 115 training_loss 0.10981836784631013 test_loss: 0.10041953325271606
epoch: 116 training_loss 0.10340724255889654 test_loss: 0.09435499906539917
epoch: 117 training_loss 0.10882456589490175 test_loss: 0.10090509653091431
epoch: 118 training_loss 0.10353812051005662 test_loss: 0.11888995170593261
epoch: 119 training_loss 0.10661142023280262 test_loss: 0.09424170851707458
epoch: 120 training_loss 0.10519128711894155 test_loss: 0.10452520847320557
epoch: 121 training_loss 0.09956219747662544 test_loss: 0.10259634256362915
epoch: 122 training_loss 0.11420508341863751 test_loss: 0.11783565282821655
epoch: 123 training_loss 0.11377309141680599 test_loss: 0.10949885845184326
epoch: 124 training_loss 0.10684767136350275 test_loss: 0.10447356700897217
epoch: 125 training_loss 0.10895440645515919 test_loss: 0.11063448190689087
epoch: 126 training_loss 0.09839438959956169 test_loss: 0.12809704542160033
epoch: 127 training_loss 0.10729425307363272 test_loss: 0.1088498592376709
epoch: 128 training_loss 0.1067443267814815 test_loss: 0.12228630781173706
epoch: 129 training_loss 0.10649291682988406 test_loss: 0.11860353946685791
epoch: 130 training_loss 0.10819607205688954 test_loss: 0.11730391979217529
epoch: 131 training_loss 0.10788141660392285 test_loss: 0.11037251949310303
epoch: 132 training_loss 0.10899466270580888 test_loss: 0.09119347333908082
epoch: 133 training_loss 0.10649015935137868 test_loss: 0.11571528911590576
epoch: 134 training_loss 0.10383916780352592 test_loss: 0.1120070457458496
epoch: 135 training_loss 0.10033078549429775 test_loss: 0.09134462475776672
epoch: 136 training_loss 0.1036327825859189 test_loss: 0.10385216474533081
epoch: 137 training_loss 0.10915826920419931 test_loss: 0.10510537624359131
epoch: 138 training_loss 0.11021926883608103 test_loss: 0.10426037311553955
epoch: 139 training_loss 0.10107505217194557 test_loss: 0.10273566246032714
epoch: 140 training_loss 0.09938674811273814 test_loss: 0.10201213359832764
epoch: 141 training_loss 0.11632636716589331 test_loss: 0.09726749658584595
epoch: 142 training_loss 0.10795299861580133 test_loss: 0.10318737030029297
epoch: 143 training_loss 0.11261985797435045 test_loss: 0.11465961933135986
epoch: 144 training_loss 0.10140902183949947 test_loss: 0.1147172212600708
epoch: 145 training_loss 0.1000619823858142 test_loss: 0.11423002481460572
epoch: 146 training_loss 0.10854648554697632 test_loss: 0.11104838848114014
epoch: 147 training_loss 0.1099310576915741 test_loss: 0.10578078031539917
epoch: 148 training_loss 0.10698009202256799 test_loss: 0.117763090133667
epoch: 149 training_loss 0.10339000843465328 test_loss: 0.11316198110580444
epoch: 0 training_loss 0.32355052798986433 test_loss: 0.2689321279525757
epoch: 1 training_loss 0.24245937123894692 test_loss: 0.2082148313522339
epoch: 2 training_loss 0.18786068320274352 test_loss: 0.18007246255874634
epoch: 3 training_loss 0.17437192499637605 test_loss: 0.16325693130493163
epoch: 4 training_loss 0.15842921130359172 test_loss: 0.1828268885612488
epoch: 5 training_loss 0.1558964766561985 test_loss: 0.15016242265701293
epoch: 6 training_loss 0.14872412972152232 test_loss: 0.15383075475692748
epoch: 7 training_loss 0.14183330085128545 test_loss: 0.15052647590637208
epoch: 8 training_loss 0.13703690961003304 test_loss: 0.1712191343307495
epoch: 9 training_loss 0.1354547692462802 test_loss: 0.1426815629005432
epoch: 10 training_loss 0.13286749489605426 test_loss: 0.14002941846847533
epoch: 11 training_loss 0.1306962262839079 test_loss: 0.12260123491287231
epoch: 12 training_loss 0.12868608359247447 test_loss: 0.12822579145431517
epoch: 13 training_loss 0.1259926925972104 test_loss: 0.13522140979766845
epoch: 14 training_loss 0.12724491443485023 test_loss: 0.13323874473571778
epoch: 15 training_loss 0.12246344298124313 test_loss: 0.12730927467346193
epoch: 16 training_loss 0.12478455044329166 test_loss: 0.1355469584465027
epoch: 17 training_loss 0.12219760220497847 test_loss: 0.13819862604141236
epoch: 18 training_loss 0.11687240418046713 test_loss: 0.13664621114730835
epoch: 19 training_loss 0.11847939401865006 test_loss: 0.12690988779067994
epoch: 20 training_loss 0.10832817770540715 test_loss: 0.13404557704925538
epoch: 21 training_loss 0.1152742687985301 test_loss: 0.13427164554595947
epoch: 22 training_loss 0.12179241970181465 test_loss: 0.13688840866088867
epoch: 23 training_loss 0.11792622778564692 test_loss: 0.12495145797729493
epoch: 24 training_loss 0.11841811690479517 test_loss: 0.13781026601791382
epoch: 25 training_loss 0.11440276872366667 test_loss: 0.11182751655578613
epoch: 26 training_loss 0.11605906475335359 test_loss: 0.12629729509353638
epoch: 27 training_loss 0.11248755373060704 test_loss: 0.1152327299118042
epoch: 28 training_loss 0.11398308016359807 test_loss: 0.1299980640411377
epoch: 29 training_loss 0.11523445814847946 test_loss: 0.11994191408157348
epoch: 30 training_loss 0.10996024832129478 test_loss: 0.12772445678710936
epoch: 31 training_loss 0.1109260631352663 test_loss: 0.1274105429649353
epoch: 32 training_loss 0.11188699072226882 test_loss: 0.1290611982345581
epoch: 33 training_loss 0.11018172532320022 test_loss: 0.12926777601242065
epoch: 34 training_loss 0.11001862399280071 test_loss: 0.13884106874465943
epoch: 35 training_loss 0.11247652757912874 test_loss: 0.13699560165405272
epoch: 36 training_loss 0.11524578582495451 test_loss: 0.1251575231552124
epoch: 37 training_loss 0.10345157818868757 test_loss: 0.12093424797058105
epoch: 38 training_loss 0.10935757987201214 test_loss: 0.11015667915344238
epoch: 39 training_loss 0.10810033164918423 test_loss: 0.12104352712631225
epoch: 40 training_loss 0.10810159724205733 test_loss: 0.10920515060424804
epoch: 41 training_loss 0.11314583890140056 test_loss: 0.125790536403656
epoch: 42 training_loss 0.1108771600946784 test_loss: 0.1251833438873291
epoch: 43 training_loss 0.11940073676407337 test_loss: 0.11443958282470704
epoch: 44 training_loss 0.11146014019846916 test_loss: 0.11544482707977295
epoch: 45 training_loss 0.10256153177469969 test_loss: 0.10355720520019532
epoch: 46 training_loss 0.10996974561363458 test_loss: 0.12592020034790039
epoch: 47 training_loss 0.10371906822547317 test_loss: 0.11653162240982055
epoch: 48 training_loss 0.11567016873508691 test_loss: 0.12734920978546144
epoch: 49 training_loss 0.10921080593019723 test_loss: 0.1214604377746582
epoch: 50 training_loss 0.10413996793329716 test_loss: 0.11633602380752564
epoch: 51 training_loss 0.10405623253434897 test_loss: 0.1206099271774292
epoch: 52 training_loss 0.10700679684057832 test_loss: 0.11712080240249634
epoch: 53 training_loss 0.10695811811834574 test_loss: 0.11469533443450927
epoch: 54 training_loss 0.11548841763287783 test_loss: 0.11527650356292725
epoch: 55 training_loss 0.11079688936471939 test_loss: 0.14120007753372193
epoch: 56 training_loss 0.11186201848089695 test_loss: 0.1286672592163086
epoch: 57 training_loss 0.10785457899793982 test_loss: 0.1340896964073181
epoch: 58 training_loss 0.10676299542188644 test_loss: 0.11941801309585572
epoch: 59 training_loss 0.11464781794697046 test_loss: 0.11467294692993164
epoch: 60 training_loss 0.10711043467745185 test_loss: 0.11691792011260986
epoch: 61 training_loss 0.11046504516154527 test_loss: 0.11736153364181519
epoch: 62 training_loss 0.10465475379489363 test_loss: 0.11398156881332397
epoch: 63 training_loss 0.10593583263456821 test_loss: 0.12884317636489867
epoch: 64 training_loss 0.11148308254778386 test_loss: 0.11066014766693115
epoch: 65 training_loss 0.10326287932693959 test_loss: 0.10926235914230346
epoch: 66 training_loss 0.11288232360035182 test_loss: 0.12197628021240234
epoch: 67 training_loss 0.11086299542337656 test_loss: 0.10215456485748291
epoch: 68 training_loss 0.10315993659198285 test_loss: 0.12231984138488769
epoch: 69 training_loss 0.10449387539178133 test_loss: 0.12303667068481446
epoch: 70 training_loss 0.1146899850293994 test_loss: 0.1305549383163452
epoch: 71 training_loss 0.10323664661496877 test_loss: 0.10080126523971558
epoch: 72 training_loss 0.10545044537633658 test_loss: 0.1094598650932312
epoch: 73 training_loss 0.1106938636302948 test_loss: 0.1156505823135376
epoch: 74 training_loss 0.10776199715211987 test_loss: 0.10876721143722534
epoch: 75 training_loss 0.11373738959431648 test_loss: 0.12182923555374145
epoch: 76 training_loss 0.10938787616789342 test_loss: 0.11964298486709594
epoch: 77 training_loss 0.10744079500436783 test_loss: 0.10867692232131958
epoch: 78 training_loss 0.1104671284928918 test_loss: 0.11375399827957153
epoch: 79 training_loss 0.11186685781925916 test_loss: 0.12272821664810181
epoch: 80 training_loss 0.10461524032056331 test_loss: 0.11835474967956543
epoch: 81 training_loss 0.11285856556147338 test_loss: 0.12919070720672607
epoch: 82 training_loss 0.11084926495328545 test_loss: 0.1222541332244873
epoch: 83 training_loss 0.10193705718964338 test_loss: 0.10811437368392944
epoch: 84 training_loss 0.10666331257671118 test_loss: 0.10455387830734253
epoch: 85 training_loss 0.10730630643665791 test_loss: 0.11616318225860596
epoch: 86 training_loss 0.10853458795696497 test_loss: 0.13114912509918214
epoch: 87 training_loss 0.1082282361574471 test_loss: 0.12057244777679443
epoch: 88 training_loss 0.1038832663372159 test_loss: 0.12861785888671876
epoch: 89 training_loss 0.10565450429916382 test_loss: 0.1244084358215332
epoch: 90 training_loss 0.10160702237859369 test_loss: 0.11841683387756348
epoch: 91 training_loss 0.10264020193368197 test_loss: 0.1160574197769165
epoch: 92 training_loss 0.10754525288939476 test_loss: 0.10965093374252319
epoch: 93 training_loss 0.10479451414197684 test_loss: 0.11095744371414185
epoch: 94 training_loss 0.1043498588912189 test_loss: 0.13692910671234132
epoch: 95 training_loss 0.10749142153188586 test_loss: 0.11894989013671875
epoch: 96 training_loss 0.10178474551066756 test_loss: 0.10250394344329834
epoch: 97 training_loss 0.09951059270650148 test_loss: 0.11364017724990845
epoch: 98 training_loss 0.09916443953290582 test_loss: 0.10331602096557617
epoch: 99 training_loss 0.10274931719526649 test_loss: 0.10687369108200073
epoch: 100 training_loss 0.10544274426996708 test_loss: 0.11720539331436157
epoch: 101 training_loss 0.11202349536120891 test_loss: 0.11722921133041382
epoch: 102 training_loss 0.11227294813841582 test_loss: 0.11767927408218384
epoch: 103 training_loss 0.10650797113776207 test_loss: 0.1225196123123169
epoch: 104 training_loss 0.10968790732324124 test_loss: 0.11599427461624146
epoch: 105 training_loss 0.09567220179364085 test_loss: 0.10792638063430786
epoch: 106 training_loss 0.10997970884665847 test_loss: 0.12713993787765504
epoch: 107 training_loss 0.10873217469081282 test_loss: 0.11890594959259033
epoch: 108 training_loss 0.10952811915427446 test_loss: 0.11622246503829955
epoch: 109 training_loss 0.10579620435833931 test_loss: 0.11533901691436768
epoch: 110 training_loss 0.0989420773088932 test_loss: 0.10841237306594849
epoch: 111 training_loss 0.11370171330869198 test_loss: 0.1149977445602417
epoch: 112 training_loss 0.1034220153838396 test_loss: 0.11413371562957764
epoch: 113 training_loss 0.11533974193036556 test_loss: 0.12557133436203002
epoch: 114 training_loss 0.10665635511279106 test_loss: 0.09582718610763549
epoch: 115 training_loss 0.10898159928619862 test_loss: 0.11445655822753906
epoch: 116 training_loss 0.10199453938752413 test_loss: 0.11061619520187378
epoch: 117 training_loss 0.1093559604138136 test_loss: 0.10281213521957397
epoch: 118 training_loss 0.09978634670376778 test_loss: 0.12497416734695435
epoch: 119 training_loss 0.10661558184772729 test_loss: 0.11824226379394531
epoch: 120 training_loss 0.10397247672080993 test_loss: 0.10556751489639282
epoch: 121 training_loss 0.10974280688911676 test_loss: 0.11593660116195678
epoch: 122 training_loss 0.1040926300548017 test_loss: 0.11137541532516479
epoch: 123 training_loss 0.10638182815164328 test_loss: 0.11481211185455323
epoch: 124 training_loss 0.1061088129132986 test_loss: 0.10676702260971069
epoch: 125 training_loss 0.09879945691674948 test_loss: 0.12114189863204956
epoch: 126 training_loss 0.10753848787397147 test_loss: 0.12152608633041381
epoch: 127 training_loss 0.104207109734416 test_loss: 0.1193621277809143
epoch: 128 training_loss 0.10339196285232902 test_loss: 0.12976648807525634
epoch: 129 training_loss 0.10679819241166115 test_loss: 0.11429486274719239
epoch: 130 training_loss 0.10888159435242414 test_loss: 0.12460280656814575
epoch: 131 training_loss 0.10638081513345242 test_loss: 0.11779932975769043
epoch: 132 training_loss 0.10146237464621663 test_loss: 0.11732041835784912
epoch: 133 training_loss 0.09893967116251588 test_loss: 0.10572923421859741
epoch: 134 training_loss 0.10035103533416986 test_loss: 0.12753349542617798
epoch: 135 training_loss 0.11385813288390637 test_loss: 0.11611065864562989
epoch: 136 training_loss 0.10575088346377015 test_loss: 0.11020476818084717
epoch: 137 training_loss 0.10547564901411534 test_loss: 0.11090584993362426
epoch: 138 training_loss 0.10357509210705756 test_loss: 0.1187060832977295
epoch: 139 training_loss 0.10619797840714455 test_loss: 0.1087624192237854
epoch: 140 training_loss 0.10678449098020792 test_loss: 0.11086657047271728
epoch: 141 training_loss 0.10463884938508272 test_loss: 0.12678371667861937
epoch: 142 training_loss 0.09997788755223155 test_loss: 0.10467572212219238
epoch: 143 training_loss 0.10063365545123816 test_loss: 0.10375394821166992
epoch: 144 training_loss 0.10410249087959528 test_loss: 0.11305232048034668
epoch: 145 training_loss 0.09967183727771044 test_loss: 0.11539757251739502
epoch: 146 training_loss 0.10285975497215986 test_loss: 0.10523411035537719
epoch: 147 training_loss 0.097829894348979 test_loss: 0.1204066514968872
epoch: 148 training_loss 0.0994866761751473 test_loss: 0.1161685824394226
epoch: 149 training_loss 0.09676197394728661 test_loss: 0.11269670724868774
epoch: 0 training_loss 0.35319168254733085 test_loss: 0.23617446422576904
epoch: 1 training_loss 0.22659480519592762 test_loss: 0.19756492376327514
epoch: 2 training_loss 0.18229565724730493 test_loss: 0.18130276203155518
epoch: 3 training_loss 0.1671296153217554 test_loss: 0.18535408973693848
epoch: 4 training_loss 0.1640748955309391 test_loss: 0.15380425453186036
epoch: 5 training_loss 0.1495467036217451 test_loss: 0.13676538467407226
epoch: 6 training_loss 0.14266965977847576 test_loss: 0.13939957618713378
epoch: 7 training_loss 0.13824346140027047 test_loss: 0.1282614588737488
epoch: 8 training_loss 0.13387613803148268 test_loss: 0.12157895565032958
epoch: 9 training_loss 0.12252844750881195 test_loss: 0.14446325302124025
epoch: 10 training_loss 0.13012237563729287 test_loss: 0.1169123649597168
epoch: 11 training_loss 0.12320058025419713 test_loss: 0.12118598222732543
epoch: 12 training_loss 0.12206248253583908 test_loss: 0.11570590734481812
epoch: 13 training_loss 0.12191215541213751 test_loss: 0.11660615205764771
epoch: 14 training_loss 0.12459160406142473 test_loss: 0.12406262159347534
epoch: 15 training_loss 0.118910514973104 test_loss: 0.1236613154411316
epoch: 16 training_loss 0.12272678028792143 test_loss: 0.11546108722686768
epoch: 17 training_loss 0.11709567740559577 test_loss: 0.12213969230651855
epoch: 18 training_loss 0.11950017545372248 test_loss: 0.1260100245475769
epoch: 19 training_loss 0.11775193650275469 test_loss: 0.1101395845413208
epoch: 20 training_loss 0.12027964688837528 test_loss: 0.12178699970245362
epoch: 21 training_loss 0.12388847060501576 test_loss: 0.11199724674224854
epoch: 22 training_loss 0.11752812389284373 test_loss: 0.11840535402297973
epoch: 23 training_loss 0.11782631687819958 test_loss: 0.1280278205871582
epoch: 24 training_loss 0.11006868585944175 test_loss: 0.1052865982055664
epoch: 25 training_loss 0.11290788054466247 test_loss: 0.10493837594985962
epoch: 26 training_loss 0.11800510067492724 test_loss: 0.11306676864624024
epoch: 27 training_loss 0.10307517677545547 test_loss: 0.112275230884552
epoch: 28 training_loss 0.1157971939817071 test_loss: 0.11543784141540528
epoch: 29 training_loss 0.1170741654932499 test_loss: 0.11688772439956666
epoch: 30 training_loss 0.11736013583838939 test_loss: 0.10183988809585572
epoch: 31 training_loss 0.10268325090408326 test_loss: 0.11685742139816284
epoch: 32 training_loss 0.11687965590506792 test_loss: 0.11651766300201416
epoch: 33 training_loss 0.1132508996874094 test_loss: 0.11182886362075806
epoch: 34 training_loss 0.11600665345788003 test_loss: 0.1092922568321228
epoch: 35 training_loss 0.1033633871935308 test_loss: 0.10643265247344971
epoch: 36 training_loss 0.10928136806935072 test_loss: 0.12019969224929809
epoch: 37 training_loss 0.11015704829245805 test_loss: 0.11918578147888184
epoch: 38 training_loss 0.10401185296475887 test_loss: 0.11560547351837158
epoch: 39 training_loss 0.11377430353313685 test_loss: 0.11826294660568237
epoch: 40 training_loss 0.11039113510400057 test_loss: 0.11125133037567139
epoch: 41 training_loss 0.10466680334880948 test_loss: 0.10442601442337036
epoch: 42 training_loss 0.1116956214979291 test_loss: 0.11356904506683349
epoch: 43 training_loss 0.11760298818349839 test_loss: 0.10737403631210327
epoch: 44 training_loss 0.11485858667641878 test_loss: 0.1184626579284668
epoch: 45 training_loss 0.11180127039551735 test_loss: 0.10406429767608642
epoch: 46 training_loss 0.10827994760125875 test_loss: 0.10905206203460693
epoch: 47 training_loss 0.10443171970546246 test_loss: 0.11459052562713623
epoch: 48 training_loss 0.10463418625295162 test_loss: 0.11150585412979126
epoch: 49 training_loss 0.0961693274974823 test_loss: 0.1033218264579773
epoch: 50 training_loss 0.11435232497751713 test_loss: 0.09582346677780151
epoch: 51 training_loss 0.10339271180331706 test_loss: 0.11712765693664551
epoch: 52 training_loss 0.1099449871480465 test_loss: 0.11310393810272217
epoch: 53 training_loss 0.1023609465174377 test_loss: 0.10052214860916138
epoch: 54 training_loss 0.10265067435801029 test_loss: 0.0994240939617157
epoch: 55 training_loss 0.1064572499319911 test_loss: 0.1286892533302307
epoch: 56 training_loss 0.1008473795093596 test_loss: 0.09428426623344421
epoch: 57 training_loss 0.10679238446056843 test_loss: 0.1244891881942749
epoch: 58 training_loss 0.10686318032443523 test_loss: 0.10680408477783203
epoch: 59 training_loss 0.10857419986277819 test_loss: 0.10173957347869873
epoch: 60 training_loss 0.10356304988265037 test_loss: 0.10466623306274414
epoch: 61 training_loss 0.11012286715209484 test_loss: 0.10574095249176026
epoch: 62 training_loss 0.10460595846176148 test_loss: 0.1079213261604309
epoch: 63 training_loss 0.10482981923967599 test_loss: 0.09641035199165345
epoch: 64 training_loss 0.10895594976842403 test_loss: 0.10818164348602295
epoch: 65 training_loss 0.10110920775681734 test_loss: 0.09447303414344788
epoch: 66 training_loss 0.10757180839776993 test_loss: 0.1198285698890686
epoch: 67 training_loss 0.11173628106713294 test_loss: 0.10332729816436767
epoch: 68 training_loss 0.10349253065884113 test_loss: 0.10014544725418091
epoch: 69 training_loss 0.1034037558361888 test_loss: 0.10766465663909912
epoch: 70 training_loss 0.10347971884533763 test_loss: 0.10432584285736084
epoch: 71 training_loss 0.10391074512153864 test_loss: 0.10873333215713502
epoch: 72 training_loss 0.1010572973638773 test_loss: 0.10683193206787109
epoch: 73 training_loss 0.10174879122525454 test_loss: 0.11624538898468018
epoch: 74 training_loss 0.09997692873701453 test_loss: 0.0996210515499115
epoch: 75 training_loss 0.10988134805113077 test_loss: 0.10085567235946655
epoch: 76 training_loss 0.11412530690431595 test_loss: 0.11604545116424561
epoch: 77 training_loss 0.10532062564045191 test_loss: 0.11077029705047607
epoch: 78 training_loss 0.10599646370857954 test_loss: 0.10075875520706176
epoch: 79 training_loss 0.09879320353269577 test_loss: 0.09700633287429809
epoch: 80 training_loss 0.10587124824523926 test_loss: 0.10634217262268067
epoch: 81 training_loss 0.10960395999252796 test_loss: 0.10385855436325073
epoch: 82 training_loss 0.1084885348752141 test_loss: 0.1037889838218689
epoch: 83 training_loss 0.10847790734842419 test_loss: 0.09626514315605164
epoch: 84 training_loss 0.10693599138408899 test_loss: 0.11808006763458252
epoch: 85 training_loss 0.10662011552602052 test_loss: 0.0934731662273407
epoch: 86 training_loss 0.10332659307867288 test_loss: 0.09191128611564636
epoch: 87 training_loss 0.1033545521646738 test_loss: 0.11341414451599122
epoch: 88 training_loss 0.11051034156233072 test_loss: 0.10926598310470581
epoch: 89 training_loss 0.09946705886162817 test_loss: 0.09913955926895142
epoch: 90 training_loss 0.09746477134525776 test_loss: 0.09905946254730225
epoch: 91 training_loss 0.10424387351609767 test_loss: 0.11267073154449463
epoch: 92 training_loss 0.09754381064325571 test_loss: 0.09878267049789428
epoch: 93 training_loss 0.10501974944025277 test_loss: 0.09371359348297119
epoch: 94 training_loss 0.10739121910184622 test_loss: 0.09671210646629333
epoch: 95 training_loss 0.09626462381333113 test_loss: 0.11406631469726562
epoch: 96 training_loss 0.10008895361796022 test_loss: 0.0868632435798645
epoch: 97 training_loss 0.10286200452595949 test_loss: 0.10296814441680908
epoch: 98 training_loss 0.0987265020608902 test_loss: 0.10456998348236084
epoch: 99 training_loss 0.11060222320258617 test_loss: 0.10267257690429688
epoch: 100 training_loss 0.10170219089835882 test_loss: 0.10602266788482666
epoch: 101 training_loss 0.10191850990056991 test_loss: 0.09301083087921143
epoch: 102 training_loss 0.10706271093338728 test_loss: 0.09823694229125976
epoch: 103 training_loss 0.10299122866243124 test_loss: 0.09882708191871643
epoch: 104 training_loss 0.10174663761630655 test_loss: 0.09779312014579773
epoch: 105 training_loss 0.09951324073597789 test_loss: 0.10074037313461304
epoch: 106 training_loss 0.10836416199803352 test_loss: 0.09875147342681885
epoch: 107 training_loss 0.094303935803473 test_loss: 0.10223503112792968
epoch: 108 training_loss 0.10486787680536508 test_loss: 0.10733884572982788
epoch: 109 training_loss 0.10091160330921412 test_loss: 0.09665836691856385
epoch: 110 training_loss 0.1035483242571354 test_loss: 0.09852566123008728
epoch: 111 training_loss 0.10111850986257195 test_loss: 0.10459045171737671
epoch: 112 training_loss 0.09980408262461424 test_loss: 0.10478558540344238
epoch: 113 training_loss 0.10609426295384765 test_loss: 0.11795487403869628
epoch: 114 training_loss 0.09939306646585465 test_loss: 0.0909566581249237
epoch: 115 training_loss 0.11215174362063408 test_loss: 0.09628180265426636
epoch: 116 training_loss 0.10419356528669596 test_loss: 0.0782367467880249
epoch: 117 training_loss 0.10644739156588912 test_loss: 0.09559218883514405
epoch: 118 training_loss 0.09926679041236638 test_loss: 0.10911892652511597
epoch: 119 training_loss 0.09654591340571642 test_loss: 0.09393996000289917
epoch: 120 training_loss 0.09107000313699246 test_loss: 0.11131248474121094
epoch: 121 training_loss 0.09414506159722805 test_loss: 0.0895493507385254
epoch: 122 training_loss 0.1033335586450994 test_loss: 0.1020275592803955
epoch: 123 training_loss 0.09766535000875592 test_loss: 0.09907464981079102
epoch: 124 training_loss 0.10051222171634436 test_loss: 0.10083786249160767
epoch: 125 training_loss 0.10617983773350716 test_loss: 0.10334099531173706
epoch: 126 training_loss 0.09890103694051504 test_loss: 0.10037064552307129
epoch: 127 training_loss 0.09799938727170229 test_loss: 0.1103399634361267
epoch: 128 training_loss 0.09469370698556304 test_loss: 0.10263311862945557
epoch: 129 training_loss 0.10400794705376029 test_loss: 0.11540638208389283
epoch: 130 training_loss 0.10328151781111955 test_loss: 0.09692238569259644
epoch: 131 training_loss 0.10099115887656808 test_loss: 0.10680330991744995
epoch: 132 training_loss 0.09951667945832014 test_loss: 0.10783182382583618
epoch: 133 training_loss 0.09215502050705254 test_loss: 0.10563229322433472
epoch: 134 training_loss 0.10253981171175837 test_loss: 0.0952635645866394
epoch: 135 training_loss 0.09779389837756752 test_loss: 0.11449031829833985
epoch: 136 training_loss 0.09855372201651334 test_loss: 0.08372684121131897
epoch: 137 training_loss 0.10250975474715233 test_loss: 0.1269722104072571
epoch: 138 training_loss 0.10275663834065199 test_loss: 0.10345652103424072
epoch: 139 training_loss 0.09948039371520281 test_loss: 0.1012343406677246
epoch: 140 training_loss 0.09750528842210769 test_loss: 0.10681842565536499
epoch: 141 training_loss 0.10574048608541489 test_loss: 0.10368233919143677
epoch: 142 training_loss 0.09717032419517636 test_loss: 0.10609196424484253
epoch: 143 training_loss 0.1011639472283423 test_loss: 0.09442251920700073
epoch: 144 training_loss 0.10572025220841169 test_loss: 0.11194107532501221
epoch: 145 training_loss 0.10025461759418249 test_loss: 0.09866570830345153
epoch: 146 training_loss 0.09846198676154018 test_loss: 0.09949839115142822
epoch: 147 training_loss 0.10060929249972105 test_loss: 0.10806632041931152
epoch: 148 training_loss 0.09590293064713479 test_loss: 0.10444748401641846
epoch: 149 training_loss 0.10105737622827292 test_loss: 0.08412175178527832
epoch: 0 training_loss 0.3194800181686878 test_loss: 0.23994529247283936
epoch: 1 training_loss 0.22665150284767152 test_loss: 0.22784221172332764
epoch: 2 training_loss 0.18629601381719113 test_loss: 0.1769225239753723
epoch: 3 training_loss 0.1757195284962654 test_loss: 0.17646204233169555
epoch: 4 training_loss 0.17180584624409675 test_loss: 0.15760667324066163
epoch: 5 training_loss 0.15240514531731605 test_loss: 0.14802663326263427
epoch: 6 training_loss 0.13864773638546468 test_loss: 0.16388627290725707
epoch: 7 training_loss 0.14000219836831093 test_loss: 0.14361692667007447
epoch: 8 training_loss 0.14602897398173809 test_loss: 0.125495445728302
epoch: 9 training_loss 0.1330210740491748 test_loss: 0.14742631912231446
epoch: 10 training_loss 0.13943548496812583 test_loss: 0.12408217191696166
epoch: 11 training_loss 0.12888379093259572 test_loss: 0.1327309012413025
epoch: 12 training_loss 0.1349132602289319 test_loss: 0.1430643081665039
epoch: 13 training_loss 0.11952970631420612 test_loss: 0.1209145188331604
epoch: 14 training_loss 0.123591578155756 test_loss: 0.14282565116882323
epoch: 15 training_loss 0.11668938744813204 test_loss: 0.11092073917388916
epoch: 16 training_loss 0.11776824845001102 test_loss: 0.1330593466758728
epoch: 17 training_loss 0.11778565399348735 test_loss: 0.11817975044250488
epoch: 18 training_loss 0.1253947375714779 test_loss: 0.11266727447509765
epoch: 19 training_loss 0.11672222778201104 test_loss: 0.12562676668167114
epoch: 20 training_loss 0.1172049792855978 test_loss: 0.11631977558135986
epoch: 21 training_loss 0.1246698223426938 test_loss: 0.11883809566497802
epoch: 22 training_loss 0.11061161119490862 test_loss: 0.12641093730926514
epoch: 23 training_loss 0.12330909047275782 test_loss: 0.1214680790901184
epoch: 24 training_loss 0.11239386346191167 test_loss: 0.11523348093032837
epoch: 25 training_loss 0.12260761357843876 test_loss: 0.12436320781707763
epoch: 26 training_loss 0.11143794845789672 test_loss: 0.1413271903991699
epoch: 27 training_loss 0.11862449422478676 test_loss: 0.12111867666244507
epoch: 28 training_loss 0.11400040235370397 test_loss: 0.11582717895507813
epoch: 29 training_loss 0.11937699265778065 test_loss: 0.11733323335647583
epoch: 30 training_loss 0.10895184982568025 test_loss: 0.12476069927215576
epoch: 31 training_loss 0.11697534320876002 test_loss: 0.11719481945037842
epoch: 32 training_loss 0.12142236022278667 test_loss: 0.10519907474517823
epoch: 33 training_loss 0.11020140100270509 test_loss: 0.13189845085144042
epoch: 34 training_loss 0.10304316814988851 test_loss: 0.10780245065689087
epoch: 35 training_loss 0.10447413736954331 test_loss: 0.10968097448348998
epoch: 36 training_loss 0.11589055426418782 test_loss: 0.11502766609191895
epoch: 37 training_loss 0.10917464829981327 test_loss: 0.12299288511276245
epoch: 38 training_loss 0.12266789775341749 test_loss: 0.11741849184036254
epoch: 39 training_loss 0.1071902633830905 test_loss: 0.11503961086273193
epoch: 40 training_loss 0.11496731206774712 test_loss: 0.1145711064338684
epoch: 41 training_loss 0.10800051521509886 test_loss: 0.1140981674194336
epoch: 42 training_loss 0.1081244183704257 test_loss: 0.10526882410049439
epoch: 43 training_loss 0.11622298639267684 test_loss: 0.13572653532028198
epoch: 44 training_loss 0.11238251622766256 test_loss: 0.1216777205467224
epoch: 45 training_loss 0.11264717545360327 test_loss: 0.11236578226089478
epoch: 46 training_loss 0.1070828389748931 test_loss: 0.10917216539382935
epoch: 47 training_loss 0.11992420256137848 test_loss: 0.13578591346740723
epoch: 48 training_loss 0.11349832076579332 test_loss: 0.10186108350753784
epoch: 49 training_loss 0.11433429479598999 test_loss: 0.11249638795852661
epoch: 50 training_loss 0.10829759135842323 test_loss: 0.10568423271179199
epoch: 51 training_loss 0.10923254748806357 test_loss: 0.10732798576354981
epoch: 52 training_loss 0.11138821545988321 test_loss: 0.10966242551803589
epoch: 53 training_loss 0.10686499174684286 test_loss: 0.1146215558052063
epoch: 54 training_loss 0.11333901632577181 test_loss: 0.11070420742034912
epoch: 55 training_loss 0.11286731615662575 test_loss: 0.1151473879814148
epoch: 56 training_loss 0.10788762375712395 test_loss: 0.11863982677459717
epoch: 57 training_loss 0.10949387203902006 test_loss: 0.12359038591384888
epoch: 58 training_loss 0.10613960243761539 test_loss: 0.11219648122787476
epoch: 59 training_loss 0.10213773878291249 test_loss: 0.11379785537719726
epoch: 60 training_loss 0.10350050460547208 test_loss: 0.10967409610748291
epoch: 61 training_loss 0.10871387012302876 test_loss: 0.10864441394805908
epoch: 62 training_loss 0.11348998365923763 test_loss: 0.10826292037963867
epoch: 63 training_loss 0.11243879355490208 test_loss: 0.11635135412216187
epoch: 64 training_loss 0.10978136198595166 test_loss: 0.1137877345085144
epoch: 65 training_loss 0.10949521133676171 test_loss: 0.10604262351989746
epoch: 66 training_loss 0.11313363697379827 test_loss: 0.0966804563999176
epoch: 67 training_loss 0.11174590200185776 test_loss: 0.10204430818557739
epoch: 68 training_loss 0.11740745598450303 test_loss: 0.1090777039527893
epoch: 69 training_loss 0.10796291820704937 test_loss: 0.11479201316833496
epoch: 70 training_loss 0.10926564592868089 test_loss: 0.11266416311264038
epoch: 71 training_loss 0.10575827609747648 test_loss: 0.09995233416557311
epoch: 72 training_loss 0.11065750170499086 test_loss: 0.12324609756469726
epoch: 73 training_loss 0.10749148342758418 test_loss: 0.11016428470611572
epoch: 74 training_loss 0.10367434930056334 test_loss: 0.10697975158691406
epoch: 75 training_loss 0.11541021049022675 test_loss: 0.11150945425033569
epoch: 76 training_loss 0.11146540431305767 test_loss: 0.12324217557907105
epoch: 77 training_loss 0.10821537144482135 test_loss: 0.10188398361206055
epoch: 78 training_loss 0.1030435948446393 test_loss: 0.10289347171783447
epoch: 79 training_loss 0.1099428252875805 test_loss: 0.11105967760086059
epoch: 80 training_loss 0.10562994157895446 test_loss: 0.12459471225738525
epoch: 81 training_loss 0.10931568659842014 test_loss: 0.11145153045654296
epoch: 82 training_loss 0.11149989478290082 test_loss: 0.1165496826171875
epoch: 83 training_loss 0.10441959029063583 test_loss: 0.11328389644622802
epoch: 84 training_loss 0.10366089316084981 test_loss: 0.12109966278076172
epoch: 85 training_loss 0.11059401331469416 test_loss: 0.11528487205505371
epoch: 86 training_loss 0.11086444806307555 test_loss: 0.10627877712249756
epoch: 87 training_loss 0.10599056255072355 test_loss: 0.11742351055145264
epoch: 88 training_loss 0.10542210221290588 test_loss: 0.1256684422492981
epoch: 89 training_loss 0.10739137765020132 test_loss: 0.10952718257904052
epoch: 90 training_loss 0.10575507342815399 test_loss: 0.10716512203216552
epoch: 91 training_loss 0.11198300827294588 test_loss: 0.12570761442184447
epoch: 92 training_loss 0.1031713105738163 test_loss: 0.11097382307052613
epoch: 93 training_loss 0.10353721491992474 test_loss: 0.11455016136169434
epoch: 94 training_loss 0.10806474428623915 test_loss: 0.11648470163345337
epoch: 95 training_loss 0.10922704573720693 test_loss: 0.11332684755325317
epoch: 96 training_loss 0.1022426388785243 test_loss: 0.10946331024169922
epoch: 97 training_loss 0.10736665900796652 test_loss: 0.12169408798217773
epoch: 98 training_loss 0.10323448646813631 test_loss: 0.12499409914016724
epoch: 99 training_loss 0.11218553856015205 test_loss: 0.09946801066398621
epoch: 100 training_loss 0.11244608383625745 test_loss: 0.1100354552268982
epoch: 101 training_loss 0.1115271194651723 test_loss: 0.11433576345443726
epoch: 102 training_loss 0.10684429086744786 test_loss: 0.11153579950332641
epoch: 103 training_loss 0.1126319245994091 test_loss: 0.09789877533912658
epoch: 104 training_loss 0.1077211869135499 test_loss: 0.0985498309135437
epoch: 105 training_loss 0.112196154743433 test_loss: 0.1101081132888794
epoch: 106 training_loss 0.10124121632426977 test_loss: 0.11133692264556885
epoch: 107 training_loss 0.1055099057033658 test_loss: 0.10832362174987793
epoch: 108 training_loss 0.11310485135763884 test_loss: 0.1009061336517334
epoch: 109 training_loss 0.10093639887869359 test_loss: 0.11220245361328125
epoch: 110 training_loss 0.11148873262107373 test_loss: 0.1079513669013977
epoch: 111 training_loss 0.11093210004270077 test_loss: 0.1024702548980713
epoch: 112 training_loss 0.10704646080732345 test_loss: 0.12138922214508056
epoch: 113 training_loss 0.10787805363535881 test_loss: 0.10836156606674194
epoch: 114 training_loss 0.11017293298617005 test_loss: 0.10118587017059326
epoch: 115 training_loss 0.10693915572017432 test_loss: 0.11728208065032959
epoch: 116 training_loss 0.09751840949058532 test_loss: 0.11874545812606811
epoch: 117 training_loss 0.10525080146268011 test_loss: 0.11703701019287109
epoch: 118 training_loss 0.10700564961880446 test_loss: 0.10565000772476196
epoch: 119 training_loss 0.09941813167184592 test_loss: 0.11356177330017089
epoch: 120 training_loss 0.11097424816340208 test_loss: 0.11016898155212403
epoch: 121 training_loss 0.1098236496374011 test_loss: 0.1240807056427002
epoch: 122 training_loss 0.10469090592116118 test_loss: 0.0895050585269928
epoch: 123 training_loss 0.09860577542334795 test_loss: 0.12038208246231079
epoch: 124 training_loss 0.10207838788628579 test_loss: 0.11284495592117309
epoch: 125 training_loss 0.11283448196947575 test_loss: 0.10291532278060914
epoch: 126 training_loss 0.10516477441415191 test_loss: 0.11283305883407593
epoch: 127 training_loss 0.10712008211761713 test_loss: 0.12131770849227905
epoch: 128 training_loss 0.11733940284699201 test_loss: 0.09680787324905396
epoch: 129 training_loss 0.10638639029115439 test_loss: 0.10151946544647217
epoch: 130 training_loss 0.10741784498095512 test_loss: 0.10263057947158813
epoch: 131 training_loss 0.10264761745929718 test_loss: 0.11388320922851562
epoch: 132 training_loss 0.10096127085387707 test_loss: 0.1248057723045349
epoch: 133 training_loss 0.10197285338304937 test_loss: 0.11315580606460571
epoch: 134 training_loss 0.10548484610393644 test_loss: 0.09474559426307679
epoch: 135 training_loss 0.10778703765943647 test_loss: 0.11835933923721313
epoch: 136 training_loss 0.10613328460603952 test_loss: 0.10599184036254883
epoch: 137 training_loss 0.10352665282785893 test_loss: 0.10577058792114258
epoch: 138 training_loss 0.10158390425145626 test_loss: 0.11554168462753296
epoch: 139 training_loss 0.10215126853436232 test_loss: 0.101992666721344
epoch: 140 training_loss 0.10535923067480325 test_loss: 0.10995142459869385
epoch: 141 training_loss 0.1081792152300477 test_loss: 0.11663326025009155
epoch: 142 training_loss 0.10495878595858812 test_loss: 0.10252430438995361
epoch: 143 training_loss 0.1022877287492156 test_loss: 0.1123517632484436
epoch: 144 training_loss 0.09966898363083601 test_loss: 0.10627361536026
epoch: 145 training_loss 0.10019121767953038 test_loss: 0.11125391721725464
epoch: 146 training_loss 0.1059851596876979 test_loss: 0.09939440488815307
epoch: 147 training_loss 0.10208011589944363 test_loss: 0.1169356346130371
epoch: 148 training_loss 0.09731103114783764 test_loss: 0.10977343320846558
epoch: 149 training_loss 0.09809219755232335 test_loss: 0.11383273601531982
episode: 0 training return: -999.9981869644952
episode: 1 training return: -999.997978641947
episode: 2 training return: -999.9981685329492
episode: 3 training return: -999.997518099858
epoch: 1 test_true_pfm: -0.3654985708878628 sim_pfm: -999.9397044651761
episode: 4 training return: -999.9952164837715
episode: 5 training return: -999.9981779194136
episode: 6 training return: -999.9980422778502
episode: 7 training return: -999.9979800011696
epoch: 2 test_true_pfm: -0.2993325985816709 sim_pfm: -999.9416370219684
episode: 8 training return: -999.9973904979472
episode: 9 training return: -999.9964594330268
episode: 10 training return: -999.9969562675644
episode: 11 training return: -999.9976444077988
epoch: 3 test_true_pfm: -0.7374630889141699 sim_pfm: -999.9400957112376
episode: 12 training return: -999.9984757981215
episode: 13 training return: -999.9984960492686
episode: 14 training return: -999.9983224943976
episode: 15 training return: -999.9986352591036
epoch: 4 test_true_pfm: -0.5445475834884778 sim_pfm: -999.9400641999988
episode: 16 training return: -999.9978213220888
episode: 17 training return: -999.9980353461817
episode: 18 training return: -999.9984319491753
episode: 19 training return: -999.9970571024093
epoch: 5 test_true_pfm: -0.8742987666380806 sim_pfm: -999.9395548209121
episode: 20 training return: -999.9946372342343
episode: 21 training return: -999.9971802514787
episode: 22 training return: -999.9983320517088
episode: 23 training return: -999.9976473368724
epoch: 6 test_true_pfm: -0.9744401165113246 sim_pfm: -999.9393736015185
episode: 24 training return: -999.9976762460649
episode: 25 training return: -999.9982157342059
episode: 26 training return: -999.9988525969914
episode: 27 training return: -999.9973932625143
epoch: 7 test_true_pfm: -0.5312610763681543 sim_pfm: -999.9392567326446
episode: 28 training return: -999.9982314672229
episode: 29 training return: -999.9973271597049
episode: 30 training return: -999.994967538224
episode: 31 training return: -999.997754177423
epoch: 8 test_true_pfm: 0.056869780527711876 sim_pfm: -999.9399362628619
episode: 32 training return: -999.9971895211903
episode: 33 training return: -999.9981439863687
episode: 34 training return: -999.9978576824802
episode: 35 training return: -999.9985838106966
epoch: 9 test_true_pfm: 0.11756787485988124 sim_pfm: -999.9379938451849
episode: 36 training return: -999.9980836442204
episode: 37 training return: -999.9976282540945
episode: 38 training return: -999.9980705822377
episode: 39 training return: -999.9961663118626
epoch: 10 test_true_pfm: 0.1495852555184524 sim_pfm: -999.9394939697335
episode: 40 training return: -999.997576227127
episode: 41 training return: -999.9987396719906
episode: 42 training return: -999.9983013987755
episode: 43 training return: -999.9974135857587
epoch: 11 test_true_pfm: -0.7082414980662709 sim_pfm: -999.9400622099012
episode: 44 training return: -999.9981857537923
episode: 45 training return: -999.9977003097393
episode: 46 training return: -999.9992285577885
episode: 47 training return: -999.9988083275482
epoch: 12 test_true_pfm: 0.018436244725067764 sim_pfm: -999.9405923031754
episode: 48 training return: -999.9985589913815
episode: 49 training return: -999.9968563337272
episode: 50 training return: -999.9927502101424
episode: 51 training return: -999.9962567613065
epoch: 13 test_true_pfm: -0.039389462221696446 sim_pfm: -999.9407704767224
episode: 52 training return: -999.997188643618
episode: 53 training return: -999.9820086253021
episode: 54 training return: -999.9977472202448
episode: 55 training return: -999.9972426118403
epoch: 14 test_true_pfm: -0.7787996043028761 sim_pfm: -999.9396955961334
episode: 56 training return: -999.9975855353905
episode: 57 training return: -999.9983635054747
episode: 58 training return: -999.9982781122736
episode: 59 training return: -999.9985903370807
epoch: 15 test_true_pfm: 0.11600298270408027 sim_pfm: -999.9385804533767
episode: 60 training return: -999.9976492543427
episode: 61 training return: -999.9986128802624
episode: 62 training return: -999.9971569415152
episode: 63 training return: -999.9984322152632
epoch: 16 test_true_pfm: -0.09032266338939558 sim_pfm: -999.9394955415845
episode: 64 training return: -999.9985603129213
episode: 65 training return: -999.9990863632698
episode: 66 training return: -999.9968248073596
episode: 67 training return: -999.9983528117897
epoch: 17 test_true_pfm: -0.4778075924961574 sim_pfm: -999.9408429922165
episode: 68 training return: -999.9977901254148
episode: 69 training return: -999.9978896424766
episode: 70 training return: -999.9982003992591
episode: 71 training return: -999.9968554951664
epoch: 18 test_true_pfm: -0.13708585375323748 sim_pfm: -999.9401509453379
episode: 72 training return: -999.9989648967518
episode: 73 training return: -999.9977319244405
episode: 74 training return: -999.9976988054707
episode: 75 training return: -999.9979334252617
epoch: 19 test_true_pfm: -0.6105255450400552 sim_pfm: -999.9388333719511
episode: 76 training return: -999.9976525358328
episode: 77 training return: -999.9960676089484
episode: 78 training return: -999.997683295002
episode: 79 training return: -999.9896659466974
epoch: 20 test_true_pfm: 0.19409863829583554 sim_pfm: -999.9400376261825
episode: 80 training return: -999.998231562544
episode: 81 training return: -999.9974701127289
episode: 82 training return: -999.9976436696933
episode: 83 training return: -999.9965959175579
epoch: 21 test_true_pfm: 0.34906365694396974 sim_pfm: -999.9394979411969
episode: 84 training return: -999.9971831104294
episode: 85 training return: -999.9987543999871
episode: 86 training return: -999.9966362321323
episode: 87 training return: -999.9993162551949
epoch: 22 test_true_pfm: 0.4124144855608991 sim_pfm: -999.9394747556454
episode: 88 training return: -999.9971243651153
episode: 89 training return: -999.9983265944232
episode: 90 training return: -999.9979424864845
episode: 91 training return: -999.9991752436927
epoch: 23 test_true_pfm: -0.44976216396776786 sim_pfm: -999.9393136607013
episode: 92 training return: -999.9992407045809
episode: 93 training return: -999.9981713447038
episode: 94 training return: -999.9983669020814
episode: 95 training return: -999.990645445064
epoch: 24 test_true_pfm: 0.15014500907613496 sim_pfm: -999.9393184510362
episode: 96 training return: -999.9977159036983
episode: 97 training return: -999.9974870710953
episode: 98 training return: -999.9972874883646
episode: 99 training return: -999.9983033730696
epoch: 25 test_true_pfm: 0.05782028461384423 sim_pfm: -999.9388035652602
episode: 100 training return: -999.9978901031001
episode: 101 training return: -999.9976278236919
episode: 102 training return: -999.9987533163983
episode: 103 training return: -999.9984873228271
epoch: 26 test_true_pfm: -0.46903372334631926 sim_pfm: -999.9413368290934
episode: 104 training return: -999.9984302785106
episode: 105 training return: -999.9989247503278
episode: 106 training return: -999.9989600813641
episode: 107 training return: -999.998252129277
epoch: 27 test_true_pfm: -0.04723917326174421 sim_pfm: -999.9393562050055
episode: 108 training return: -999.9971101501013
episode: 109 training return: -999.9987420065701
episode: 110 training return: -999.9981690992689
episode: 111 training return: -999.9986911664959
epoch: 28 test_true_pfm: -0.6826858619545301 sim_pfm: -999.9398402114483
episode: 112 training return: -999.9981434992987
episode: 113 training return: -999.9970968717943
episode: 114 training return: -999.9834925352144
episode: 115 training return: -999.9951254454247
epoch: 29 test_true_pfm: -0.9376524356422867 sim_pfm: -999.9397419635811
episode: 116 training return: -999.9979758564211
episode: 117 training return: -999.997310342708
episode: 118 training return: -999.9971938085138
episode: 119 training return: -999.998572106921
epoch: 30 test_true_pfm: -0.5540597324060472 sim_pfm: -999.9386115401397
episode: 120 training return: -999.9993595372335
episode: 121 training return: -999.9983380584416
episode: 122 training return: -999.9984906169155
episode: 123 training return: -999.9982197066415
epoch: 31 test_true_pfm: -0.43883908684740924 sim_pfm: -999.939341741548
episode: 124 training return: -999.9977061420655
episode: 125 training return: -999.9987660927997
episode: 126 training return: -999.9982256018044
episode: 127 training return: -999.998759544013
epoch: 32 test_true_pfm: -0.07010647117022874 sim_pfm: -999.9380276227124
episode: 128 training return: -999.9978379002023
episode: 129 training return: -999.9987687308502
episode: 130 training return: -999.9980420823604
episode: 131 training return: -999.997985984212
epoch: 33 test_true_pfm: -0.3245026318934824 sim_pfm: -999.941369332379
episode: 132 training return: -999.9980943332772
episode: 133 training return: -999.9993970130354
episode: 134 training return: -999.9958333285497
episode: 135 training return: -999.9975751815833
epoch: 34 test_true_pfm: -0.8259202410086529 sim_pfm: -999.9390744613283
episode: 136 training return: -999.9993538809279
episode: 137 training return: -999.9994015433675
episode: 138 training return: -999.9990585895653
episode: 139 training return: -999.9972443708604
epoch: 35 test_true_pfm: -0.4166892668805319 sim_pfm: -999.94029595631
episode: 140 training return: -999.9981452377604
episode: 141 training return: -999.9965566671395
episode: 142 training return: -999.9993288849417
episode: 143 training return: -999.9991252847611
epoch: 36 test_true_pfm: -0.36200376728015576 sim_pfm: -999.9402260323714
episode: 144 training return: -999.9969306590916
episode: 145 training return: -999.9983522264268
episode: 146 training return: -999.99936301646
episode: 147 training return: -999.9994304240839
epoch: 37 test_true_pfm: -0.24619575238536853 sim_pfm: -999.9393872896098
episode: 148 training return: -999.9980908422311
episode: 149 training return: -999.9981521361099
episode: 150 training return: -999.9982507120849
episode: 151 training return: -999.9979537682827
epoch: 38 test_true_pfm: 0.375674032511131 sim_pfm: -999.9391126592699
episode: 152 training return: -999.9991634628541
episode: 153 training return: -999.9984826665997
episode: 154 training return: -999.9987996301287
episode: 155 training return: -999.9952027446055
epoch: 39 test_true_pfm: -0.17638868868034088 sim_pfm: -999.9405514157721
episode: 156 training return: -999.9988905281665
episode: 157 training return: -999.9977431482192
episode: 158 training return: -999.9986658692591
episode: 159 training return: -999.996925053603
epoch: 40 test_true_pfm: -0.7759573550091821 sim_pfm: -999.9388621003612
episode: 160 training return: -999.997735408188
episode: 161 training return: -999.9983123619578
episode: 162 training return: -999.9988725238991
episode: 163 training return: -999.996967597285
epoch: 41 test_true_pfm: -0.9645638823524774 sim_pfm: -999.9392093205603
episode: 164 training return: -999.9978996088826
episode: 165 training return: -999.9977312169185
episode: 166 training return: -999.9953862652139
episode: 167 training return: -999.9970844267979
epoch: 42 test_true_pfm: -0.8480550794873801 sim_pfm: -999.9397151161214
episode: 168 training return: -999.9978870583694
episode: 169 training return: -999.9983240036801
episode: 170 training return: -999.9942850510741
episode: 171 training return: -999.9984775614628
epoch: 43 test_true_pfm: -0.7899979935572587 sim_pfm: -999.9404448904603
episode: 172 training return: -999.9983217615991
episode: 173 training return: -999.9920434279012
episode: 174 training return: -999.9950512170676
episode: 175 training return: -999.9988913157131
epoch: 44 test_true_pfm: -0.22002885035868838 sim_pfm: -999.9400064492114
episode: 176 training return: -999.997571254494
episode: 177 training return: -999.9993981829223
episode: 178 training return: -999.9979273533278
episode: 179 training return: -999.9976521908405
epoch: 45 test_true_pfm: -0.1127392422493053 sim_pfm: -999.9380955117596
episode: 180 training return: -999.999023576233
episode: 181 training return: -999.9987623134299
episode: 182 training return: -999.9972079861117
episode: 183 training return: -999.9987454645327
epoch: 46 test_true_pfm: -0.3956057964918136 sim_pfm: -999.9406615734842
episode: 184 training return: -999.9969829067417
episode: 185 training return: -999.9991402033179
episode: 186 training return: -999.9987916896016
episode: 187 training return: -999.9975611257569
epoch: 47 test_true_pfm: 0.05495863280190902 sim_pfm: -999.9400486383728
episode: 188 training return: -999.9975075910784
episode: 189 training return: -999.9969291383137
episode: 190 training return: -999.9978597334614
episode: 191 training return: -999.9968969373446
epoch: 48 test_true_pfm: 0.35215629524936926 sim_pfm: -999.9392290441482
episode: 192 training return: -999.998589781333
episode: 193 training return: -999.9991923268884
episode: 194 training return: -999.9987078156369
episode: 195 training return: -999.9983988147276
epoch: 49 test_true_pfm: -0.10330774841839423 sim_pfm: -999.9405392867599
episode: 196 training return: -999.9974311934387
episode: 197 training return: -999.9979162065334
episode: 198 training return: -999.9986018272507
episode: 199 training return: -999.9985841947579
epoch: 50 test_true_pfm: -0.16467453743923896 sim_pfm: -999.9406733537802
episode: 200 training return: -999.9977501416935
episode: 201 training return: -999.9968721534826
episode: 202 training return: -999.9988030368967
episode: 203 training return: -999.9977265628057
epoch: 51 test_true_pfm: 0.4758643431826047 sim_pfm: -999.9395019211939
episode: 204 training return: -999.9983601379796
episode: 205 training return: -999.9980554457978
episode: 206 training return: -999.9976306857608
episode: 207 training return: -999.9983373133075
epoch: 52 test_true_pfm: 0.47817329638991274 sim_pfm: -999.9397196353722
episode: 208 training return: -999.9982919259581
episode: 209 training return: -999.9961309777151
episode: 210 training return: -999.9978946504095
episode: 211 training return: -999.998504630845
epoch: 53 test_true_pfm: -0.08264998401031991 sim_pfm: -999.9392488744589
episode: 212 training return: -999.9990288786225
episode: 213 training return: -999.9979959782775
episode: 214 training return: -999.9973341247083
episode: 215 training return: -999.9978673685582
epoch: 54 test_true_pfm: -0.12415253354060135 sim_pfm: -999.9387892342261
episode: 216 training return: -999.9963625252839
episode: 217 training return: -999.9983349819356
episode: 218 training return: -999.9970214739636
episode: 219 training return: -999.9982063616994
epoch: 55 test_true_pfm: -0.9032021001909095 sim_pfm: -999.939966479753
episode: 220 training return: -999.997280633395
episode: 221 training return: -999.9993717359373
episode: 222 training return: -999.9990083356734
episode: 223 training return: -999.9984477861199
epoch: 56 test_true_pfm: -0.4051451274156279 sim_pfm: -999.9395407119291
episode: 224 training return: -999.9974591868885
episode: 225 training return: -999.9973336198893
episode: 226 training return: -999.9979306434101
episode: 227 training return: -999.9993031413269
epoch: 57 test_true_pfm: 0.03642454846295332 sim_pfm: -999.9396353969773
episode: 228 training return: -999.9991441855864
episode: 229 training return: -999.9937239269626
episode: 230 training return: -999.9984652969123
episode: 231 training return: -999.9976827498975
epoch: 58 test_true_pfm: 0.19786574785678437 sim_pfm: -999.9393439257065
episode: 232 training return: -999.9970599044807
episode: 233 training return: -999.999255937304
episode: 234 training return: -999.998815064042
episode: 235 training return: -999.9978868343193
epoch: 59 test_true_pfm: -0.42050574612013064 sim_pfm: -999.9399030380795
episode: 236 training return: -999.9970300541687
episode: 237 training return: -999.9970105729786
episode: 238 training return: -999.9977875787055
episode: 239 training return: -999.997337112507
epoch: 60 test_true_pfm: -0.44695235176541304 sim_pfm: -999.9400072785771
episode: 240 training return: -999.9987145153019
episode: 241 training return: -999.999411872715
episode: 242 training return: -999.997761609367
episode: 243 training return: -999.9982089956984
epoch: 61 test_true_pfm: 0.07823711614761097 sim_pfm: -999.9401720420095
episode: 244 training return: -999.9971745796846
episode: 245 training return: -999.998709601017
episode: 246 training return: -999.9981917341157
episode: 247 training return: -999.9985364152101
epoch: 62 test_true_pfm: -0.082736596310031 sim_pfm: -999.9383506732078
episode: 248 training return: -999.9984285276208
episode: 249 training return: -999.9965076722898
episode: 250 training return: -999.9985901010664
episode: 251 training return: -999.9983104025149
epoch: 63 test_true_pfm: -0.27267044266435975 sim_pfm: -999.9399680207979
episode: 252 training return: -999.9980067443994
episode: 253 training return: -999.9966980830399
episode: 254 training return: -999.9919991042492
episode: 255 training return: -999.9985983891272
epoch: 64 test_true_pfm: -0.22133813859738813 sim_pfm: -999.9381885211525
episode: 256 training return: -999.9957534874559
episode: 257 training return: -999.9984874624274
episode: 258 training return: -999.9986607683862
episode: 259 training return: -999.9907362683515
epoch: 65 test_true_pfm: 0.872253293647641 sim_pfm: -999.9407021982697
episode: 260 training return: -999.9979012304752
episode: 261 training return: -999.9980737826771
episode: 262 training return: -999.9960760416433
episode: 263 training return: -999.997597383067
epoch: 66 test_true_pfm: -0.14349383225888832 sim_pfm: -999.9397540474247
episode: 264 training return: -999.9985320957362
episode: 265 training return: -999.9969130015656
episode: 266 training return: -999.9977808888684
episode: 267 training return: -999.9952214380388
epoch: 67 test_true_pfm: -0.5869160264740275 sim_pfm: -999.9385257947374
episode: 268 training return: -999.998193327115
episode: 269 training return: -999.9976932724277
episode: 270 training return: -999.9990543769873
episode: 271 training return: -999.9975249509718
epoch: 68 test_true_pfm: -0.06153131851317747 sim_pfm: -999.9385683450737
episode: 272 training return: -999.9988345379776
episode: 273 training return: -999.9975840757212
episode: 274 training return: -999.9943606636393
episode: 275 training return: -999.9983223991778
epoch: 69 test_true_pfm: -1.2587677900740408 sim_pfm: -999.9393191824006
episode: 276 training return: -999.9945869926652
episode: 277 training return: -999.9990101257096
episode: 278 training return: -999.9973164188702
episode: 279 training return: -999.998554419872
epoch: 70 test_true_pfm: 0.08223030152044292 sim_pfm: -999.9391039586664
episode: 280 training return: -999.998454143361
episode: 281 training return: -999.9981320902592
episode: 282 training return: -999.9858638933295
episode: 283 training return: -999.9662312453062
epoch: 71 test_true_pfm: -0.4184189653472939 sim_pfm: -999.9390163081445
episode: 284 training return: -999.9989350736623
episode: 285 training return: -999.9978864733885
episode: 286 training return: -999.9975422610097
episode: 287 training return: -999.9981720378851
epoch: 72 test_true_pfm: 0.207963728547218 sim_pfm: -999.9393821369789
episode: 288 training return: -999.9984813391154
episode: 289 training return: -999.9976722392304
episode: 290 training return: -999.9937757378468
episode: 291 training return: -999.9990210811615
epoch: 73 test_true_pfm: -0.6188080114421384 sim_pfm: -999.9399309740703
episode: 292 training return: -999.9988594966715
episode: 293 training return: -999.996568752594
episode: 294 training return: -999.9973546700174
episode: 295 training return: -999.9972630828039
epoch: 74 test_true_pfm: -0.5456161061050097 sim_pfm: -999.9395556177486
episode: 296 training return: -999.9951202500101
episode: 297 training return: -999.9984200731502
episode: 298 training return: -999.997277415168
episode: 299 training return: -999.9983966639358
epoch: 75 test_true_pfm: -0.18741189757218948 sim_pfm: -999.9398493141385
episode: 300 training return: -999.998201620923
episode: 301 training return: -999.9963851867004
episode: 302 training return: -999.9955706668695
episode: 303 training return: -999.9984317857421
epoch: 76 test_true_pfm: -0.013918198273674895 sim_pfm: -999.9389014064851
episode: 304 training return: -999.998672953815
episode: 305 training return: -999.9974652485856
episode: 306 training return: -999.9990471995407
episode: 307 training return: -999.9972740940112
epoch: 77 test_true_pfm: -0.5154201669293191 sim_pfm: -999.939000728783
episode: 308 training return: -999.995973164241
episode: 309 training return: -999.991235573909
episode: 310 training return: -999.9990231116575
episode: 311 training return: -999.9976217306972
epoch: 78 test_true_pfm: -0.2941525578084534 sim_pfm: -999.9389585866542
episode: 312 training return: -999.9979398235416
episode: 313 training return: -999.9934099193895
episode: 314 training return: -999.9981133041921
episode: 315 training return: -999.9979429619264
epoch: 79 test_true_pfm: -1.125347929137332 sim_pfm: -999.9391855847465
episode: 316 training return: -999.9980443287668
episode: 317 training return: -999.9985905125915
episode: 318 training return: -999.9982517769214
episode: 319 training return: -999.9971532959381
epoch: 80 test_true_pfm: -0.24568298738112068 sim_pfm: -999.9390541827966
episode: 320 training return: -999.9973821633
episode: 321 training return: -999.9976479156658
episode: 322 training return: -999.9968640978861
episode: 323 training return: -999.9981592592845
epoch: 81 test_true_pfm: -0.5166464453325786 sim_pfm: -999.9378945479953
episode: 324 training return: -999.9969425571918
episode: 325 training return: -999.9975596896408
episode: 326 training return: -999.9983764113331
episode: 327 training return: -999.9981370354722
epoch: 82 test_true_pfm: -0.720116925006255 sim_pfm: -999.9389555035656
episode: 328 training return: -999.9960854789216
episode: 329 training return: -999.9940057675008
episode: 330 training return: -999.9808016224335
episode: 331 training return: -999.9995734845118
epoch: 83 test_true_pfm: 0.1390258793883792 sim_pfm: -999.9399187627718
episode: 332 training return: -999.9979958659184
episode: 333 training return: -999.9974325488876
episode: 334 training return: -999.9975857748188
episode: 335 training return: -999.9987364377295
epoch: 84 test_true_pfm: -0.40489322507674846 sim_pfm: -999.9377086250364
episode: 336 training return: -999.9982515186551
episode: 337 training return: -999.9977087467646
episode: 338 training return: -999.9983238440734
episode: 339 training return: -999.9979042729021
epoch: 85 test_true_pfm: -0.4572006755833893 sim_pfm: -999.93875712715
episode: 340 training return: -999.9970868512366
episode: 341 training return: -999.9978243720539
episode: 342 training return: -999.9986358647758
episode: 343 training return: -999.9961539055954
epoch: 86 test_true_pfm: -0.43440066664326343 sim_pfm: -999.9391088298827
episode: 344 training return: -999.9970158081547
episode: 345 training return: -999.9986327345875
episode: 346 training return: -999.998937832402
episode: 347 training return: -999.9959946432986
epoch: 87 test_true_pfm: 0.4707083822397668 sim_pfm: -999.9396190361125
episode: 348 training return: -999.9987367822615
episode: 349 training return: -999.9975612873983
episode: 350 training return: -999.9987775483539
episode: 351 training return: -999.9964382015409
epoch: 88 test_true_pfm: -0.8592918696721813 sim_pfm: -999.9394434207052
episode: 352 training return: -999.998209694747
episode: 353 training return: -999.9995024898224
episode: 354 training return: -999.9984893493549
episode: 355 training return: -999.9983794734835
epoch: 89 test_true_pfm: -0.8648187132271987 sim_pfm: -999.9389278290854
episode: 356 training return: -999.9983502179387
episode: 357 training return: -999.9980807905567
episode: 358 training return: -999.9987229146857
episode: 359 training return: -999.9983319311987
epoch: 90 test_true_pfm: 0.03186514981571067 sim_pfm: -999.9386872150775
episode: 360 training return: -999.9985324391535
episode: 361 training return: -999.9990960629641
episode: 362 training return: -999.9985239911134
episode: 363 training return: -999.9980521622624
epoch: 91 test_true_pfm: -0.5434765327291228 sim_pfm: -999.9395091842948
episode: 364 training return: -999.9977944937506
episode: 365 training return: -999.9975586039668
episode: 366 training return: -999.9983774897592
episode: 367 training return: -999.9981002341063
epoch: 92 test_true_pfm: -0.030388198980078624 sim_pfm: -999.939455021391
episode: 368 training return: -999.9984727819309
episode: 369 training return: -999.9985704930538
episode: 370 training return: -999.99818427326
episode: 371 training return: -999.9990966698647
epoch: 93 test_true_pfm: -0.1693344496325286 sim_pfm: -999.9388011268743
episode: 372 training return: -999.9968522080469
episode: 373 training return: -999.9981717021652
episode: 374 training return: -999.9974968305241
episode: 375 training return: -999.9983328178181
epoch: 94 test_true_pfm: -0.8046134230813747 sim_pfm: -999.9412627407564
episode: 376 training return: -999.9980809233983
episode: 377 training return: -999.9956603731522
episode: 378 training return: -999.9979149412295
episode: 379 training return: -999.9978735251821
epoch: 95 test_true_pfm: -0.49614525746914145 sim_pfm: -999.9398284704804
episode: 380 training return: -999.9985769862143
episode: 381 training return: -999.9990804030689
episode: 382 training return: -999.9945048708394
episode: 383 training return: -999.9959034471802
epoch: 96 test_true_pfm: -0.60672280096363 sim_pfm: -999.9390765037741
episode: 384 training return: -999.9974798830725
episode: 385 training return: -999.997781090501
episode: 386 training return: -999.9983589183951
episode: 387 training return: -999.9977978904107
epoch: 97 test_true_pfm: -0.18930197083318168 sim_pfm: -999.9382458818877
episode: 388 training return: -999.9982434503843
episode: 389 training return: -999.9985801547695
episode: 390 training return: -999.998319066353
episode: 391 training return: -999.9971987415507
epoch: 98 test_true_pfm: -0.04296692890313177 sim_pfm: -999.9389685571487
episode: 392 training return: -999.965810062882
episode: 393 training return: -999.9990719396903
episode: 394 training return: -999.9985027632365
episode: 395 training return: -999.996103738868
epoch: 99 test_true_pfm: -0.4870539358877392 sim_pfm: -999.9398769225321
episode: 396 training return: -999.9982230944021
episode: 397 training return: -999.9980880088094
episode: 398 training return: -999.9936719152946
episode: 399 training return: -999.9953000150889
epoch: 100 test_true_pfm: 0.3470017256100135 sim_pfm: -999.9395477982007
episode: 400 training return: -999.9958696251399
episode: 401 training return: -999.999576934722
episode: 402 training return: -999.9985555394957
episode: 403 training return: -999.9992375037139
epoch: 101 test_true_pfm: 0.24963243007108896 sim_pfm: -999.9385411818429
episode: 404 training return: -999.9981506798948
episode: 405 training return: -999.9979431083889
episode: 406 training return: -999.9954912834985
episode: 407 training return: -999.9939337537633
epoch: 102 test_true_pfm: -0.963756795076435 sim_pfm: -999.9379669324525
episode: 408 training return: -999.9979118663633
episode: 409 training return: -999.9983128149449
episode: 410 training return: -999.9981213480097
episode: 411 training return: -999.9977899099352
epoch: 103 test_true_pfm: 0.044837420319934575 sim_pfm: -999.939736403713
episode: 412 training return: -999.9979114931972
episode: 413 training return: -999.9980296505037
episode: 414 training return: -999.9980203819987
episode: 415 training return: -999.9975491687233
epoch: 104 test_true_pfm: 0.0622099276040279 sim_pfm: -999.9395682494937
episode: 416 training return: -999.9975820908602
episode: 417 training return: -999.9981316636221
episode: 418 training return: -999.9993488052195
episode: 419 training return: -999.9968885818643
epoch: 105 test_true_pfm: 0.02017790197817178 sim_pfm: -999.9398362962088
episode: 420 training return: -999.99841506186
episode: 421 training return: -999.9976752637917
episode: 422 training return: -999.9983922398678
episode: 423 training return: -999.9985418368973
epoch: 106 test_true_pfm: -0.542110735265422 sim_pfm: -999.9390126035076
episode: 424 training return: -999.9938101483241
episode: 425 training return: -999.9979105477487
episode: 426 training return: -999.9981367025723
episode: 427 training return: -999.9983479705062
epoch: 107 test_true_pfm: -0.443887543118669 sim_pfm: -999.9397833974223
episode: 428 training return: -999.9972204545445
episode: 429 training return: -999.9960631773704
episode: 430 training return: -999.9937329217515
episode: 431 training return: -999.99765265743
epoch: 108 test_true_pfm: -0.24082572979520397 sim_pfm: -999.9396639559799
episode: 432 training return: -999.9977219929268
episode: 433 training return: -999.9977093715571
episode: 434 training return: -999.998622066004
episode: 435 training return: -999.9961168352747
epoch: 109 test_true_pfm: -1.0535131825166606 sim_pfm: -999.9394630853377
episode: 436 training return: -999.9985048850044
episode: 437 training return: -999.9975579710789
episode: 438 training return: -999.9990483481371
episode: 439 training return: -999.9977916535129
epoch: 110 test_true_pfm: -1.175851997489424 sim_pfm: -999.9387121126175
episode: 440 training return: -999.9984267046871
episode: 441 training return: -999.9980472836411
episode: 442 training return: -999.9960795328648
episode: 443 training return: -999.9983780690326
epoch: 111 test_true_pfm: -0.4133444019696881 sim_pfm: -999.9396772476726
episode: 444 training return: -999.9977339603485
episode: 445 training return: -999.9980360388756
episode: 446 training return: -999.9987449635641
episode: 447 training return: -999.9972144996356
epoch: 112 test_true_pfm: -0.7115636542457612 sim_pfm: -999.9397117649269
episode: 448 training return: -999.9979214429117
episode: 449 training return: -999.9985194676108
episode: 450 training return: -999.9973418847344
episode: 451 training return: -999.9986005833993
epoch: 113 test_true_pfm: 0.34501910670349156 sim_pfm: -999.9397074671336
episode: 452 training return: -999.996751395169
episode: 453 training return: -999.99899156144
episode: 454 training return: -999.9992815396017
episode: 455 training return: -999.9993967301758
epoch: 114 test_true_pfm: -0.40474091900606196 sim_pfm: -999.9402638610914
episode: 456 training return: -999.9926947974692
episode: 457 training return: -999.9973986973084
episode: 458 training return: -999.9980185509307
episode: 459 training return: -999.9983548183295
epoch: 115 test_true_pfm: -0.06803395645535455 sim_pfm: -999.9383470924885
episode: 460 training return: -999.9972500111554
episode: 461 training return: -999.9985418806741
episode: 462 training return: -999.998130589959
episode: 463 training return: -999.998024509126
epoch: 116 test_true_pfm: -0.14382829796046184 sim_pfm: -999.9394084785285
episode: 464 training return: -999.9942269798076
episode: 465 training return: -999.9979406334758
episode: 466 training return: -999.9987542035996
episode: 467 training return: -999.9986810499075
epoch: 117 test_true_pfm: -0.6275228302227319 sim_pfm: -999.9397290748889
episode: 468 training return: -999.9980939429706
episode: 469 training return: -999.9972093159662
episode: 470 training return: -999.9964683884084
episode: 471 training return: -999.9976154854655
epoch: 118 test_true_pfm: 0.2507767378206807 sim_pfm: -999.9380499977939
episode: 472 training return: -999.9983217056574
episode: 473 training return: -999.9993667156598
episode: 474 training return: -999.9978297019509
episode: 475 training return: -999.998059088174
epoch: 119 test_true_pfm: -0.6744485606035515 sim_pfm: -999.938445695374
episode: 476 training return: -999.9988426870489
episode: 477 training return: -999.9974908942033
episode: 478 training return: -999.9975948639959
episode: 479 training return: -999.9988389769505
epoch: 120 test_true_pfm: 0.10402344621458608 sim_pfm: -999.9384461157891
episode: 480 training return: -999.9989771402853
episode: 481 training return: -999.9985532265804
episode: 482 training return: -999.9896102020768
episode: 483 training return: -999.9980245104056
epoch: 121 test_true_pfm: -0.06503362592124946 sim_pfm: -999.9402071911908
episode: 484 training return: -999.9980108178431
episode: 485 training return: -999.9990718279786
episode: 486 training return: -999.9989537598739
episode: 487 training return: -999.996987009459
epoch: 122 test_true_pfm: -1.2657261666906043 sim_pfm: -999.9383860299685
episode: 488 training return: -999.9955609598303
episode: 489 training return: -999.9976487202973
episode: 490 training return: -999.9974330780019
episode: 491 training return: -999.9976077866389
epoch: 123 test_true_pfm: 0.42997557938942293 sim_pfm: -999.939307344877
episode: 492 training return: -999.9950069309524
episode: 493 training return: -999.99825225037
episode: 494 training return: -999.9978170406874
episode: 495 training return: -999.9973640343285
epoch: 124 test_true_pfm: -0.2284157794321557 sim_pfm: -999.9388811581912
episode: 496 training return: -999.9985736886599
episode: 497 training return: -999.9990559517718
episode: 498 training return: -999.9982629869753
episode: 499 training return: -999.9980721987938
epoch: 125 test_true_pfm: -0.6613405219880458 sim_pfm: -999.9395913576809
episode: 500 training return: -999.9984187998116
episode: 501 training return: -999.997893879226
episode: 502 training return: -999.998347769425
episode: 503 training return: -999.9892673477426
epoch: 126 test_true_pfm: -0.1838899903946618 sim_pfm: -999.940521763804
episode: 504 training return: -999.9988682876408
episode: 505 training return: -999.996045472634
episode: 506 training return: -999.996465827584
episode: 507 training return: -999.9959094951924
epoch: 127 test_true_pfm: -0.6973243288302625 sim_pfm: -999.9403998244485
episode: 508 training return: -999.9972341481642
episode: 509 training return: -999.9977305418341
episode: 510 training return: -999.9982268754133
episode: 511 training return: -999.9968290648588
epoch: 128 test_true_pfm: -0.01911282295200528 sim_pfm: -999.9396197646662
episode: 512 training return: -999.9977250954012
episode: 513 training return: -999.9977167202219
episode: 514 training return: -999.9983792806632
episode: 515 training return: -999.9988918978567
epoch: 129 test_true_pfm: -0.912811844700459 sim_pfm: -999.9404931728305
episode: 516 training return: -999.9982935945652
episode: 517 training return: -999.9914765380696
episode: 518 training return: -999.9979170837642
episode: 519 training return: -999.9978685528558
epoch: 130 test_true_pfm: -0.4611518418490472 sim_pfm: -999.9385156213381
episode: 520 training return: -999.998170561586
episode: 521 training return: -999.9981470159023
episode: 522 training return: -999.9969937376748
episode: 523 training return: -999.9984688923647
epoch: 131 test_true_pfm: -0.6866969353806547 sim_pfm: -999.9399906186791
episode: 524 training return: -999.991317256408
episode: 525 training return: -999.9949603120366
episode: 526 training return: -999.9956273046225
episode: 527 training return: -999.998415276133
epoch: 132 test_true_pfm: -0.2715794920805417 sim_pfm: -999.9398064505816
episode: 528 training return: -999.995314977361
episode: 529 training return: -999.9977440597467
episode: 530 training return: -999.9979920613498
episode: 531 training return: -999.9966387865811
epoch: 133 test_true_pfm: -0.4873851424094218 sim_pfm: -999.9395825604611
episode: 532 training return: -999.9980276298888
episode: 533 training return: -999.9980387237945
episode: 534 training return: -999.9981540917394
episode: 535 training return: -999.99699262942
epoch: 134 test_true_pfm: -0.007292219690719952 sim_pfm: -999.9398548968569
episode: 536 training return: -999.9984462345867
episode: 537 training return: -999.9979124523868
episode: 538 training return: -999.9984261996478
episode: 539 training return: -999.9978253306858
epoch: 135 test_true_pfm: -0.15247607690514176 sim_pfm: -999.9393448544664
episode: 540 training return: -999.9984477187454
episode: 541 training return: -999.9965722847937
episode: 542 training return: -999.9984325849197
episode: 543 training return: -999.9962654702546
epoch: 136 test_true_pfm: -0.12478162590741686 sim_pfm: -999.9398900184902
episode: 544 training return: -999.9969254071356
episode: 545 training return: -999.9981517772283
episode: 546 training return: -999.9976721171594
episode: 547 training return: -999.9984123986056
epoch: 137 test_true_pfm: -0.7214999394608684 sim_pfm: -999.9398940456371
episode: 548 training return: -999.9970309046918
episode: 549 training return: -999.9987703963408
episode: 550 training return: -999.9984560137054
episode: 551 training return: -999.9984555959024
epoch: 138 test_true_pfm: -0.5404969191317587 sim_pfm: -999.93890937302
episode: 552 training return: -999.9957834542652
episode: 553 training return: -999.9976987221288
episode: 554 training return: -999.9986458790228
episode: 555 training return: -999.964304592652
epoch: 139 test_true_pfm: 0.08429125697231032 sim_pfm: -999.9388742773621
episode: 556 training return: -999.9978512332086
episode: 557 training return: -999.9976031733214
episode: 558 training return: -999.9989507411531
episode: 559 training return: -999.9976303811991
epoch: 140 test_true_pfm: 0.3155344514791157 sim_pfm: -999.9395071581816
episode: 560 training return: -999.9984708857519
episode: 561 training return: -999.9978102536586
episode: 562 training return: -999.9980564164457
episode: 563 training return: -999.9986988175352
epoch: 141 test_true_pfm: 0.06228797934362235 sim_pfm: -999.9386219119982
episode: 564 training return: -999.9977543474597
episode: 565 training return: -999.9988484700376
episode: 566 training return: -999.9982233617811
episode: 567 training return: -999.9981006599752
epoch: 142 test_true_pfm: 0.29652302423794386 sim_pfm: -999.9396126993356
episode: 568 training return: -999.9993385301939
episode: 569 training return: -999.9979398699116
episode: 570 training return: -999.9980781161229
episode: 571 training return: -999.9964951738356
epoch: 143 test_true_pfm: -0.15550251705016985 sim_pfm: -999.9392891588328
episode: 572 training return: -999.998339437573
episode: 573 training return: -999.9983876836882
episode: 574 training return: -999.9980142358573
episode: 575 training return: -999.9988051605774
epoch: 144 test_true_pfm: -0.7206308394596653 sim_pfm: -999.9396149435719
episode: 576 training return: -999.9957054998326
episode: 577 training return: -999.9960827572854
episode: 578 training return: -999.9994703371117
episode: 579 training return: -999.9969277916749
epoch: 145 test_true_pfm: -0.10121175524561128 sim_pfm: -999.9382611262026
episode: 580 training return: -999.9981997440243
episode: 581 training return: -999.9968922550445
episode: 582 training return: -999.9984194031906
episode: 583 training return: -999.9902741347681
epoch: 146 test_true_pfm: -0.8227637684093856 sim_pfm: -999.9379670400043
episode: 584 training return: -999.9984106418543
episode: 585 training return: -999.9982534914062
episode: 586 training return: -999.9985258318477
episode: 587 training return: -999.9976429245386
epoch: 147 test_true_pfm: -0.3209302329246878 sim_pfm: -999.9380382906699
episode: 588 training return: -999.998409689167
episode: 589 training return: -999.995069049114
episode: 590 training return: -999.9982353067361
episode: 591 training return: -999.9986974573754
epoch: 148 test_true_pfm: 0.19170993384572652 sim_pfm: -999.9391334897668
episode: 592 training return: -999.9987307016594
episode: 593 training return: -999.9974857627852
episode: 594 training return: -999.997933767264
episode: 595 training return: -999.9987389538614
epoch: 149 test_true_pfm: -0.22603979337478322 sim_pfm: -999.9387135429838
episode: 596 training return: -999.9979680760781
episode: 597 training return: -999.9979223449844
episode: 598 training return: -999.9985850393905
episode: 599 training return: -999.9989988322084
epoch: 150 test_true_pfm: -0.2687818137836628 sim_pfm: -999.9377745054207
