['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'uncertainty', '--traj', 'expert', '--seed', '2', '--data', '100000']
epoch: 0 training_loss 0.3271654622256756 test_loss: 0.26108641624450685
epoch: 1 training_loss 0.22963012613356112 test_loss: 0.2341917037963867
epoch: 2 training_loss 0.1916405227780342 test_loss: 0.1830083727836609
epoch: 3 training_loss 0.18346368923783302 test_loss: 0.17505735158920288
epoch: 4 training_loss 0.16922896407544613 test_loss: 0.15777332782745362
epoch: 5 training_loss 0.15428661845624447 test_loss: 0.1665241003036499
epoch: 6 training_loss 0.15238698482513427 test_loss: 0.1591796398162842
epoch: 7 training_loss 0.13489337135106325 test_loss: 0.12355575561523438
epoch: 8 training_loss 0.14346682131290436 test_loss: 0.11724271774291992
epoch: 9 training_loss 0.1347762206941843 test_loss: 0.1410732865333557
epoch: 10 training_loss 0.1317437496036291 test_loss: 0.1575346827507019
epoch: 11 training_loss 0.1348121100664139 test_loss: 0.153517484664917
epoch: 12 training_loss 0.12639915321022271 test_loss: 0.15083141326904298
epoch: 13 training_loss 0.12618598978966475 test_loss: 0.14136910438537598
epoch: 14 training_loss 0.11507413117215037 test_loss: 0.1247252106666565
epoch: 15 training_loss 0.12163935407996178 test_loss: 0.14604464769363404
epoch: 16 training_loss 0.12861129131168128 test_loss: 0.14201194047927856
epoch: 17 training_loss 0.12099170312285423 test_loss: 0.13226134777069093
epoch: 18 training_loss 0.11731571737676859 test_loss: 0.12255053520202637
epoch: 19 training_loss 0.11577807869762183 test_loss: 0.12187834978103637
epoch: 20 training_loss 0.12603537198156117 test_loss: 0.12490110397338867
epoch: 21 training_loss 0.12168917756527663 test_loss: 0.13143936395645142
epoch: 22 training_loss 0.11870036555454135 test_loss: 0.1229485273361206
epoch: 23 training_loss 0.1122007180377841 test_loss: 0.11619706153869629
epoch: 24 training_loss 0.12334123689681292 test_loss: 0.10628623962402343
epoch: 25 training_loss 0.11449700318276883 test_loss: 0.11627756357192993
epoch: 26 training_loss 0.12354435447603464 test_loss: 0.12190796136856079
epoch: 27 training_loss 0.11814538329839706 test_loss: 0.11290371417999268
epoch: 28 training_loss 0.11703643314540386 test_loss: 0.10902769565582275
epoch: 29 training_loss 0.11681832984089852 test_loss: 0.12592024803161622
epoch: 30 training_loss 0.11716587204486131 test_loss: 0.11707488298416138
epoch: 31 training_loss 0.11455167915672064 test_loss: 0.1256558418273926
epoch: 32 training_loss 0.11400518387556076 test_loss: 0.12819098234176635
epoch: 33 training_loss 0.10878014579415321 test_loss: 0.10308232307434081
epoch: 34 training_loss 0.11124791219830513 test_loss: 0.12217470407485961
epoch: 35 training_loss 0.11154550544917584 test_loss: 0.1008703351020813
epoch: 36 training_loss 0.11704412233084441 test_loss: 0.10696941614151001
epoch: 37 training_loss 0.11350154224783182 test_loss: 0.1395850419998169
epoch: 38 training_loss 0.11120660573244096 test_loss: 0.13100221157073974
epoch: 39 training_loss 0.1136413947492838 test_loss: 0.12042595148086548
epoch: 40 training_loss 0.10463338401168584 test_loss: 0.13449807167053224
epoch: 41 training_loss 0.1195284677669406 test_loss: 0.11522647142410278
epoch: 42 training_loss 0.11484886277467013 test_loss: 0.11710474491119385
epoch: 43 training_loss 0.11081927794963121 test_loss: 0.12184015512466431
epoch: 44 training_loss 0.10986851762980222 test_loss: 0.11430146694183349
epoch: 45 training_loss 0.10470165340229869 test_loss: 0.1369473457336426
epoch: 46 training_loss 0.10780830873176456 test_loss: 0.11431601047515869
epoch: 47 training_loss 0.1071101763099432 test_loss: 0.11307281255722046
epoch: 48 training_loss 0.12428840108215809 test_loss: 0.11806962490081788
epoch: 49 training_loss 0.11270491365343333 test_loss: 0.13046826124191285
epoch: 50 training_loss 0.11365277200937271 test_loss: 0.10429275035858154
epoch: 51 training_loss 0.11226636368781329 test_loss: 0.1296466112136841
epoch: 52 training_loss 0.10449933469295501 test_loss: 0.11723886728286743
epoch: 53 training_loss 0.11123038280755282 test_loss: 0.11250437498092651
epoch: 54 training_loss 0.10528503097593785 test_loss: 0.13926684856414795
epoch: 55 training_loss 0.11262468922883272 test_loss: 0.12710647583007811
epoch: 56 training_loss 0.11370272267609835 test_loss: 0.1172647476196289
epoch: 57 training_loss 0.10501885067671538 test_loss: 0.11813024282455445
epoch: 58 training_loss 0.09650564014911651 test_loss: 0.11942435503005981
epoch: 59 training_loss 0.11418403482064604 test_loss: 0.10418362617492676
epoch: 60 training_loss 0.10790530005469919 test_loss: 0.11999430656433105
epoch: 61 training_loss 0.10661275565624237 test_loss: 0.11283810138702392
epoch: 62 training_loss 0.10667553920298815 test_loss: 0.11775857210159302
epoch: 63 training_loss 0.10851977255195379 test_loss: 0.11024872064590455
epoch: 64 training_loss 0.10544779654592276 test_loss: 0.10516207218170166
epoch: 65 training_loss 0.1060211405530572 test_loss: 0.12938417196273805
epoch: 66 training_loss 0.1100442960485816 test_loss: 0.10874292850494385
epoch: 67 training_loss 0.10829870097339153 test_loss: 0.11879009008407593
epoch: 68 training_loss 0.10748033810406923 test_loss: 0.1189163327217102
epoch: 69 training_loss 0.10639140576124191 test_loss: 0.11212043762207032
epoch: 70 training_loss 0.1097991132363677 test_loss: 0.11753751039505005
epoch: 71 training_loss 0.11049217479303479 test_loss: 0.11001954078674317
epoch: 72 training_loss 0.1142608987353742 test_loss: 0.1137892484664917
epoch: 73 training_loss 0.10845956090837718 test_loss: 0.10994132757186889
epoch: 74 training_loss 0.11603459665551781 test_loss: 0.1276857852935791
epoch: 75 training_loss 0.10937055133283138 test_loss: 0.11164828538894653
epoch: 76 training_loss 0.11257860746234655 test_loss: 0.1138033390045166
epoch: 77 training_loss 0.11151909373700619 test_loss: 0.11491175889968872
epoch: 78 training_loss 0.1089116001315415 test_loss: 0.11906353235244752
epoch: 79 training_loss 0.12046998839825392 test_loss: 0.12259262800216675
epoch: 80 training_loss 0.1075043236836791 test_loss: 0.09668106436729432
epoch: 81 training_loss 0.10476681327447296 test_loss: 0.11628127098083496
epoch: 82 training_loss 0.09859046556055545 test_loss: 0.11305946111679077
epoch: 83 training_loss 0.10214717866852879 test_loss: 0.1105538010597229
epoch: 84 training_loss 0.11007938202470541 test_loss: 0.11327712535858155
epoch: 85 training_loss 0.10551063781604171 test_loss: 0.10532703399658203
epoch: 86 training_loss 0.10083383303135633 test_loss: 0.11189537048339844
epoch: 87 training_loss 0.1116229746490717 test_loss: 0.10130013227462768
epoch: 88 training_loss 0.10868738766759634 test_loss: 0.1119794487953186
epoch: 89 training_loss 0.11293150592595338 test_loss: 0.11264290809631347
epoch: 90 training_loss 0.11326603099703789 test_loss: 0.11485586166381836
epoch: 91 training_loss 0.11281939327716828 test_loss: 0.1278071403503418
epoch: 92 training_loss 0.10497404333204031 test_loss: 0.11768153905868531
epoch: 93 training_loss 0.10747581657022237 test_loss: 0.11352313756942749
epoch: 94 training_loss 0.11437026102095843 test_loss: 0.11240173578262329
epoch: 95 training_loss 0.10693715572357178 test_loss: 0.1231043815612793
epoch: 96 training_loss 0.10719842590391636 test_loss: 0.10720217227935791
epoch: 97 training_loss 0.10705493142828346 test_loss: 0.11858911514282226
epoch: 98 training_loss 0.1091262138262391 test_loss: 0.11957868337631225
epoch: 99 training_loss 0.10984598819166422 test_loss: 0.11995325088500977
epoch: 100 training_loss 0.10315093820914627 test_loss: 0.09949086308479309
epoch: 101 training_loss 0.11395494068041444 test_loss: 0.11707836389541626
epoch: 102 training_loss 0.10360286282375455 test_loss: 0.12231465578079223
epoch: 103 training_loss 0.10421972367912531 test_loss: 0.11744807958602906
epoch: 104 training_loss 0.10809758208692073 test_loss: 0.12269481420516967
epoch: 105 training_loss 0.10288001565262675 test_loss: 0.11801000833511352
epoch: 106 training_loss 0.11019260551780462 test_loss: 0.1068576455116272
epoch: 107 training_loss 0.10824644058942795 test_loss: 0.11576794385910034
epoch: 108 training_loss 0.10714574817568064 test_loss: 0.10775185823440551
epoch: 109 training_loss 0.10904459230601787 test_loss: 0.13451626300811767
epoch: 110 training_loss 0.10257890140637756 test_loss: 0.13076326847076417
epoch: 111 training_loss 0.11298370961099863 test_loss: 0.10945502519607545
epoch: 112 training_loss 0.10385254554450513 test_loss: 0.11408982276916504
epoch: 113 training_loss 0.10047704540193081 test_loss: 0.12376290559768677
epoch: 114 training_loss 0.10276958471164108 test_loss: 0.1065176248550415
epoch: 115 training_loss 0.10265608891844749 test_loss: 0.10869458913803101
epoch: 116 training_loss 0.10429315509274602 test_loss: 0.10243653059005738
epoch: 117 training_loss 0.10585091628134251 test_loss: 0.12757976055145265
epoch: 118 training_loss 0.11082544891163706 test_loss: 0.11898237466812134
epoch: 119 training_loss 0.11009884182363748 test_loss: 0.10040479898452759
epoch: 120 training_loss 0.09686650220304728 test_loss: 0.11769455671310425
epoch: 121 training_loss 0.10200217297300697 test_loss: 0.12486952543258667
epoch: 122 training_loss 0.10115732653066516 test_loss: 0.10415700674057007
epoch: 123 training_loss 0.10547055816277862 test_loss: 0.11645123958587647
epoch: 124 training_loss 0.1139804950170219 test_loss: 0.10812178850173951
epoch: 125 training_loss 0.10800592575222254 test_loss: 0.12722563743591309
epoch: 126 training_loss 0.10952493607997894 test_loss: 0.12579787969589235
epoch: 127 training_loss 0.1047797941789031 test_loss: 0.11289299726486206
epoch: 128 training_loss 0.10017852226272225 test_loss: 0.11419347524642945
epoch: 129 training_loss 0.10919108316302299 test_loss: 0.1115102767944336
epoch: 130 training_loss 0.10254421735182405 test_loss: 0.12102633714675903
epoch: 131 training_loss 0.1071706636622548 test_loss: 0.10995988845825196
epoch: 132 training_loss 0.1047666079737246 test_loss: 0.10061269998550415
epoch: 133 training_loss 0.10437285099178553 test_loss: 0.11812006235122681
epoch: 134 training_loss 0.10776451097801328 test_loss: 0.11402406692504882
epoch: 135 training_loss 0.09944188639521599 test_loss: 0.11742656230926514
epoch: 136 training_loss 0.10339569520205259 test_loss: 0.11485811471939086
epoch: 137 training_loss 0.10729586977511645 test_loss: 0.13040558099746705
epoch: 138 training_loss 0.10617331389337778 test_loss: 0.09911218285560608
epoch: 139 training_loss 0.1019855347275734 test_loss: 0.11331440210342407
epoch: 140 training_loss 0.10198194710537792 test_loss: 0.12354618310928345
epoch: 141 training_loss 0.10887669930234552 test_loss: 0.13002562522888184
epoch: 142 training_loss 0.10944972988218069 test_loss: 0.11542410850524902
epoch: 143 training_loss 0.10744351539760828 test_loss: 0.10200592279434204
epoch: 144 training_loss 0.10135971970856189 test_loss: 0.11990898847579956
epoch: 145 training_loss 0.1067699245736003 test_loss: 0.10990761518478394
epoch: 146 training_loss 0.10491537392139434 test_loss: 0.11789112091064453
epoch: 147 training_loss 0.10842323709279299 test_loss: 0.12551883459091187
epoch: 148 training_loss 0.1002660608664155 test_loss: 0.12345240116119385
epoch: 149 training_loss 0.10313522834330797 test_loss: 0.1330735683441162
epoch: 0 training_loss 0.33117636740207673 test_loss: 0.2697002410888672
epoch: 1 training_loss 0.2266714771091938 test_loss: 0.19931988716125487
epoch: 2 training_loss 0.20243631526827813 test_loss: 0.20542533397674562
epoch: 3 training_loss 0.18031032167375088 test_loss: 0.1799108862876892
epoch: 4 training_loss 0.15375026248395443 test_loss: 0.17930500507354735
epoch: 5 training_loss 0.15944225303828716 test_loss: 0.15791126489639282
epoch: 6 training_loss 0.13828291215002536 test_loss: 0.14462085962295532
epoch: 7 training_loss 0.1403375533223152 test_loss: 0.15194082260131836
epoch: 8 training_loss 0.14135486017912627 test_loss: 0.1540001630783081
epoch: 9 training_loss 0.13635417852550746 test_loss: 0.13234201669692994
epoch: 10 training_loss 0.1210684148594737 test_loss: 0.1314069628715515
epoch: 11 training_loss 0.12844544485211373 test_loss: 0.1362586498260498
epoch: 12 training_loss 0.13661880142986774 test_loss: 0.13836041688919068
epoch: 13 training_loss 0.1271165058761835 test_loss: 0.12467455863952637
epoch: 14 training_loss 0.13013043500483035 test_loss: 0.13180601596832275
epoch: 15 training_loss 0.11895555876195431 test_loss: 0.13852176666259766
epoch: 16 training_loss 0.11601415444165468 test_loss: 0.14104006290435792
epoch: 17 training_loss 0.12725065782666206 test_loss: 0.12320981025695801
epoch: 18 training_loss 0.12191223315894603 test_loss: 0.12749779224395752
epoch: 19 training_loss 0.12067105494439602 test_loss: 0.12609878778457642
epoch: 20 training_loss 0.11371746987104416 test_loss: 0.14160428047180176
epoch: 21 training_loss 0.1234040399454534 test_loss: 0.11762865781784057
epoch: 22 training_loss 0.11192047707736492 test_loss: 0.12473173141479492
epoch: 23 training_loss 0.11815543159842491 test_loss: 0.10822440385818481
epoch: 24 training_loss 0.11701033778488636 test_loss: 0.13116577863693238
epoch: 25 training_loss 0.11235164139419794 test_loss: 0.13374773263931275
epoch: 26 training_loss 0.11365999335423112 test_loss: 0.12156369686126708
epoch: 27 training_loss 0.1137946710176766 test_loss: 0.12812941074371337
epoch: 28 training_loss 0.11914538525044918 test_loss: 0.12360280752182007
epoch: 29 training_loss 0.11135994963347912 test_loss: 0.12918050289154054
epoch: 30 training_loss 0.11739790663123131 test_loss: 0.14661425352096558
epoch: 31 training_loss 0.11069050947204233 test_loss: 0.11997807025909424
epoch: 32 training_loss 0.1073201872035861 test_loss: 0.1297391891479492
epoch: 33 training_loss 0.11840326454490423 test_loss: 0.12826355695724487
epoch: 34 training_loss 0.10609861273318529 test_loss: 0.12080605030059814
epoch: 35 training_loss 0.11617385745048522 test_loss: 0.12583472728729247
epoch: 36 training_loss 0.10661135535687208 test_loss: 0.13087356090545654
epoch: 37 training_loss 0.1190874507278204 test_loss: 0.13182499408721923
epoch: 38 training_loss 0.10157790705561638 test_loss: 0.12399213314056397
epoch: 39 training_loss 0.10651102667674422 test_loss: 0.10101832151412964
epoch: 40 training_loss 0.10798355907201768 test_loss: 0.11181609630584717
epoch: 41 training_loss 0.10681150868535041 test_loss: 0.12321135997772217
epoch: 42 training_loss 0.10989798080176115 test_loss: 0.12164509296417236
epoch: 43 training_loss 0.09868041679263115 test_loss: 0.12755892276763917
epoch: 44 training_loss 0.1083438740670681 test_loss: 0.12697360515594483
epoch: 45 training_loss 0.1119644033163786 test_loss: 0.1129384994506836
epoch: 46 training_loss 0.11345071829855442 test_loss: 0.1281217098236084
epoch: 47 training_loss 0.11970365829765797 test_loss: 0.12225559949874878
epoch: 48 training_loss 0.10185388823971152 test_loss: 0.1259610414505005
epoch: 49 training_loss 0.10812439633533359 test_loss: 0.10709208250045776
epoch: 50 training_loss 0.10557409293949604 test_loss: 0.13046016693115234
epoch: 51 training_loss 0.10987380307167768 test_loss: 0.11319446563720703
epoch: 52 training_loss 0.10697110310196876 test_loss: 0.1305640459060669
epoch: 53 training_loss 0.10814574286341667 test_loss: 0.12960184812545777
epoch: 54 training_loss 0.10197878988459706 test_loss: 0.13139299154281617
epoch: 55 training_loss 0.10934731835499406 test_loss: 0.1132277488708496
epoch: 56 training_loss 0.09663870574906469 test_loss: 0.11464388370513916
epoch: 57 training_loss 0.11338231306523085 test_loss: 0.10953055620193482
epoch: 58 training_loss 0.11442194022238254 test_loss: 0.12086975574493408
epoch: 59 training_loss 0.1106924293935299 test_loss: 0.1311139225959778
epoch: 60 training_loss 0.10782947812229395 test_loss: 0.11553469896316529
epoch: 61 training_loss 0.10524719314649701 test_loss: 0.11926565170288086
epoch: 62 training_loss 0.11574394132941962 test_loss: 0.12013496160507202
epoch: 63 training_loss 0.1043147225305438 test_loss: 0.1117040753364563
epoch: 64 training_loss 0.10559750149026513 test_loss: 0.11829948425292969
epoch: 65 training_loss 0.10883261065930128 test_loss: 0.12048109769821166
epoch: 66 training_loss 0.10585088357329368 test_loss: 0.13591243028640748
epoch: 67 training_loss 0.1072791924327612 test_loss: 0.11040509939193725
epoch: 68 training_loss 0.10702877234667539 test_loss: 0.11881022453308106
epoch: 69 training_loss 0.10273686915636063 test_loss: 0.10126917362213135
epoch: 70 training_loss 0.10799709632992745 test_loss: 0.11567168235778809
epoch: 71 training_loss 0.1074561657011509 test_loss: 0.10987447500228882
epoch: 72 training_loss 0.1193136577680707 test_loss: 0.10980384349822998
epoch: 73 training_loss 0.10674244303256274 test_loss: 0.11362935304641723
epoch: 74 training_loss 0.10257944222539664 test_loss: 0.1177522897720337
epoch: 75 training_loss 0.10542621940374375 test_loss: 0.12149043083190918
epoch: 76 training_loss 0.11174540888518095 test_loss: 0.1195984125137329
epoch: 77 training_loss 0.11080006832256913 test_loss: 0.1175830602645874
epoch: 78 training_loss 0.10351263768970967 test_loss: 0.10846353769302368
epoch: 79 training_loss 0.10300489194691181 test_loss: 0.10256326198577881
epoch: 80 training_loss 0.10636670779436827 test_loss: 0.10636190176010132
epoch: 81 training_loss 0.11050887122750282 test_loss: 0.11287187337875366
epoch: 82 training_loss 0.10445559114217758 test_loss: 0.11340445280075073
epoch: 83 training_loss 0.09693665117025375 test_loss: 0.1287290334701538
epoch: 84 training_loss 0.1010521131940186 test_loss: 0.11170917749404907
epoch: 85 training_loss 0.10347745209932327 test_loss: 0.1270635485649109
epoch: 86 training_loss 0.11174704145640135 test_loss: 0.11187355518341065
epoch: 87 training_loss 0.10279716618359089 test_loss: 0.11732354164123535
epoch: 88 training_loss 0.10203026128932834 test_loss: 0.13600071668624877
epoch: 89 training_loss 0.10742718689143657 test_loss: 0.11646709442138672
epoch: 90 training_loss 0.09863419540226459 test_loss: 0.10478349924087524
epoch: 91 training_loss 0.09842488683760166 test_loss: 0.11406859159469604
epoch: 92 training_loss 0.10349354159086943 test_loss: 0.12166796922683716
epoch: 93 training_loss 0.10379015268757939 test_loss: 0.09963138699531555
epoch: 94 training_loss 0.1017991085536778 test_loss: 0.12148939371109009
epoch: 95 training_loss 0.09579257545992732 test_loss: 0.13372373580932617
epoch: 96 training_loss 0.10177846312522888 test_loss: 0.1312041163444519
epoch: 97 training_loss 0.11070264350622892 test_loss: 0.1304085373878479
epoch: 98 training_loss 0.1073930312693119 test_loss: 0.11696637868881225
epoch: 99 training_loss 0.11241675980389118 test_loss: 0.11328163146972656
epoch: 100 training_loss 0.10640799406915903 test_loss: 0.11578168869018554
epoch: 101 training_loss 0.10035281533375383 test_loss: 0.1113236665725708
epoch: 102 training_loss 0.1007236815430224 test_loss: 0.11750490665435791
epoch: 103 training_loss 0.09800083230249584 test_loss: 0.12189943790435791
epoch: 104 training_loss 0.11039257720112801 test_loss: 0.10805855989456177
epoch: 105 training_loss 0.10281224049627781 test_loss: 0.11759840250015259
epoch: 106 training_loss 0.1041110748052597 test_loss: 0.11602182388305664
epoch: 107 training_loss 0.10941019881516695 test_loss: 0.12267125844955444
epoch: 108 training_loss 0.09930563118308783 test_loss: 0.12000083923339844
epoch: 109 training_loss 0.1118780622445047 test_loss: 0.11170963048934937
epoch: 110 training_loss 0.10631879325956106 test_loss: 0.09988089203834534
epoch: 111 training_loss 0.10386782901361585 test_loss: 0.10874239206314087
epoch: 112 training_loss 0.09681865697726608 test_loss: 0.12560670375823973
epoch: 113 training_loss 0.09540694113820791 test_loss: 0.11864707469940186
epoch: 114 training_loss 0.09977428931742907 test_loss: 0.12488142251968384
epoch: 115 training_loss 0.10029135642573238 test_loss: 0.12733663320541383
epoch: 116 training_loss 0.10418479416519404 test_loss: 0.11938288211822509
epoch: 117 training_loss 0.10402720356360078 test_loss: 0.12260867357254028
epoch: 118 training_loss 0.10290334083139896 test_loss: 0.11559064388275146
epoch: 119 training_loss 0.11759863682091236 test_loss: 0.12040345668792725
epoch: 120 training_loss 0.10892865762114524 test_loss: 0.12963709831237794
epoch: 121 training_loss 0.10234791630879045 test_loss: 0.1055903673171997
epoch: 122 training_loss 0.10374548241496086 test_loss: 0.12314618825912475
epoch: 123 training_loss 0.10988836105912923 test_loss: 0.10587379932403565
epoch: 124 training_loss 0.09674097303301096 test_loss: 0.11647193431854248
epoch: 125 training_loss 0.10723807509988546 test_loss: 0.11949393749237061
epoch: 126 training_loss 0.10441890154033899 test_loss: 0.11480122804641724
epoch: 127 training_loss 0.10764961924403905 test_loss: 0.10768684148788452
epoch: 128 training_loss 0.09671593861654401 test_loss: 0.11714110374450684
epoch: 129 training_loss 0.09738530054688453 test_loss: 0.12122212648391724
epoch: 130 training_loss 0.10464682076126337 test_loss: 0.1160963535308838
epoch: 131 training_loss 0.10754043135792017 test_loss: 0.11639913320541381
epoch: 132 training_loss 0.10228298529982567 test_loss: 0.12503746747970582
epoch: 133 training_loss 0.10386677674949168 test_loss: 0.09303147196769715
epoch: 134 training_loss 0.09620472682639956 test_loss: 0.12208305597305298
epoch: 135 training_loss 0.10416110180318355 test_loss: 0.12100938558578492
epoch: 136 training_loss 0.10882975462824106 test_loss: 0.11811071634292603
epoch: 137 training_loss 0.10595186319202185 test_loss: 0.11446378231048585
epoch: 138 training_loss 0.10336202520877123 test_loss: 0.13051228523254393
epoch: 139 training_loss 0.10494248483330011 test_loss: 0.1178119421005249
epoch: 140 training_loss 0.10531746312975883 test_loss: 0.12250577211380005
epoch: 141 training_loss 0.10204237235710024 test_loss: 0.11904301643371581
epoch: 142 training_loss 0.10048825621604919 test_loss: 0.10875824689865113
epoch: 143 training_loss 0.09811139941215515 test_loss: 0.10787324905395508
epoch: 144 training_loss 0.10182575941085815 test_loss: 0.10249273777008057
epoch: 145 training_loss 0.10398294636979699 test_loss: 0.12105027437210084
epoch: 146 training_loss 0.10194392088800669 test_loss: 0.11402519941329955
epoch: 147 training_loss 0.10485227566212416 test_loss: 0.11168391704559326
epoch: 148 training_loss 0.10019561722874641 test_loss: 0.12041276693344116
epoch: 149 training_loss 0.10235629176720977 test_loss: 0.11412056684494018
epoch: 0 training_loss 0.33392304986715315 test_loss: 0.23369784355163575
epoch: 1 training_loss 0.21953470341861248 test_loss: 0.18690720796585084
epoch: 2 training_loss 0.1820824120938778 test_loss: 0.1845136523246765
epoch: 3 training_loss 0.17247045807540418 test_loss: 0.16542991399765014
epoch: 4 training_loss 0.14912543717771767 test_loss: 0.15774673223495483
epoch: 5 training_loss 0.14906350538134575 test_loss: 0.1562819242477417
epoch: 6 training_loss 0.13548989936709405 test_loss: 0.1362539291381836
epoch: 7 training_loss 0.13401687763631343 test_loss: 0.1277593493461609
epoch: 8 training_loss 0.13055882580578326 test_loss: 0.13022158145904542
epoch: 9 training_loss 0.1301730553805828 test_loss: 0.135942542552948
epoch: 10 training_loss 0.12918894536793232 test_loss: 0.14302874803543092
epoch: 11 training_loss 0.12048734717071057 test_loss: 0.1343923568725586
epoch: 12 training_loss 0.11830088745802642 test_loss: 0.12295869588851929
epoch: 13 training_loss 0.12098783697932959 test_loss: 0.14860297441482545
epoch: 14 training_loss 0.1205057854950428 test_loss: 0.1271885395050049
epoch: 15 training_loss 0.11698639884591103 test_loss: 0.14560465812683104
epoch: 16 training_loss 0.11100187331438065 test_loss: 0.1389790654182434
epoch: 17 training_loss 0.11234799768775701 test_loss: 0.11194376945495606
epoch: 18 training_loss 0.11810184724628925 test_loss: 0.1256645679473877
epoch: 19 training_loss 0.11485352203249931 test_loss: 0.12872815132141113
epoch: 20 training_loss 0.11559193536639213 test_loss: 0.10707275867462158
epoch: 21 training_loss 0.11065407052636146 test_loss: 0.1111764669418335
epoch: 22 training_loss 0.11442756976932288 test_loss: 0.10950149297714233
epoch: 23 training_loss 0.11465311802923679 test_loss: 0.11509824991226196
epoch: 24 training_loss 0.1108121108263731 test_loss: 0.11155223846435547
epoch: 25 training_loss 0.10811216566711664 test_loss: 0.13794249296188354
epoch: 26 training_loss 0.11279062386602164 test_loss: 0.11858670711517334
epoch: 27 training_loss 0.1069826347567141 test_loss: 0.12839431762695314
epoch: 28 training_loss 0.10735485408455134 test_loss: 0.11643530130386352
epoch: 29 training_loss 0.10431127823889255 test_loss: 0.12423506975173951
epoch: 30 training_loss 0.10956951346248388 test_loss: 0.12505688667297363
epoch: 31 training_loss 0.10567660849541426 test_loss: 0.1092142939567566
epoch: 32 training_loss 0.11091789681464434 test_loss: 0.12000224590301514
epoch: 33 training_loss 0.11044472403824329 test_loss: 0.11502364873886109
epoch: 34 training_loss 0.10781617034226657 test_loss: 0.12344937324523926
epoch: 35 training_loss 0.1125796115025878 test_loss: 0.13320995569229127
epoch: 36 training_loss 0.10717907104641199 test_loss: 0.12418826818466186
epoch: 37 training_loss 0.10293204613029956 test_loss: 0.10697793960571289
epoch: 38 training_loss 0.10809878259897232 test_loss: 0.1300106167793274
epoch: 39 training_loss 0.10677250461652875 test_loss: 0.11468908786773682
epoch: 40 training_loss 0.10696687374264002 test_loss: 0.11498056650161743
epoch: 41 training_loss 0.10246788438409567 test_loss: 0.11710882186889648
epoch: 42 training_loss 0.10712099539116025 test_loss: 0.1196484923362732
epoch: 43 training_loss 0.1024904676526785 test_loss: 0.11246877908706665
epoch: 44 training_loss 0.1017322924733162 test_loss: 0.11280025243759155
epoch: 45 training_loss 0.11217319775372743 test_loss: 0.11071804761886597
epoch: 46 training_loss 0.10849270470440388 test_loss: 0.11454838514328003
epoch: 47 training_loss 0.10518949635326863 test_loss: 0.11687039136886597
epoch: 48 training_loss 0.10156581193208694 test_loss: 0.1318157434463501
epoch: 49 training_loss 0.10892744112759828 test_loss: 0.11511223316192627
epoch: 50 training_loss 0.1097636092081666 test_loss: 0.11810271739959717
epoch: 51 training_loss 0.1008332996442914 test_loss: 0.12713491916656494
epoch: 52 training_loss 0.10930663619190455 test_loss: 0.10896061658859253
epoch: 53 training_loss 0.10241022232919932 test_loss: 0.11803041696548462
epoch: 54 training_loss 0.0989843962341547 test_loss: 0.12366992235183716
epoch: 55 training_loss 0.10235490940511227 test_loss: 0.11319297552108765
epoch: 56 training_loss 0.1008935135230422 test_loss: 0.11513307094573974
epoch: 57 training_loss 0.10896153382956981 test_loss: 0.11237776279449463
epoch: 58 training_loss 0.10595411010086536 test_loss: 0.12332336902618408
epoch: 59 training_loss 0.10207180116325616 test_loss: 0.1117885947227478
epoch: 60 training_loss 0.10765993919223547 test_loss: 0.10817656517028809
epoch: 61 training_loss 0.10735988441854716 test_loss: 0.10530003309249877
epoch: 62 training_loss 0.10268230427056552 test_loss: 0.12600256204605104
epoch: 63 training_loss 0.10450487207621335 test_loss: 0.12629708051681518
epoch: 64 training_loss 0.10181127924472094 test_loss: 0.10319733619689941
epoch: 65 training_loss 0.10033631823956966 test_loss: 0.12196476459503174
epoch: 66 training_loss 0.10227454943582416 test_loss: 0.10574653148651122
epoch: 67 training_loss 0.10355299465358257 test_loss: 0.1048163890838623
epoch: 68 training_loss 0.10350848179310561 test_loss: 0.1205831527709961
epoch: 69 training_loss 0.10437054743990302 test_loss: 0.10934033393859863
epoch: 70 training_loss 0.09840561185032129 test_loss: 0.12462453842163086
epoch: 71 training_loss 0.10252011202275753 test_loss: 0.1040075659751892
epoch: 72 training_loss 0.10555562974885106 test_loss: 0.11586085557937623
epoch: 73 training_loss 0.09520236641168595 test_loss: 0.1055057168006897
epoch: 74 training_loss 0.10183918468654156 test_loss: 0.10271438360214233
epoch: 75 training_loss 0.11728788841515779 test_loss: 0.09940699934959411
epoch: 76 training_loss 0.10587813175283373 test_loss: 0.1079781174659729
epoch: 77 training_loss 0.10790974281728268 test_loss: 0.11571764945983887
epoch: 78 training_loss 0.10844820842146874 test_loss: 0.13107811212539672
epoch: 79 training_loss 0.09726030990481377 test_loss: 0.12237067222595215
epoch: 80 training_loss 0.10555652257055044 test_loss: 0.125371253490448
epoch: 81 training_loss 0.1014867963641882 test_loss: 0.12363473176956177
epoch: 82 training_loss 0.09862703768536449 test_loss: 0.11948648691177369
epoch: 83 training_loss 0.10305831030011177 test_loss: 0.12464112043380737
epoch: 84 training_loss 0.10299820667132735 test_loss: 0.10403140783309936
epoch: 85 training_loss 0.10787418957799673 test_loss: 0.11005533933639526
epoch: 86 training_loss 0.09532532583922147 test_loss: 0.11733275651931763
epoch: 87 training_loss 0.1021600279211998 test_loss: 0.12411072254180908
epoch: 88 training_loss 0.09966754103079438 test_loss: 0.0998145580291748
epoch: 89 training_loss 0.10068500520661473 test_loss: 0.12273091077804565
epoch: 90 training_loss 0.10255790337920188 test_loss: 0.10664619207382202
epoch: 91 training_loss 0.09766716685146093 test_loss: 0.12525132894515992
epoch: 92 training_loss 0.10069757252931595 test_loss: 0.11521047353744507
epoch: 93 training_loss 0.10289175223559141 test_loss: 0.11454458236694336
epoch: 94 training_loss 0.09385865358635784 test_loss: 0.12767260074615477
epoch: 95 training_loss 0.10743016060441732 test_loss: 0.1223233699798584
epoch: 96 training_loss 0.10280309733003377 test_loss: 0.11818480491638184
epoch: 97 training_loss 0.0952040559053421 test_loss: 0.112479567527771
epoch: 98 training_loss 0.09560391006991267 test_loss: 0.12213301658630371
epoch: 99 training_loss 0.10316091045737266 test_loss: 0.11603296995162964
epoch: 100 training_loss 0.09792316012084484 test_loss: 0.11334260702133178
epoch: 101 training_loss 0.1042243236489594 test_loss: 0.11458126306533814
epoch: 102 training_loss 0.10649928949773312 test_loss: 0.11914106607437133
epoch: 103 training_loss 0.10426931656897068 test_loss: 0.09205477833747863
epoch: 104 training_loss 0.10843190401792527 test_loss: 0.11741377115249634
epoch: 105 training_loss 0.09534008555114269 test_loss: 0.11539355516433716
epoch: 106 training_loss 0.10797723514959216 test_loss: 0.10460128784179687
epoch: 107 training_loss 0.1074428435973823 test_loss: 0.10510351657867431
epoch: 108 training_loss 0.1092621998861432 test_loss: 0.11482541561126709
epoch: 109 training_loss 0.09967648718506097 test_loss: 0.10032063722610474
epoch: 110 training_loss 0.10055020116269589 test_loss: 0.13152544498443602
epoch: 111 training_loss 0.1035594578832388 test_loss: 0.10432894229888916
epoch: 112 training_loss 0.10080440118908882 test_loss: 0.10505828857421876
epoch: 113 training_loss 0.09735487287864089 test_loss: 0.11280403137207032
epoch: 114 training_loss 0.09903119791299105 test_loss: 0.12580565214157105
epoch: 115 training_loss 0.10049660008400679 test_loss: 0.10940432548522949
epoch: 116 training_loss 0.09691732592880725 test_loss: 0.11567426919937134
epoch: 117 training_loss 0.09598322506994009 test_loss: 0.12011537551879883
epoch: 118 training_loss 0.09447300966829061 test_loss: 0.12289916276931763
epoch: 119 training_loss 0.1011432177759707 test_loss: 0.11777586936950683
epoch: 120 training_loss 0.09583819925785064 test_loss: 0.1083258032798767
epoch: 121 training_loss 0.09743796298280358 test_loss: 0.12707580327987672
epoch: 122 training_loss 0.091034424463287 test_loss: 0.10980011224746704
epoch: 123 training_loss 0.10598631951957942 test_loss: 0.1137290358543396
epoch: 124 training_loss 0.0924405363202095 test_loss: 0.11766542196273803
epoch: 125 training_loss 0.10462092038244009 test_loss: 0.101669442653656
epoch: 126 training_loss 0.10191586455330252 test_loss: 0.12582056522369384
epoch: 127 training_loss 0.10366191325709223 test_loss: 0.11483587026596069
epoch: 128 training_loss 0.09900750365108252 test_loss: 0.1146545648574829
epoch: 129 training_loss 0.10155730124562978 test_loss: 0.10721631050109863
epoch: 130 training_loss 0.09791255831718444 test_loss: 0.11178597211837768
epoch: 131 training_loss 0.09705777123570442 test_loss: 0.11987615823745727
epoch: 132 training_loss 0.10436418406665325 test_loss: 0.10373733043670655
epoch: 133 training_loss 0.09608237862586975 test_loss: 0.11451176404953003
epoch: 134 training_loss 0.09835379660129547 test_loss: 0.11664631366729736
epoch: 135 training_loss 0.10458821481093765 test_loss: 0.11988080739974975
epoch: 136 training_loss 0.09889838006347418 test_loss: 0.10371692180633545
epoch: 137 training_loss 0.09980935310944915 test_loss: 0.10050500631332397
epoch: 138 training_loss 0.09412091577425599 test_loss: 0.10834391117095947
epoch: 139 training_loss 0.10691613931208849 test_loss: 0.13266520500183104
epoch: 140 training_loss 0.09584339298307895 test_loss: 0.11949326992034912
epoch: 141 training_loss 0.10127640010789037 test_loss: 0.10132042169570923
epoch: 142 training_loss 0.0974764009937644 test_loss: 0.11534451246261597
epoch: 143 training_loss 0.09734257645905017 test_loss: 0.09906513094902039
epoch: 144 training_loss 0.09347402952611446 test_loss: 0.10638624429702759
epoch: 145 training_loss 0.10471672110259533 test_loss: 0.10999433994293213
epoch: 146 training_loss 0.09971563227474689 test_loss: 0.11196615695953369
epoch: 147 training_loss 0.0914657223597169 test_loss: 0.131479811668396
epoch: 148 training_loss 0.09838330373167992 test_loss: 0.11630007028579711
epoch: 149 training_loss 0.10020920608192682 test_loss: 0.10049643516540527
epoch: 0 training_loss 0.3350700144469738 test_loss: 0.24347429275512694
epoch: 1 training_loss 0.22033592373132704 test_loss: 0.21283578872680664
epoch: 2 training_loss 0.18505985021591187 test_loss: 0.171577525138855
epoch: 3 training_loss 0.17060649037361145 test_loss: 0.18098018169403077
epoch: 4 training_loss 0.15936485260725022 test_loss: 0.13900700807571412
epoch: 5 training_loss 0.14686861097812653 test_loss: 0.16596333980560302
epoch: 6 training_loss 0.1402578891813755 test_loss: 0.13854562044143676
epoch: 7 training_loss 0.13310709163546564 test_loss: 0.14624451398849486
epoch: 8 training_loss 0.12958613827824592 test_loss: 0.12181994915008545
epoch: 9 training_loss 0.13198929883539676 test_loss: 0.11873759031295776
epoch: 10 training_loss 0.1276830689609051 test_loss: 0.13572827577590943
epoch: 11 training_loss 0.12424143943935632 test_loss: 0.13416676521301268
epoch: 12 training_loss 0.11666026430204511 test_loss: 0.1130907416343689
epoch: 13 training_loss 0.11950766734778881 test_loss: 0.12200489044189453
epoch: 14 training_loss 0.12485665306448937 test_loss: 0.13077433109283448
epoch: 15 training_loss 0.12331446304917336 test_loss: 0.12671867609024048
epoch: 16 training_loss 0.12046811770647764 test_loss: 0.12564762830734252
epoch: 17 training_loss 0.11866456601768732 test_loss: 0.13067375421524047
epoch: 18 training_loss 0.12045457474887371 test_loss: 0.11890286207199097
epoch: 19 training_loss 0.11839591622352601 test_loss: 0.11032689809799194
epoch: 20 training_loss 0.11499647064134479 test_loss: 0.12794780731201172
epoch: 21 training_loss 0.11803180541843176 test_loss: 0.12302722930908203
epoch: 22 training_loss 0.11630873281508684 test_loss: 0.13285475969314575
epoch: 23 training_loss 0.10789888110011817 test_loss: 0.12733640670776367
epoch: 24 training_loss 0.11810144186019897 test_loss: 0.10890922546386719
epoch: 25 training_loss 0.11688128367066383 test_loss: 0.11534347534179687
epoch: 26 training_loss 0.12152820467948913 test_loss: 0.11732122898101807
epoch: 27 training_loss 0.11803204014897346 test_loss: 0.11159515380859375
epoch: 28 training_loss 0.11704706406220794 test_loss: 0.1066313624382019
epoch: 29 training_loss 0.11547552961856127 test_loss: 0.1239511251449585
epoch: 30 training_loss 0.11577285151928664 test_loss: 0.11638556718826294
epoch: 31 training_loss 0.1199820590019226 test_loss: 0.12764832973480225
epoch: 32 training_loss 0.1170009395852685 test_loss: 0.11072663068771363
epoch: 33 training_loss 0.1048004275932908 test_loss: 0.12374553680419922
epoch: 34 training_loss 0.11328348442912102 test_loss: 0.11477922201156616
epoch: 35 training_loss 0.10893975980579854 test_loss: 0.11489237546920776
epoch: 36 training_loss 0.10645979380235077 test_loss: 0.11621778011322022
epoch: 37 training_loss 0.11276799853891134 test_loss: 0.10872844457626343
epoch: 38 training_loss 0.11489826023578643 test_loss: 0.12404921054840087
epoch: 39 training_loss 0.11052466180175542 test_loss: 0.11968225240707397
epoch: 40 training_loss 0.1139425366744399 test_loss: 0.11628570556640624
epoch: 41 training_loss 0.11688204612582923 test_loss: 0.12206264734268188
epoch: 42 training_loss 0.1153276401385665 test_loss: 0.11001350879669189
epoch: 43 training_loss 0.11557706637308002 test_loss: 0.1211735486984253
epoch: 44 training_loss 0.10850210040807724 test_loss: 0.09911953806877136
epoch: 45 training_loss 0.11218871716409921 test_loss: 0.09860989451408386
epoch: 46 training_loss 0.10488016083836556 test_loss: 0.11872378587722779
epoch: 47 training_loss 0.11307789351791143 test_loss: 0.10477927923202515
epoch: 48 training_loss 0.10964691795408726 test_loss: 0.1338935136795044
epoch: 49 training_loss 0.11308370932936668 test_loss: 0.10500741004943848
epoch: 50 training_loss 0.10383280646055937 test_loss: 0.11189247369766235
epoch: 51 training_loss 0.11016284830868245 test_loss: 0.11114697456359864
epoch: 52 training_loss 0.11083850998431444 test_loss: 0.11379086971282959
epoch: 53 training_loss 0.12307554915547371 test_loss: 0.11382554769515991
epoch: 54 training_loss 0.1075842396169901 test_loss: 0.10407192707061767
epoch: 55 training_loss 0.11620852148160338 test_loss: 0.09665326476097107
epoch: 56 training_loss 0.10891066901385785 test_loss: 0.10247550010681153
epoch: 57 training_loss 0.11902450695633889 test_loss: 0.12288091182708741
epoch: 58 training_loss 0.11059928005561233 test_loss: 0.09874833226203919
epoch: 59 training_loss 0.10929218031466008 test_loss: 0.09628273844718933
epoch: 60 training_loss 0.09802965074777603 test_loss: 0.09948281049728394
epoch: 61 training_loss 0.11068716261535883 test_loss: 0.11073286533355713
epoch: 62 training_loss 0.1116656545549631 test_loss: 0.11377847194671631
epoch: 63 training_loss 0.10723768439143896 test_loss: 0.10282295942306519
epoch: 64 training_loss 0.11373370304703713 test_loss: 0.10932409763336182
epoch: 65 training_loss 0.10340899316594004 test_loss: 0.10800501108169555
epoch: 66 training_loss 0.1097175794839859 test_loss: 0.12704602479934693
epoch: 67 training_loss 0.11109246671199799 test_loss: 0.1114655613899231
epoch: 68 training_loss 0.10504211006686091 test_loss: 0.12467100620269775
epoch: 69 training_loss 0.10626163945533335 test_loss: 0.10675089359283448
epoch: 70 training_loss 0.10244973912835122 test_loss: 0.11462509632110596
epoch: 71 training_loss 0.11185557689517736 test_loss: 0.12020429372787475
epoch: 72 training_loss 0.10924703568220138 test_loss: 0.12404600381851197
epoch: 73 training_loss 0.10745091207325458 test_loss: 0.12754082679748535
epoch: 74 training_loss 0.10836266718804836 test_loss: 0.10900653600692749
epoch: 75 training_loss 0.11102885261178017 test_loss: 0.12864671945571898
epoch: 76 training_loss 0.10441208275035024 test_loss: 0.12092136144638062
epoch: 77 training_loss 0.1044780045747757 test_loss: 0.11278725862503051
epoch: 78 training_loss 0.10975404761731625 test_loss: 0.10848196744918823
epoch: 79 training_loss 0.10702248150482774 test_loss: 0.09817806482315064
epoch: 80 training_loss 0.11416453596204519 test_loss: 0.10423167943954467
epoch: 81 training_loss 0.09962147084996104 test_loss: 0.10017719268798828
epoch: 82 training_loss 0.11168998289853334 test_loss: 0.11253556013107299
epoch: 83 training_loss 0.10311233814805747 test_loss: 0.11620422601699829
epoch: 84 training_loss 0.11432693745940924 test_loss: 0.10763314962387086
epoch: 85 training_loss 0.10936485160142183 test_loss: 0.11380729675292969
epoch: 86 training_loss 0.10897991357371212 test_loss: 0.10500366687774658
epoch: 87 training_loss 0.1090063813328743 test_loss: 0.10629887580871582
epoch: 88 training_loss 0.10168490558862686 test_loss: 0.09782952666282654
epoch: 89 training_loss 0.10861155319958925 test_loss: 0.11452286243438721
epoch: 90 training_loss 0.10594305053353309 test_loss: 0.10233442783355713
epoch: 91 training_loss 0.10516916204243898 test_loss: 0.09637835621833801
epoch: 92 training_loss 0.1145562332496047 test_loss: 0.11130274534225464
epoch: 93 training_loss 0.10264145553112031 test_loss: 0.11057491302490234
epoch: 94 training_loss 0.10633420653641223 test_loss: 0.10684747695922851
epoch: 95 training_loss 0.11003753105178475 test_loss: 0.12843869924545287
epoch: 96 training_loss 0.10679521784186363 test_loss: 0.09932413697242737
epoch: 97 training_loss 0.1065338322520256 test_loss: 0.10415476560592651
epoch: 98 training_loss 0.11056878168135881 test_loss: 0.1065186858177185
epoch: 99 training_loss 0.09972018691711128 test_loss: 0.11939282417297363
epoch: 100 training_loss 0.10614065289497375 test_loss: 0.10896862745285034
epoch: 101 training_loss 0.10599750146269799 test_loss: 0.11456702947616577
epoch: 102 training_loss 0.10249133113771677 test_loss: 0.11345859766006469
epoch: 103 training_loss 0.10492756189778447 test_loss: 0.11180421113967895
epoch: 104 training_loss 0.10867096722126007 test_loss: 0.10141547918319702
epoch: 105 training_loss 0.1080863525532186 test_loss: 0.10891907215118408
epoch: 106 training_loss 0.10267056399956345 test_loss: 0.11454617977142334
epoch: 107 training_loss 0.1072213757224381 test_loss: 0.10705291032791138
epoch: 108 training_loss 0.11086633630096912 test_loss: 0.09239306449890136
epoch: 109 training_loss 0.11326184459030628 test_loss: 0.11241068840026855
epoch: 110 training_loss 0.1062707569822669 test_loss: 0.09072626829147339
epoch: 111 training_loss 0.11105971023440361 test_loss: 0.1180616021156311
epoch: 112 training_loss 0.11200958909466863 test_loss: 0.10625967979431153
epoch: 113 training_loss 0.10799890220165252 test_loss: 0.10711442232131958
epoch: 114 training_loss 0.10111887503415345 test_loss: 0.1246761441230774
epoch: 115 training_loss 0.10628344871103763 test_loss: 0.11612672805786133
epoch: 116 training_loss 0.10676098110154271 test_loss: 0.11030066013336182
epoch: 117 training_loss 0.10853660445660353 test_loss: 0.10782363414764404
epoch: 118 training_loss 0.10818932242691517 test_loss: 0.10635228157043457
epoch: 119 training_loss 0.10425302322953939 test_loss: 0.11308457851409912
epoch: 120 training_loss 0.11725039597600699 test_loss: 0.09775617122650146
epoch: 121 training_loss 0.10890447849407792 test_loss: 0.10100260972976685
epoch: 122 training_loss 0.10789251586422324 test_loss: 0.1023787260055542
epoch: 123 training_loss 0.11307710504159331 test_loss: 0.09781666398048401
epoch: 124 training_loss 0.10697542546316981 test_loss: 0.11759920120239258
epoch: 125 training_loss 0.1063148763217032 test_loss: 0.096021968126297
epoch: 126 training_loss 0.10904691565781832 test_loss: 0.10530921220779418
epoch: 127 training_loss 0.10396306777372956 test_loss: 0.10196468830108643
epoch: 128 training_loss 0.11296585366129876 test_loss: 0.10665332078933716
epoch: 129 training_loss 0.10247030153870583 test_loss: 0.1084405779838562
epoch: 130 training_loss 0.10690002573654056 test_loss: 0.11832151412963868
epoch: 131 training_loss 0.10508677747100592 test_loss: 0.10899364948272705
epoch: 132 training_loss 0.10266459682956337 test_loss: 0.1018067479133606
epoch: 133 training_loss 0.10698957156389952 test_loss: 0.09926862716674804
epoch: 134 training_loss 0.11429453037679195 test_loss: 0.11859571933746338
epoch: 135 training_loss 0.10117438036948442 test_loss: 0.11378278732299804
epoch: 136 training_loss 0.10904443372040987 test_loss: 0.11071088314056396
epoch: 137 training_loss 0.10040196392685174 test_loss: 0.11153714656829834
epoch: 138 training_loss 0.10307367756962776 test_loss: 0.11608650684356689
epoch: 139 training_loss 0.10830994963645935 test_loss: 0.10292717218399047
epoch: 140 training_loss 0.10334541730582714 test_loss: 0.11027261018753051
epoch: 141 training_loss 0.10361536055803298 test_loss: 0.11606017351150513
epoch: 142 training_loss 0.10384700208902359 test_loss: 0.10662691593170166
epoch: 143 training_loss 0.10037718322128057 test_loss: 0.11085047721862792
epoch: 144 training_loss 0.10185198768973351 test_loss: 0.13290053606033325
epoch: 145 training_loss 0.09638722877949477 test_loss: 0.10916929244995117
epoch: 146 training_loss 0.1042097045853734 test_loss: 0.12109042406082153
epoch: 147 training_loss 0.10006777435541153 test_loss: 0.10842007398605347
epoch: 148 training_loss 0.10497950457036495 test_loss: 0.10946646928787232
epoch: 149 training_loss 0.1094695894792676 test_loss: 0.11455165147781372
episode: 0 training return: -999.9714241415562
episode: 1 training return: -999.9747466502395
episode: 2 training return: -999.9919262071356
episode: 3 training return: -999.9817923488665
epoch: 1 test_true_pfm: 0.3970819842050499 sim_pfm: -999.9581825073014
episode: 4 training return: -999.995781467983
episode: 5 training return: -999.9854721710487
episode: 6 training return: -999.9672762452647
episode: 7 training return: -999.9786119669291
epoch: 2 test_true_pfm: -0.7287766222723097 sim_pfm: -999.9573940856802
episode: 8 training return: -999.9763091418982
episode: 9 training return: -999.9911163365491
episode: 10 training return: -999.989598419381
episode: 11 training return: -999.9925234878505
epoch: 3 test_true_pfm: -0.5417528005384249 sim_pfm: -999.9538529483789
episode: 12 training return: -999.9917860990753
episode: 13 training return: -999.9744384822695
episode: 14 training return: -999.9963149940513
episode: 15 training return: -999.9798715148538
epoch: 4 test_true_pfm: -0.27934900361160314 sim_pfm: -999.954991265147
episode: 16 training return: -999.972579602174
episode: 17 training return: -999.9815411803045
episode: 18 training return: -999.9728600167413
episode: 19 training return: -999.9883115260644
epoch: 5 test_true_pfm: -0.21458340941889317 sim_pfm: -999.9574465535879
episode: 20 training return: -999.9847822181806
episode: 21 training return: -999.9888034394951
episode: 22 training return: -999.9709040550727
episode: 23 training return: -999.9746120999109
epoch: 6 test_true_pfm: -0.8016527182231515 sim_pfm: -999.9583743395461
episode: 24 training return: -999.9765107954012
episode: 25 training return: -999.9536404392718
episode: 26 training return: -999.947292304725
episode: 27 training return: -999.9707376916521
epoch: 7 test_true_pfm: -0.2953120529173986 sim_pfm: -999.957244153716
episode: 28 training return: -999.9684319464147
episode: 29 training return: -999.9909000265275
episode: 30 training return: -999.978449234037
episode: 31 training return: -999.9863166283253
epoch: 8 test_true_pfm: -0.28892492009588117 sim_pfm: -999.9570105703364
episode: 32 training return: -999.9900556182358
episode: 33 training return: -999.9929873543048
episode: 34 training return: -999.9795948143283
episode: 35 training return: -999.993607410421
epoch: 9 test_true_pfm: -1.007214383383796 sim_pfm: -999.9550956861746
episode: 36 training return: -999.9809629853955
episode: 37 training return: -999.9872660338093
episode: 38 training return: -999.9914351768683
episode: 39 training return: -999.9863263801356
epoch: 10 test_true_pfm: 0.06177705646010503 sim_pfm: -999.9567878169587
episode: 40 training return: -999.9746287477069
episode: 41 training return: -999.9549977251892
episode: 42 training return: -999.964380743814
episode: 43 training return: -999.9672147211296
epoch: 11 test_true_pfm: -0.9164676569094151 sim_pfm: -999.9547286721123
episode: 44 training return: -999.9916666611035
episode: 45 training return: -999.9751637509424
episode: 46 training return: -999.9864004678731
episode: 47 training return: -999.9934911913692
epoch: 12 test_true_pfm: 0.006649453959985063 sim_pfm: -999.9563115563686
episode: 48 training return: -999.9705745149602
episode: 49 training return: -999.9839185603898
episode: 50 training return: -999.9886524152068
episode: 51 training return: -999.9765067407196
epoch: 13 test_true_pfm: -1.131743479923532 sim_pfm: -999.9562593676441
episode: 52 training return: -999.9880428289864
episode: 53 training return: -999.995134355205
episode: 54 training return: -999.970741092515
episode: 55 training return: -999.9961756315687
epoch: 14 test_true_pfm: 0.03322816399116635 sim_pfm: -999.9553431883056
episode: 56 training return: -999.9946657908271
episode: 57 training return: -999.9804908220693
episode: 58 training return: -999.9908481877884
episode: 59 training return: -999.987142419555
epoch: 15 test_true_pfm: -0.91251678299707 sim_pfm: -999.9577119306745
episode: 60 training return: -999.9911749887557
episode: 61 training return: -999.9907166067841
episode: 62 training return: -999.9827136691033
episode: 63 training return: -999.9836306146865
epoch: 16 test_true_pfm: 0.42058992026451514 sim_pfm: -999.9554907924963
episode: 64 training return: -999.9588094268887
episode: 65 training return: -999.9725194899025
episode: 66 training return: -999.9896835993461
episode: 67 training return: -999.9906733929971
epoch: 17 test_true_pfm: -0.4588373731421826 sim_pfm: -999.9545428999886
episode: 68 training return: -999.9944387318442
episode: 69 training return: -999.9759496928186
episode: 70 training return: -999.9935780320869
episode: 71 training return: -999.9908529825641
epoch: 18 test_true_pfm: -0.12963039986584143 sim_pfm: -999.9560938853559
episode: 72 training return: -999.9819909331113
episode: 73 training return: -999.9932482074347
episode: 74 training return: -999.983637977728
episode: 75 training return: -999.9868790354
epoch: 19 test_true_pfm: -0.7441071109720198 sim_pfm: -999.9550912828137
episode: 76 training return: -999.9726375711186
episode: 77 training return: -999.9891102229863
episode: 78 training return: -999.9760075530029
episode: 79 training return: -999.9941305404492
epoch: 20 test_true_pfm: -0.14990365207553458 sim_pfm: -999.9560056259161
episode: 80 training return: -999.992793785579
episode: 81 training return: -999.985887494738
episode: 82 training return: -999.9814457151955
episode: 83 training return: -999.9728756716828
epoch: 21 test_true_pfm: 0.17163068160434855 sim_pfm: -999.9576621261436
episode: 84 training return: -999.9887429471739
episode: 85 training return: -999.9781457235988
episode: 86 training return: -999.9791191268286
episode: 87 training return: -999.9670114478434
epoch: 22 test_true_pfm: -0.5355590437025698 sim_pfm: -999.9561398701065
episode: 88 training return: -999.9914085098982
episode: 89 training return: -999.9844953087804
episode: 90 training return: -999.9721112059746
episode: 91 training return: -999.9840864694617
epoch: 23 test_true_pfm: -0.8839722597366145 sim_pfm: -999.9532727401161
episode: 92 training return: -999.9765265791309
episode: 93 training return: -999.9881224163134
episode: 94 training return: -999.983652459018
episode: 95 training return: -999.9906664262129
epoch: 24 test_true_pfm: 0.12776927529581647 sim_pfm: -999.9551236255298
episode: 96 training return: -999.9860594015444
episode: 97 training return: -999.994638926094
episode: 98 training return: -999.9915848114288
episode: 99 training return: -999.993879310328
epoch: 25 test_true_pfm: 0.1932484843985711 sim_pfm: -999.9564282714408
episode: 100 training return: -999.9822349744575
episode: 101 training return: -999.9762377269724
episode: 102 training return: -999.9655667815202
episode: 103 training return: -999.9654787123361
epoch: 26 test_true_pfm: -0.39296376023098495 sim_pfm: -999.9562981794073
episode: 104 training return: -999.9854328477244
episode: 105 training return: -999.989616881821
episode: 106 training return: -999.9876321424962
episode: 107 training return: -999.9620102617213
epoch: 27 test_true_pfm: 0.1766772700972287 sim_pfm: -999.9550545407179
episode: 108 training return: -999.984131368723
episode: 109 training return: -999.9800157027588
episode: 110 training return: -999.9741217527871
episode: 111 training return: -999.9865082254763
epoch: 28 test_true_pfm: 0.7351066293731021 sim_pfm: -999.9567478697358
episode: 112 training return: -999.9891269953017
episode: 113 training return: -999.9732904164854
episode: 114 training return: -999.9901558419845
episode: 115 training return: -999.9891309320915
epoch: 29 test_true_pfm: -0.8151798697965957 sim_pfm: -999.9552802948134
episode: 116 training return: -999.9812260870978
episode: 117 training return: -999.9752943732677
episode: 118 training return: -999.9788495145756
episode: 119 training return: -999.9923577530509
epoch: 30 test_true_pfm: -1.0410467418759766 sim_pfm: -999.9548499097094
episode: 120 training return: -999.9928440558679
episode: 121 training return: -999.9785904674446
episode: 122 training return: -999.9769571896707
episode: 123 training return: -999.9724306062166
epoch: 31 test_true_pfm: -0.14126645133290375 sim_pfm: -999.9555899785929
episode: 124 training return: -999.9922428413449
episode: 125 training return: -999.9874024283878
episode: 126 training return: -999.9954036005181
episode: 127 training return: -999.9957652568445
epoch: 32 test_true_pfm: -0.5526112950509954 sim_pfm: -999.9546153894757
episode: 128 training return: -999.9915920361523
episode: 129 training return: -999.9903274534773
episode: 130 training return: -999.9686383621053
episode: 131 training return: -999.9806618210218
epoch: 33 test_true_pfm: 0.4486975127938257 sim_pfm: -999.9570010361344
episode: 132 training return: -999.9816262830766
episode: 133 training return: -999.962521150516
episode: 134 training return: -999.9753728841976
episode: 135 training return: -999.9886138194223
epoch: 34 test_true_pfm: -0.3860596424455592 sim_pfm: -999.955577629518
episode: 136 training return: -999.9859386935142
episode: 137 training return: -999.9816544647024
episode: 138 training return: -999.9952838162726
episode: 139 training return: -999.9877970760327
epoch: 35 test_true_pfm: 0.043516896572796816 sim_pfm: -999.9559954376851
episode: 140 training return: -999.9515847240232
episode: 141 training return: -999.9923532422806
episode: 142 training return: -999.9336103184473
episode: 143 training return: -999.9875725588723
epoch: 36 test_true_pfm: -0.17640323419117646 sim_pfm: -999.9549182877969
episode: 144 training return: -999.9832416500732
episode: 145 training return: -999.9869490846461
episode: 146 training return: -999.976410268191
episode: 147 training return: -999.9640939392426
epoch: 37 test_true_pfm: 0.2288249576636426 sim_pfm: -999.9570810852396
episode: 148 training return: -999.9895961831317
episode: 149 training return: -999.9865489246232
episode: 150 training return: -999.9722610655673
episode: 151 training return: -999.9879977684408
epoch: 38 test_true_pfm: -0.4910319916130215 sim_pfm: -999.9554390324724
episode: 152 training return: -999.9594020890459
episode: 153 training return: -999.9906967376216
episode: 154 training return: -999.9644087233886
episode: 155 training return: -999.9932869343431
epoch: 39 test_true_pfm: -1.1765476928170575 sim_pfm: -999.9539185704599
episode: 156 training return: -999.994438972947
episode: 157 training return: -999.9958891569887
episode: 158 training return: -999.9721070298933
episode: 159 training return: -999.9777534716626
epoch: 40 test_true_pfm: -0.3165520234018342 sim_pfm: -999.9547837035366
episode: 160 training return: -999.9936729828789
episode: 161 training return: -999.9853798649673
episode: 162 training return: -999.9822415666595
episode: 163 training return: -999.9937757791309
epoch: 41 test_true_pfm: 0.2432567219208339 sim_pfm: -999.9555068935597
episode: 164 training return: -999.9899461462815
episode: 165 training return: -1000.0027764728915
episode: 166 training return: -999.9862731790257
episode: 167 training return: -999.971899615665
epoch: 42 test_true_pfm: -0.37857905383836005 sim_pfm: -999.9561188440995
episode: 168 training return: -999.9897553273046
episode: 169 training return: -999.9962185984464
episode: 170 training return: -999.9893040218691
episode: 171 training return: -999.9757311697641
epoch: 43 test_true_pfm: -0.3211260933944545 sim_pfm: -999.9550670346412
episode: 172 training return: -999.9824907838647
episode: 173 training return: -999.9785320003444
episode: 174 training return: -999.8828575184433
episode: 175 training return: -999.968950799602
epoch: 44 test_true_pfm: -0.5795366429962484 sim_pfm: -999.9562956973965
episode: 176 training return: -999.9952594682957
episode: 177 training return: -999.9916671228807
episode: 178 training return: -999.9947212816642
episode: 179 training return: -999.9675992797967
epoch: 45 test_true_pfm: 0.2810683069498842 sim_pfm: -999.9557492115122
episode: 180 training return: -999.974743172954
episode: 181 training return: -999.9804563303087
episode: 182 training return: -999.9767399114385
episode: 183 training return: -999.9678964634319
epoch: 46 test_true_pfm: -0.4290518896826836 sim_pfm: -999.9555042520111
episode: 184 training return: -999.9918855316049
episode: 185 training return: -999.9933355383346
episode: 186 training return: -999.9830564710418
episode: 187 training return: -999.9748732281446
epoch: 47 test_true_pfm: 0.14335245753320175 sim_pfm: -999.9544323772678
episode: 188 training return: -999.9640912836827
episode: 189 training return: -999.9834170973467
episode: 190 training return: -999.9835644597891
episode: 191 training return: -999.9608825685378
epoch: 48 test_true_pfm: -0.5772105788309644 sim_pfm: -999.9552819998917
episode: 192 training return: -999.9921198131225
episode: 193 training return: -999.9800333173081
episode: 194 training return: -999.954501629743
episode: 195 training return: -999.9822964548669
epoch: 49 test_true_pfm: -0.3598984144964732 sim_pfm: -999.9551215324936
episode: 196 training return: -999.9934230274723
episode: 197 training return: -999.9832851226051
episode: 198 training return: -999.9944969248112
episode: 199 training return: -999.9757390539671
epoch: 50 test_true_pfm: -0.3552932976725966 sim_pfm: -999.9560612674906
episode: 200 training return: -999.981662121635
episode: 201 training return: -999.9798308443749
episode: 202 training return: -999.9893512338218
episode: 203 training return: -999.9942420273317
epoch: 51 test_true_pfm: -0.4628071771314481 sim_pfm: -999.9554513094703
episode: 204 training return: -999.9780069127354
episode: 205 training return: -999.9812567152374
episode: 206 training return: -999.9874157592966
episode: 207 training return: -999.9832543707172
epoch: 52 test_true_pfm: -0.08177152090766286 sim_pfm: -999.9560685379691
episode: 208 training return: -999.9863926103751
episode: 209 training return: -999.9881640484427
episode: 210 training return: -999.9874115301209
episode: 211 training return: -999.9818309159456
epoch: 53 test_true_pfm: 0.04504361977872481 sim_pfm: -999.956677992444
episode: 212 training return: -999.9939238139519
episode: 213 training return: -999.9937977734627
episode: 214 training return: -999.9870395429027
episode: 215 training return: -999.9936706658966
epoch: 54 test_true_pfm: -0.26155283993951844 sim_pfm: -999.9567322722445
episode: 216 training return: -999.9823478208049
episode: 217 training return: -999.9878645825763
episode: 218 training return: -999.9724989877683
episode: 219 training return: -999.9875754028405
epoch: 55 test_true_pfm: 0.5335562389210415 sim_pfm: -999.9555769301654
episode: 220 training return: -999.9966542796649
episode: 221 training return: -999.9890722968693
episode: 222 training return: -999.9783753496093
episode: 223 training return: -999.9647596120095
epoch: 56 test_true_pfm: -0.16245702678994892 sim_pfm: -999.9543199444342
episode: 224 training return: -999.9777910043908
episode: 225 training return: -999.9882795322836
episode: 226 training return: -999.9739450910109
episode: 227 training return: -999.9792749416617
epoch: 57 test_true_pfm: -0.24767427848976944 sim_pfm: -999.955260298503
episode: 228 training return: -999.9957526210497
episode: 229 training return: -999.978770216176
episode: 230 training return: -999.990603257198
episode: 231 training return: -999.9853924065558
epoch: 58 test_true_pfm: -1.3901607322809992 sim_pfm: -999.9552005238037
episode: 232 training return: -999.9881012915915
episode: 233 training return: -1000.0082540077623
episode: 234 training return: -999.9794545092474
episode: 235 training return: -999.9849139599914
epoch: 59 test_true_pfm: -0.6166835140768526 sim_pfm: -999.9545724626454
episode: 236 training return: -999.9820548982407
episode: 237 training return: -1000.7783506684607
episode: 238 training return: -999.9893650205022
episode: 239 training return: -999.9821863451608
epoch: 60 test_true_pfm: -0.0878989522602175 sim_pfm: -999.9552944083985
episode: 240 training return: -999.9804448416155
episode: 241 training return: -999.9786650266002
episode: 242 training return: -999.9877068163955
episode: 243 training return: -999.9792533387952
epoch: 61 test_true_pfm: -0.34610698849886384 sim_pfm: -999.9554930568992
episode: 244 training return: -999.984686410066
episode: 245 training return: -999.9881083615198
episode: 246 training return: -1000.3826109356155
episode: 247 training return: -999.9954476000154
epoch: 62 test_true_pfm: -0.20226664105911965 sim_pfm: -999.95852009616
episode: 248 training return: -999.9862315884058
episode: 249 training return: -999.9934295186449
episode: 250 training return: -999.9805798769538
episode: 251 training return: -999.9781860909609
epoch: 63 test_true_pfm: -0.6265094893972346 sim_pfm: -999.9558680510512
episode: 252 training return: -999.9870918797442
episode: 253 training return: -999.9761870557584
episode: 254 training return: -999.9682563140469
episode: 255 training return: -999.9587122114506
epoch: 64 test_true_pfm: -0.4330505875369337 sim_pfm: -999.9559167032775
episode: 256 training return: -999.9512321485424
episode: 257 training return: -999.9731568244945
episode: 258 training return: -999.9924663056331
episode: 259 training return: -999.9579882356408
epoch: 65 test_true_pfm: -0.17024113474980326 sim_pfm: -999.9534117512461
episode: 260 training return: -999.9896154097346
episode: 261 training return: -999.9723391882478
episode: 262 training return: -999.9886630918877
episode: 263 training return: -999.9750221447156
epoch: 66 test_true_pfm: -0.6272335831753568 sim_pfm: -999.9546952808952
episode: 264 training return: -1000.1174486583067
episode: 265 training return: -999.985927808263
episode: 266 training return: -999.9707612230383
episode: 267 training return: -999.9808066249741
epoch: 67 test_true_pfm: -0.09540526814418905 sim_pfm: -999.9554949049962
episode: 268 training return: -999.9864543137398
episode: 269 training return: -999.9891577797399
episode: 270 training return: -999.9626727498968
episode: 271 training return: -999.9924845072887
epoch: 68 test_true_pfm: 0.17574181551927484 sim_pfm: -999.9572207386085
episode: 272 training return: -999.9722796625491
episode: 273 training return: -999.9698200370004
episode: 274 training return: -999.9745436296823
episode: 275 training return: -999.9652790116401
epoch: 69 test_true_pfm: 0.4336170655168828 sim_pfm: -999.9559684699549
episode: 276 training return: -999.9650821062578
episode: 277 training return: -999.9884487208521
episode: 278 training return: -999.9932554922477
episode: 279 training return: -999.9744676063532
epoch: 70 test_true_pfm: 0.02680028351203532 sim_pfm: -999.9555170185771
episode: 280 training return: -999.9854990543455
episode: 281 training return: -999.9780021900102
episode: 282 training return: -999.972758368487
episode: 283 training return: -999.9473540627379
epoch: 71 test_true_pfm: -0.250244360585134 sim_pfm: -999.9559258466129
episode: 284 training return: -999.9812169920851
episode: 285 training return: -999.9668744989556
episode: 286 training return: -999.9620408592393
episode: 287 training return: -999.9686511387529
epoch: 72 test_true_pfm: 0.41459401118718026 sim_pfm: -999.9568036685981
episode: 288 training return: -999.989761928245
episode: 289 training return: -999.973820478337
episode: 290 training return: -999.9875213454112
episode: 291 training return: -999.9857210212497
epoch: 73 test_true_pfm: -0.5843615892183269 sim_pfm: -999.9570696892838
episode: 292 training return: -999.991974364533
episode: 293 training return: -999.9789948883553
episode: 294 training return: -999.9875692395508
episode: 295 training return: -999.9882972540505
epoch: 74 test_true_pfm: 0.15567503294183058 sim_pfm: -999.9570916847616
episode: 296 training return: -999.9825995022311
episode: 297 training return: -999.9742955728042
episode: 298 training return: -999.9952781589644
episode: 299 training return: -999.9956979192164
epoch: 75 test_true_pfm: 0.40441416012864756 sim_pfm: -999.9552666505975
episode: 300 training return: -999.9770231704135
episode: 301 training return: -999.9881120092177
episode: 302 training return: -999.964727976678
episode: 303 training return: -999.9706986183654
epoch: 76 test_true_pfm: -1.1843200891674637 sim_pfm: -999.9551890927222
episode: 304 training return: -999.9542682242928
episode: 305 training return: -999.9844641360438
episode: 306 training return: -999.9632545184436
episode: 307 training return: -999.9904579445529
epoch: 77 test_true_pfm: -0.5559810565745268 sim_pfm: -999.9555240860997
episode: 308 training return: -999.9887410212065
episode: 309 training return: -999.9762269294253
episode: 310 training return: -999.9809514910548
episode: 311 training return: -999.9841188036966
epoch: 78 test_true_pfm: -0.2838938535999702 sim_pfm: -999.9549403004975
episode: 312 training return: -999.9859316325278
episode: 313 training return: -999.9873175707409
episode: 314 training return: -999.9664443849638
episode: 315 training return: -999.9924940677629
epoch: 79 test_true_pfm: -0.5302842529289599 sim_pfm: -999.955040354666
episode: 316 training return: -999.9745165380007
episode: 317 training return: -999.9743833415447
episode: 318 training return: -999.9715138863323
episode: 319 training return: -999.9784255485838
epoch: 80 test_true_pfm: -0.07576348570727105 sim_pfm: -999.9560542809076
episode: 320 training return: -999.9588569016792
episode: 321 training return: -999.9926680272573
episode: 322 training return: -999.98618275478
episode: 323 training return: -999.9831449554106
epoch: 81 test_true_pfm: -0.3644471372002762 sim_pfm: -999.9559779716511
episode: 324 training return: -999.9821187243318
episode: 325 training return: -999.9829443098266
episode: 326 training return: -999.9658086100545
episode: 327 training return: -999.9846547085713
epoch: 82 test_true_pfm: -0.4013461055449154 sim_pfm: -999.956195112075
episode: 328 training return: -999.9637774252794
episode: 329 training return: -999.9741351756204
episode: 330 training return: -999.9945214693859
episode: 331 training return: -999.9615345409504
epoch: 83 test_true_pfm: -0.26721390659402083 sim_pfm: -999.9567611662177
episode: 332 training return: -999.9900166639729
episode: 333 training return: -999.9680694051477
episode: 334 training return: -999.9911727859584
episode: 335 training return: -999.9957872466557
epoch: 84 test_true_pfm: -0.30958674643995676 sim_pfm: -999.9532383276863
episode: 336 training return: -999.9875867052282
episode: 337 training return: -999.9840199839617
episode: 338 training return: -999.9943095554554
episode: 339 training return: -1000.3335622080806
epoch: 85 test_true_pfm: -0.3994576503009022 sim_pfm: -999.9548610202229
episode: 340 training return: -999.9875875327618
episode: 341 training return: -999.9835321123853
episode: 342 training return: -999.9569463392835
episode: 343 training return: -999.9693758482805
epoch: 86 test_true_pfm: 0.09927940625191554 sim_pfm: -999.9568322257359
episode: 344 training return: -999.9900575131194
episode: 345 training return: -999.9797516439891
episode: 346 training return: -999.9928894572862
episode: 347 training return: -999.9652970333254
epoch: 87 test_true_pfm: -0.4541534229097069 sim_pfm: -999.9560969235232
episode: 348 training return: -999.9949779544744
episode: 349 training return: -999.9954064287737
episode: 350 training return: -999.9715961731969
episode: 351 training return: -999.9944052161137
epoch: 88 test_true_pfm: -0.08586116181516446 sim_pfm: -999.9545079160529
episode: 352 training return: -999.9700550614007
episode: 353 training return: -999.964993999655
episode: 354 training return: -999.9891077328003
episode: 355 training return: -999.9907223453681
epoch: 89 test_true_pfm: -0.39526964806469195 sim_pfm: -999.9547559090553
episode: 356 training return: -999.978016511377
episode: 357 training return: -1000.0014617569516
episode: 358 training return: -999.9818476608287
episode: 359 training return: -999.9732457871304
epoch: 90 test_true_pfm: 0.05775998523506689 sim_pfm: -999.9546701541226
episode: 360 training return: -999.9628988888014
episode: 361 training return: -1001.5210627083133
episode: 362 training return: -999.9743638418605
episode: 363 training return: -999.9675767358547
epoch: 91 test_true_pfm: -0.31916341604640786 sim_pfm: -999.9547396780509
episode: 364 training return: -999.9891774554608
episode: 365 training return: -999.9382335270318
episode: 366 training return: -999.9741971452157
episode: 367 training return: -999.9636049709538
epoch: 92 test_true_pfm: -0.14265097583099176 sim_pfm: -999.9566210712964
episode: 368 training return: -999.9803897089205
episode: 369 training return: -999.9934353507173
episode: 370 training return: -999.9894354741965
episode: 371 training return: -1000.0080582666218
epoch: 93 test_true_pfm: -0.2803876128206341 sim_pfm: -999.954105124018
episode: 372 training return: -999.9895081201516
episode: 373 training return: -999.9866784501814
episode: 374 training return: -999.9813006883022
episode: 375 training return: -999.9877111532293
epoch: 94 test_true_pfm: 0.22511589978724947 sim_pfm: -999.9566191000528
episode: 376 training return: -1000.0217542402667
episode: 377 training return: -999.9790581593685
episode: 378 training return: -1000.1476696032037
episode: 379 training return: -999.9722295572276
epoch: 95 test_true_pfm: -0.03004959136992998 sim_pfm: -999.9548668636968
episode: 380 training return: -999.9845883801136
episode: 381 training return: -999.9761440805161
episode: 382 training return: -999.9697144535758
episode: 383 training return: -999.9914685906825
epoch: 96 test_true_pfm: -0.1949753175943085 sim_pfm: -999.9563739777714
episode: 384 training return: -999.9948851895591
episode: 385 training return: -999.9932600923729
episode: 386 training return: -1000.0387546913963
episode: 387 training return: -999.994713547026
epoch: 97 test_true_pfm: -0.3809492193296016 sim_pfm: -999.9534844022677
episode: 388 training return: -999.9810503167523
episode: 389 training return: -999.9917630258309
episode: 390 training return: -999.9560128118757
episode: 391 training return: -999.9700762543158
epoch: 98 test_true_pfm: -0.2808141334635572 sim_pfm: -999.9592004188706
episode: 392 training return: -999.9877912056501
episode: 393 training return: -999.9838267022877
episode: 394 training return: -999.9752562223193
episode: 395 training return: -999.9766244599393
epoch: 99 test_true_pfm: -0.047349363065267935 sim_pfm: -999.9579492953217
episode: 396 training return: -999.9915912894492
episode: 397 training return: -999.9895566422847
episode: 398 training return: -999.9932027786227
episode: 399 training return: -999.9569092512412
epoch: 100 test_true_pfm: -0.483192174113171 sim_pfm: -999.957595020435
episode: 400 training return: -1000.0301590916315
episode: 401 training return: -999.9862197853167
episode: 402 training return: -999.976813976128
episode: 403 training return: -999.9966838436336
epoch: 101 test_true_pfm: -0.6505036275780297 sim_pfm: -999.9550034107378
episode: 404 training return: -999.9858074980045
episode: 405 training return: -999.98667402241
episode: 406 training return: -999.9821617265393
episode: 407 training return: -999.9817180449766
epoch: 102 test_true_pfm: -0.366220743524947 sim_pfm: -999.9550590765128
episode: 408 training return: -999.990770752111
episode: 409 training return: -999.969790057226
episode: 410 training return: -999.9953731469828
episode: 411 training return: -999.9893256696092
epoch: 103 test_true_pfm: -0.43195992546390355 sim_pfm: -999.9566073739724
episode: 412 training return: -999.9755647780456
episode: 413 training return: -999.9755525633487
episode: 414 training return: -999.9906307490386
episode: 415 training return: -999.9936884008912
epoch: 104 test_true_pfm: 0.05490246845687996 sim_pfm: -999.9538442165143
episode: 416 training return: -999.9809489958759
episode: 417 training return: -999.9734305433924
episode: 418 training return: -999.9948249442865
episode: 419 training return: -999.9858797666965
epoch: 105 test_true_pfm: -0.3889464023378392 sim_pfm: -999.9555079036613
episode: 420 training return: -999.9918291201186
episode: 421 training return: -999.9766034270314
episode: 422 training return: -1000.0004258305543
episode: 423 training return: -999.9863540332516
epoch: 106 test_true_pfm: 0.22505650203757752 sim_pfm: -999.9569788052282
episode: 424 training return: -999.985082632036
episode: 425 training return: -999.9820010247921
episode: 426 training return: -999.9883215221159
episode: 427 training return: -999.9814954974241
epoch: 107 test_true_pfm: -0.5025844387403446 sim_pfm: -999.955748572657
episode: 428 training return: -999.9829919815888
episode: 429 training return: -999.9602920484949
episode: 430 training return: -999.9893529185832
episode: 431 training return: -999.9926278847033
epoch: 108 test_true_pfm: -0.5980295233988738 sim_pfm: -999.955236915008
episode: 432 training return: -999.9948817611898
episode: 433 training return: -999.9818808414084
episode: 434 training return: -999.9877016911521
episode: 435 training return: -999.9778996106103
epoch: 109 test_true_pfm: 0.27120833746345485 sim_pfm: -999.9573518065584
episode: 436 training return: -999.983742496438
episode: 437 training return: -999.9749612598296
episode: 438 training return: -999.98939733756
episode: 439 training return: -999.992326846259
epoch: 110 test_true_pfm: 0.6073521740694042 sim_pfm: -999.9563500596996
episode: 440 training return: -999.9759461903418
episode: 441 training return: -999.9791739913514
episode: 442 training return: -999.9798277986872
episode: 443 training return: -999.98414393502
epoch: 111 test_true_pfm: -0.20442667189217625 sim_pfm: -999.9553891074305
episode: 444 training return: -999.9826235076152
episode: 445 training return: -999.9599439093131
episode: 446 training return: -1000.0169098065891
episode: 447 training return: -999.9842211096234
epoch: 112 test_true_pfm: -0.8756745508179429 sim_pfm: -999.9556900507055
episode: 448 training return: -999.9845649042117
episode: 449 training return: -999.99167836851
episode: 450 training return: -999.9910755965973
episode: 451 training return: -999.9325892354786
epoch: 113 test_true_pfm: 0.3962209423521217 sim_pfm: -999.9543569858303
episode: 452 training return: -999.9916574501639
episode: 453 training return: -999.9820536086129
episode: 454 training return: -999.9738544888086
episode: 455 training return: -999.9736439037503
epoch: 114 test_true_pfm: -0.03949892812868633 sim_pfm: -999.9565484247
episode: 456 training return: -999.9911450684416
episode: 457 training return: -999.9890715723068
episode: 458 training return: -999.97849386982
episode: 459 training return: -999.9853720477463
epoch: 115 test_true_pfm: -0.4953691430681823 sim_pfm: -999.9562235771451
episode: 460 training return: -999.9704684916038
episode: 461 training return: -999.9882730369724
episode: 462 training return: -999.9821879688665
episode: 463 training return: -999.9922007562943
epoch: 116 test_true_pfm: -0.4172309218315504 sim_pfm: -999.957756529266
episode: 464 training return: -999.9892790778755
episode: 465 training return: -999.9843465071659
episode: 466 training return: -999.9758034236407
episode: 467 training return: -999.9723935206077
epoch: 117 test_true_pfm: -0.3927166424768109 sim_pfm: -999.9536324786471
episode: 468 training return: -999.9102357501338
episode: 469 training return: -999.9761584590258
episode: 470 training return: -999.9948764708728
episode: 471 training return: -999.9745584426854
epoch: 118 test_true_pfm: -0.7227540974420218 sim_pfm: -999.9544708604489
episode: 472 training return: -999.9805572471214
episode: 473 training return: -999.9856309469159
episode: 474 training return: -999.9731103831717
episode: 475 training return: -999.8857560371564
epoch: 119 test_true_pfm: -0.5210465508744848 sim_pfm: -999.9572791263977
episode: 476 training return: -999.9809058028661
episode: 477 training return: -1000.1834122335086
episode: 478 training return: -999.977239214607
episode: 479 training return: -999.9539167321016
epoch: 120 test_true_pfm: 0.3038978955902842 sim_pfm: -999.9546686267119
episode: 480 training return: -999.9858625466551
episode: 481 training return: -999.9709642258255
episode: 482 training return: -999.9748324891173
episode: 483 training return: -999.9809785211419
epoch: 121 test_true_pfm: -1.0401832823556294 sim_pfm: -999.9556743837675
episode: 484 training return: -999.9943275872466
episode: 485 training return: -999.9878926015977
episode: 486 training return: -999.9750463495553
episode: 487 training return: -999.9898665707846
epoch: 122 test_true_pfm: -0.948001863782823 sim_pfm: -999.9571820442665
episode: 488 training return: -999.993224594348
episode: 489 training return: -999.9801092220736
episode: 490 training return: -999.9608933240021
episode: 491 training return: -999.9866999505872
epoch: 123 test_true_pfm: -0.8495104619573963 sim_pfm: -999.9559203971388
episode: 492 training return: -999.9765229079588
episode: 493 training return: -999.9898705646261
episode: 494 training return: -999.9642331998151
episode: 495 training return: -999.9869629919048
epoch: 124 test_true_pfm: -0.16190501806408178 sim_pfm: -999.9551751031885
episode: 496 training return: -999.989416110868
episode: 497 training return: -999.9892877239131
episode: 498 training return: -999.9541988703199
episode: 499 training return: -999.9947438273841
epoch: 125 test_true_pfm: -0.4504566761171879 sim_pfm: -999.9554079773103
episode: 500 training return: -999.996810262347
episode: 501 training return: -999.9729034515386
episode: 502 training return: -999.9883718874854
episode: 503 training return: -1000.0949750852635
epoch: 126 test_true_pfm: -0.34909546969933514 sim_pfm: -999.9560654403118
episode: 504 training return: -999.9586761566645
episode: 505 training return: -999.9889690835599
episode: 506 training return: -999.8939625202597
episode: 507 training return: -999.9896795204095
epoch: 127 test_true_pfm: 0.056048717683730355 sim_pfm: -999.9558515762207
episode: 508 training return: -999.9841204962831
episode: 509 training return: -999.9671820147028
episode: 510 training return: -999.9623799665455
episode: 511 training return: -999.9708496092899
epoch: 128 test_true_pfm: -0.1289031707079232 sim_pfm: -999.9574171753952
episode: 512 training return: -999.9879643318401
episode: 513 training return: -999.9926687786913
episode: 514 training return: -999.9770585466683
episode: 515 training return: -999.9898290858382
epoch: 129 test_true_pfm: -0.6602642625192595 sim_pfm: -999.9564175216309
episode: 516 training return: -999.9626859828851
episode: 517 training return: -999.9770149160368
episode: 518 training return: -999.9793672274192
episode: 519 training return: -999.975758245175
epoch: 130 test_true_pfm: -0.6487435499523648 sim_pfm: -999.9549395589866
episode: 520 training return: -999.9936580750633
episode: 521 training return: -999.9472095702173
episode: 522 training return: -999.9748482902542
episode: 523 training return: -999.9746047924649
epoch: 131 test_true_pfm: -0.19396603334006665 sim_pfm: -999.9552068304776
episode: 524 training return: -999.9741906395188
episode: 525 training return: -999.986369363012
episode: 526 training return: -999.9917727735333
episode: 527 training return: -999.9942867537568
epoch: 132 test_true_pfm: 0.006153338267553839 sim_pfm: -999.9556207312311
episode: 528 training return: -999.980330014582
episode: 529 training return: -999.9605522295506
episode: 530 training return: -999.939298107711
episode: 531 training return: -1000.0426942068779
epoch: 133 test_true_pfm: 0.04397110680794982 sim_pfm: -999.955440814612
episode: 532 training return: -1000.0178045735021
episode: 533 training return: -999.9862139416367
episode: 534 training return: -999.9903036092182
episode: 535 training return: -999.9882120792674
epoch: 134 test_true_pfm: 0.0439100491435556 sim_pfm: -999.9553456258473
episode: 536 training return: -999.9917589964738
episode: 537 training return: -999.9877267941645
episode: 538 training return: -999.9822983110328
episode: 539 training return: -999.9588591093523
epoch: 135 test_true_pfm: -0.47265946749984145 sim_pfm: -999.954528210728
episode: 540 training return: -1000.0452532072544
episode: 541 training return: -999.9917151173714
episode: 542 training return: -999.9921144630948
episode: 543 training return: -999.9676336388296
epoch: 136 test_true_pfm: -0.5372920147442529 sim_pfm: -999.9571931898009
episode: 544 training return: -999.9849255300671
episode: 545 training return: -999.9734236279236
episode: 546 training return: -999.987952438589
episode: 547 training return: -999.9797253969717
epoch: 137 test_true_pfm: -0.002680776228961313 sim_pfm: -999.9561199151493
episode: 548 training return: -999.9723550899043
episode: 549 training return: -1000.1265991384055
episode: 550 training return: -999.9772855313952
episode: 551 training return: -999.9496800619195
epoch: 138 test_true_pfm: -0.4784709020937494 sim_pfm: -999.9558096669501
episode: 552 training return: -999.9974450195211
episode: 553 training return: -999.9870416956709
episode: 554 training return: -999.9925179919487
episode: 555 training return: -999.9724565043575
epoch: 139 test_true_pfm: -0.718446561372208 sim_pfm: -999.9569767959662
episode: 556 training return: -999.9804812770387
episode: 557 training return: -999.9927110172223
episode: 558 training return: -999.9838826284348
episode: 559 training return: -999.9910163367533
epoch: 140 test_true_pfm: -0.6525212069571892 sim_pfm: -999.9560032114878
episode: 560 training return: -999.9948225552855
episode: 561 training return: -999.9939759893573
episode: 562 training return: -999.9941366384213
episode: 563 training return: -999.9448479233738
epoch: 141 test_true_pfm: -0.6563366083512668 sim_pfm: -999.9567637163449
episode: 564 training return: -999.9922234954461
episode: 565 training return: -999.9903803953284
episode: 566 training return: -999.9625322590185
episode: 567 training return: -999.9939140606449
epoch: 142 test_true_pfm: -0.0370982363652431 sim_pfm: -999.9555981610924
episode: 568 training return: -999.9618415301787
episode: 569 training return: -999.9651197170705
episode: 570 training return: -999.9898414255945
episode: 571 training return: -999.9826821252462
epoch: 143 test_true_pfm: -0.17888659506700502 sim_pfm: -999.956424476674
episode: 572 training return: -999.980691376579
episode: 573 training return: -999.9799448861777
episode: 574 training return: -999.9675820822557
episode: 575 training return: -999.9940088714839
epoch: 144 test_true_pfm: 0.5163014642811506 sim_pfm: -999.9555229573957
episode: 576 training return: -999.9677798745836
episode: 577 training return: -999.9692769942412
episode: 578 training return: -999.9844799590636
episode: 579 training return: -999.970583564605
epoch: 145 test_true_pfm: 0.05524431771952279 sim_pfm: -999.9556402722129
episode: 580 training return: -999.9900631088954
episode: 581 training return: -999.9723902795017
episode: 582 training return: -999.9945714586453
episode: 583 training return: -999.9701513132657
epoch: 146 test_true_pfm: -0.4469730497528252 sim_pfm: -999.9577266094794
episode: 584 training return: -999.991201778995
episode: 585 training return: -999.9499764328311
episode: 586 training return: -999.9699684703513
episode: 587 training return: -999.9826935528682
epoch: 147 test_true_pfm: -0.1193967796448822 sim_pfm: -999.9550736424503
episode: 588 training return: -999.9890281727859
episode: 589 training return: -1000.0096127323313
episode: 590 training return: -999.9926910457368
episode: 591 training return: -999.9973148834789
epoch: 148 test_true_pfm: 0.3754990579402244 sim_pfm: -999.9570475529581
episode: 592 training return: -999.9798703003942
episode: 593 training return: -999.9288928587399
episode: 594 training return: -999.9522727372836
episode: 595 training return: -999.9721957432341
epoch: 149 test_true_pfm: 0.17400624793816932 sim_pfm: -999.957412317573
episode: 596 training return: -999.9766824219188
episode: 597 training return: -999.9540624418955
episode: 598 training return: -1000.0033153374718
episode: 599 training return: -999.9924694818031
epoch: 150 test_true_pfm: -0.07970756194585032 sim_pfm: -999.9533283808631
