['--alg', 'sac', '--env', 'Walker2d-v3', '--learn', 'uncertainty', '--traj', 'medium', '--seed', '4', '--data', '100000']
epoch: 0 training_loss 0.26356348738074303 test_loss: 0.19475678205490113
epoch: 1 training_loss 0.15834338162094355 test_loss: 0.1472030758857727
epoch: 2 training_loss 0.12646568730473517 test_loss: 0.14736698865890502
epoch: 3 training_loss 0.12066680401563644 test_loss: 0.13129887580871583
epoch: 4 training_loss 0.11300028424710035 test_loss: 0.12916772365570067
epoch: 5 training_loss 0.10696240492165089 test_loss: 0.1271906852722168
epoch: 6 training_loss 0.12280047822743655 test_loss: 0.10544053316116334
epoch: 7 training_loss 0.10918919526040555 test_loss: 0.13424007892608641
epoch: 8 training_loss 0.12029665779322386 test_loss: 0.1055712103843689
epoch: 9 training_loss 0.1072207742743194 test_loss: 0.10454486608505249
epoch: 10 training_loss 0.09943421620875598 test_loss: 0.11298043727874756
epoch: 11 training_loss 0.10415370557457208 test_loss: 0.10324028730392457
epoch: 12 training_loss 0.10698806289583444 test_loss: 0.1061941385269165
epoch: 13 training_loss 0.10383620193228126 test_loss: 0.12592856884002684
epoch: 14 training_loss 0.10080355986952781 test_loss: 0.10358794927597045
epoch: 15 training_loss 0.09586178693920373 test_loss: 0.10819298028945923
epoch: 16 training_loss 0.10228311166167259 test_loss: 0.09525695443153381
epoch: 17 training_loss 0.1025633355602622 test_loss: 0.12369965314865113
epoch: 18 training_loss 0.10137931929901242 test_loss: 0.10748466253280639
epoch: 19 training_loss 0.09754019118845463 test_loss: 0.11017425060272217
epoch: 20 training_loss 0.09908050958067179 test_loss: 0.10144401788711548
epoch: 21 training_loss 0.09698233438655735 test_loss: 0.10940136909484863
epoch: 22 training_loss 0.09381214331835508 test_loss: 0.09945561289787293
epoch: 23 training_loss 0.09527138767763972 test_loss: 0.10772252082824707
epoch: 24 training_loss 0.09167938604950905 test_loss: 0.10575461387634277
epoch: 25 training_loss 0.09287419107742607 test_loss: 0.09182420372962952
epoch: 26 training_loss 0.09355905197560788 test_loss: 0.1059154748916626
epoch: 27 training_loss 0.0980582313798368 test_loss: 0.10016345977783203
epoch: 28 training_loss 0.09040090816095471 test_loss: 0.12359167337417602
epoch: 29 training_loss 0.09161226645112037 test_loss: 0.11242331266403198
epoch: 30 training_loss 0.09280920661985874 test_loss: 0.10334600210189819
epoch: 31 training_loss 0.09735071498900652 test_loss: 0.0933886706829071
epoch: 32 training_loss 0.08730332259088755 test_loss: 0.10359834432601929
epoch: 33 training_loss 0.08716910421848297 test_loss: 0.11252316236495971
epoch: 34 training_loss 0.09626666817814111 test_loss: 0.10900083780288697
epoch: 35 training_loss 0.0930723301321268 test_loss: 0.11612542867660522
epoch: 36 training_loss 0.09666831966489553 test_loss: 0.10247551202774048
epoch: 37 training_loss 0.09676179714500904 test_loss: 0.099009108543396
epoch: 38 training_loss 0.09371289117261768 test_loss: 0.10209065675735474
epoch: 39 training_loss 0.09635358287021517 test_loss: 0.09294485449790954
epoch: 40 training_loss 0.08824751613661647 test_loss: 0.10686453580856323
epoch: 41 training_loss 0.09407216789200902 test_loss: 0.10260076522827148
epoch: 42 training_loss 0.0903597417473793 test_loss: 0.09481111168861389
epoch: 43 training_loss 0.0914174884185195 test_loss: 0.10805977582931518
epoch: 44 training_loss 0.09307105826213956 test_loss: 0.11786904335021972
epoch: 45 training_loss 0.08824353359639645 test_loss: 0.09350399971008301
epoch: 46 training_loss 0.09927545659244061 test_loss: 0.10414038896560669
epoch: 47 training_loss 0.08961688747629523 test_loss: 0.10489729642868043
epoch: 48 training_loss 0.08449351092800499 test_loss: 0.09075393080711365
epoch: 49 training_loss 0.08663635717704893 test_loss: 0.08279652595520019
epoch: 50 training_loss 0.09728274589404463 test_loss: 0.08560864925384522
epoch: 51 training_loss 0.08300493003800512 test_loss: 0.09748376607894897
epoch: 52 training_loss 0.09314863637089729 test_loss: 0.1073195219039917
epoch: 53 training_loss 0.09528218621388078 test_loss: 0.10007317066192627
epoch: 54 training_loss 0.0895584612339735 test_loss: 0.1066091775894165
epoch: 55 training_loss 0.09003071328625083 test_loss: 0.11600937843322753
epoch: 56 training_loss 0.08556201589293778 test_loss: 0.09126710891723633
epoch: 57 training_loss 0.09120899399742484 test_loss: 0.11090638637542724
epoch: 58 training_loss 0.08578946527093649 test_loss: 0.08568653464317322
epoch: 59 training_loss 0.09001155731268227 test_loss: 0.1115949034690857
epoch: 60 training_loss 0.09026396717876196 test_loss: 0.09847835302352906
epoch: 61 training_loss 0.0831469669751823 test_loss: 0.11442471742630005
epoch: 62 training_loss 0.0940562374331057 test_loss: 0.09093435406684876
epoch: 63 training_loss 0.09288283284753561 test_loss: 0.11803927421569824
epoch: 64 training_loss 0.08780438434332609 test_loss: 0.08692275285720825
epoch: 65 training_loss 0.08755897738039493 test_loss: 0.10189554691314698
epoch: 66 training_loss 0.09022277500480413 test_loss: 0.1015138030052185
epoch: 67 training_loss 0.08662736162543297 test_loss: 0.09529694318771362
epoch: 68 training_loss 0.08649742983281612 test_loss: 0.10759247541427612
epoch: 69 training_loss 0.0876286391634494 test_loss: 0.10198673009872436
epoch: 70 training_loss 0.08983500743284821 test_loss: 0.1140315055847168
epoch: 71 training_loss 0.08924345454201102 test_loss: 0.10199851989746093
epoch: 72 training_loss 0.08793224304914475 test_loss: 0.097021484375
epoch: 73 training_loss 0.09101595958694815 test_loss: 0.10970925092697144
epoch: 74 training_loss 0.08298421947285534 test_loss: 0.10922714471817016
epoch: 75 training_loss 0.08744542431086302 test_loss: 0.1098359227180481
epoch: 76 training_loss 0.08554193506017327 test_loss: 0.11527698040008545
epoch: 77 training_loss 0.08282342072576285 test_loss: 0.10116777420043946
epoch: 78 training_loss 0.09188889913260936 test_loss: 0.10402209758758545
epoch: 79 training_loss 0.07964038584381342 test_loss: 0.114119553565979
epoch: 80 training_loss 0.08574239818379283 test_loss: 0.12342411279678345
epoch: 81 training_loss 0.08356032989919186 test_loss: 0.10141997337341309
epoch: 82 training_loss 0.08192794151604176 test_loss: 0.10763078927993774
epoch: 83 training_loss 0.07522003496065736 test_loss: 0.10271965265274048
epoch: 84 training_loss 0.08528800813481212 test_loss: 0.11131747961044311
epoch: 85 training_loss 0.08561605339869857 test_loss: 0.105242919921875
epoch: 86 training_loss 0.08815394178032875 test_loss: 0.10159595012664795
epoch: 87 training_loss 0.08414074150845409 test_loss: 0.10669856071472168
epoch: 88 training_loss 0.07934931505471468 test_loss: 0.11752161979675294
epoch: 89 training_loss 0.08772944223135709 test_loss: 0.1064615249633789
epoch: 90 training_loss 0.07999173186719417 test_loss: 0.10090159177780152
epoch: 91 training_loss 0.08274408472701907 test_loss: 0.10185884237289429
epoch: 92 training_loss 0.08588166011497378 test_loss: 0.10201730728149414
epoch: 93 training_loss 0.07924158953130245 test_loss: 0.10207945108413696
epoch: 94 training_loss 0.0883616740256548 test_loss: 0.10402029752731323
epoch: 95 training_loss 0.08829386372119188 test_loss: 0.09609156847000122
epoch: 96 training_loss 0.08505336116999387 test_loss: 0.08856046199798584
epoch: 97 training_loss 0.08821679754182696 test_loss: 0.10204554796218872
epoch: 98 training_loss 0.08921715267002582 test_loss: 0.1013339877128601
epoch: 99 training_loss 0.08112929478287696 test_loss: 0.10574322938919067
epoch: 100 training_loss 0.08289964143186808 test_loss: 0.10863521099090576
epoch: 101 training_loss 0.0818372555822134 test_loss: 0.10401867628097534
epoch: 102 training_loss 0.08342226728796959 test_loss: 0.0921432375907898
epoch: 103 training_loss 0.08709020324051381 test_loss: 0.0865069031715393
epoch: 104 training_loss 0.08633362960070372 test_loss: 0.11689046621322632
epoch: 105 training_loss 0.08600827530026436 test_loss: 0.0950382649898529
epoch: 106 training_loss 0.08356479366309941 test_loss: 0.11403471231460571
epoch: 107 training_loss 0.080272012418136 test_loss: 0.09963979125022888
epoch: 108 training_loss 0.08675270423293113 test_loss: 0.11719694137573242
epoch: 109 training_loss 0.08072717109695077 test_loss: 0.09836091995239257
epoch: 110 training_loss 0.08499756105244159 test_loss: 0.08297790884971619
epoch: 111 training_loss 0.08033982552587986 test_loss: 0.10855191946029663
epoch: 112 training_loss 0.0815263312496245 test_loss: 0.09997566342353821
epoch: 113 training_loss 0.08000367739237844 test_loss: 0.12050682306289673
epoch: 114 training_loss 0.08487887309864163 test_loss: 0.08774280548095703
epoch: 115 training_loss 0.08726158482953907 test_loss: 0.10530996322631836
epoch: 116 training_loss 0.0835667970776558 test_loss: 0.08974713087081909
epoch: 117 training_loss 0.08276479734107851 test_loss: 0.10403062105178833
epoch: 118 training_loss 0.08142678508535027 test_loss: 0.09471688270568848
epoch: 119 training_loss 0.07798942961730063 test_loss: 0.1004646897315979
epoch: 120 training_loss 0.07741223770193756 test_loss: 0.11217129230499268
epoch: 121 training_loss 0.08279224872589111 test_loss: 0.11489818096160889
epoch: 122 training_loss 0.08335365230217576 test_loss: 0.09176315069198608
epoch: 123 training_loss 0.08047349546104669 test_loss: 0.11349546909332275
epoch: 124 training_loss 0.08123136905953288 test_loss: 0.10205380916595459
epoch: 125 training_loss 0.08100999727845191 test_loss: 0.1038215160369873
epoch: 126 training_loss 0.08365367699414492 test_loss: 0.10674790143966675
epoch: 127 training_loss 0.08008237052708864 test_loss: 0.10514595508575439
epoch: 128 training_loss 0.08716807786375284 test_loss: 0.11194591522216797
epoch: 129 training_loss 0.07820080481469631 test_loss: 0.11058520078659058
epoch: 130 training_loss 0.08234263814985752 test_loss: 0.08115057349205017
epoch: 131 training_loss 0.08315117405727505 test_loss: 0.10134423971176147
epoch: 132 training_loss 0.08130806656554342 test_loss: 0.11911630630493164
epoch: 133 training_loss 0.08406577777117491 test_loss: 0.1019667148590088
epoch: 134 training_loss 0.08818667694926262 test_loss: 0.12367655038833618
epoch: 135 training_loss 0.07821704315021634 test_loss: 0.08208388686180115
epoch: 136 training_loss 0.07546545784920454 test_loss: 0.08716685771942138
epoch: 137 training_loss 0.08220847299322485 test_loss: 0.09890081882476806
epoch: 138 training_loss 0.08379967534448951 test_loss: 0.08986158967018128
epoch: 139 training_loss 0.08596599782817066 test_loss: 0.09253995418548584
epoch: 140 training_loss 0.08310770770534873 test_loss: 0.09323261976242066
epoch: 141 training_loss 0.07865815035998822 test_loss: 0.10608799457550049
epoch: 142 training_loss 0.08818442797288299 test_loss: 0.09963228702545165
epoch: 143 training_loss 0.08240599419921636 test_loss: 0.12166240215301513
epoch: 144 training_loss 0.08475352525711059 test_loss: 0.09212075471878052
epoch: 145 training_loss 0.08053116785362363 test_loss: 0.10270828008651733
epoch: 146 training_loss 0.08088857593014837 test_loss: 0.10811516046524047
epoch: 147 training_loss 0.07932614980265498 test_loss: 0.10408915281295776
epoch: 148 training_loss 0.08252996837720275 test_loss: 0.08983257412910461
epoch: 149 training_loss 0.08418025750666856 test_loss: 0.10366703271865844
epoch: 0 training_loss 0.2636613041907549 test_loss: 0.16652872562408447
epoch: 1 training_loss 0.1572399700433016 test_loss: 0.13697271347045897
epoch: 2 training_loss 0.13409163482487202 test_loss: 0.11589293479919434
epoch: 3 training_loss 0.1289191590808332 test_loss: 0.11882848739624023
epoch: 4 training_loss 0.12450231987982989 test_loss: 0.10005478858947754
epoch: 5 training_loss 0.10974682800471783 test_loss: 0.1044427514076233
epoch: 6 training_loss 0.1107581852376461 test_loss: 0.0905115246772766
epoch: 7 training_loss 0.10603229638189077 test_loss: 0.09942733645439147
epoch: 8 training_loss 0.10969737268984318 test_loss: 0.11516414880752564
epoch: 9 training_loss 0.0999320776388049 test_loss: 0.11585659980773926
epoch: 10 training_loss 0.09816695470362902 test_loss: 0.09892546534538268
epoch: 11 training_loss 0.10228293508291245 test_loss: 0.0980195939540863
epoch: 12 training_loss 0.10148743972182274 test_loss: 0.08516423106193542
epoch: 13 training_loss 0.10188204981386662 test_loss: 0.09403817653656006
epoch: 14 training_loss 0.11284881211817265 test_loss: 0.1230006456375122
epoch: 15 training_loss 0.10101773886010051 test_loss: 0.09049975872039795
epoch: 16 training_loss 0.09441248610615731 test_loss: 0.10403275489807129
epoch: 17 training_loss 0.1021588839031756 test_loss: 0.09703209400177001
epoch: 18 training_loss 0.09780362175777554 test_loss: 0.09378836750984192
epoch: 19 training_loss 0.10709394428879022 test_loss: 0.10442080497741699
epoch: 20 training_loss 0.09229303123429418 test_loss: 0.08925906419754029
epoch: 21 training_loss 0.09727623809128999 test_loss: 0.10753611326217652
epoch: 22 training_loss 0.08807490764185787 test_loss: 0.12228065729141235
epoch: 23 training_loss 0.09546218447387218 test_loss: 0.10524414777755738
epoch: 24 training_loss 0.1054543524608016 test_loss: 0.08889957070350647
epoch: 25 training_loss 0.0975325122475624 test_loss: 0.11820032596588134
epoch: 26 training_loss 0.0963546247407794 test_loss: 0.088751620054245
epoch: 27 training_loss 0.09125486642122269 test_loss: 0.0912543535232544
epoch: 28 training_loss 0.08762005204334855 test_loss: 0.10626794099807739
epoch: 29 training_loss 0.09182635810226202 test_loss: 0.09131178259849548
epoch: 30 training_loss 0.09835855530574918 test_loss: 0.10244497060775756
epoch: 31 training_loss 0.08820752993226051 test_loss: 0.092136150598526
epoch: 32 training_loss 0.0898376682586968 test_loss: 0.11062308549880981
epoch: 33 training_loss 0.09645288672298193 test_loss: 0.09226049184799194
epoch: 34 training_loss 0.08682006780058145 test_loss: 0.10074125528335572
epoch: 35 training_loss 0.08669679466634989 test_loss: 0.10483146905899048
epoch: 36 training_loss 0.09188825491815805 test_loss: 0.10479142665863037
epoch: 37 training_loss 0.0862811222113669 test_loss: 0.09876923561096192
epoch: 38 training_loss 0.08936941368505359 test_loss: 0.09722735285758972
epoch: 39 training_loss 0.0914420253969729 test_loss: 0.09840568900108337
epoch: 40 training_loss 0.08269843447953462 test_loss: 0.11222285032272339
epoch: 41 training_loss 0.0908183965459466 test_loss: 0.10054060220718383
epoch: 42 training_loss 0.08146349931135774 test_loss: 0.09523076415061951
epoch: 43 training_loss 0.08573565050959588 test_loss: 0.0976466715335846
epoch: 44 training_loss 0.08439905292354524 test_loss: 0.10058887004852295
epoch: 45 training_loss 0.08579977937042713 test_loss: 0.10910521745681763
epoch: 46 training_loss 0.08762252772226929 test_loss: 0.10090014934539795
epoch: 47 training_loss 0.09599756002426148 test_loss: 0.09617051482200623
epoch: 48 training_loss 0.09564054057002068 test_loss: 0.09530662298202515
epoch: 49 training_loss 0.09158877670764923 test_loss: 0.08685705065727234
epoch: 50 training_loss 0.08412340730428695 test_loss: 0.0854791522026062
epoch: 51 training_loss 0.08243304740637541 test_loss: 0.08369524478912353
epoch: 52 training_loss 0.08304957522079348 test_loss: 0.10563375949859619
epoch: 53 training_loss 0.08132978189736605 test_loss: 0.09038804173469543
epoch: 54 training_loss 0.08850720487534999 test_loss: 0.08986990451812744
epoch: 55 training_loss 0.0859995255805552 test_loss: 0.09382858872413635
epoch: 56 training_loss 0.08482700298540294 test_loss: 0.10393673181533813
epoch: 57 training_loss 0.09045803794637322 test_loss: 0.10293575525283813
epoch: 58 training_loss 0.08256347203627229 test_loss: 0.11585898399353027
epoch: 59 training_loss 0.0888274654187262 test_loss: 0.11098264455795288
epoch: 60 training_loss 0.08841304752975702 test_loss: 0.1030699372291565
epoch: 61 training_loss 0.08431262120604516 test_loss: 0.09301073551177978
epoch: 62 training_loss 0.08145840166136623 test_loss: 0.09621542096138
epoch: 63 training_loss 0.08389760777354241 test_loss: 0.10384935140609741
epoch: 64 training_loss 0.08154033061116933 test_loss: 0.10723375082015991
epoch: 65 training_loss 0.08644130568951368 test_loss: 0.1033029317855835
epoch: 66 training_loss 0.08940651975572109 test_loss: 0.0924898624420166
epoch: 67 training_loss 0.08735813692212105 test_loss: 0.09990423917770386
epoch: 68 training_loss 0.07764074865728617 test_loss: 0.09258995652198791
epoch: 69 training_loss 0.0791784287430346 test_loss: 0.10572108030319213
epoch: 70 training_loss 0.08718268143944442 test_loss: 0.09596234560012817
epoch: 71 training_loss 0.08362012112513184 test_loss: 0.10366008281707764
epoch: 72 training_loss 0.08470448788255452 test_loss: 0.12278746366500855
epoch: 73 training_loss 0.08943902600556612 test_loss: 0.08761342763900756
epoch: 74 training_loss 0.0865390608087182 test_loss: 0.09252036213874817
epoch: 75 training_loss 0.07939088804647326 test_loss: 0.10329209566116333
epoch: 76 training_loss 0.08486669819802045 test_loss: 0.11368094682693482
epoch: 77 training_loss 0.07616464603692293 test_loss: 0.09353035688400269
epoch: 78 training_loss 0.08307567639276385 test_loss: 0.10389366149902343
epoch: 79 training_loss 0.08128358403220773 test_loss: 0.09575649499893188
epoch: 80 training_loss 0.08319485848769545 test_loss: 0.10669620037078857
epoch: 81 training_loss 0.08806677162647247 test_loss: 0.11799921989440917
epoch: 82 training_loss 0.09310753885656595 test_loss: 0.10486526489257812
epoch: 83 training_loss 0.08170726343989372 test_loss: 0.09546667337417603
epoch: 84 training_loss 0.08861832166090608 test_loss: 0.09265046715736389
epoch: 85 training_loss 0.08023562043905258 test_loss: 0.10260063409805298
epoch: 86 training_loss 0.07760918766260147 test_loss: 0.1014238715171814
epoch: 87 training_loss 0.08148283088579773 test_loss: 0.11364179849624634
epoch: 88 training_loss 0.08937663005664945 test_loss: 0.09256750345230103
epoch: 89 training_loss 0.08776714794337749 test_loss: 0.09291201829910278
epoch: 90 training_loss 0.08442926190793515 test_loss: 0.09010720849037171
epoch: 91 training_loss 0.08545486938208341 test_loss: 0.0953458309173584
epoch: 92 training_loss 0.08677682111039758 test_loss: 0.08891109824180603
epoch: 93 training_loss 0.07288831327110529 test_loss: 0.08805816769599914
epoch: 94 training_loss 0.08572440888732671 test_loss: 0.0906832754611969
epoch: 95 training_loss 0.08459996147081256 test_loss: 0.0888033926486969
epoch: 96 training_loss 0.08345428798347712 test_loss: 0.09821697473526
epoch: 97 training_loss 0.08698793224059045 test_loss: 0.08717876076698303
epoch: 98 training_loss 0.07645629001781344 test_loss: 0.10672560930252076
epoch: 99 training_loss 0.07795031357556581 test_loss: 0.10093482732772827
epoch: 100 training_loss 0.07992463463917375 test_loss: 0.0968558132648468
epoch: 101 training_loss 0.08114495220594108 test_loss: 0.10484298467636108
epoch: 102 training_loss 0.0787865155376494 test_loss: 0.11346253156661987
epoch: 103 training_loss 0.07668449749238788 test_loss: 0.09714361429214477
epoch: 104 training_loss 0.08470597024075686 test_loss: 0.10014828443527221
epoch: 105 training_loss 0.08187683629803359 test_loss: 0.08538535237312317
epoch: 106 training_loss 0.08113808685913682 test_loss: 0.10287307500839234
epoch: 107 training_loss 0.08624321505427361 test_loss: 0.10114012956619263
epoch: 108 training_loss 0.08378862744197249 test_loss: 0.0954203188419342
epoch: 109 training_loss 0.07976235341280699 test_loss: 0.10679030418395996
epoch: 110 training_loss 0.07807749373838306 test_loss: 0.0823790967464447
epoch: 111 training_loss 0.06947198800742627 test_loss: 0.09638231992721558
epoch: 112 training_loss 0.08900507595390081 test_loss: 0.11354355812072754
epoch: 113 training_loss 0.07601207409054041 test_loss: 0.09748308658599854
epoch: 114 training_loss 0.07537869028747082 test_loss: 0.09133291840553284
epoch: 115 training_loss 0.08169605519622564 test_loss: 0.10526008605957031
epoch: 116 training_loss 0.0738378532603383 test_loss: 0.10941733121871948
epoch: 117 training_loss 0.07184865490533411 test_loss: 0.10156670808792115
epoch: 118 training_loss 0.07966865304857493 test_loss: 0.08334960341453553
epoch: 119 training_loss 0.08040212636813521 test_loss: 0.0955064356327057
epoch: 120 training_loss 0.08138728663325309 test_loss: 0.0964396357536316
epoch: 121 training_loss 0.08147232677787543 test_loss: 0.10605888366699219
epoch: 122 training_loss 0.07184974974021316 test_loss: 0.10547314882278443
epoch: 123 training_loss 0.07848570082336664 test_loss: 0.09659794569015503
epoch: 124 training_loss 0.07745053960010409 test_loss: 0.08666670918464661
epoch: 125 training_loss 0.07143506072461606 test_loss: 0.08903972506523132
epoch: 126 training_loss 0.07798173125833273 test_loss: 0.11096848249435425
epoch: 127 training_loss 0.08286965379491448 test_loss: 0.09595599174499511
epoch: 128 training_loss 0.08102273766882717 test_loss: 0.10305223464965821
epoch: 129 training_loss 0.07903760895133019 test_loss: 0.10827280282974243
epoch: 130 training_loss 0.07674012687057256 test_loss: 0.10273566246032714
epoch: 131 training_loss 0.07812631836161017 test_loss: 0.09569419622421264
epoch: 132 training_loss 0.07591349821537734 test_loss: 0.10202196836471558
epoch: 133 training_loss 0.07267708126455545 test_loss: 0.10376003980636597
epoch: 134 training_loss 0.07446918752044439 test_loss: 0.09720824956893921
epoch: 135 training_loss 0.08204809132963418 test_loss: 0.09574653506278992
epoch: 136 training_loss 0.07618681337684392 test_loss: 0.09376391768455505
epoch: 137 training_loss 0.07709405079483986 test_loss: 0.0881991982460022
epoch: 138 training_loss 0.0725321161467582 test_loss: 0.09893213510513306
epoch: 139 training_loss 0.07514165261760354 test_loss: 0.08484486341476441
epoch: 140 training_loss 0.07087925027124584 test_loss: 0.10653020143508911
epoch: 141 training_loss 0.08550524728372694 test_loss: 0.10485187768936158
epoch: 142 training_loss 0.07863292375579477 test_loss: 0.10900505781173705
epoch: 143 training_loss 0.07183420015498995 test_loss: 0.11015760898590088
epoch: 144 training_loss 0.08145861662924289 test_loss: 0.09344638586044311
epoch: 145 training_loss 0.07553783867508174 test_loss: 0.09931616187095642
epoch: 146 training_loss 0.07234101636335254 test_loss: 0.08899311423301696
epoch: 147 training_loss 0.07239210030063986 test_loss: 0.11943233013153076
epoch: 148 training_loss 0.07331653693690895 test_loss: 0.10102804899215698
epoch: 149 training_loss 0.08191804446280003 test_loss: 0.11287083625793456
epoch: 0 training_loss 0.27271803773939607 test_loss: 0.1818198084831238
epoch: 1 training_loss 0.15898357294499874 test_loss: 0.13868054151535034
epoch: 2 training_loss 0.15359281845390796 test_loss: 0.14468059539794922
epoch: 3 training_loss 0.11959672702476382 test_loss: 0.14720968008041382
epoch: 4 training_loss 0.1180820221081376 test_loss: 0.1224327564239502
epoch: 5 training_loss 0.11600202757865191 test_loss: 0.13118144273757934
epoch: 6 training_loss 0.10461908709257842 test_loss: 0.11642899513244628
epoch: 7 training_loss 0.10349060196429491 test_loss: 0.1448846936225891
epoch: 8 training_loss 0.10197851438075305 test_loss: 0.10098420381546021
epoch: 9 training_loss 0.10224814763292671 test_loss: 0.12317299842834473
epoch: 10 training_loss 0.11378370352089405 test_loss: 0.11111000776290894
epoch: 11 training_loss 0.10883544297888875 test_loss: 0.1316152811050415
epoch: 12 training_loss 0.09558407709002495 test_loss: 0.10669382810592651
epoch: 13 training_loss 0.10056625485420227 test_loss: 0.09853017330169678
epoch: 14 training_loss 0.09623156258836389 test_loss: 0.12233786582946778
epoch: 15 training_loss 0.09577418714761735 test_loss: 0.10656503438949586
epoch: 16 training_loss 0.09833116255700589 test_loss: 0.1166157841682434
epoch: 17 training_loss 0.09499436415731907 test_loss: 0.09284634590148926
epoch: 18 training_loss 0.10143141482025385 test_loss: 0.119086754322052
epoch: 19 training_loss 0.09623063910752534 test_loss: 0.09010656476020813
epoch: 20 training_loss 0.09467101465910673 test_loss: 0.10541081428527832
epoch: 21 training_loss 0.10098132759332656 test_loss: 0.09661167860031128
epoch: 22 training_loss 0.09560811396688224 test_loss: 0.09854142069816589
epoch: 23 training_loss 0.09222265234217047 test_loss: 0.0963184654712677
epoch: 24 training_loss 0.09290191516280175 test_loss: 0.0897141456604004
epoch: 25 training_loss 0.09288388792425394 test_loss: 0.10018661022186279
epoch: 26 training_loss 0.09688763830810786 test_loss: 0.09984752535820007
epoch: 27 training_loss 0.09569960681721568 test_loss: 0.09208655953407288
epoch: 28 training_loss 0.0916383820399642 test_loss: 0.09306831955909729
epoch: 29 training_loss 0.0986160577274859 test_loss: 0.11143147945404053
epoch: 30 training_loss 0.0960451596789062 test_loss: 0.11386398077011109
epoch: 31 training_loss 0.08904048155993223 test_loss: 0.10853418111801147
epoch: 32 training_loss 0.08703338041901588 test_loss: 0.1051950454711914
epoch: 33 training_loss 0.08470900816842913 test_loss: 0.09374052286148071
epoch: 34 training_loss 0.09100957781076431 test_loss: 0.0993939459323883
epoch: 35 training_loss 0.08522860880941152 test_loss: 0.09775657057762147
epoch: 36 training_loss 0.08924833033233881 test_loss: 0.10200903415679932
epoch: 37 training_loss 0.09048255505040288 test_loss: 0.09774651527404785
epoch: 38 training_loss 0.08912792585790158 test_loss: 0.10478016138076782
epoch: 39 training_loss 0.08933169133961201 test_loss: 0.08130170106887817
epoch: 40 training_loss 0.09121114857494832 test_loss: 0.0854918360710144
epoch: 41 training_loss 0.09354039080440998 test_loss: 0.10218577384948731
epoch: 42 training_loss 0.09160711005330086 test_loss: 0.11250813007354736
epoch: 43 training_loss 0.08608042174950242 test_loss: 0.09626461267471313
epoch: 44 training_loss 0.09684756230562926 test_loss: 0.090136057138443
epoch: 45 training_loss 0.08861239925026894 test_loss: 0.10454298257827759
epoch: 46 training_loss 0.09275041572749615 test_loss: 0.0955108106136322
epoch: 47 training_loss 0.08732968204654754 test_loss: 0.09102838039398194
epoch: 48 training_loss 0.08599845139309764 test_loss: 0.09547977447509766
epoch: 49 training_loss 0.08602338312193751 test_loss: 0.12017197608947754
epoch: 50 training_loss 0.08633503204211593 test_loss: 0.09634209871292114
epoch: 51 training_loss 0.08715877434238792 test_loss: 0.11096253395080566
epoch: 52 training_loss 0.08917072288691998 test_loss: 0.08806527256965638
epoch: 53 training_loss 0.08982516311109066 test_loss: 0.10822359323501587
epoch: 54 training_loss 0.08911586264148355 test_loss: 0.09788019061088563
epoch: 55 training_loss 0.08304089328274131 test_loss: 0.08935949802398682
epoch: 56 training_loss 0.0879907809663564 test_loss: 0.1019899606704712
epoch: 57 training_loss 0.08671598693355918 test_loss: 0.08900367617607116
epoch: 58 training_loss 0.08620390200987459 test_loss: 0.0969315767288208
epoch: 59 training_loss 0.08976372644305229 test_loss: 0.09637108445167542
epoch: 60 training_loss 0.08990294246003032 test_loss: 0.09624622464179992
epoch: 61 training_loss 0.08586501196026802 test_loss: 0.09042735695838929
epoch: 62 training_loss 0.08378189317882061 test_loss: 0.08480452299118042
epoch: 63 training_loss 0.0867226804792881 test_loss: 0.08498402237892151
epoch: 64 training_loss 0.08462875096127391 test_loss: 0.09074788689613342
epoch: 65 training_loss 0.09341853588819504 test_loss: 0.08698663711547852
epoch: 66 training_loss 0.08652989456430077 test_loss: 0.09261662364006043
epoch: 67 training_loss 0.09149634934961796 test_loss: 0.09983130097389221
epoch: 68 training_loss 0.08457244766876101 test_loss: 0.09923210144042968
epoch: 69 training_loss 0.08303875464946031 test_loss: 0.10044939517974853
epoch: 70 training_loss 0.07978470649570227 test_loss: 0.08164632320404053
epoch: 71 training_loss 0.0838780621625483 test_loss: 0.10609005689620972
epoch: 72 training_loss 0.08267356997355818 test_loss: 0.1072360634803772
epoch: 73 training_loss 0.08253064967691898 test_loss: 0.09692207574844361
epoch: 74 training_loss 0.08256324842572212 test_loss: 0.09465066194534302
epoch: 75 training_loss 0.08530675014480948 test_loss: 0.09522238373756409
epoch: 76 training_loss 0.08232119521126151 test_loss: 0.10292634963989258
epoch: 77 training_loss 0.07748410657048226 test_loss: 0.09188722372055054
epoch: 78 training_loss 0.08198757257312536 test_loss: 0.09797984957695008
epoch: 79 training_loss 0.08348033944144845 test_loss: 0.08272086381912232
epoch: 80 training_loss 0.08785782465711237 test_loss: 0.10017514228820801
epoch: 81 training_loss 0.08420115202665329 test_loss: 0.1041684627532959
epoch: 82 training_loss 0.08518956925719977 test_loss: 0.09450628757476806
epoch: 83 training_loss 0.08872669458389282 test_loss: 0.08331226706504821
epoch: 84 training_loss 0.07719604069367052 test_loss: 0.10009323358535767
epoch: 85 training_loss 0.08639249691739678 test_loss: 0.10923638343811035
epoch: 86 training_loss 0.07935678604990244 test_loss: 0.09289223551750184
epoch: 87 training_loss 0.08350242134183646 test_loss: 0.08894010782241821
epoch: 88 training_loss 0.08645007042214274 test_loss: 0.09532666206359863
epoch: 89 training_loss 0.0770440568216145 test_loss: 0.10229573249816895
epoch: 90 training_loss 0.08532334031537175 test_loss: 0.0868608295917511
epoch: 91 training_loss 0.08265057090669871 test_loss: 0.09281431436538697
epoch: 92 training_loss 0.08305413290858268 test_loss: 0.10543867349624633
epoch: 93 training_loss 0.08045627363026142 test_loss: 0.10127980709075927
epoch: 94 training_loss 0.08633732935413718 test_loss: 0.09309473037719726
epoch: 95 training_loss 0.07711410956457257 test_loss: 0.08957167267799378
epoch: 96 training_loss 0.07609797578305005 test_loss: 0.1039473533630371
epoch: 97 training_loss 0.08191806273534893 test_loss: 0.09573518633842468
epoch: 98 training_loss 0.08321411335840821 test_loss: 0.10365141630172729
epoch: 99 training_loss 0.07921119451522828 test_loss: 0.10253592729568481
epoch: 100 training_loss 0.07913098501041532 test_loss: 0.0926367998123169
epoch: 101 training_loss 0.08128527009859682 test_loss: 0.08139973282814025
epoch: 102 training_loss 0.08351316602900624 test_loss: 0.10344266891479492
epoch: 103 training_loss 0.07732790838927031 test_loss: 0.08736015558242798
epoch: 104 training_loss 0.08376567607745528 test_loss: 0.08900835514068603
epoch: 105 training_loss 0.07593689477071167 test_loss: 0.09257926344871521
epoch: 106 training_loss 0.08048836318776012 test_loss: 0.10892164707183838
epoch: 107 training_loss 0.08064224922098219 test_loss: 0.08627501726150513
epoch: 108 training_loss 0.07846314510330558 test_loss: 0.09656856656074524
epoch: 109 training_loss 0.08673540882766247 test_loss: 0.09206509590148926
epoch: 110 training_loss 0.08211762946099042 test_loss: 0.09546135663986206
epoch: 111 training_loss 0.0825577993132174 test_loss: 0.08393159508705139
epoch: 112 training_loss 0.07926988128572703 test_loss: 0.0981947124004364
epoch: 113 training_loss 0.07956278765574097 test_loss: 0.11039036512374878
epoch: 114 training_loss 0.08458884403109551 test_loss: 0.10506275892257691
epoch: 115 training_loss 0.07784722683951259 test_loss: 0.08920911550521851
epoch: 116 training_loss 0.0779541466385126 test_loss: 0.10071063041687012
epoch: 117 training_loss 0.07948289734311402 test_loss: 0.10754817724227905
epoch: 118 training_loss 0.08345454275608062 test_loss: 0.1022344946861267
epoch: 119 training_loss 0.07962672544643283 test_loss: 0.10398163795471191
epoch: 120 training_loss 0.08049057274125516 test_loss: 0.09487441778182984
epoch: 121 training_loss 0.07543707836419344 test_loss: 0.11482391357421876
epoch: 122 training_loss 0.08788308832794428 test_loss: 0.09882078170776368
epoch: 123 training_loss 0.07701679218560457 test_loss: 0.096761816740036
epoch: 124 training_loss 0.0767056380212307 test_loss: 0.11320775747299194
epoch: 125 training_loss 0.08165854375809431 test_loss: 0.10907891988754273
epoch: 126 training_loss 0.07868948189541697 test_loss: 0.10532163381576538
epoch: 127 training_loss 0.0790259769745171 test_loss: 0.10631762742996216
epoch: 128 training_loss 0.07608759950846433 test_loss: 0.09153923988342286
epoch: 129 training_loss 0.07941990744322538 test_loss: 0.09834173321723938
epoch: 130 training_loss 0.07705057879909873 test_loss: 0.0998540461063385
epoch: 131 training_loss 0.07739567369222641 test_loss: 0.08500919342041016
epoch: 132 training_loss 0.08537505870684982 test_loss: 0.10674096345901489
epoch: 133 training_loss 0.07677751377224923 test_loss: 0.08759960532188416
epoch: 134 training_loss 0.08855546120554209 test_loss: 0.10255415439605713
epoch: 135 training_loss 0.08536401411518454 test_loss: 0.08849769234657287
epoch: 136 training_loss 0.07726357093080878 test_loss: 0.09066290259361268
epoch: 137 training_loss 0.076501848641783 test_loss: 0.09302166104316711
epoch: 138 training_loss 0.08586218949407338 test_loss: 0.08597099184989929
epoch: 139 training_loss 0.07713545816019178 test_loss: 0.10579110383987426
epoch: 140 training_loss 0.07272296076640487 test_loss: 0.09849520921707153
epoch: 141 training_loss 0.08475506119430065 test_loss: 0.10077052116394043
epoch: 142 training_loss 0.07624165527522564 test_loss: 0.09367391467094421
epoch: 143 training_loss 0.0734889417886734 test_loss: 0.0940226972103119
epoch: 144 training_loss 0.08589126385748386 test_loss: 0.11041275262832642
epoch: 145 training_loss 0.07788840675726533 test_loss: 0.10021930932998657
epoch: 146 training_loss 0.07415729627013207 test_loss: 0.09768592119216919
epoch: 147 training_loss 0.07791258163750171 test_loss: 0.10446780920028687
epoch: 148 training_loss 0.07425008211284875 test_loss: 0.09658971428871155
epoch: 149 training_loss 0.07480796649120748 test_loss: 0.08978695273399354
epoch: 0 training_loss 0.2489599910378456 test_loss: 0.1678330659866333
epoch: 1 training_loss 0.14854721520096065 test_loss: 0.1458588123321533
epoch: 2 training_loss 0.13778383500874042 test_loss: 0.15593847036361694
epoch: 3 training_loss 0.13822474244982005 test_loss: 0.11707751750946045
epoch: 4 training_loss 0.12298069655895233 test_loss: 0.11293646097183227
epoch: 5 training_loss 0.10914089608937502 test_loss: 0.12782238721847533
epoch: 6 training_loss 0.11726894095540047 test_loss: 0.09735992550849915
epoch: 7 training_loss 0.10368657641112805 test_loss: 0.10234709978103637
epoch: 8 training_loss 0.1019817129522562 test_loss: 0.10720359086990357
epoch: 9 training_loss 0.10861853301525116 test_loss: 0.1045525074005127
epoch: 10 training_loss 0.10704653061926366 test_loss: 0.08955671191215515
epoch: 11 training_loss 0.10427415821701289 test_loss: 0.1321729302406311
epoch: 12 training_loss 0.11383044380694628 test_loss: 0.11805195808410644
epoch: 13 training_loss 0.11143521862104536 test_loss: 0.09796847701072693
epoch: 14 training_loss 0.10089968776330352 test_loss: 0.09880686402320862
epoch: 15 training_loss 0.10239049857482314 test_loss: 0.10228641033172607
epoch: 16 training_loss 0.10160575633868575 test_loss: 0.11504552364349366
epoch: 17 training_loss 0.1002634054608643 test_loss: 0.10261977910995483
epoch: 18 training_loss 0.09505951043218375 test_loss: 0.08451048135757447
epoch: 19 training_loss 0.09534918442368508 test_loss: 0.10415595769882202
epoch: 20 training_loss 0.09828684467822313 test_loss: 0.09846147894859314
epoch: 21 training_loss 0.10017403844743968 test_loss: 0.09904546737670898
epoch: 22 training_loss 0.09709817809984088 test_loss: 0.09211809039115906
epoch: 23 training_loss 0.09730139059945941 test_loss: 0.08864104747772217
epoch: 24 training_loss 0.09350077524781227 test_loss: 0.09049711227416993
epoch: 25 training_loss 0.09015529489144683 test_loss: 0.09626505374908448
epoch: 26 training_loss 0.10223922614008188 test_loss: 0.09835579991340637
epoch: 27 training_loss 0.09349085254594684 test_loss: 0.09191145300865174
epoch: 28 training_loss 0.10069136921316385 test_loss: 0.09343419671058655
epoch: 29 training_loss 0.09202184874564409 test_loss: 0.10754019021987915
epoch: 30 training_loss 0.09787851125001908 test_loss: 0.10104963779449463
epoch: 31 training_loss 0.09938529919832945 test_loss: 0.08979551792144776
epoch: 32 training_loss 0.09589467268437148 test_loss: 0.08747557401657105
epoch: 33 training_loss 0.09286775056272745 test_loss: 0.10839412212371827
epoch: 34 training_loss 0.09447786854580045 test_loss: 0.09943827390670776
epoch: 35 training_loss 0.08523082938045264 test_loss: 0.10899670124053955
epoch: 36 training_loss 0.09033011192455888 test_loss: 0.07990406155586242
epoch: 37 training_loss 0.09282496878877282 test_loss: 0.11036721467971802
epoch: 38 training_loss 0.09124259801581502 test_loss: 0.07857528328895569
epoch: 39 training_loss 0.0906099071726203 test_loss: 0.08120601177215576
epoch: 40 training_loss 0.09225601954385639 test_loss: 0.09669641256332398
epoch: 41 training_loss 0.0970825557783246 test_loss: 0.09171444773674012
epoch: 42 training_loss 0.09901095310226082 test_loss: 0.10011423826217651
epoch: 43 training_loss 0.09830033633857965 test_loss: 0.09281875491142273
epoch: 44 training_loss 0.08992264030501246 test_loss: 0.10451818704605102
epoch: 45 training_loss 0.09365806225687265 test_loss: 0.08911830186843872
epoch: 46 training_loss 0.09064830675721168 test_loss: 0.10360623598098755
epoch: 47 training_loss 0.07633985361084342 test_loss: 0.10060479640960693
epoch: 48 training_loss 0.08978849370032549 test_loss: 0.10031884908676147
epoch: 49 training_loss 0.08851414881646633 test_loss: 0.100630784034729
epoch: 50 training_loss 0.08765479711815714 test_loss: 0.09576866030693054
epoch: 51 training_loss 0.09087526085786521 test_loss: 0.08640576004981995
epoch: 52 training_loss 0.0918978226557374 test_loss: 0.08821308016777038
epoch: 53 training_loss 0.09008361479267478 test_loss: 0.08758881092071533
epoch: 54 training_loss 0.08752892799675464 test_loss: 0.10034602880477905
epoch: 55 training_loss 0.08978125759400428 test_loss: 0.0927365005016327
epoch: 56 training_loss 0.0933604633435607 test_loss: 0.08876720666885377
epoch: 57 training_loss 0.08418001405894757 test_loss: 0.07689272165298462
epoch: 58 training_loss 0.09409489031881094 test_loss: 0.08066796660423278
epoch: 59 training_loss 0.08666053747758269 test_loss: 0.09610730409622192
epoch: 60 training_loss 0.09032253731042146 test_loss: 0.0942300021648407
epoch: 61 training_loss 0.08580372171476483 test_loss: 0.09809430241584778
epoch: 62 training_loss 0.09604619195684791 test_loss: 0.09246476292610169
epoch: 63 training_loss 0.0940805346891284 test_loss: 0.09861714243888856
epoch: 64 training_loss 0.08952380253002047 test_loss: 0.08627856969833374
epoch: 65 training_loss 0.08156759578734636 test_loss: 0.09554688334465027
epoch: 66 training_loss 0.0878980665653944 test_loss: 0.10005550384521485
epoch: 67 training_loss 0.09105477044358849 test_loss: 0.09146094918251038
epoch: 68 training_loss 0.08653984442353249 test_loss: 0.08100876212120056
epoch: 69 training_loss 0.09280204191803933 test_loss: 0.09566644430160523
epoch: 70 training_loss 0.0831433067843318 test_loss: 0.09307894110679626
epoch: 71 training_loss 0.08897182114422321 test_loss: 0.08968409299850463
epoch: 72 training_loss 0.08612874368205667 test_loss: 0.09562294483184815
epoch: 73 training_loss 0.08584215078502894 test_loss: 0.08450801968574524
epoch: 74 training_loss 0.08852664329111576 test_loss: 0.10298844575881957
epoch: 75 training_loss 0.09016446620225907 test_loss: 0.10466684103012085
epoch: 76 training_loss 0.08937362033873797 test_loss: 0.08751165270805358
epoch: 77 training_loss 0.08696359282359481 test_loss: 0.09331905245780944
epoch: 78 training_loss 0.0916831030882895 test_loss: 0.09506561160087586
epoch: 79 training_loss 0.09114688109606504 test_loss: 0.09624510407447814
epoch: 80 training_loss 0.08591463172808289 test_loss: 0.09001520276069641
epoch: 81 training_loss 0.08201775478199125 test_loss: 0.09776386022567748
epoch: 82 training_loss 0.08429220641031861 test_loss: 0.09617818593978882
epoch: 83 training_loss 0.09163210645318032 test_loss: 0.10885522365570069
epoch: 84 training_loss 0.09383114049211144 test_loss: 0.09309982061386109
epoch: 85 training_loss 0.09279898956418037 test_loss: 0.10273306369781494
epoch: 86 training_loss 0.08937860321253538 test_loss: 0.09958104491233825
epoch: 87 training_loss 0.08056860564276576 test_loss: 0.08504295349121094
epoch: 88 training_loss 0.08874549627304078 test_loss: 0.09196277260780335
epoch: 89 training_loss 0.08153551302850247 test_loss: 0.09542372226715087
epoch: 90 training_loss 0.07766856296919286 test_loss: 0.09708224534988404
epoch: 91 training_loss 0.08593609182164073 test_loss: 0.08715105652809144
epoch: 92 training_loss 0.08275385623797775 test_loss: 0.09036780595779419
epoch: 93 training_loss 0.08166262831538916 test_loss: 0.08972578048706055
epoch: 94 training_loss 0.0809905577916652 test_loss: 0.10000503063201904
epoch: 95 training_loss 0.07923000156879426 test_loss: 0.11042497158050538
epoch: 96 training_loss 0.08865836028009653 test_loss: 0.09220426082611084
epoch: 97 training_loss 0.084414741396904 test_loss: 0.08870779871940612
epoch: 98 training_loss 0.08461745662614703 test_loss: 0.0853985607624054
epoch: 99 training_loss 0.08560815945267677 test_loss: 0.08600893020629882
epoch: 100 training_loss 0.08947278222069144 test_loss: 0.09168172478675843
epoch: 101 training_loss 0.08228605315089225 test_loss: 0.10209454298019409
epoch: 102 training_loss 0.08448642471805215 test_loss: 0.08943560123443603
epoch: 103 training_loss 0.08224101895466447 test_loss: 0.08169282674789428
epoch: 104 training_loss 0.07566573571413755 test_loss: 0.08923038244247436
epoch: 105 training_loss 0.08200376533903181 test_loss: 0.09944701790809632
epoch: 106 training_loss 0.08337791027501225 test_loss: 0.10704492330551148
epoch: 107 training_loss 0.07915794532746076 test_loss: 0.09017726182937622
epoch: 108 training_loss 0.08287889221683145 test_loss: 0.08611149787902832
epoch: 109 training_loss 0.0796401882916689 test_loss: 0.11263229846954345
epoch: 110 training_loss 0.0826618168503046 test_loss: 0.10076981782913208
epoch: 111 training_loss 0.08755371583625675 test_loss: 0.10556133985519409
epoch: 112 training_loss 0.07758045015856624 test_loss: 0.0797203779220581
epoch: 113 training_loss 0.09178415989503265 test_loss: 0.08115327954292298
epoch: 114 training_loss 0.08149499265477061 test_loss: 0.10442328453063965
epoch: 115 training_loss 0.08699068380519748 test_loss: 0.10061701536178588
epoch: 116 training_loss 0.08645553490146994 test_loss: 0.10929783582687377
epoch: 117 training_loss 0.08699473230168224 test_loss: 0.09099465608596802
epoch: 118 training_loss 0.08388564329594374 test_loss: 0.08185378313064576
epoch: 119 training_loss 0.07820161297917366 test_loss: 0.09400832653045654
epoch: 120 training_loss 0.07703183325007558 test_loss: 0.10701904296875
epoch: 121 training_loss 0.07561660550534725 test_loss: 0.10272111892700195
epoch: 122 training_loss 0.0749432269204408 test_loss: 0.10085283517837525
epoch: 123 training_loss 0.07912704946473241 test_loss: 0.08293197751045227
epoch: 124 training_loss 0.08192586591467262 test_loss: 0.08766775727272033
epoch: 125 training_loss 0.08437610195949674 test_loss: 0.0885928988456726
epoch: 126 training_loss 0.0883247919473797 test_loss: 0.10252943038940429
epoch: 127 training_loss 0.08225714487954974 test_loss: 0.1032942771911621
epoch: 128 training_loss 0.08120389608666301 test_loss: 0.08858537673950195
epoch: 129 training_loss 0.08880639538168907 test_loss: 0.09468982815742492
epoch: 130 training_loss 0.08331378221511841 test_loss: 0.09362654685974121
epoch: 131 training_loss 0.08282765286043287 test_loss: 0.08585116267204285
epoch: 132 training_loss 0.07736158786341547 test_loss: 0.08981278538703918
epoch: 133 training_loss 0.08289197539910674 test_loss: 0.08769170641899109
epoch: 134 training_loss 0.08517654403112829 test_loss: 0.0921911358833313
epoch: 135 training_loss 0.07484876437112689 test_loss: 0.09799376130104065
epoch: 136 training_loss 0.0828884844854474 test_loss: 0.0969043493270874
epoch: 137 training_loss 0.08135967433452607 test_loss: 0.08529385328292846
epoch: 138 training_loss 0.08066918723285198 test_loss: 0.0955899178981781
epoch: 139 training_loss 0.08226831894367934 test_loss: 0.11391998529434204
epoch: 140 training_loss 0.07780799323692918 test_loss: 0.10630278587341309
epoch: 141 training_loss 0.08166120134294033 test_loss: 0.09240805506706237
epoch: 142 training_loss 0.0825971625559032 test_loss: 0.09904182553291321
epoch: 143 training_loss 0.08639780789613724 test_loss: 0.0895705759525299
epoch: 144 training_loss 0.07326824152842164 test_loss: 0.09002625942230225
epoch: 145 training_loss 0.08692891705781221 test_loss: 0.08414592146873474
epoch: 146 training_loss 0.08228028075769543 test_loss: 0.1132959246635437
epoch: 147 training_loss 0.08556077132001519 test_loss: 0.09810147285461426
epoch: 148 training_loss 0.08512118753045797 test_loss: 0.09557799100875855
epoch: 149 training_loss 0.0789720012061298 test_loss: 0.09695742726325988
episode: 0 training return: -822.787476100788
episode: 1 training return: -760.7224588367898
episode: 2 training return: -765.4709310648593
episode: 3 training return: -823.9461565784724
epoch: 1 test_true_pfm: 116.20874649571367 sim_pfm: -521.7308560700206
episode: 4 training return: -901.8722281088882
episode: 5 training return: -1025.3585189303667
episode: 6 training return: -893.3998956817718
episode: 7 training return: -947.1995699239619
epoch: 2 test_true_pfm: 88.48010879073261 sim_pfm: -844.1813493776049
episode: 8 training return: -844.437945271679
episode: 9 training return: -770.9390625260188
episode: 10 training return: -784.461178099982
episode: 11 training return: -750.4679003703287
epoch: 3 test_true_pfm: 5.369185152027065 sim_pfm: -642.4729054143648
episode: 12 training return: -614.9258649832927
episode: 13 training return: -752.0420391135556
episode: 14 training return: -654.0277429352798
episode: 15 training return: -827.2807320352141
epoch: 4 test_true_pfm: -77.21096128848887 sim_pfm: -807.410467990357
episode: 16 training return: -641.2415016511
episode: 17 training return: -704.1706841399515
episode: 18 training return: -836.567677853899
episode: 19 training return: -708.4454211107889
epoch: 5 test_true_pfm: 259.70488109743116 sim_pfm: -771.2856856620369
episode: 20 training return: -810.7393440185574
episode: 21 training return: -914.0625568946901
episode: 22 training return: -722.0595648356842
episode: 23 training return: -761.925079477528
epoch: 6 test_true_pfm: -84.14297566797984 sim_pfm: -762.0338697276561
episode: 24 training return: -875.2034060895983
episode: 25 training return: -675.7562202215553
episode: 26 training return: -796.2235522376552
episode: 27 training return: -712.8922832454733
epoch: 7 test_true_pfm: 14.567575072104782 sim_pfm: -511.95892235097494
episode: 28 training return: -816.2839483177021
episode: 29 training return: -772.1927076887509
episode: 30 training return: -734.0274768901716
episode: 31 training return: -793.2458822169618
epoch: 8 test_true_pfm: 12.496904106061246 sim_pfm: -563.7758725155877
episode: 32 training return: -679.825534056432
episode: 33 training return: -764.34304925105
episode: 34 training return: -853.5872633720547
episode: 35 training return: -792.6382294675188
epoch: 9 test_true_pfm: 180.3506502964258 sim_pfm: -705.6380134298153
episode: 36 training return: -795.8478931274623
episode: 37 training return: -783.2062156067974
episode: 38 training return: -809.5986242783275
episode: 39 training return: -819.0030635848746
epoch: 10 test_true_pfm: 184.34777655388348 sim_pfm: -634.6397158141054
episode: 40 training return: -820.8563083261573
episode: 41 training return: -805.3964777058138
episode: 42 training return: -817.5091663408807
episode: 43 training return: -823.9571253255667
epoch: 11 test_true_pfm: 192.75073879042858 sim_pfm: -649.3287769748977
episode: 44 training return: -765.3815052223733
episode: 45 training return: -822.0616259522409
episode: 46 training return: -806.3409666558878
episode: 47 training return: -819.2207763868944
epoch: 12 test_true_pfm: 135.84255392222113 sim_pfm: -629.0984957486285
episode: 48 training return: -777.5728671782776
episode: 49 training return: -840.9039248197449
episode: 50 training return: -826.6092913065327
episode: 51 training return: -834.7098166625084
epoch: 13 test_true_pfm: 124.49162442108843 sim_pfm: -629.1961282959154
episode: 52 training return: -816.4572046183963
episode: 53 training return: -828.8608871729878
episode: 54 training return: -748.376257686178
episode: 55 training return: -784.4749931092516
epoch: 14 test_true_pfm: 119.28271753951458 sim_pfm: -635.3231845432234
episode: 56 training return: -763.7018893434919
episode: 57 training return: -785.339367733647
episode: 58 training return: -764.6966979675407
episode: 59 training return: -789.8626199240462
epoch: 15 test_true_pfm: 144.2349062960113 sim_pfm: -648.5042348781112
episode: 60 training return: -765.9091961314489
episode: 61 training return: -780.0945666368863
episode: 62 training return: -775.6504289818188
episode: 63 training return: -716.2867781001884
epoch: 16 test_true_pfm: 209.12907108552966 sim_pfm: -604.3079301217689
episode: 64 training return: -781.438642569289
episode: 65 training return: -709.1060271111475
episode: 66 training return: -722.7433778565487
episode: 67 training return: -695.2653202873374
epoch: 17 test_true_pfm: 170.35580744537262 sim_pfm: -629.9335266339416
episode: 68 training return: -717.4670544843615
episode: 69 training return: -686.4317682916856
episode: 70 training return: -655.8856990257816
episode: 71 training return: -715.6589746760118
epoch: 18 test_true_pfm: 245.19129358672146 sim_pfm: -597.2385733089255
episode: 72 training return: -620.9780427860189
episode: 73 training return: -645.2646644798696
episode: 74 training return: -668.5001071538111
episode: 75 training return: -637.9703747568631
epoch: 19 test_true_pfm: 90.23866592114253 sim_pfm: -585.5662466738625
episode: 76 training return: -635.7931035949772
episode: 77 training return: -621.1397844402195
episode: 78 training return: -633.491949413833
episode: 79 training return: -597.2608697320878
epoch: 20 test_true_pfm: 319.54988587577714 sim_pfm: -527.8553785629898
episode: 80 training return: -600.5423237592778
episode: 81 training return: -593.5734222494729
episode: 82 training return: -624.3370402111216
episode: 83 training return: -609.1680455355585
epoch: 21 test_true_pfm: 100.03157625199408 sim_pfm: -500.02389869496983
episode: 84 training return: -562.9686523517387
episode: 85 training return: -606.0286287907473
episode: 86 training return: -727.3318330890321
episode: 87 training return: -596.4916094767652
epoch: 22 test_true_pfm: 8.671568073733871 sim_pfm: -526.6497357848497
episode: 88 training return: -572.7670250895878
episode: 89 training return: -620.0765602467575
episode: 90 training return: -559.3593778163859
episode: 91 training return: -604.2071021847522
epoch: 23 test_true_pfm: 76.70567121372471 sim_pfm: -524.3701965150179
episode: 92 training return: -584.7700930028412
episode: 93 training return: -567.262273358419
episode: 94 training return: -570.014491898916
episode: 95 training return: -546.8338828133791
epoch: 24 test_true_pfm: 85.72804259493527 sim_pfm: -524.1451586801994
episode: 96 training return: -619.6629433014828
episode: 97 training return: -537.7051153199315
episode: 98 training return: -606.7735384018143
episode: 99 training return: -622.9015616579348
epoch: 25 test_true_pfm: -12.407799537974654 sim_pfm: -539.4288283365745
episode: 100 training return: -577.7533323759761
episode: 101 training return: -565.6385692481826
episode: 102 training return: -586.9922019791658
episode: 103 training return: -640.3264470063838
epoch: 26 test_true_pfm: 184.03315513436428 sim_pfm: -524.5061737142768
episode: 104 training return: -583.023778370212
episode: 105 training return: -681.8023794508421
episode: 106 training return: -527.8261633181967
episode: 107 training return: -605.0069732026376
epoch: 27 test_true_pfm: -51.204212120452745 sim_pfm: -516.9329720126052
episode: 108 training return: -568.6408084281244
episode: 109 training return: -563.097486154907
episode: 110 training return: -514.4641837488169
episode: 111 training return: -572.3256531047058
epoch: 28 test_true_pfm: 165.6726427466658 sim_pfm: -483.67126505291634
episode: 112 training return: -607.5274583208244
episode: 113 training return: -548.3373307871276
episode: 114 training return: -603.103726885492
episode: 115 training return: -790.8619436071067
epoch: 29 test_true_pfm: 35.48548952530901 sim_pfm: -508.26358641430596
episode: 116 training return: -715.5707915982186
episode: 117 training return: -576.7921163846471
episode: 118 training return: -569.4816789405105
episode: 119 training return: -615.4437667342075
epoch: 30 test_true_pfm: 162.5224916502185 sim_pfm: -516.9486153040152
episode: 120 training return: -617.107602078339
episode: 121 training return: -505.34678769115
episode: 122 training return: -582.0907951188788
episode: 123 training return: -514.800242466024
epoch: 31 test_true_pfm: 166.92198249989994 sim_pfm: -476.4798207409836
episode: 124 training return: -602.6975156603318
episode: 125 training return: -532.7233865655544
episode: 126 training return: -537.8569664024084
episode: 127 training return: -525.4091999317478
epoch: 32 test_true_pfm: 141.29643522856892 sim_pfm: -471.905291915632
episode: 128 training return: -569.0019827974627
episode: 129 training return: -536.4995092265297
episode: 130 training return: -584.4588867773737
episode: 131 training return: -543.3508788729553
epoch: 33 test_true_pfm: -96.41946411132301 sim_pfm: -516.2042786166318
episode: 132 training return: -543.999483359261
episode: 133 training return: -508.1485394020166
episode: 134 training return: -545.4347690712576
episode: 135 training return: -524.0459138896185
epoch: 34 test_true_pfm: 142.88953858188606 sim_pfm: -498.7842646862137
episode: 136 training return: -516.6457311457082
episode: 137 training return: -584.6927399042471
episode: 138 training return: -541.8046221024396
episode: 139 training return: -520.4293261302616
epoch: 35 test_true_pfm: 232.33081651591897 sim_pfm: -497.5618286922984
episode: 140 training return: -584.7433957319993
episode: 141 training return: -551.9294557024086
episode: 142 training return: -593.7115099730257
episode: 143 training return: -523.320810742348
epoch: 36 test_true_pfm: 207.66824846494364 sim_pfm: -483.1594126843216
episode: 144 training return: -569.5029780605729
episode: 145 training return: -542.572914283823
episode: 146 training return: -668.8973676305085
episode: 147 training return: -766.3064160507521
epoch: 37 test_true_pfm: 327.629416760226 sim_pfm: -440.9589668452791
episode: 148 training return: -610.1620409968689
episode: 149 training return: -580.9056602522473
episode: 150 training return: -561.393555979229
episode: 151 training return: -535.0143605313707
epoch: 38 test_true_pfm: 87.92330431182666 sim_pfm: -497.1709550640739
episode: 152 training return: -529.6076111009937
episode: 153 training return: -513.813554277389
episode: 154 training return: -582.1774667078503
episode: 155 training return: -579.3047214993021
epoch: 39 test_true_pfm: 131.1952798700189 sim_pfm: -499.49887907582206
episode: 156 training return: -517.8337230487999
episode: 157 training return: -586.5570993520718
episode: 158 training return: -711.0113620361683
episode: 159 training return: -584.3950573490022
epoch: 40 test_true_pfm: 135.27524859966104 sim_pfm: -496.1492005895521
episode: 160 training return: -522.1527344558841
episode: 161 training return: -600.6499537414277
episode: 162 training return: -518.5786482005726
episode: 163 training return: -552.4536345067894
epoch: 41 test_true_pfm: 201.65535874129384 sim_pfm: -494.1903290459391
episode: 164 training return: -560.1977129171955
episode: 165 training return: -543.309601261492
episode: 166 training return: -522.4730468883586
episode: 167 training return: -535.9902481621825
epoch: 42 test_true_pfm: 186.77813136628137 sim_pfm: -488.39508566302135
episode: 168 training return: -536.0394192982096
episode: 169 training return: -538.7782186089047
episode: 170 training return: -574.513668286195
episode: 171 training return: -531.7581869985631
epoch: 43 test_true_pfm: 119.29257200283205 sim_pfm: -473.56838111541884
episode: 172 training return: -532.7793012641288
episode: 173 training return: -555.3733403577168
episode: 174 training return: -531.435948503752
episode: 175 training return: -561.0180687960568
epoch: 44 test_true_pfm: -283.2474783214907 sim_pfm: -558.7839651166827
episode: 176 training return: -557.8131720308925
episode: 177 training return: -619.0264657163518
episode: 178 training return: -539.6994083664782
episode: 179 training return: -609.4128554227067
epoch: 45 test_true_pfm: 80.04093028586169 sim_pfm: -491.6598754437329
episode: 180 training return: -542.2846075314518
episode: 181 training return: -531.2561433043323
episode: 182 training return: -552.2049176643121
episode: 183 training return: -529.4267852662066
epoch: 46 test_true_pfm: 70.78209379066689 sim_pfm: -480.5257545970598
episode: 184 training return: -588.2161454704653
episode: 185 training return: -570.3462001343737
episode: 186 training return: -573.9198438430766
episode: 187 training return: -546.064403182254
epoch: 47 test_true_pfm: 149.51877450311648 sim_pfm: -514.1372547558224
episode: 188 training return: -531.1355384748639
episode: 189 training return: -542.7755779710121
episode: 190 training return: -527.2134489626209
episode: 191 training return: -585.0209629219646
epoch: 48 test_true_pfm: 115.06442761267256 sim_pfm: -481.55739147132743
episode: 192 training return: -564.828018467281
episode: 193 training return: -554.297877343716
episode: 194 training return: -558.7124695643594
episode: 195 training return: -566.8825981516608
epoch: 49 test_true_pfm: 159.22706416832037 sim_pfm: -495.96149402580323
episode: 196 training return: -676.7382639774139
episode: 197 training return: -544.4490370114671
episode: 198 training return: -587.2179888204118
episode: 199 training return: -560.300590203562
epoch: 50 test_true_pfm: 175.56530067878705 sim_pfm: -488.1941269141187
episode: 200 training return: -574.1749108678703
episode: 201 training return: -542.4260128368298
episode: 202 training return: -553.7021814873924
episode: 203 training return: -544.114066962092
epoch: 51 test_true_pfm: 158.87337269415016 sim_pfm: -456.4092355936182
episode: 204 training return: -597.2085721276861
episode: 205 training return: -566.8044013762233
episode: 206 training return: -557.9789684346041
episode: 207 training return: -517.0649311504035
epoch: 52 test_true_pfm: 343.94130147763593 sim_pfm: -471.2671408928673
episode: 208 training return: -558.2304764592378
episode: 209 training return: -535.9126666742594
episode: 210 training return: -576.6090011527025
episode: 211 training return: -659.3344496179088
epoch: 53 test_true_pfm: 170.5969342785383 sim_pfm: -501.30799294706594
episode: 212 training return: -538.0174397503855
episode: 213 training return: -581.8827518808874
episode: 214 training return: -553.57656933352
episode: 215 training return: -599.8220852211128
epoch: 54 test_true_pfm: 97.23717328876019 sim_pfm: -497.4260401149309
episode: 216 training return: -553.1632578586688
episode: 217 training return: -509.1830698157611
episode: 218 training return: -526.7221666931308
episode: 219 training return: -555.7974911373251
epoch: 55 test_true_pfm: 257.0823750216212 sim_pfm: -476.73906022943123
episode: 220 training return: -584.3648984391481
episode: 221 training return: -542.1244313888535
episode: 222 training return: -542.8161365770208
episode: 223 training return: -554.7738689113241
epoch: 56 test_true_pfm: 258.92542228479425 sim_pfm: -486.01531103777984
episode: 224 training return: -532.0434087500693
episode: 225 training return: -530.9128504148866
episode: 226 training return: -566.1430304680151
episode: 227 training return: -510.847636964128
epoch: 57 test_true_pfm: 201.0021459201953 sim_pfm: -507.06687096542015
episode: 228 training return: -555.0563449409537
episode: 229 training return: -528.8657133013324
episode: 230 training return: -552.9605085811639
episode: 231 training return: -602.778229125623
epoch: 58 test_true_pfm: 108.65791453767402 sim_pfm: -443.5759482721766
episode: 232 training return: -776.305227998653
episode: 233 training return: -583.1962151208959
episode: 234 training return: -534.1896339810509
episode: 235 training return: -559.3276989067415
epoch: 59 test_true_pfm: 379.91210574535256 sim_pfm: -459.32324148316184
episode: 236 training return: -567.9417208939713
episode: 237 training return: -563.0358290336898
episode: 238 training return: -545.3731601586672
episode: 239 training return: -518.6605320691456
epoch: 60 test_true_pfm: 18.948398955703244 sim_pfm: -492.2658926887698
episode: 240 training return: -579.9493407965014
episode: 241 training return: -516.0752469801272
episode: 242 training return: -524.6770305062314
episode: 243 training return: -804.0493204408967
epoch: 61 test_true_pfm: 118.83217582580149 sim_pfm: -455.5884595051286
episode: 244 training return: -528.3919699100124
episode: 245 training return: -541.469415023463
episode: 246 training return: -480.6517927464182
episode: 247 training return: -647.3469290499526
epoch: 62 test_true_pfm: 174.31818616129559 sim_pfm: -431.4013795950388
episode: 248 training return: -570.8565184817156
episode: 249 training return: -584.9527670926384
episode: 250 training return: -611.7072857214107
episode: 251 training return: -591.3881100833723
epoch: 63 test_true_pfm: 159.7684503084007 sim_pfm: -495.50651055514686
episode: 252 training return: -527.2179233139593
episode: 253 training return: -554.106934837279
episode: 254 training return: -518.5631847368086
episode: 255 training return: -763.108663200075
epoch: 64 test_true_pfm: 46.03205841036705 sim_pfm: -459.5529189251412
episode: 256 training return: -549.1434336119145
episode: 257 training return: -641.7025212866538
episode: 258 training return: -508.1023582857738
episode: 259 training return: -544.5039814687433
epoch: 65 test_true_pfm: 200.52363605422636 sim_pfm: -434.2047731317945
episode: 260 training return: -545.6634839037627
episode: 261 training return: -550.2334910557078
episode: 262 training return: -493.57842124752113
episode: 263 training return: -584.365547238877
epoch: 66 test_true_pfm: -60.95306642687208 sim_pfm: -495.6130344959658
episode: 264 training return: -799.9684786110155
episode: 265 training return: -526.919234197473
episode: 266 training return: -558.1024444270842
episode: 267 training return: -570.0277560215678
epoch: 67 test_true_pfm: 128.6903664211815 sim_pfm: -484.1099733053814
episode: 268 training return: -565.7262811981811
episode: 269 training return: -568.9930675511982
episode: 270 training return: -550.1332026227424
episode: 271 training return: -549.0482484250978
epoch: 68 test_true_pfm: 58.37987511404606 sim_pfm: -530.187916094165
episode: 272 training return: -540.9524067515284
episode: 273 training return: -515.4118155222
episode: 274 training return: -534.2563999252576
episode: 275 training return: -545.0088365717434
epoch: 69 test_true_pfm: 137.13509493538712 sim_pfm: -449.77845507160504
episode: 276 training return: -588.848868882142
episode: 277 training return: -519.6142720745594
episode: 278 training return: -552.4965476662912
episode: 279 training return: -558.4040699445886
epoch: 70 test_true_pfm: 147.4531234301462 sim_pfm: -464.2531609600754
episode: 280 training return: -521.3219403081546
episode: 281 training return: -569.5720729317374
episode: 282 training return: -521.5965530928939
episode: 283 training return: -573.3201289266078
epoch: 71 test_true_pfm: 182.0831711246731 sim_pfm: -465.0584991715562
episode: 284 training return: -544.347111432742
episode: 285 training return: -577.86105924684
episode: 286 training return: -559.1597181064463
episode: 287 training return: -528.4071520854009
epoch: 72 test_true_pfm: 103.72068161210747 sim_pfm: -508.5972552928597
episode: 288 training return: -534.9505637178958
episode: 289 training return: -512.8402747825677
episode: 290 training return: -510.22401121602314
episode: 291 training return: -550.1120296357179
epoch: 73 test_true_pfm: 34.08679141682983 sim_pfm: -501.9952563903853
episode: 292 training return: -510.41805349151196
episode: 293 training return: -536.5327127804244
episode: 294 training return: -593.0732531330545
episode: 295 training return: -545.400876455029
epoch: 74 test_true_pfm: 105.21902584752213 sim_pfm: -472.8312649368506
episode: 296 training return: -596.9176533880443
episode: 297 training return: -601.62846066757
episode: 298 training return: -543.5490674727605
episode: 299 training return: -521.8940115780364
epoch: 75 test_true_pfm: 171.25478068274256 sim_pfm: -475.0253529579249
episode: 300 training return: -587.8896414263709
episode: 301 training return: -548.3810281594957
episode: 302 training return: -580.4820660972505
episode: 303 training return: -546.4082814103261
epoch: 76 test_true_pfm: 335.6038859486983 sim_pfm: -445.7303221650941
episode: 304 training return: -787.872527550353
episode: 305 training return: -536.4732575614244
episode: 306 training return: -531.7761011867975
episode: 307 training return: -543.9088521397211
epoch: 77 test_true_pfm: -105.19693390043805 sim_pfm: -512.5237273258826
episode: 308 training return: -520.3787290600585
episode: 309 training return: -586.9914595760223
episode: 310 training return: -554.8850836172926
episode: 311 training return: -576.4400355220935
epoch: 78 test_true_pfm: 45.44979556585565 sim_pfm: -529.50153647036
episode: 312 training return: -554.5289666029543
episode: 313 training return: -496.71019869324977
episode: 314 training return: -556.6435335883178
episode: 315 training return: -539.0692554059539
epoch: 79 test_true_pfm: 8.67551621508804 sim_pfm: -510.6884354227912
episode: 316 training return: -518.4984641145369
episode: 317 training return: -473.3718340302058
episode: 318 training return: -550.1196713400456
episode: 319 training return: -600.079879030245
epoch: 80 test_true_pfm: 152.5941638471761 sim_pfm: -474.981825172392
episode: 320 training return: -512.4931809877137
episode: 321 training return: -532.5635855463642
episode: 322 training return: -518.3437150175902
episode: 323 training return: -543.2453375048634
epoch: 81 test_true_pfm: 294.67456252236906 sim_pfm: -457.52612320175285
episode: 324 training return: -527.6103955141923
episode: 325 training return: -538.6761321152766
episode: 326 training return: -535.175175181807
episode: 327 training return: -542.6827340004158
epoch: 82 test_true_pfm: 100.74690836566576 sim_pfm: -535.018465871408
episode: 328 training return: -564.1959924179789
episode: 329 training return: -529.1314162708909
episode: 330 training return: -512.3003223412181
episode: 331 training return: -555.3745517707571
epoch: 83 test_true_pfm: 327.0122350071567 sim_pfm: -429.77047607783567
episode: 332 training return: -540.7284599020553
episode: 333 training return: -501.45605316625137
episode: 334 training return: -543.1212535041067
episode: 335 training return: -540.7920079842027
epoch: 84 test_true_pfm: 73.51289875764259 sim_pfm: -477.9397939413786
episode: 336 training return: -579.2323463628885
episode: 337 training return: -506.1632589479268
episode: 338 training return: -551.7999085361757
episode: 339 training return: -532.4380596028353
epoch: 85 test_true_pfm: 29.525299989672106 sim_pfm: -453.9811157758022
episode: 340 training return: -522.1902160446879
episode: 341 training return: -554.7864844218535
episode: 342 training return: -535.8030268640721
episode: 343 training return: -533.3069461493227
epoch: 86 test_true_pfm: 125.56867857932497 sim_pfm: -445.07307139438086
episode: 344 training return: -558.2729214959975
episode: 345 training return: -520.5845970200029
episode: 346 training return: -530.4171394624007
episode: 347 training return: -534.190390489897
epoch: 87 test_true_pfm: 183.72297079895884 sim_pfm: -466.4922878170276
episode: 348 training return: -507.34048869804695
episode: 349 training return: -481.3526413193965
episode: 350 training return: -525.8976542733294
episode: 351 training return: -520.1095013831512
epoch: 88 test_true_pfm: 88.07617811187212 sim_pfm: -434.35043100763346
episode: 352 training return: -551.7841593806453
episode: 353 training return: -606.2637804794607
episode: 354 training return: -526.5136366469923
episode: 355 training return: -531.6151785505955
epoch: 89 test_true_pfm: -21.2617713474834 sim_pfm: -509.85517518585385
episode: 356 training return: -545.9472107753752
episode: 357 training return: -507.6493907646245
episode: 358 training return: -542.155686524878
episode: 359 training return: -498.15569208717847
epoch: 90 test_true_pfm: 20.752368050763174 sim_pfm: -448.51953903331287
episode: 360 training return: -508.87204885750896
episode: 361 training return: -548.5707287063972
episode: 362 training return: -540.2329651990124
episode: 363 training return: -548.0536988559278
epoch: 91 test_true_pfm: 221.48999710057907 sim_pfm: -451.85576098042793
episode: 364 training return: -548.0536986001362
episode: 365 training return: -511.8330733775016
episode: 366 training return: -538.8029852964481
episode: 367 training return: -518.3649107317347
epoch: 92 test_true_pfm: 309.5443870150439 sim_pfm: -403.3004913659449
episode: 368 training return: -543.2214728234861
episode: 369 training return: -483.6690265739885
episode: 370 training return: -489.60851108026554
episode: 371 training return: -532.1296357939791
epoch: 93 test_true_pfm: -14.600646316718311 sim_pfm: -461.15402852384705
episode: 372 training return: -515.9122746980747
episode: 373 training return: -550.5770622127262
episode: 374 training return: -521.5956924926232
episode: 375 training return: -554.1898121321775
epoch: 94 test_true_pfm: 258.7901492589304 sim_pfm: -431.8034425112798
episode: 376 training return: -517.3897454727353
episode: 377 training return: -564.0910770553002
episode: 378 training return: -557.9943370718547
episode: 379 training return: -512.6372678114487
epoch: 95 test_true_pfm: 271.82328652840437 sim_pfm: -414.8222092880911
episode: 380 training return: -497.80609128733954
episode: 381 training return: -474.53298922585196
episode: 382 training return: -552.9634603901454
episode: 383 training return: -527.8441681134769
epoch: 96 test_true_pfm: 250.87200771087646 sim_pfm: -436.44533471334444
episode: 384 training return: -511.77284362067184
episode: 385 training return: -517.3724139363561
episode: 386 training return: -527.9612956549921
episode: 387 training return: -520.2983189136353
epoch: 97 test_true_pfm: 319.7558186199966 sim_pfm: -453.4266901226568
episode: 388 training return: -497.72886860225555
episode: 389 training return: -493.0432505221064
episode: 390 training return: -572.5352153145524
episode: 391 training return: -521.3463061907457
epoch: 98 test_true_pfm: 335.09011405308297 sim_pfm: -445.29065524675633
episode: 392 training return: -505.52919236576867
episode: 393 training return: -510.7958882004064
episode: 394 training return: -522.6499833049315
episode: 395 training return: -525.5910731206327
epoch: 99 test_true_pfm: 335.8067519650477 sim_pfm: -412.4265137484471
episode: 396 training return: -512.0614830555319
episode: 397 training return: -512.3423168213391
episode: 398 training return: -523.0530533728879
episode: 399 training return: -562.6580586965931
epoch: 100 test_true_pfm: 177.08734432246374 sim_pfm: -475.1808953665439
episode: 400 training return: -508.23627797300105
episode: 401 training return: -513.8270377594522
episode: 402 training return: -565.259115211302
episode: 403 training return: -496.55527261946446
epoch: 101 test_true_pfm: 15.841221450356144 sim_pfm: -511.4291807902949
episode: 404 training return: -522.9828044736565
episode: 405 training return: -549.2611942664197
episode: 406 training return: -472.069374228781
episode: 407 training return: -490.3383672160698
epoch: 102 test_true_pfm: 90.50773202235375 sim_pfm: -440.0889404841509
episode: 408 training return: -551.0508869273308
episode: 409 training return: -538.7470368713662
episode: 410 training return: -539.4772202931787
episode: 411 training return: -516.2707086573165
epoch: 103 test_true_pfm: 156.78715910407087 sim_pfm: -445.05461111445624
episode: 412 training return: -491.8624745377684
episode: 413 training return: -496.62207958354395
episode: 414 training return: -536.4708477868813
episode: 415 training return: -535.7216156024507
epoch: 104 test_true_pfm: 277.1233453193591 sim_pfm: -415.34163983336504
episode: 416 training return: -526.7317695810423
episode: 417 training return: -498.7978800887728
episode: 418 training return: -532.2721424868568
episode: 419 training return: -543.7048366185593
epoch: 105 test_true_pfm: 263.206668429294 sim_pfm: -419.3963371801736
episode: 420 training return: -534.8656804105572
episode: 421 training return: -756.1130986644405
episode: 422 training return: -480.52613913116096
episode: 423 training return: -675.7728804492442
epoch: 106 test_true_pfm: 94.55567570760128 sim_pfm: -440.7602325501546
episode: 424 training return: -565.2044040058275
episode: 425 training return: -535.4343314625212
episode: 426 training return: -557.9891571323983
episode: 427 training return: -540.2884108711522
epoch: 107 test_true_pfm: 291.12907284098054 sim_pfm: -469.08429423827414
episode: 428 training return: -496.7143175913454
episode: 429 training return: -522.8013291456197
episode: 430 training return: -561.9250538953498
episode: 431 training return: -540.8434223085395
epoch: 108 test_true_pfm: 209.59389536747378 sim_pfm: -428.00572132086273
episode: 432 training return: -527.9330709692298
episode: 433 training return: -537.1646844816387
episode: 434 training return: -514.7319125410296
episode: 435 training return: -727.413417030975
epoch: 109 test_true_pfm: 352.0180218134238 sim_pfm: -419.8413570038635
episode: 436 training return: -533.0719381933068
episode: 437 training return: -507.29747111426514
episode: 438 training return: -518.291868571617
episode: 439 training return: -496.2106053818736
epoch: 110 test_true_pfm: 256.67350407504233 sim_pfm: -449.10790063070027
episode: 440 training return: -510.6846462618905
episode: 441 training return: -506.6362470032078
episode: 442 training return: -516.5316154768675
episode: 443 training return: -494.29715343861704
epoch: 111 test_true_pfm: 286.12825392400526 sim_pfm: -411.67663794549344
episode: 444 training return: -547.2002938551199
episode: 445 training return: -507.2860647674659
episode: 446 training return: -500.84278957464034
episode: 447 training return: -541.0606877335888
epoch: 112 test_true_pfm: 307.09500496635064 sim_pfm: -450.2978911756811
episode: 448 training return: -717.7627068699596
episode: 449 training return: -478.2781072789517
episode: 450 training return: -504.11120131274527
episode: 451 training return: -542.233715299758
epoch: 113 test_true_pfm: 157.86961508011768 sim_pfm: -479.38904868324136
episode: 452 training return: -556.9431304591903
episode: 453 training return: -546.778329010031
episode: 454 training return: -510.0620227107226
episode: 455 training return: -517.521952257555
epoch: 114 test_true_pfm: 80.55889693254453 sim_pfm: -514.9060771368565
episode: 456 training return: -546.2568349469844
episode: 457 training return: -506.3589926995137
episode: 458 training return: -515.3393276130512
episode: 459 training return: -531.6870821970277
epoch: 115 test_true_pfm: 260.10127248602566 sim_pfm: -428.1593677930502
episode: 460 training return: -530.379718530991
episode: 461 training return: -530.6346950569124
episode: 462 training return: -773.3589038073734
episode: 463 training return: -503.58151275169325
epoch: 116 test_true_pfm: 270.55597858227776 sim_pfm: -443.1250927021901
episode: 464 training return: -526.1522414715234
episode: 465 training return: -489.0530144450045
episode: 466 training return: -507.7139294078762
episode: 467 training return: -519.3834653962631
epoch: 117 test_true_pfm: 126.36047576754562 sim_pfm: -421.77230901227693
episode: 468 training return: -593.3902302064248
episode: 469 training return: -522.3626059477392
episode: 470 training return: -555.7624454689771
episode: 471 training return: -504.4686736547024
epoch: 118 test_true_pfm: 119.21873854592985 sim_pfm: -434.4292905592021
episode: 472 training return: -518.4219844826529
episode: 473 training return: -494.61057771966676
episode: 474 training return: -519.9705906547883
episode: 475 training return: -451.4312934129498
epoch: 119 test_true_pfm: 360.83946492908876 sim_pfm: -430.4392622444434
episode: 476 training return: -491.8383286783686
episode: 477 training return: -655.5225018449665
episode: 478 training return: -563.333077078585
episode: 479 training return: -515.4807952937895
epoch: 120 test_true_pfm: 149.08163933849062 sim_pfm: -435.63195514006907
episode: 480 training return: -521.7162381474632
episode: 481 training return: -573.886825802991
episode: 482 training return: -538.3800582987195
episode: 483 training return: -527.1268283057802
epoch: 121 test_true_pfm: 282.0858000222673 sim_pfm: -416.7896207859183
episode: 484 training return: -518.192586057655
episode: 485 training return: -538.8826668468992
episode: 486 training return: -512.7657016801203
episode: 487 training return: -519.957022173835
epoch: 122 test_true_pfm: 452.6853293819295 sim_pfm: -416.97668824743124
episode: 488 training return: -491.84446571695577
episode: 489 training return: -510.3286714258163
episode: 490 training return: -505.48415187342573
episode: 491 training return: -496.9214408357973
epoch: 123 test_true_pfm: 345.41951377967854 sim_pfm: -421.66451626560576
episode: 492 training return: -521.3577156830818
episode: 493 training return: -549.7931192691375
episode: 494 training return: -490.7907100811692
episode: 495 training return: -528.368177521965
epoch: 124 test_true_pfm: 465.83572868788275 sim_pfm: -380.2384931170299
episode: 496 training return: -485.7270603230537
episode: 497 training return: -523.2972840886771
episode: 498 training return: -528.9809004632032
episode: 499 training return: -723.1982962822169
epoch: 125 test_true_pfm: 220.48705100849972 sim_pfm: -462.9261027173863
episode: 500 training return: -526.5097582808698
episode: 501 training return: -505.0327767079092
episode: 502 training return: -560.367275134215
episode: 503 training return: -543.0549230355431
epoch: 126 test_true_pfm: 163.34947085377283 sim_pfm: -426.1492055502362
episode: 504 training return: -522.4080070407042
episode: 505 training return: -520.1784522322029
episode: 506 training return: -525.7784639924366
episode: 507 training return: -539.2048353306265
epoch: 127 test_true_pfm: 333.10716285722697 sim_pfm: -440.5267674504894
episode: 508 training return: -568.6211859426624
episode: 509 training return: -517.5136372397186
episode: 510 training return: -488.396807847426
episode: 511 training return: -504.0090997180469
epoch: 128 test_true_pfm: 286.30967432794876 sim_pfm: -409.9843237627615
episode: 512 training return: -553.4860865808583
episode: 513 training return: -516.2287928164302
episode: 514 training return: -536.0675536243862
episode: 515 training return: -537.8388598335897
epoch: 129 test_true_pfm: 215.66572989743858 sim_pfm: -477.15703219584316
episode: 516 training return: -512.0463662093666
episode: 517 training return: -534.5736107014961
episode: 518 training return: -518.5525642949144
episode: 519 training return: -528.5836206390256
epoch: 130 test_true_pfm: 325.70683528920364 sim_pfm: -379.62561379014045
episode: 520 training return: -545.3591183024256
episode: 521 training return: -503.35684471005163
episode: 522 training return: -523.0260765571329
episode: 523 training return: -498.6965039731455
epoch: 131 test_true_pfm: 218.4544088586982 sim_pfm: -453.4983420686322
episode: 524 training return: -501.77740414915485
episode: 525 training return: -470.4622262126356
episode: 526 training return: -532.3688365629694
episode: 527 training return: -526.4836822950323
epoch: 132 test_true_pfm: 71.65699208001189 sim_pfm: -472.66770614428884
episode: 528 training return: -538.4364805550154
episode: 529 training return: -502.9850286619228
episode: 530 training return: -519.0535140708461
episode: 531 training return: -542.4430566751093
epoch: 133 test_true_pfm: 442.081944759097 sim_pfm: -411.39120988421865
episode: 532 training return: -485.5531039745121
episode: 533 training return: -530.1698646792888
episode: 534 training return: -559.7036516351112
episode: 535 training return: -526.3363719511012
epoch: 134 test_true_pfm: 397.76943401318255 sim_pfm: -447.2666819241526
episode: 536 training return: -543.4959233103998
episode: 537 training return: -502.0047641250118
episode: 538 training return: -489.2453063598157
episode: 539 training return: -507.5405310484813
epoch: 135 test_true_pfm: 387.3362263870869 sim_pfm: -420.2413549191345
episode: 540 training return: -495.1368475149452
episode: 541 training return: -616.7742185847613
episode: 542 training return: -510.78807999900414
episode: 543 training return: -534.61874543391
epoch: 136 test_true_pfm: 220.9498319869208 sim_pfm: -485.1788488485686
episode: 544 training return: -515.6821669449363
episode: 545 training return: -449.7410754844456
episode: 546 training return: -492.54116199089947
episode: 547 training return: -559.6506230022552
epoch: 137 test_true_pfm: 316.68205802517673 sim_pfm: -409.23493153959254
episode: 548 training return: -500.8088259845229
episode: 549 training return: -542.6836720770679
episode: 550 training return: -496.50086967409993
episode: 551 training return: -587.6334115228196
epoch: 138 test_true_pfm: 208.1045049258954 sim_pfm: -459.27893545989537
episode: 552 training return: -497.64268082122885
episode: 553 training return: -498.4302217941217
episode: 554 training return: -479.24489927437514
episode: 555 training return: -516.0077681598169
epoch: 139 test_true_pfm: 162.8334802154984 sim_pfm: -456.4929942702911
episode: 556 training return: -531.5404800265666
episode: 557 training return: -574.143557312769
episode: 558 training return: -489.220067036285
episode: 559 training return: -481.2029943401613
epoch: 140 test_true_pfm: 430.2053390959746 sim_pfm: -422.7903724497972
episode: 560 training return: -503.35706228802366
episode: 561 training return: -592.5369467223696
episode: 562 training return: -540.1018242449474
episode: 563 training return: -477.7324509100979
epoch: 141 test_true_pfm: 350.8864937716635 sim_pfm: -398.71159659172105
episode: 564 training return: -528.3403795045128
episode: 565 training return: -482.42752091989985
episode: 566 training return: -537.8839044562147
episode: 567 training return: -505.2363115707439
epoch: 142 test_true_pfm: 345.34119505874827 sim_pfm: -441.57162595130916
episode: 568 training return: -481.1932701225741
episode: 569 training return: -519.4812470915681
episode: 570 training return: -755.2120944134227
episode: 571 training return: -522.5820838117572
epoch: 143 test_true_pfm: 327.4645324054836 sim_pfm: -430.40571118054066
episode: 572 training return: -490.51468933734657
episode: 573 training return: -536.4665015250969
episode: 574 training return: -500.10479060285314
episode: 575 training return: -481.28725842742267
epoch: 144 test_true_pfm: 213.6549829955372 sim_pfm: -446.5007632852931
episode: 576 training return: -553.0138156890866
episode: 577 training return: -548.969919946292
episode: 578 training return: -546.7808395165134
episode: 579 training return: -527.5216464325797
epoch: 145 test_true_pfm: 331.4046574199863 sim_pfm: -410.19077490753835
episode: 580 training return: -516.6220946291979
episode: 581 training return: -515.7867342182459
episode: 582 training return: -504.61323412322315
episode: 583 training return: -503.09160017219693
epoch: 146 test_true_pfm: 279.30741363694887 sim_pfm: -464.4375491161841
episode: 584 training return: -505.17487868537876
episode: 585 training return: -483.53962512577743
episode: 586 training return: -472.9356337050817
episode: 587 training return: -511.07986235376165
epoch: 147 test_true_pfm: 390.03924728232363 sim_pfm: -415.2780154057306
episode: 588 training return: -494.0894410937814
episode: 589 training return: -525.4341184770611
episode: 590 training return: -522.478857296265
episode: 591 training return: -516.2881175182595
epoch: 148 test_true_pfm: 406.9480799288988 sim_pfm: -414.19116771849804
episode: 592 training return: -511.8863802481004
episode: 593 training return: -486.4111216885881
episode: 594 training return: -524.317015329659
episode: 595 training return: -474.75754258824196
epoch: 149 test_true_pfm: 148.47315031436315 sim_pfm: -464.61975848639185
episode: 596 training return: -488.65205440989286
episode: 597 training return: -535.4618579411476
episode: 598 training return: -501.6413498993175
episode: 599 training return: -503.6358143722105
epoch: 150 test_true_pfm: 252.5447684126456 sim_pfm: -423.7257531599591
