['--alg', 'ddpg', '--env', 'Swimmer-v3', '--learn', 'uncertainty', '--traj', 'expert', '--seed', '4', '--data', '100000']
epoch: 0 training_loss 0.21759330593049525 test_loss: 0.14473949670791625
epoch: 1 training_loss 0.14354296531528235 test_loss: 0.13713252544403076
epoch: 2 training_loss 0.1404813291877508 test_loss: 0.13984698057174683
epoch: 3 training_loss 0.12960158959031104 test_loss: 0.1495985746383667
epoch: 4 training_loss 0.13114040717482567 test_loss: 0.1212590217590332
epoch: 5 training_loss 0.12858391009271145 test_loss: 0.12198429107666016
epoch: 6 training_loss 0.12134840227663517 test_loss: 0.11833270788192748
epoch: 7 training_loss 0.12228440001606941 test_loss: 0.13241738080978394
epoch: 8 training_loss 0.12933315198868514 test_loss: 0.11984329223632813
epoch: 9 training_loss 0.12499163925647735 test_loss: 0.10381442308425903
epoch: 10 training_loss 0.11821830444037915 test_loss: 0.12064335346221924
epoch: 11 training_loss 0.11687121547758579 test_loss: 0.11963832378387451
epoch: 12 training_loss 0.11528037164360284 test_loss: 0.13131085634231568
epoch: 13 training_loss 0.11244629137217999 test_loss: 0.11742033958435058
epoch: 14 training_loss 0.11724681001156569 test_loss: 0.11499524116516113
epoch: 15 training_loss 0.11576197374612093 test_loss: 0.11688191890716552
epoch: 16 training_loss 0.11660651519894599 test_loss: 0.11628831624984741
epoch: 17 training_loss 0.11398048117756844 test_loss: 0.12392288446426392
epoch: 18 training_loss 0.11464556749910117 test_loss: 0.10672236680984497
epoch: 19 training_loss 0.11483492281287909 test_loss: 0.12529126405715943
epoch: 20 training_loss 0.11913376457989217 test_loss: 0.11087853908538818
epoch: 21 training_loss 0.11914720423519612 test_loss: 0.10414912700653076
epoch: 22 training_loss 0.1147613699734211 test_loss: 0.11329551935195922
epoch: 23 training_loss 0.12033826977014542 test_loss: 0.1153862714767456
epoch: 24 training_loss 0.11618643447756767 test_loss: 0.11282972097396851
epoch: 25 training_loss 0.11824020329862833 test_loss: 0.10997045040130615
epoch: 26 training_loss 0.11679074607789516 test_loss: 0.10985323190689086
epoch: 27 training_loss 0.12292841985821724 test_loss: 0.1146553635597229
epoch: 28 training_loss 0.10855711232870817 test_loss: 0.10978213548660279
epoch: 29 training_loss 0.1126911659911275 test_loss: 0.11520917415618896
epoch: 30 training_loss 0.1105844684317708 test_loss: 0.11086763143539428
epoch: 31 training_loss 0.1185135556012392 test_loss: 0.11616287231445313
epoch: 32 training_loss 0.1159479320421815 test_loss: 0.10737432241439819
epoch: 33 training_loss 0.11272616725414991 test_loss: 0.12595123052597046
epoch: 34 training_loss 0.11280432190746069 test_loss: 0.12044544219970703
epoch: 35 training_loss 0.11557688944041729 test_loss: 0.1137162208557129
epoch: 36 training_loss 0.11132698263972998 test_loss: 0.11965535879135132
epoch: 37 training_loss 0.10764009989798069 test_loss: 0.10981645584106445
epoch: 38 training_loss 0.10750631961971521 test_loss: 0.11157944202423095
epoch: 39 training_loss 0.11695553250610828 test_loss: 0.11220111846923828
epoch: 40 training_loss 0.11562380153685808 test_loss: 0.11200259923934937
epoch: 41 training_loss 0.11708688210695982 test_loss: 0.10872814655303956
epoch: 42 training_loss 0.1193751772493124 test_loss: 0.11547189950942993
epoch: 43 training_loss 0.10665500920265913 test_loss: 0.10821483135223389
epoch: 44 training_loss 0.11443593058735133 test_loss: 0.11547104120254517
epoch: 45 training_loss 0.11429534684866667 test_loss: 0.10681453943252564
epoch: 46 training_loss 0.11486704129725694 test_loss: 0.12294789552688598
epoch: 47 training_loss 0.11229280292987824 test_loss: 0.10661396980285645
epoch: 48 training_loss 0.11511781062930822 test_loss: 0.1201697826385498
epoch: 49 training_loss 0.11144573289901018 test_loss: 0.10500484704971313
epoch: 50 training_loss 0.1083293205499649 test_loss: 0.12006322145462037
epoch: 51 training_loss 0.10609601587057113 test_loss: 0.11873514652252197
epoch: 52 training_loss 0.11895336616784334 test_loss: 0.1208385705947876
epoch: 53 training_loss 0.11586805738508701 test_loss: 0.11505200862884521
epoch: 54 training_loss 0.11462115470319986 test_loss: 0.11611534357070923
epoch: 55 training_loss 0.11246029615402221 test_loss: 0.1058579444885254
epoch: 56 training_loss 0.11410597082227468 test_loss: 0.11555969715118408
epoch: 57 training_loss 0.10809646815061569 test_loss: 0.11753246784210206
epoch: 58 training_loss 0.10734626617282629 test_loss: 0.11035065650939942
epoch: 59 training_loss 0.1101300185546279 test_loss: 0.11516203880310058
epoch: 60 training_loss 0.11531836815178394 test_loss: 0.11896191835403443
epoch: 61 training_loss 0.10861775506287813 test_loss: 0.11437122821807862
epoch: 62 training_loss 0.10919357314705849 test_loss: 0.10946903228759766
epoch: 63 training_loss 0.11144692726433277 test_loss: 0.11315776109695434
epoch: 64 training_loss 0.11046599578112364 test_loss: 0.11405029296875
epoch: 65 training_loss 0.1146922643110156 test_loss: 0.10839544534683228
epoch: 66 training_loss 0.11275717716664076 test_loss: 0.11553387641906739
epoch: 67 training_loss 0.11086412928998471 test_loss: 0.11177935600280761
epoch: 68 training_loss 0.11712651304900647 test_loss: 0.11846230030059815
epoch: 69 training_loss 0.1117268230766058 test_loss: 0.11017557382583618
epoch: 70 training_loss 0.113810924179852 test_loss: 0.11404951810836791
epoch: 71 training_loss 0.11390674423426389 test_loss: 0.10273648500442505
epoch: 72 training_loss 0.10945835184305906 test_loss: 0.11245416402816773
epoch: 73 training_loss 0.11187525823712349 test_loss: 0.10299500226974487
epoch: 74 training_loss 0.11147706232964992 test_loss: 0.11893084049224853
epoch: 75 training_loss 0.11451569195836782 test_loss: 0.11332910060882569
epoch: 76 training_loss 0.11967009488493204 test_loss: 0.10950150489807128
epoch: 77 training_loss 0.11034841738641261 test_loss: 0.1144218921661377
epoch: 78 training_loss 0.11210080035030842 test_loss: 0.1169017195701599
epoch: 79 training_loss 0.11335628118366003 test_loss: 0.10989124774932861
epoch: 80 training_loss 0.10673097599297762 test_loss: 0.10302411317825318
epoch: 81 training_loss 0.10620210539549589 test_loss: 0.11377745866775513
epoch: 82 training_loss 0.10942942589521408 test_loss: 0.10753209590911865
epoch: 83 training_loss 0.10558319080621004 test_loss: 0.11302604675292968
epoch: 84 training_loss 0.11160498470067978 test_loss: 0.10832720994949341
epoch: 85 training_loss 0.11102204822003842 test_loss: 0.10943738222122193
epoch: 86 training_loss 0.11195526791736483 test_loss: 0.11196342706680298
epoch: 87 training_loss 0.10836677573621273 test_loss: 0.11458479166030884
epoch: 88 training_loss 0.11143737755715848 test_loss: 0.10911473035812377
epoch: 89 training_loss 0.11144751947373152 test_loss: 0.11831471920013428
epoch: 90 training_loss 0.11403399009257555 test_loss: 0.1124947190284729
epoch: 91 training_loss 0.10594781037420034 test_loss: 0.11254345178604126
epoch: 92 training_loss 0.10726476304233074 test_loss: 0.12634713649749757
epoch: 93 training_loss 0.10929766044020653 test_loss: 0.11380908489227295
epoch: 94 training_loss 0.10758988950401545 test_loss: 0.10762373208999634
epoch: 95 training_loss 0.11388674199581146 test_loss: 0.10729012489318848
epoch: 96 training_loss 0.11245794832706452 test_loss: 0.10923893451690674
epoch: 97 training_loss 0.11440474819391966 test_loss: 0.11446462869644165
epoch: 98 training_loss 0.1169571390748024 test_loss: 0.11614153385162354
epoch: 99 training_loss 0.1075134776160121 test_loss: 0.11122341156005859
epoch: 100 training_loss 0.11043471515178681 test_loss: 0.10773484706878662
epoch: 101 training_loss 0.10208815902471542 test_loss: 0.10341615676879883
epoch: 102 training_loss 0.11496657520532608 test_loss: 0.1051061987876892
epoch: 103 training_loss 0.10707231841981411 test_loss: 0.11668338775634765
epoch: 104 training_loss 0.10742029622197151 test_loss: 0.10584126710891724
epoch: 105 training_loss 0.11291726402938367 test_loss: 0.1061759352684021
epoch: 106 training_loss 0.10505950041115283 test_loss: 0.11668573617935181
epoch: 107 training_loss 0.11453255359083414 test_loss: 0.114873206615448
epoch: 108 training_loss 0.11155016258358956 test_loss: 0.11278817653656006
epoch: 109 training_loss 0.10710863038897514 test_loss: 0.11630897521972657
epoch: 110 training_loss 0.1029764973372221 test_loss: 0.1156607985496521
epoch: 111 training_loss 0.1091732157021761 test_loss: 0.10997656583786011
epoch: 112 training_loss 0.111267250739038 test_loss: 0.1179389476776123
epoch: 113 training_loss 0.10405055414885282 test_loss: 0.11125658750534058
epoch: 114 training_loss 0.11321182414889336 test_loss: 0.11755520105361938
epoch: 115 training_loss 0.11273819033056498 test_loss: 0.10876702070236206
epoch: 116 training_loss 0.10914732798933983 test_loss: 0.1064666748046875
epoch: 117 training_loss 0.10410341586917639 test_loss: 0.11022360324859619
epoch: 118 training_loss 0.10616590451449155 test_loss: 0.11128568649291992
epoch: 119 training_loss 0.10808203004300594 test_loss: 0.1061893343925476
epoch: 120 training_loss 0.11520625282078982 test_loss: 0.11604157686233521
epoch: 121 training_loss 0.10521530583500863 test_loss: 0.10831987857818604
epoch: 122 training_loss 0.1093749924749136 test_loss: 0.10951088666915894
epoch: 123 training_loss 0.10988019566982984 test_loss: 0.1085325837135315
epoch: 124 training_loss 0.11362586677074432 test_loss: 0.10843825340270996
epoch: 125 training_loss 0.10765636038035155 test_loss: 0.11460903882980347
epoch: 126 training_loss 0.10622970502823591 test_loss: 0.11041340827941895
epoch: 127 training_loss 0.10932320550084114 test_loss: 0.11518422365188599
epoch: 128 training_loss 0.10733605924993754 test_loss: 0.10929234027862549
epoch: 129 training_loss 0.11625903461128473 test_loss: 0.10442255735397339
epoch: 130 training_loss 0.10926211170852185 test_loss: 0.10315238237380982
epoch: 131 training_loss 0.10453098472207785 test_loss: 0.11234790086746216
epoch: 132 training_loss 0.10880812369287014 test_loss: 0.09941056370735168
epoch: 133 training_loss 0.11306074224412441 test_loss: 0.12092475891113282
epoch: 134 training_loss 0.10532495249062776 test_loss: 0.11884835958480836
epoch: 135 training_loss 0.10645393330603838 test_loss: 0.11621875762939453
epoch: 136 training_loss 0.10864760734140873 test_loss: 0.10329039096832275
epoch: 137 training_loss 0.11322096530348062 test_loss: 0.10990171432495117
epoch: 138 training_loss 0.10695540715008973 test_loss: 0.10988270044326783
epoch: 139 training_loss 0.11496229484677314 test_loss: 0.10633243322372436
epoch: 140 training_loss 0.11385039281100034 test_loss: 0.10410484075546264
epoch: 141 training_loss 0.11418223947286606 test_loss: 0.10439517498016357
epoch: 142 training_loss 0.1112095669656992 test_loss: 0.09557012915611267
epoch: 143 training_loss 0.10325043249875307 test_loss: 0.11691944599151612
epoch: 144 training_loss 0.10896668259054422 test_loss: 0.11633751392364503
epoch: 145 training_loss 0.10606966201215982 test_loss: 0.10438247919082641
epoch: 146 training_loss 0.10679789640009403 test_loss: 0.11433137655258178
epoch: 147 training_loss 0.1046823363006115 test_loss: 0.10213372707366944
epoch: 148 training_loss 0.11105043955147266 test_loss: 0.10078270435333252
epoch: 149 training_loss 0.10985541105270386 test_loss: 0.11724435091018677
epoch: 0 training_loss 0.23574880801141262 test_loss: 0.15704612731933593
epoch: 1 training_loss 0.14512826006859542 test_loss: 0.1390824556350708
epoch: 2 training_loss 0.1327184746414423 test_loss: 0.142694354057312
epoch: 3 training_loss 0.12769583128392697 test_loss: 0.13755189180374144
epoch: 4 training_loss 0.12436507951468229 test_loss: 0.13163942098617554
epoch: 5 training_loss 0.12143533129245043 test_loss: 0.1316758155822754
epoch: 6 training_loss 0.1222344509512186 test_loss: 0.1381916880607605
epoch: 7 training_loss 0.12815966363996267 test_loss: 0.13214638233184814
epoch: 8 training_loss 0.11765136439353227 test_loss: 0.12114652395248413
epoch: 9 training_loss 0.11982913419604302 test_loss: 0.11808128356933593
epoch: 10 training_loss 0.11625527743250132 test_loss: 0.11547180414199829
epoch: 11 training_loss 0.12104583971202373 test_loss: 0.13474446535110474
epoch: 12 training_loss 0.11740039404481649 test_loss: 0.1210512399673462
epoch: 13 training_loss 0.12046728610992431 test_loss: 0.12255446910858155
epoch: 14 training_loss 0.11782364301383495 test_loss: 0.12857452630996705
epoch: 15 training_loss 0.11482269778847694 test_loss: 0.1365921139717102
epoch: 16 training_loss 0.11992569904774428 test_loss: 0.14466512203216553
epoch: 17 training_loss 0.11857847649604082 test_loss: 0.12830095291137694
epoch: 18 training_loss 0.11056512415409088 test_loss: 0.11573898792266846
epoch: 19 training_loss 0.12395517494529486 test_loss: 0.11542068719863892
epoch: 20 training_loss 0.11107420306652785 test_loss: 0.12368313074111939
epoch: 21 training_loss 0.10992532264441252 test_loss: 0.12484217882156372
epoch: 22 training_loss 0.11029167950153351 test_loss: 0.1161091685295105
epoch: 23 training_loss 0.11635881382972002 test_loss: 0.1310879111289978
epoch: 24 training_loss 0.11601890683174133 test_loss: 0.13324646949768065
epoch: 25 training_loss 0.11682841572910548 test_loss: 0.11787018775939942
epoch: 26 training_loss 0.11038303591310977 test_loss: 0.12545788288116455
epoch: 27 training_loss 0.1097304280847311 test_loss: 0.13514444828033448
epoch: 28 training_loss 0.12143131509423256 test_loss: 0.11054610013961792
epoch: 29 training_loss 0.11414128929376602 test_loss: 0.11986467838287354
epoch: 30 training_loss 0.11301617205142975 test_loss: 0.12018851041793824
epoch: 31 training_loss 0.11559089001268148 test_loss: 0.12906858921051026
epoch: 32 training_loss 0.11172088410705328 test_loss: 0.11689597368240356
epoch: 33 training_loss 0.11418645404279232 test_loss: 0.13107759952545167
epoch: 34 training_loss 0.11251823287457227 test_loss: 0.12743306159973145
epoch: 35 training_loss 0.11199080113321543 test_loss: 0.11459633111953735
epoch: 36 training_loss 0.1105612687766552 test_loss: 0.12891712188720703
epoch: 37 training_loss 0.11692262630909682 test_loss: 0.11727792024612427
epoch: 38 training_loss 0.11268486570566892 test_loss: 0.13250911235809326
epoch: 39 training_loss 0.11198526747524738 test_loss: 0.11841542720794677
epoch: 40 training_loss 0.10893697690218687 test_loss: 0.11695479154586792
epoch: 41 training_loss 0.11347186416387559 test_loss: 0.12231322526931762
epoch: 42 training_loss 0.11406504988670349 test_loss: 0.12211322784423828
epoch: 43 training_loss 0.11545960023999215 test_loss: 0.11585403680801391
epoch: 44 training_loss 0.11359536074101925 test_loss: 0.12253692150115966
epoch: 45 training_loss 0.11393995225429535 test_loss: 0.13519976139068604
epoch: 46 training_loss 0.11380561370402574 test_loss: 0.11406124830245971
epoch: 47 training_loss 0.11322613414376974 test_loss: 0.12674134969711304
epoch: 48 training_loss 0.11158700026571751 test_loss: 0.11348679065704345
epoch: 49 training_loss 0.11140076722949743 test_loss: 0.10179424285888672
epoch: 50 training_loss 0.11539950843900443 test_loss: 0.11374926567077637
epoch: 51 training_loss 0.11946471229195595 test_loss: 0.11562721729278565
epoch: 52 training_loss 0.11591307215392589 test_loss: 0.12976417541503907
epoch: 53 training_loss 0.11712225154042244 test_loss: 0.12588801383972167
epoch: 54 training_loss 0.11425869148224592 test_loss: 0.10851900577545166
epoch: 55 training_loss 0.10777285106480122 test_loss: 0.11862028837203979
epoch: 56 training_loss 0.1152429361641407 test_loss: 0.11321462392807007
epoch: 57 training_loss 0.11336073625832796 test_loss: 0.12677830457687378
epoch: 58 training_loss 0.11151368245482444 test_loss: 0.12472010850906372
epoch: 59 training_loss 0.10621419109404087 test_loss: 0.12379932403564453
epoch: 60 training_loss 0.10707807447761297 test_loss: 0.13273004293441773
epoch: 61 training_loss 0.1099161933362484 test_loss: 0.11973494291305542
epoch: 62 training_loss 0.11218767803162337 test_loss: 0.11807123422622681
epoch: 63 training_loss 0.10784387171268463 test_loss: 0.12497526407241821
epoch: 64 training_loss 0.10830502081662416 test_loss: 0.11251636743545532
epoch: 65 training_loss 0.1126229402795434 test_loss: 0.12467324733734131
epoch: 66 training_loss 0.1104436805844307 test_loss: 0.10078866481781006
epoch: 67 training_loss 0.10957761730998755 test_loss: 0.12014377117156982
epoch: 68 training_loss 0.11304881945252418 test_loss: 0.11541370153427125
epoch: 69 training_loss 0.11175536599010229 test_loss: 0.12248444557189941
epoch: 70 training_loss 0.10685558646917342 test_loss: 0.11939841508865356
epoch: 71 training_loss 0.10836172234266997 test_loss: 0.11830720901489258
epoch: 72 training_loss 0.10876094549894333 test_loss: 0.1252022862434387
epoch: 73 training_loss 0.10314822684973478 test_loss: 0.12659498453140258
epoch: 74 training_loss 0.11558264348655939 test_loss: 0.11473946571350098
epoch: 75 training_loss 0.1111986269056797 test_loss: 0.11901062726974487
epoch: 76 training_loss 0.1094973210990429 test_loss: 0.1238139510154724
epoch: 77 training_loss 0.11303457494825125 test_loss: 0.11099342107772828
epoch: 78 training_loss 0.11714132934808731 test_loss: 0.11781395673751831
epoch: 79 training_loss 0.11926199428737164 test_loss: 0.11618406772613525
epoch: 80 training_loss 0.11145466815680266 test_loss: 0.11655668020248414
epoch: 81 training_loss 0.10616001585498452 test_loss: 0.12252525091171265
epoch: 82 training_loss 0.11384402722120285 test_loss: 0.1155624508857727
epoch: 83 training_loss 0.11697587624192238 test_loss: 0.12427759170532227
epoch: 84 training_loss 0.10839365001767874 test_loss: 0.10498253107070923
epoch: 85 training_loss 0.11417313512414694 test_loss: 0.0945048213005066
epoch: 86 training_loss 0.10956852670758963 test_loss: 0.12165058851242065
epoch: 87 training_loss 0.11389039348810912 test_loss: 0.12623493671417235
epoch: 88 training_loss 0.11765156537294388 test_loss: 0.12020947933197021
epoch: 89 training_loss 0.1058827893435955 test_loss: 0.12333741188049316
epoch: 90 training_loss 0.11196913108229638 test_loss: 0.12334150075912476
epoch: 91 training_loss 0.11376500405371189 test_loss: 0.12637977600097655
epoch: 92 training_loss 0.11545149009674788 test_loss: 0.11891031265258789
epoch: 93 training_loss 0.11051325980573892 test_loss: 0.11996073722839355
epoch: 94 training_loss 0.11078044187277555 test_loss: 0.11404378414154052
epoch: 95 training_loss 0.10707753524184227 test_loss: 0.11828194856643677
epoch: 96 training_loss 0.10830717753618956 test_loss: 0.13511537313461303
epoch: 97 training_loss 0.10873874757438898 test_loss: 0.13142547607421876
epoch: 98 training_loss 0.11307403061538934 test_loss: 0.12450100183486938
epoch: 99 training_loss 0.10901623297482729 test_loss: 0.12359510660171509
epoch: 100 training_loss 0.11188988912850618 test_loss: 0.10546613931655884
epoch: 101 training_loss 0.10718527022749186 test_loss: 0.11143876314163208
epoch: 102 training_loss 0.11001640025526285 test_loss: 0.1310349941253662
epoch: 103 training_loss 0.11079006819054485 test_loss: 0.11204147338867188
epoch: 104 training_loss 0.10591271102428436 test_loss: 0.13210666179656982
epoch: 105 training_loss 0.1065123887360096 test_loss: 0.123046612739563
epoch: 106 training_loss 0.11365364242345094 test_loss: 0.1158254861831665
epoch: 107 training_loss 0.1091421503573656 test_loss: 0.12849096059799195
epoch: 108 training_loss 0.10687857005745173 test_loss: 0.11044362783432007
epoch: 109 training_loss 0.10715218063443899 test_loss: 0.1156996726989746
epoch: 110 training_loss 0.10623870372772216 test_loss: 0.11818780899047851
epoch: 111 training_loss 0.11409307908266783 test_loss: 0.11332422494888306
epoch: 112 training_loss 0.11489871345460415 test_loss: 0.1138546347618103
epoch: 113 training_loss 0.10383008282631635 test_loss: 0.11141371726989746
epoch: 114 training_loss 0.10411632940173149 test_loss: 0.1208720088005066
epoch: 115 training_loss 0.10873686082661152 test_loss: 0.12338413000106811
epoch: 116 training_loss 0.10810591872781515 test_loss: 0.10766525268554687
epoch: 117 training_loss 0.11247203644365072 test_loss: 0.12161864042282104
epoch: 118 training_loss 0.10823302563279867 test_loss: 0.1321970820426941
epoch: 119 training_loss 0.10609326926991343 test_loss: 0.11583795547485351
epoch: 120 training_loss 0.10957741163671017 test_loss: 0.13311778306961058
epoch: 121 training_loss 0.10857455369085073 test_loss: 0.11875940561294555
epoch: 122 training_loss 0.115443728081882 test_loss: 0.12093237638473511
epoch: 123 training_loss 0.11268911451101303 test_loss: 0.10955971479415894
epoch: 124 training_loss 0.11016169302165509 test_loss: 0.11385101079940796
epoch: 125 training_loss 0.10621980674564839 test_loss: 0.12915632724761963
epoch: 126 training_loss 0.10486133050173521 test_loss: 0.11540753841400146
epoch: 127 training_loss 0.10214765023440123 test_loss: 0.11331126689910889
epoch: 128 training_loss 0.10773474924266338 test_loss: 0.12005358934402466
epoch: 129 training_loss 0.10978545892983675 test_loss: 0.11519453525543213
epoch: 130 training_loss 0.10435265559703112 test_loss: 0.10854369401931763
epoch: 131 training_loss 0.10966648980975151 test_loss: 0.12833305597305297
epoch: 132 training_loss 0.11591693446040154 test_loss: 0.11987640857696533
epoch: 133 training_loss 0.10781355254352093 test_loss: 0.11980721950531006
epoch: 134 training_loss 0.11191760327666998 test_loss: 0.11541790962219238
epoch: 135 training_loss 0.10439459573477507 test_loss: 0.12585844993591308
epoch: 136 training_loss 0.11214801106601953 test_loss: 0.1197290301322937
epoch: 137 training_loss 0.10792523238807916 test_loss: 0.11235660314559937
epoch: 138 training_loss 0.11297352463006974 test_loss: 0.11421446800231934
epoch: 139 training_loss 0.10546493861824274 test_loss: 0.11070033311843872
epoch: 140 training_loss 0.10871638467535377 test_loss: 0.10823636054992676
epoch: 141 training_loss 0.10854276604950427 test_loss: 0.135054349899292
epoch: 142 training_loss 0.1092726831883192 test_loss: 0.14235764741897583
epoch: 143 training_loss 0.11335003543645143 test_loss: 0.12847763299942017
epoch: 144 training_loss 0.10726826395839453 test_loss: 0.1206371545791626
epoch: 145 training_loss 0.11022808995097876 test_loss: 0.1160075306892395
epoch: 146 training_loss 0.10442803528159857 test_loss: 0.11657174825668334
epoch: 147 training_loss 0.11385116238147021 test_loss: 0.11415905952453613
epoch: 148 training_loss 0.10342205889523029 test_loss: 0.10756397247314453
epoch: 149 training_loss 0.10759908892214298 test_loss: 0.11282830238342285
epoch: 0 training_loss 0.20823379553854465 test_loss: 0.1848752975463867
epoch: 1 training_loss 0.15932592086493969 test_loss: 0.12708849906921388
epoch: 2 training_loss 0.13073421105742455 test_loss: 0.12246657609939575
epoch: 3 training_loss 0.1365593546628952 test_loss: 0.13900452852249146
epoch: 4 training_loss 0.13049628220498563 test_loss: 0.12103675603866577
epoch: 5 training_loss 0.12944550685584544 test_loss: 0.14570796489715576
epoch: 6 training_loss 0.12821463987231255 test_loss: 0.13289635181427
epoch: 7 training_loss 0.12748469665646553 test_loss: 0.13455352783203126
epoch: 8 training_loss 0.1240565226972103 test_loss: 0.10953414440155029
epoch: 9 training_loss 0.13240943510085346 test_loss: 0.11467906236648559
epoch: 10 training_loss 0.1133082240447402 test_loss: 0.11414047479629516
epoch: 11 training_loss 0.12920203886926174 test_loss: 0.11222453117370605
epoch: 12 training_loss 0.12029381688684225 test_loss: 0.11516388654708862
epoch: 13 training_loss 0.12497850000858307 test_loss: 0.11721553802490234
epoch: 14 training_loss 0.12177031196653842 test_loss: 0.12171227931976318
epoch: 15 training_loss 0.12288167949765921 test_loss: 0.13243412971496582
epoch: 16 training_loss 0.12091084554791451 test_loss: 0.11415185928344726
epoch: 17 training_loss 0.12014077089726925 test_loss: 0.13000794649124145
epoch: 18 training_loss 0.11781354255974293 test_loss: 0.1118021011352539
epoch: 19 training_loss 0.12213661156594753 test_loss: 0.12295575141906738
epoch: 20 training_loss 0.11752265725284815 test_loss: 0.11203482151031494
epoch: 21 training_loss 0.12069970417767763 test_loss: 0.12288321256637573
epoch: 22 training_loss 0.1196480256319046 test_loss: 0.13174155950546265
epoch: 23 training_loss 0.11880139015614986 test_loss: 0.120460844039917
epoch: 24 training_loss 0.11471967888996006 test_loss: 0.10229136943817138
epoch: 25 training_loss 0.11889450326561928 test_loss: 0.12150071859359741
epoch: 26 training_loss 0.11263461366295814 test_loss: 0.11594065427780151
epoch: 27 training_loss 0.12177911672741175 test_loss: 0.12001068592071533
epoch: 28 training_loss 0.11804600793868303 test_loss: 0.11413731575012206
epoch: 29 training_loss 0.11879492539912462 test_loss: 0.11831234693527222
epoch: 30 training_loss 0.11755275133997202 test_loss: 0.11977612972259521
epoch: 31 training_loss 0.11739529673010111 test_loss: 0.10860216617584229
epoch: 32 training_loss 0.1156703868880868 test_loss: 0.11188247203826904
epoch: 33 training_loss 0.111648550927639 test_loss: 0.11345645189285278
epoch: 34 training_loss 0.11552096527069806 test_loss: 0.11199824810028076
epoch: 35 training_loss 0.11838552858680487 test_loss: 0.11736156940460205
epoch: 36 training_loss 0.11649456799030304 test_loss: 0.11498311758041382
epoch: 37 training_loss 0.11379166949540377 test_loss: 0.12812037467956544
epoch: 38 training_loss 0.11842792466282845 test_loss: 0.10423234701156617
epoch: 39 training_loss 0.11547876227647066 test_loss: 0.12680554389953613
epoch: 40 training_loss 0.11005492813885212 test_loss: 0.1101191520690918
epoch: 41 training_loss 0.11660434704273939 test_loss: 0.11683032512664795
epoch: 42 training_loss 0.1129030941426754 test_loss: 0.11304231882095336
epoch: 43 training_loss 0.1149923550710082 test_loss: 0.10861709117889404
epoch: 44 training_loss 0.11281613953411579 test_loss: 0.12876427173614502
epoch: 45 training_loss 0.11533714573830366 test_loss: 0.10326759815216065
epoch: 46 training_loss 0.11455592233687639 test_loss: 0.11565145254135131
epoch: 47 training_loss 0.11174205504357815 test_loss: 0.1083060622215271
epoch: 48 training_loss 0.1169907283782959 test_loss: 0.11620464324951171
epoch: 49 training_loss 0.11770321562886238 test_loss: 0.12263041734695435
epoch: 50 training_loss 0.11771964259445668 test_loss: 0.11504731178283692
epoch: 51 training_loss 0.11985194072127342 test_loss: 0.1104900598526001
epoch: 52 training_loss 0.11943841092288494 test_loss: 0.13260473012924195
epoch: 53 training_loss 0.11766304604709149 test_loss: 0.11487010717391968
epoch: 54 training_loss 0.11787566259503364 test_loss: 0.10624018907546998
epoch: 55 training_loss 0.11231254082173109 test_loss: 0.11839306354522705
epoch: 56 training_loss 0.11501838341355324 test_loss: 0.11586239337921142
epoch: 57 training_loss 0.10882252808660269 test_loss: 0.10079858303070069
epoch: 58 training_loss 0.11462337724864483 test_loss: 0.12725625038146973
epoch: 59 training_loss 0.11460782036185264 test_loss: 0.10641069412231445
epoch: 60 training_loss 0.1163205773383379 test_loss: 0.12181323766708374
epoch: 61 training_loss 0.11274750601500273 test_loss: 0.12132219076156617
epoch: 62 training_loss 0.11978098206222057 test_loss: 0.10671486854553222
epoch: 63 training_loss 0.11974868282675744 test_loss: 0.1309073805809021
epoch: 64 training_loss 0.11897431295365095 test_loss: 0.12395752668380737
epoch: 65 training_loss 0.11659137215465307 test_loss: 0.10912108421325684
epoch: 66 training_loss 0.11653182465583085 test_loss: 0.12760845422744752
epoch: 67 training_loss 0.11865728225558997 test_loss: 0.11604521274566651
epoch: 68 training_loss 0.11356403738260269 test_loss: 0.11976102590560914
epoch: 69 training_loss 0.11707416918128728 test_loss: 0.11601635217666625
epoch: 70 training_loss 0.1123516957461834 test_loss: 0.11742222309112549
epoch: 71 training_loss 0.11511941120028496 test_loss: 0.11988521814346313
epoch: 72 training_loss 0.12365313354879617 test_loss: 0.1087518572807312
epoch: 73 training_loss 0.11288781251758337 test_loss: 0.1172560691833496
epoch: 74 training_loss 0.11781154293566942 test_loss: 0.11803381443023682
epoch: 75 training_loss 0.11750040363520384 test_loss: 0.13166432380676268
epoch: 76 training_loss 0.12186103992164135 test_loss: 0.10814752578735351
epoch: 77 training_loss 0.11860501047223806 test_loss: 0.12899646759033204
epoch: 78 training_loss 0.11310480251908302 test_loss: 0.11898626089096069
epoch: 79 training_loss 0.11187514200806618 test_loss: 0.10045789480209351
epoch: 80 training_loss 0.12068987529724837 test_loss: 0.12359036207199096
epoch: 81 training_loss 0.11189453784376382 test_loss: 0.11844315528869628
epoch: 82 training_loss 0.11428591135889292 test_loss: 0.10947214365005493
epoch: 83 training_loss 0.1063485635817051 test_loss: 0.11186903715133667
epoch: 84 training_loss 0.10770631097257137 test_loss: 0.11294987201690673
epoch: 85 training_loss 0.11745004832744599 test_loss: 0.12679996490478515
epoch: 86 training_loss 0.11155102130025625 test_loss: 0.11887573003768921
epoch: 87 training_loss 0.11876795589923858 test_loss: 0.12534840106964112
epoch: 88 training_loss 0.11410782380029559 test_loss: 0.11773141622543334
epoch: 89 training_loss 0.10881563555449247 test_loss: 0.1332395315170288
epoch: 90 training_loss 0.11656684909015894 test_loss: 0.113774573802948
epoch: 91 training_loss 0.11425358351320028 test_loss: 0.12186667919158936
epoch: 92 training_loss 0.11200515858829022 test_loss: 0.11510312557220459
epoch: 93 training_loss 0.11784496795386076 test_loss: 0.1105473518371582
epoch: 94 training_loss 0.11153284847736358 test_loss: 0.1211902379989624
epoch: 95 training_loss 0.11093333754688502 test_loss: 0.11558091640472412
epoch: 96 training_loss 0.1091187220811844 test_loss: 0.11729630231857299
epoch: 97 training_loss 0.11321970086544753 test_loss: 0.12180944681167602
epoch: 98 training_loss 0.11775019757449627 test_loss: 0.1064741849899292
epoch: 99 training_loss 0.11471618283540011 test_loss: 0.11444219350814819
epoch: 100 training_loss 0.12071971450001001 test_loss: 0.11899523735046387
epoch: 101 training_loss 0.11208977751433849 test_loss: 0.12248177528381347
epoch: 102 training_loss 0.12174250811338425 test_loss: 0.10489128828048706
epoch: 103 training_loss 0.10777596533298492 test_loss: 0.11849274635314941
epoch: 104 training_loss 0.11000173438340426 test_loss: 0.1153481125831604
epoch: 105 training_loss 0.11272635526955127 test_loss: 0.1196533203125
epoch: 106 training_loss 0.11475220784544944 test_loss: 0.11104133129119872
epoch: 107 training_loss 0.11541558358818292 test_loss: 0.12429301738739014
epoch: 108 training_loss 0.11282340556383133 test_loss: 0.11331809759140014
epoch: 109 training_loss 0.12187161918729544 test_loss: 0.11273661851882935
epoch: 110 training_loss 0.11246687836945057 test_loss: 0.11680554151535034
epoch: 111 training_loss 0.11043580144643783 test_loss: 0.11604534387588501
epoch: 112 training_loss 0.11972981378436089 test_loss: 0.13971049785614015
epoch: 113 training_loss 0.11585027817636728 test_loss: 0.10818228721618653
epoch: 114 training_loss 0.11285326637327671 test_loss: 0.1068604588508606
epoch: 115 training_loss 0.11347891569137573 test_loss: 0.11220124959945679
epoch: 116 training_loss 0.11921843409538269 test_loss: 0.1150824785232544
epoch: 117 training_loss 0.11122027192264795 test_loss: 0.11155409812927246
epoch: 118 training_loss 0.11275745071470737 test_loss: 0.12168186902999878
epoch: 119 training_loss 0.10724112708121539 test_loss: 0.11997396945953369
epoch: 120 training_loss 0.11645651951432229 test_loss: 0.10566523075103759
epoch: 121 training_loss 0.11217276297509671 test_loss: 0.10904207229614257
epoch: 122 training_loss 0.11205426532775163 test_loss: 0.1071688175201416
epoch: 123 training_loss 0.1153727613016963 test_loss: 0.12453320026397705
epoch: 124 training_loss 0.11169966813176871 test_loss: 0.11432744264602661
epoch: 125 training_loss 0.11214893322438002 test_loss: 0.11846511363983155
epoch: 126 training_loss 0.11101056180894375 test_loss: 0.1100847601890564
epoch: 127 training_loss 0.1137770789861679 test_loss: 0.12430565357208252
epoch: 128 training_loss 0.11275759529322386 test_loss: 0.12536908388137818
epoch: 129 training_loss 0.11443829916417599 test_loss: 0.11543622016906738
epoch: 130 training_loss 0.11260797806084157 test_loss: 0.11255558729171752
epoch: 131 training_loss 0.11190078407526016 test_loss: 0.11698799133300782
epoch: 132 training_loss 0.11742445282638073 test_loss: 0.1069139838218689
epoch: 133 training_loss 0.11267265979200601 test_loss: 0.11198076009750366
epoch: 134 training_loss 0.10840105816721916 test_loss: 0.11235010623931885
epoch: 135 training_loss 0.10865351840853692 test_loss: 0.10742754936218261
epoch: 136 training_loss 0.11646134175360202 test_loss: 0.1097598671913147
epoch: 137 training_loss 0.11351636555045844 test_loss: 0.11251932382583618
epoch: 138 training_loss 0.11198653869330882 test_loss: 0.12306292057037353
epoch: 139 training_loss 0.11035910662263632 test_loss: 0.12182831764221191
epoch: 140 training_loss 0.11273892767727375 test_loss: 0.10283788442611694
epoch: 141 training_loss 0.11339489690959453 test_loss: 0.10663497447967529
epoch: 142 training_loss 0.11733833357691764 test_loss: 0.11746182441711425
epoch: 143 training_loss 0.11165575090795755 test_loss: 0.11049902439117432
epoch: 144 training_loss 0.1089443802088499 test_loss: 0.12904151678085327
epoch: 145 training_loss 0.11036035887897015 test_loss: 0.11915758848190308
epoch: 146 training_loss 0.11378033705055714 test_loss: 0.1169277548789978
epoch: 147 training_loss 0.11469668328762055 test_loss: 0.09863104224205017
epoch: 148 training_loss 0.10905172605067491 test_loss: 0.11573525667190551
epoch: 149 training_loss 0.1097576716542244 test_loss: 0.10563774108886718
epoch: 0 training_loss 0.19003509640693664 test_loss: 0.17001994848251342
epoch: 1 training_loss 0.14322703152894975 test_loss: 0.12820832729339598
epoch: 2 training_loss 0.1432522586733103 test_loss: 0.13009583950042725
epoch: 3 training_loss 0.1354129769280553 test_loss: 0.14266210794448853
epoch: 4 training_loss 0.13131784588098527 test_loss: 0.1196141242980957
epoch: 5 training_loss 0.13372778665274382 test_loss: 0.1196099042892456
epoch: 6 training_loss 0.13427095256745816 test_loss: 0.13102407455444337
epoch: 7 training_loss 0.130162351988256 test_loss: 0.13244318962097168
epoch: 8 training_loss 0.12164633065462112 test_loss: 0.1282685160636902
epoch: 9 training_loss 0.12203850235790015 test_loss: 0.11933643817901611
epoch: 10 training_loss 0.12489330213516951 test_loss: 0.12324005365371704
epoch: 11 training_loss 0.13163270015269518 test_loss: 0.11435673236846924
epoch: 12 training_loss 0.13019816391170025 test_loss: 0.12056387662887573
epoch: 13 training_loss 0.11932275496423245 test_loss: 0.11634541749954223
epoch: 14 training_loss 0.12490463249385357 test_loss: 0.12205870151519775
epoch: 15 training_loss 0.11990136444568635 test_loss: 0.12111526727676392
epoch: 16 training_loss 0.11863648395985366 test_loss: 0.11252744197845459
epoch: 17 training_loss 0.11982732828706504 test_loss: 0.12187011241912842
epoch: 18 training_loss 0.12052515134215355 test_loss: 0.11716951131820678
epoch: 19 training_loss 0.12882235649973153 test_loss: 0.12433531284332275
epoch: 20 training_loss 0.11895733542740344 test_loss: 0.11670572757720947
epoch: 21 training_loss 0.12326909076422453 test_loss: 0.12080907821655273
epoch: 22 training_loss 0.11818785782903433 test_loss: 0.130712628364563
epoch: 23 training_loss 0.12298696961253881 test_loss: 0.11493505239486694
epoch: 24 training_loss 0.11629012353718281 test_loss: 0.1242447853088379
epoch: 25 training_loss 0.11794014591723681 test_loss: 0.11848000288009644
epoch: 26 training_loss 0.11961907483637332 test_loss: 0.12709951400756836
epoch: 27 training_loss 0.12149647992104291 test_loss: 0.11905373334884643
epoch: 28 training_loss 0.11429225023835897 test_loss: 0.11650557518005371
epoch: 29 training_loss 0.11968840554356575 test_loss: 0.11625006198883056
epoch: 30 training_loss 0.11919835761189461 test_loss: 0.11577787399291992
epoch: 31 training_loss 0.11426063321530819 test_loss: 0.12432183027267456
epoch: 32 training_loss 0.12093886602669954 test_loss: 0.10717967748641968
epoch: 33 training_loss 0.12222914714366198 test_loss: 0.12065715789794922
epoch: 34 training_loss 0.12005633339285851 test_loss: 0.1343567967414856
epoch: 35 training_loss 0.11746275819838047 test_loss: 0.10628485679626465
epoch: 36 training_loss 0.12011170446872711 test_loss: 0.10822337865829468
epoch: 37 training_loss 0.11938028782606125 test_loss: 0.10608400106430053
epoch: 38 training_loss 0.1160358302667737 test_loss: 0.11678166389465332
epoch: 39 training_loss 0.12447752740234136 test_loss: 0.12526193857192994
epoch: 40 training_loss 0.1183243428170681 test_loss: 0.11543989181518555
epoch: 41 training_loss 0.11473143428564071 test_loss: 0.10818480253219605
epoch: 42 training_loss 0.11186041094362736 test_loss: 0.11982486248016358
epoch: 43 training_loss 0.12219577949494123 test_loss: 0.1324641466140747
epoch: 44 training_loss 0.11353991795331239 test_loss: 0.12055379152297974
epoch: 45 training_loss 0.11858378302305937 test_loss: 0.11635429859161377
epoch: 46 training_loss 0.11893350157886744 test_loss: 0.10271053314208985
epoch: 47 training_loss 0.11206438530236483 test_loss: 0.12266280651092529
epoch: 48 training_loss 0.12247558157891035 test_loss: 0.11756731271743774
epoch: 49 training_loss 0.1115972663462162 test_loss: 0.10513881444931031
epoch: 50 training_loss 0.11903382148593664 test_loss: 0.11517727375030518
epoch: 51 training_loss 0.11195429723709821 test_loss: 0.11598085165023804
epoch: 52 training_loss 0.11861705858260393 test_loss: 0.111488938331604
epoch: 53 training_loss 0.10726680912077427 test_loss: 0.13026751279830934
epoch: 54 training_loss 0.12045120317488908 test_loss: 0.11267162561416626
epoch: 55 training_loss 0.11481012608855963 test_loss: 0.11127927303314208
epoch: 56 training_loss 0.11560890294611453 test_loss: 0.11738735437393188
epoch: 57 training_loss 0.1144659049063921 test_loss: 0.11696633100509643
epoch: 58 training_loss 0.11812703691422939 test_loss: 0.12520275115966797
epoch: 59 training_loss 0.1100234641134739 test_loss: 0.1170278549194336
epoch: 60 training_loss 0.11121813181787729 test_loss: 0.11112431287765503
epoch: 61 training_loss 0.11567443091422319 test_loss: 0.11598814725875854
epoch: 62 training_loss 0.11491672165691852 test_loss: 0.12207639217376709
epoch: 63 training_loss 0.12022294010967016 test_loss: 0.11854677200317383
epoch: 64 training_loss 0.11443352483212949 test_loss: 0.11963490247726441
epoch: 65 training_loss 0.12128018446266652 test_loss: 0.12589662075042723
epoch: 66 training_loss 0.11323328349739313 test_loss: 0.11891437768936157
epoch: 67 training_loss 0.11635037239640951 test_loss: 0.11765226125717163
epoch: 68 training_loss 0.10876459531486034 test_loss: 0.12139343023300171
epoch: 69 training_loss 0.11895188793540001 test_loss: 0.12174860239028931
epoch: 70 training_loss 0.10835765415802598 test_loss: 0.12039622068405151
epoch: 71 training_loss 0.11024146981537342 test_loss: 0.10899018049240113
epoch: 72 training_loss 0.12030034568160772 test_loss: 0.1146500587463379
epoch: 73 training_loss 0.11418967016041279 test_loss: 0.1064152479171753
epoch: 74 training_loss 0.11247897576540708 test_loss: 0.11347064971923829
epoch: 75 training_loss 0.10993907403200864 test_loss: 0.11420964002609253
epoch: 76 training_loss 0.11409641370177269 test_loss: 0.10469893217086793
epoch: 77 training_loss 0.11359907910227776 test_loss: 0.11033258438110352
epoch: 78 training_loss 0.11526602797210217 test_loss: 0.10422167778015137
epoch: 79 training_loss 0.11309207066893577 test_loss: 0.1182225227355957
epoch: 80 training_loss 0.11170573696494103 test_loss: 0.11490654945373535
epoch: 81 training_loss 0.11488390501588583 test_loss: 0.11595044136047364
epoch: 82 training_loss 0.11466387011110783 test_loss: 0.10504387617111206
epoch: 83 training_loss 0.11344767469912767 test_loss: 0.12013856172561646
epoch: 84 training_loss 0.11421340733766555 test_loss: 0.12576252222061157
epoch: 85 training_loss 0.10827091921120882 test_loss: 0.10651198625564576
epoch: 86 training_loss 0.11547789458185434 test_loss: 0.10747047662734985
epoch: 87 training_loss 0.11344618853181601 test_loss: 0.123932945728302
epoch: 88 training_loss 0.11814499810338021 test_loss: 0.10536342859268188
epoch: 89 training_loss 0.11427656136453151 test_loss: 0.09741843938827514
epoch: 90 training_loss 0.11718615725636482 test_loss: 0.10728498697280883
epoch: 91 training_loss 0.11873296782374382 test_loss: 0.1230156421661377
epoch: 92 training_loss 0.11693505018949509 test_loss: 0.11354955434799194
epoch: 93 training_loss 0.10872853130102157 test_loss: 0.11156731843948364
epoch: 94 training_loss 0.11073701586574317 test_loss: 0.1273134469985962
epoch: 95 training_loss 0.10917840905487537 test_loss: 0.11906393766403198
epoch: 96 training_loss 0.11511129654943943 test_loss: 0.11273880004882812
epoch: 97 training_loss 0.11362136974930763 test_loss: 0.12399221658706665
epoch: 98 training_loss 0.11270825877785683 test_loss: 0.12518792152404784
epoch: 99 training_loss 0.11540244843810797 test_loss: 0.12319433689117432
epoch: 100 training_loss 0.11799589157104493 test_loss: 0.11175979375839233
epoch: 101 training_loss 0.11990039218217134 test_loss: 0.11262445449829102
epoch: 102 training_loss 0.11572763621807099 test_loss: 0.12261344194412231
epoch: 103 training_loss 0.11446803160011769 test_loss: 0.10689791440963745
epoch: 104 training_loss 0.11047090068459511 test_loss: 0.11037523746490478
epoch: 105 training_loss 0.11203987587243319 test_loss: 0.11622250080108643
epoch: 106 training_loss 0.11505765367299318 test_loss: 0.12037663459777832
epoch: 107 training_loss 0.1191823273897171 test_loss: 0.1098742127418518
epoch: 108 training_loss 0.11369000542908907 test_loss: 0.1202722430229187
epoch: 109 training_loss 0.11138292111456394 test_loss: 0.10411982536315918
epoch: 110 training_loss 0.11376121390610933 test_loss: 0.11090965270996093
epoch: 111 training_loss 0.10961514692753553 test_loss: 0.09824791550636292
epoch: 112 training_loss 0.11972926940768958 test_loss: 0.11605740785598755
epoch: 113 training_loss 0.10769684717059136 test_loss: 0.11311939954757691
epoch: 114 training_loss 0.11475228559225797 test_loss: 0.12089463472366332
epoch: 115 training_loss 0.11385262355208398 test_loss: 0.11268390417098999
epoch: 116 training_loss 0.11171214614063502 test_loss: 0.09936262965202332
epoch: 117 training_loss 0.11997552648186684 test_loss: 0.12094632387161255
epoch: 118 training_loss 0.1116493508592248 test_loss: 0.11003968715667725
epoch: 119 training_loss 0.11228439256548882 test_loss: 0.11480864286422729
epoch: 120 training_loss 0.10866560343652963 test_loss: 0.11428828239440918
epoch: 121 training_loss 0.11631870806217194 test_loss: 0.11479877233505249
epoch: 122 training_loss 0.11106648176908493 test_loss: 0.10666077136993408
epoch: 123 training_loss 0.11292140487581491 test_loss: 0.12574228048324584
epoch: 124 training_loss 0.11392704088240863 test_loss: 0.10809921026229859
epoch: 125 training_loss 0.10942592374980449 test_loss: 0.11159834861755372
epoch: 126 training_loss 0.1105414054915309 test_loss: 0.11542237997055053
epoch: 127 training_loss 0.1126970997825265 test_loss: 0.09923680424690247
epoch: 128 training_loss 0.10904593411833048 test_loss: 0.1253417730331421
epoch: 129 training_loss 0.11211354669183493 test_loss: 0.11329180002212524
epoch: 130 training_loss 0.10758425660431385 test_loss: 0.10204091072082519
epoch: 131 training_loss 0.11502402406185866 test_loss: 0.10638948678970336
epoch: 132 training_loss 0.10703867537900806 test_loss: 0.10050727128982544
epoch: 133 training_loss 0.11036926336586475 test_loss: 0.10442528724670411
epoch: 134 training_loss 0.11301584996283054 test_loss: 0.11649826765060425
epoch: 135 training_loss 0.10606774196028709 test_loss: 0.10862888097763061
epoch: 136 training_loss 0.11723280614241957 test_loss: 0.11306130886077881
epoch: 137 training_loss 0.12050316162407398 test_loss: 0.11782661676406861
epoch: 138 training_loss 0.11326420169323682 test_loss: 0.09809167981147766
epoch: 139 training_loss 0.10824069358408452 test_loss: 0.11173714399337768
epoch: 140 training_loss 0.11620688766241073 test_loss: 0.12866226434707642
epoch: 141 training_loss 0.1078955464437604 test_loss: 0.10681418180465699
epoch: 142 training_loss 0.11169501721858978 test_loss: 0.10985047817230224
epoch: 143 training_loss 0.11138151325285435 test_loss: 0.10857220888137817
epoch: 144 training_loss 0.1069876029342413 test_loss: 0.10846562385559082
epoch: 145 training_loss 0.11487155552953482 test_loss: 0.12506451606750488
epoch: 146 training_loss 0.11136777207255363 test_loss: 0.11177483797073365
epoch: 147 training_loss 0.11101985119283199 test_loss: 0.10814530849456787
epoch: 148 training_loss 0.11025831911712886 test_loss: 0.11377301216125488
epoch: 149 training_loss 0.11638886380940676 test_loss: 0.12223025560379028
episode: 0 training return: -562.4732338872087
episode: 1 training return: -1061.2743418480397
episode: 2 training return: -439.14723411518554
episode: 3 training return: -1013.0219060210432
epoch: 1 test_true_pfm: 27.388910535979882 sim_pfm: -461.65918863885173
episode: 4 training return: -1115.3487552686954
episode: 5 training return: -414.4615067236815
episode: 6 training return: -482.2415660844097
episode: 7 training return: -1071.6201698282123
epoch: 2 test_true_pfm: 46.55590745156498 sim_pfm: -278.1195254447004
episode: 8 training return: -997.5422226762821
episode: 9 training return: -585.4305503543862
episode: 10 training return: 539.1981866181397
episode: 11 training return: 381.2335489547003
epoch: 3 test_true_pfm: 13.666388704020576 sim_pfm: 112.96948512436828
episode: 12 training return: 538.2466578540464
episode: 13 training return: 531.3130105196079
episode: 14 training return: 644.40267121229
episode: 15 training return: 650.1667724396643
epoch: 4 test_true_pfm: 18.223095182679298 sim_pfm: -986.3114810578458
episode: 16 training return: -1213.9827076518159
episode: 17 training return: -646.7564500324772
episode: 18 training return: -253.1492224169054
episode: 19 training return: -51.410797787126775
epoch: 5 test_true_pfm: 6.760560247682048 sim_pfm: -144.29562175819314
episode: 20 training return: -31.359064528517308
episode: 21 training return: 73.3213610498695
episode: 22 training return: 14.096633498890286
episode: 23 training return: 137.65541011568703
epoch: 6 test_true_pfm: -9.273077950872253 sim_pfm: 37.54836116084934
episode: 24 training return: 113.58105761119296
episode: 25 training return: 118.86719292586693
episode: 26 training return: 177.67167467350404
episode: 27 training return: 212.01144170872934
epoch: 7 test_true_pfm: -58.94945296395222 sim_pfm: 220.27736685986605
episode: 28 training return: 185.82621243891904
episode: 29 training return: 272.9718611982958
episode: 30 training return: 531.6367829237229
episode: 31 training return: 908.9488720801571
epoch: 8 test_true_pfm: -8.33960434677562 sim_pfm: 616.988111943337
episode: 32 training return: 601.1651202533347
episode: 33 training return: 764.8638061917247
episode: 34 training return: 732.9036008696372
episode: 35 training return: 383.2135649331847
epoch: 9 test_true_pfm: 50.80750298152791 sim_pfm: 459.8584073159729
episode: 36 training return: 472.80481881135904
episode: 37 training return: 9.951419658963372
episode: 38 training return: -67.55482836803303
episode: 39 training return: 207.59455544303916
epoch: 10 test_true_pfm: 51.179376236090015 sim_pfm: 160.39431076731006
episode: 40 training return: -14.046478938179378
episode: 41 training return: 179.00907179263078
episode: 42 training return: 768.7860860217792
episode: 43 training return: 668.9954152058233
epoch: 11 test_true_pfm: 25.18976574444243 sim_pfm: 922.7933747306746
episode: 44 training return: 877.3997575877999
episode: 45 training return: 915.2725859460668
episode: 46 training return: 872.7739571818831
episode: 47 training return: 925.9775327373552
epoch: 12 test_true_pfm: 19.053021760246377 sim_pfm: 602.1459672825592
episode: 48 training return: 920.667068306932
episode: 49 training return: 901.3298294678616
episode: 50 training return: 934.9805913499741
episode: 51 training return: 920.0944806727542
epoch: 13 test_true_pfm: 17.966257875560835 sim_pfm: 944.5881955475622
episode: 52 training return: 920.0420803796646
episode: 53 training return: 880.3581211659017
episode: 54 training return: 928.1352229420052
episode: 55 training return: 923.1429264432816
epoch: 14 test_true_pfm: 21.511879611373107 sim_pfm: 949.0582835015491
episode: 56 training return: 920.928430631302
episode: 57 training return: 914.900751098277
episode: 58 training return: 906.7659465498332
episode: 59 training return: 886.0073832087104
epoch: 15 test_true_pfm: 22.633506233422235 sim_pfm: 946.9280227511088
episode: 60 training return: 918.4587849225408
episode: 61 training return: 891.938469958188
episode: 62 training return: 895.6045028401028
episode: 63 training return: 910.5852769291309
epoch: 16 test_true_pfm: 21.4963082906771 sim_pfm: 946.2920977315828
episode: 64 training return: 857.3643801044295
episode: 65 training return: 862.1574478858975
episode: 66 training return: 890.2120600956438
episode: 67 training return: 919.895030486108
epoch: 17 test_true_pfm: 21.48626778186516 sim_pfm: 952.4057224120455
episode: 68 training return: 922.2091462897932
episode: 69 training return: 936.5792417865584
episode: 70 training return: 923.4745156412199
episode: 71 training return: 919.134951782668
epoch: 18 test_true_pfm: 20.22227130778748 sim_pfm: 950.8398575180142
episode: 72 training return: 920.4533223404429
episode: 73 training return: 937.5208892800371
episode: 74 training return: 937.3453284879316
episode: 75 training return: 934.7451001172349
epoch: 19 test_true_pfm: 20.200771148039983 sim_pfm: 952.199111587508
episode: 76 training return: 924.8366505041006
episode: 77 training return: 934.4888226523769
episode: 78 training return: 931.1573646871796
episode: 79 training return: 925.6597555702278
epoch: 20 test_true_pfm: 19.975217268619964 sim_pfm: 948.7263823851123
episode: 80 training return: 915.148087493076
episode: 81 training return: 922.1629391846641
episode: 82 training return: 925.656172315121
episode: 83 training return: 931.1885959795694
epoch: 21 test_true_pfm: 19.59233795759403 sim_pfm: 947.8203025492137
episode: 84 training return: 918.8035035228186
episode: 85 training return: 924.4782310129794
episode: 86 training return: 942.9478971583476
episode: 87 training return: 937.0444308446507
epoch: 22 test_true_pfm: 19.807239745737636 sim_pfm: 957.8631678303057
episode: 88 training return: 943.8525268013702
episode: 89 training return: 947.7924604865497
episode: 90 training return: 939.2602986966793
episode: 91 training return: 928.638589595754
epoch: 23 test_true_pfm: 19.362208405605866 sim_pfm: 942.6216655097509
episode: 92 training return: 926.3175925357187
episode: 93 training return: 930.4248967898848
episode: 94 training return: 934.693141701163
episode: 95 training return: 924.0784966562709
epoch: 24 test_true_pfm: 21.614435916384863 sim_pfm: 951.8957860991901
episode: 96 training return: 939.4425274492651
episode: 97 training return: 950.7844355122027
episode: 98 training return: 934.9552835986599
episode: 99 training return: 941.6449732003058
epoch: 25 test_true_pfm: 22.855357664100943 sim_pfm: 956.9568091963814
episode: 100 training return: 931.5855983820342
episode: 101 training return: 940.1686064796658
episode: 102 training return: 932.8282831237902
episode: 103 training return: 928.5954880205621
epoch: 26 test_true_pfm: 17.408432668250377 sim_pfm: 939.3529046074291
episode: 104 training return: 921.1254579119228
episode: 105 training return: 940.6470111056093
episode: 106 training return: 941.477204559305
episode: 107 training return: 927.8856246630717
epoch: 27 test_true_pfm: 20.269750220664083 sim_pfm: 958.2978255054404
episode: 108 training return: 943.8237571644585
episode: 109 training return: 929.1363815691001
episode: 110 training return: 918.0747264124984
episode: 111 training return: 898.3986683584243
epoch: 28 test_true_pfm: 16.543500161949744 sim_pfm: 949.7767155033458
episode: 112 training return: 920.7078867113942
episode: 113 training return: 926.3892888442704
episode: 114 training return: 933.4290563215997
episode: 115 training return: 956.4932122340766
epoch: 29 test_true_pfm: 23.645636072501752 sim_pfm: 964.7981031036825
episode: 116 training return: 938.4366125791615
episode: 117 training return: 945.6811641167834
episode: 118 training return: 944.4840231889818
episode: 119 training return: 887.0683322604036
epoch: 30 test_true_pfm: 22.935676625962692 sim_pfm: 953.0527122576747
episode: 120 training return: 934.5225960751884
episode: 121 training return: 946.134819815743
episode: 122 training return: 955.8536231117487
episode: 123 training return: 951.490601336911
epoch: 31 test_true_pfm: 22.58718793837493 sim_pfm: 966.4876073463893
episode: 124 training return: 953.6628819324895
episode: 125 training return: 949.0619419367403
episode: 126 training return: 949.3782385075268
episode: 127 training return: 951.7991579179253
epoch: 32 test_true_pfm: 17.812821389663966 sim_pfm: 960.1857643269972
episode: 128 training return: 944.6615346772905
episode: 129 training return: 932.0573894110756
episode: 130 training return: 955.0527632462254
episode: 131 training return: 945.6804570645655
epoch: 33 test_true_pfm: 17.613136284501632 sim_pfm: 959.7797556121029
episode: 132 training return: 944.8077461609885
episode: 133 training return: 948.8095421019839
episode: 134 training return: 942.4313971860047
episode: 135 training return: 940.3758517664476
epoch: 34 test_true_pfm: 24.48816270451567 sim_pfm: 966.2625490765984
episode: 136 training return: 941.5638852233466
episode: 137 training return: 926.5288498571308
episode: 138 training return: 932.5997268556763
episode: 139 training return: 924.9818559846905
epoch: 35 test_true_pfm: 23.947008947083447 sim_pfm: 959.3225715584856
episode: 140 training return: 943.8320906896661
episode: 141 training return: 940.8057633926633
episode: 142 training return: 949.6199410195733
episode: 143 training return: 952.2446578208397
epoch: 36 test_true_pfm: 25.10277750008482 sim_pfm: 966.0275872627195
episode: 144 training return: 931.5396811486511
episode: 145 training return: 946.2808037624302
episode: 146 training return: 943.351022276185
episode: 147 training return: 939.3296535148161
epoch: 37 test_true_pfm: 22.30617468177209 sim_pfm: 962.8687797957048
episode: 148 training return: 951.3612542567004
episode: 149 training return: 942.9334010467635
episode: 150 training return: 945.9090064726199
episode: 151 training return: 953.8661542229617
epoch: 38 test_true_pfm: 22.077373969010317 sim_pfm: 966.0416848773402
episode: 152 training return: 950.4106477475216
episode: 153 training return: 946.2866839235569
episode: 154 training return: 942.1891299866957
episode: 155 training return: 952.4837686197867
epoch: 39 test_true_pfm: 24.25700148900576 sim_pfm: 959.2349357296389
episode: 156 training return: 952.8872979872377
episode: 157 training return: 935.1864540999136
episode: 158 training return: 947.1087998544406
episode: 159 training return: 944.0581794188881
epoch: 40 test_true_pfm: 19.82313696749211 sim_pfm: 965.5544396360619
episode: 160 training return: 928.351873739116
episode: 161 training return: 939.6842316262506
episode: 162 training return: 936.1194315512478
episode: 163 training return: 946.9288772034893
epoch: 41 test_true_pfm: 22.69231957306467 sim_pfm: 964.1931336925893
episode: 164 training return: 940.286932266802
episode: 165 training return: 935.633764856624
episode: 166 training return: 937.7604139902095
episode: 167 training return: 943.4759833182928
epoch: 42 test_true_pfm: 24.772087975517223 sim_pfm: 967.4723074478109
episode: 168 training return: 955.5318724165105
episode: 169 training return: 937.8247739954872
episode: 170 training return: 934.0103849216227
episode: 171 training return: 946.1004987322019
epoch: 43 test_true_pfm: 22.749382103962358 sim_pfm: 965.6565438194291
episode: 172 training return: 943.5538420905013
episode: 173 training return: 949.8721168828342
episode: 174 training return: 943.8464285011448
episode: 175 training return: 938.0256514486603
epoch: 44 test_true_pfm: 19.789908456481058 sim_pfm: 954.7582980337427
episode: 176 training return: 938.0921706061844
episode: 177 training return: 943.8635062570389
episode: 178 training return: 941.9809486496322
episode: 179 training return: 942.2491346845816
epoch: 45 test_true_pfm: 22.4036113205037 sim_pfm: 966.9740327439649
episode: 180 training return: 939.9120042311541
episode: 181 training return: 931.685965296517
episode: 182 training return: 941.9279797794621
episode: 183 training return: 921.4881364456829
epoch: 46 test_true_pfm: 20.23077215393768 sim_pfm: 961.3493948814308
episode: 184 training return: 935.6446484614289
episode: 185 training return: 952.2775101494873
episode: 186 training return: 947.3379532735564
episode: 187 training return: 953.4726894311937
epoch: 47 test_true_pfm: 22.301216586899532 sim_pfm: 965.9086251920883
episode: 188 training return: 936.8073305134734
episode: 189 training return: 952.2502546687853
episode: 190 training return: 953.2834557551624
episode: 191 training return: 936.8770054542249
epoch: 48 test_true_pfm: 21.42647878199681 sim_pfm: 962.0087393068967
episode: 192 training return: 949.2725269147792
episode: 193 training return: 939.9257725161077
episode: 194 training return: 947.6003447634363
episode: 195 training return: 951.1803352627026
epoch: 49 test_true_pfm: 23.24437144510487 sim_pfm: 967.943014510042
episode: 196 training return: 948.8773324653301
episode: 197 training return: 939.0095256175515
episode: 198 training return: 952.0065309311323
episode: 199 training return: 955.9394608679687
epoch: 50 test_true_pfm: 21.627129497773133 sim_pfm: 964.4586340058971
episode: 200 training return: 955.1345624894527
episode: 201 training return: 942.8121426487379
episode: 202 training return: 947.5865747227709
episode: 203 training return: 945.7691885507676
epoch: 51 test_true_pfm: 25.344723810214163 sim_pfm: 967.571015028822
episode: 204 training return: 958.0646118996048
episode: 205 training return: 955.0933189938704
episode: 206 training return: 955.3436332019602
episode: 207 training return: 952.9070868987585
epoch: 52 test_true_pfm: 22.877017617609233 sim_pfm: 964.6836509922589
episode: 208 training return: 957.0930237074452
episode: 209 training return: 958.0480503924935
episode: 210 training return: 947.7375369052091
episode: 211 training return: 947.6605026403223
epoch: 53 test_true_pfm: 21.916652207300398 sim_pfm: 966.2528958039672
episode: 212 training return: 951.5925208243922
episode: 213 training return: 951.3813369946424
episode: 214 training return: 946.4936522483372
episode: 215 training return: 945.8397105900285
epoch: 54 test_true_pfm: 18.465664658725892 sim_pfm: 963.5250834542227
episode: 216 training return: 924.7010478855804
episode: 217 training return: 931.7333599268702
episode: 218 training return: 941.3615441687307
episode: 219 training return: 929.0320477163348
epoch: 55 test_true_pfm: 21.845091150605107 sim_pfm: 962.7118303867292
episode: 220 training return: 950.7149925396941
episode: 221 training return: 940.2772655941583
episode: 222 training return: 951.1289626207191
episode: 223 training return: 954.7729700857312
epoch: 56 test_true_pfm: 21.333866501964305 sim_pfm: 967.1962755972361
episode: 224 training return: 955.181272908811
episode: 225 training return: 946.7233613909249
episode: 226 training return: 952.4639554273064
episode: 227 training return: 941.9996069308015
epoch: 57 test_true_pfm: 19.631218379308223 sim_pfm: 961.7860902533018
episode: 228 training return: 949.5665485615045
episode: 229 training return: 953.4824228517754
episode: 230 training return: 958.5261723972832
episode: 231 training return: 945.9732629564334
epoch: 58 test_true_pfm: 20.392411071739097 sim_pfm: 967.0271433322366
episode: 232 training return: 954.525245260615
episode: 233 training return: 955.135470731151
episode: 234 training return: 934.6443606278168
episode: 235 training return: 934.8227176542154
epoch: 59 test_true_pfm: 20.421882054912018 sim_pfm: 960.5621509767423
episode: 236 training return: 954.7551467352706
episode: 237 training return: 940.5352934702482
episode: 238 training return: 934.6161968501581
episode: 239 training return: 943.5169709392104
epoch: 60 test_true_pfm: 19.894943794567887 sim_pfm: 965.1522955886883
episode: 240 training return: 948.1290873792958
episode: 241 training return: 953.3451608655452
episode: 242 training return: 952.4087566889389
episode: 243 training return: 952.7690626872223
epoch: 61 test_true_pfm: 21.441176176982772 sim_pfm: 964.5140988404597
episode: 244 training return: 954.2987016093764
episode: 245 training return: 954.9605235431851
episode: 246 training return: 946.2971886204459
episode: 247 training return: 895.5524683030773
epoch: 62 test_true_pfm: 23.278838056191603 sim_pfm: 346.8970516755695
episode: 248 training return: 948.9914699105071
episode: 249 training return: 930.4918365321323
episode: 250 training return: 934.7957193217684
episode: 251 training return: 940.1009462494978
epoch: 63 test_true_pfm: 21.111582847002655 sim_pfm: 961.808313671291
episode: 252 training return: 928.946107654909
episode: 253 training return: 928.200569050615
episode: 254 training return: 951.0295198321592
episode: 255 training return: 956.3370666479505
epoch: 64 test_true_pfm: 20.850002741169718 sim_pfm: 965.1078763799163
episode: 256 training return: 959.409703061402
episode: 257 training return: 955.1026193335945
episode: 258 training return: 953.0654078135072
episode: 259 training return: 955.8432511445804
epoch: 65 test_true_pfm: 23.31764604498452 sim_pfm: 967.3520648394439
episode: 260 training return: 953.2266384472252
episode: 261 training return: 950.2854826807566
episode: 262 training return: 954.6858151243839
episode: 263 training return: 955.0345854132806
epoch: 66 test_true_pfm: 22.774294968133038 sim_pfm: 965.1318705918411
episode: 264 training return: 950.9305369465592
episode: 265 training return: 944.3219425928726
episode: 266 training return: 936.393439705517
episode: 267 training return: 943.5294770561519
epoch: 67 test_true_pfm: 25.003062036578367 sim_pfm: 964.9240303877402
episode: 268 training return: 949.4230800094514
episode: 269 training return: 934.9884939137193
episode: 270 training return: 908.4660251751071
episode: 271 training return: 937.0321615694386
epoch: 68 test_true_pfm: 22.74207491816541 sim_pfm: 962.4036783079995
episode: 272 training return: 937.2971511146536
episode: 273 training return: 941.5650240658733
episode: 274 training return: 950.1845244746537
episode: 275 training return: 935.7461006626963
epoch: 69 test_true_pfm: 21.835399311149743 sim_pfm: 963.233918060549
episode: 276 training return: 945.7382270763798
episode: 277 training return: 940.5114428049234
episode: 278 training return: 944.6598677967143
episode: 279 training return: 943.5285454244465
epoch: 70 test_true_pfm: 21.225731707617133 sim_pfm: 963.4571049677352
episode: 280 training return: 948.5074813536995
episode: 281 training return: 949.3336525447676
episode: 282 training return: 936.9169715593705
episode: 283 training return: 941.2683709051147
epoch: 71 test_true_pfm: 25.794998524422038 sim_pfm: 967.1245380519862
episode: 284 training return: 952.0567746633616
episode: 285 training return: 952.0975634681915
episode: 286 training return: 952.2426949216828
episode: 287 training return: 956.7102545292436
epoch: 72 test_true_pfm: 24.117142178758773 sim_pfm: 967.3018068631891
episode: 288 training return: 953.6270517561588
episode: 289 training return: 946.7557954289219
episode: 290 training return: 937.983056856204
episode: 291 training return: 937.20700527931
epoch: 73 test_true_pfm: 24.534762704231518 sim_pfm: 966.1956028033416
episode: 292 training return: 945.6320937426844
episode: 293 training return: 958.7319483408753
episode: 294 training return: 952.7833520344643
episode: 295 training return: 952.8667255913277
epoch: 74 test_true_pfm: 21.467577995684337 sim_pfm: 965.324475619346
episode: 296 training return: 948.1248065056324
episode: 297 training return: 959.4699679776566
episode: 298 training return: 945.9195690623479
episode: 299 training return: 949.2491459465498
epoch: 75 test_true_pfm: 25.292653036471258 sim_pfm: 967.7015157538602
episode: 300 training return: 957.7786623199668
episode: 301 training return: 956.4152875135339
episode: 302 training return: 949.9672192681
episode: 303 training return: 946.1178571749753
epoch: 76 test_true_pfm: 22.683247065385512 sim_pfm: 966.3846508232194
episode: 304 training return: 953.3783339786135
episode: 305 training return: 952.528812259492
episode: 306 training return: 950.0638489414611
episode: 307 training return: 944.2418986670802
epoch: 77 test_true_pfm: 28.83467226964226 sim_pfm: 968.5845148809664
episode: 308 training return: 940.302991748397
episode: 309 training return: 933.7311252879034
episode: 310 training return: 937.8845856158096
episode: 311 training return: 961.0268973668949
epoch: 78 test_true_pfm: 22.113935159964324 sim_pfm: 966.6772487523983
episode: 312 training return: 953.1923180532897
episode: 313 training return: 955.7178830878946
episode: 314 training return: 943.0561511208339
episode: 315 training return: 954.8750921955854
epoch: 79 test_true_pfm: 26.717172410332562 sim_pfm: 968.2962253000863
episode: 316 training return: 948.6941551030739
episode: 317 training return: 915.4826552760381
episode: 318 training return: 948.1118648527558
episode: 319 training return: 940.3811154531003
epoch: 80 test_true_pfm: 23.385602238028493 sim_pfm: 964.7451502641707
episode: 320 training return: 939.5451047905641
episode: 321 training return: 930.0079994654792
episode: 322 training return: 935.582366441975
episode: 323 training return: 941.3397040034594
epoch: 81 test_true_pfm: 21.33506531451784 sim_pfm: 966.9399520553659
episode: 324 training return: 949.4367071546549
episode: 325 training return: 956.0224612837063
episode: 326 training return: 958.1857875560232
episode: 327 training return: 941.1091668912613
epoch: 82 test_true_pfm: 23.532686196490697 sim_pfm: 968.2572065379334
episode: 328 training return: 958.0317195603395
episode: 329 training return: 960.7398688310363
episode: 330 training return: 950.6470468406246
episode: 331 training return: 951.1915062820973
epoch: 83 test_true_pfm: 29.29536211690255 sim_pfm: 966.7134637921345
episode: 332 training return: 960.0745275742976
episode: 333 training return: 950.4110635634945
episode: 334 training return: 945.957675287303
episode: 335 training return: 951.4760741253388
epoch: 84 test_true_pfm: 24.540552208191386 sim_pfm: 967.5946684304499
episode: 336 training return: 955.9107277183311
episode: 337 training return: 956.5591540971932
episode: 338 training return: 948.3276796069671
episode: 339 training return: 962.8944511111912
epoch: 85 test_true_pfm: 22.210271421190928 sim_pfm: 966.0458704070898
episode: 340 training return: 959.6372680986241
episode: 341 training return: 942.9226688655696
episode: 342 training return: 939.2823468012039
episode: 343 training return: 930.1980936499891
epoch: 86 test_true_pfm: 26.613324626504852 sim_pfm: 966.7915676288898
episode: 344 training return: 962.0415879950209
episode: 345 training return: 956.4357941809249
episode: 346 training return: 956.2979810326659
episode: 347 training return: 958.1205981954093
epoch: 87 test_true_pfm: 21.930643421306446 sim_pfm: 967.7762970888964
episode: 348 training return: 955.5131568620073
episode: 349 training return: 948.7521397372776
episode: 350 training return: 954.5804430431858
episode: 351 training return: 955.9691628876869
epoch: 88 test_true_pfm: 27.244761983099625 sim_pfm: 968.4439806471737
episode: 352 training return: 962.0485153662565
episode: 353 training return: 957.1130849850647
episode: 354 training return: 938.395372099054
episode: 355 training return: 928.5046079066157
epoch: 89 test_true_pfm: 22.69198771877706 sim_pfm: 962.5140306914324
episode: 356 training return: 933.3124759689745
episode: 357 training return: 961.5806845388993
episode: 358 training return: 953.573927197339
episode: 359 training return: 948.5797653143378
epoch: 90 test_true_pfm: 22.2435612806731 sim_pfm: 966.9811846657125
episode: 360 training return: 950.4005038323791
episode: 361 training return: 937.5765351072266
episode: 362 training return: 943.4691283936589
episode: 363 training return: 933.2503739500847
epoch: 91 test_true_pfm: 23.50361242028236 sim_pfm: 961.763788662974
episode: 364 training return: 949.2346380651088
episode: 365 training return: 955.3797384512333
episode: 366 training return: 952.0838668436927
episode: 367 training return: 951.0426674649348
epoch: 92 test_true_pfm: 20.953180854421486 sim_pfm: 964.4364045951286
episode: 368 training return: 947.0867665888412
episode: 369 training return: 954.739958013056
episode: 370 training return: 956.3978370782603
episode: 371 training return: 944.4483849162488
epoch: 93 test_true_pfm: 22.791207754768028 sim_pfm: 966.4210682988671
episode: 372 training return: 959.5712032688342
episode: 373 training return: 958.7444345296113
episode: 374 training return: 937.2924455055476
episode: 375 training return: 956.3099948850827
epoch: 94 test_true_pfm: 22.037451193607176 sim_pfm: 965.5125599144006
episode: 376 training return: 958.3443873190099
episode: 377 training return: 954.0108245846207
episode: 378 training return: 958.9186574591264
episode: 379 training return: 961.3677796144003
epoch: 95 test_true_pfm: 22.621826734285722 sim_pfm: 966.5473813693791
episode: 380 training return: 949.6991419346031
episode: 381 training return: 948.9002264742497
episode: 382 training return: 952.7489602121576
episode: 383 training return: 957.5294405389552
epoch: 96 test_true_pfm: 21.83044600870814 sim_pfm: 962.9851171270584
episode: 384 training return: 949.2650857557653
episode: 385 training return: 958.9848816509658
episode: 386 training return: 955.3398423929262
episode: 387 training return: 961.2232512297982
epoch: 97 test_true_pfm: 22.472833950734383 sim_pfm: 966.5769815372394
episode: 388 training return: 947.4073628163491
episode: 389 training return: 952.3775678614568
episode: 390 training return: 957.1582385506396
episode: 391 training return: 961.9549528495492
epoch: 98 test_true_pfm: 20.922945277115137 sim_pfm: 965.4545536063058
episode: 392 training return: 954.7712055116834
episode: 393 training return: 959.1863332672688
episode: 394 training return: 950.14352426864
episode: 395 training return: 957.9048325484644
epoch: 99 test_true_pfm: 22.5269420015488 sim_pfm: 964.7651657402687
episode: 396 training return: 953.7541801895555
episode: 397 training return: 953.5982982887153
episode: 398 training return: 944.4928833727814
episode: 399 training return: 941.7330135506077
epoch: 100 test_true_pfm: 23.620814681262026 sim_pfm: 961.0245600778908
episode: 400 training return: 933.2991547138427
episode: 401 training return: 924.1799664137765
episode: 402 training return: 936.8428810901888
episode: 403 training return: 948.9088800772989
epoch: 101 test_true_pfm: 28.106073709385335 sim_pfm: 965.7175279021658
episode: 404 training return: 947.1884357865296
episode: 405 training return: 920.6820673980267
episode: 406 training return: 929.3147987854994
episode: 407 training return: 926.1306556327265
epoch: 102 test_true_pfm: 23.773006504324158 sim_pfm: 955.983805898388
episode: 408 training return: 939.1957991655747
episode: 409 training return: 928.9099245252627
episode: 410 training return: 949.2323355618986
episode: 411 training return: 950.015016015694
epoch: 103 test_true_pfm: 22.743936679733793 sim_pfm: 965.0731766089
episode: 412 training return: 950.8970781279893
episode: 413 training return: 949.7218164062309
episode: 414 training return: 948.7967805619007
episode: 415 training return: 939.6625000187952
epoch: 104 test_true_pfm: 26.01617472161165 sim_pfm: 967.1969321980669
episode: 416 training return: 954.306417924197
episode: 417 training return: 935.8111474139051
episode: 418 training return: 941.5456212153456
episode: 419 training return: 952.294745167744
epoch: 105 test_true_pfm: 29.100812026201567 sim_pfm: 967.5339257056296
episode: 420 training return: 958.8871007355342
episode: 421 training return: 951.496383494973
episode: 422 training return: 959.4539956294543
episode: 423 training return: 959.6108362461755
epoch: 106 test_true_pfm: 24.76692363571332 sim_pfm: 966.3800980826218
episode: 424 training return: 945.5540663815242
episode: 425 training return: 946.3812208373273
episode: 426 training return: 948.0996176302481
episode: 427 training return: 950.4683541459507
epoch: 107 test_true_pfm: 24.22205219580556 sim_pfm: 966.211839019352
episode: 428 training return: 957.3214402067869
episode: 429 training return: 945.0290522460159
episode: 430 training return: 931.2847166816892
episode: 431 training return: 936.3636907118344
epoch: 108 test_true_pfm: 20.217542930642942 sim_pfm: 960.7769638675524
episode: 432 training return: 928.6460118486896
episode: 433 training return: 918.1793195155594
episode: 434 training return: 919.7629727703696
episode: 435 training return: 923.3822361471312
epoch: 109 test_true_pfm: 20.268764943347726 sim_pfm: 959.3113413744029
episode: 436 training return: 933.12677298542
episode: 437 training return: 953.2253932339919
episode: 438 training return: 954.3716156979125
episode: 439 training return: 959.68358991148
epoch: 110 test_true_pfm: 25.619264557188092 sim_pfm: 967.9483207490827
episode: 440 training return: 959.487307621934
episode: 441 training return: 945.6420828454097
episode: 442 training return: 955.9342962861527
episode: 443 training return: 952.2825770017828
epoch: 111 test_true_pfm: 20.911381831166537 sim_pfm: 963.9708518957029
episode: 444 training return: 954.2714135031715
episode: 445 training return: 955.0446670815841
episode: 446 training return: 952.9713938897555
episode: 447 training return: 945.6923464204526
epoch: 112 test_true_pfm: 22.16248602233241 sim_pfm: 965.2117500705841
episode: 448 training return: 956.4745409369524
episode: 449 training return: 956.0096771049194
episode: 450 training return: 962.2658257115788
episode: 451 training return: 959.6862191016045
epoch: 113 test_true_pfm: 27.00461647957325 sim_pfm: 967.1950276147642
episode: 452 training return: 958.4997056999966
episode: 453 training return: 948.8726952687927
episode: 454 training return: 942.6957002239849
episode: 455 training return: 941.7027035492746
epoch: 114 test_true_pfm: 21.926981204382287 sim_pfm: 966.8637383946449
episode: 456 training return: 956.5349792607734
episode: 457 training return: 960.2737511341035
episode: 458 training return: 948.494921771994
episode: 459 training return: 935.4902252496756
epoch: 115 test_true_pfm: 25.210611181477127 sim_pfm: 967.534832448674
episode: 460 training return: 959.654071181209
episode: 461 training return: 960.983738259311
episode: 462 training return: 957.9822816665129
episode: 463 training return: 961.3955221353539
epoch: 116 test_true_pfm: 22.835975684404282 sim_pfm: 967.8471349755484
episode: 464 training return: 962.3140206309768
episode: 465 training return: 955.725084438181
episode: 466 training return: 957.4377049109081
episode: 467 training return: 955.0470936186173
epoch: 117 test_true_pfm: 31.203516041251408 sim_pfm: 968.497348059849
episode: 468 training return: 941.7358435127545
episode: 469 training return: 943.1604698968587
episode: 470 training return: 957.8220354090023
episode: 471 training return: 933.5149572916326
epoch: 118 test_true_pfm: 22.731604030436813 sim_pfm: 966.5695295229527
episode: 472 training return: 920.7314421343243
episode: 473 training return: 929.8351606161856
episode: 474 training return: 928.3275812374744
episode: 475 training return: 914.8873241632119
epoch: 119 test_true_pfm: 23.891118355283506 sim_pfm: 944.2803415512283
episode: 476 training return: 905.0355862018328
episode: 477 training return: 887.0739977399762
episode: 478 training return: 917.5061769887722
episode: 479 training return: 925.3522203180188
epoch: 120 test_true_pfm: 23.547061227794384 sim_pfm: 960.6557136434083
episode: 480 training return: 952.6194315451976
episode: 481 training return: 944.0089698390806
episode: 482 training return: 956.2997288174057
episode: 483 training return: 958.3068499364597
epoch: 121 test_true_pfm: 28.716516857670513 sim_pfm: 968.6728106320454
episode: 484 training return: 957.3761774196068
episode: 485 training return: 956.5866374778954
episode: 486 training return: 952.9030452076062
episode: 487 training return: 950.5753777928194
epoch: 122 test_true_pfm: 28.158879550723622 sim_pfm: 966.1093833806115
episode: 488 training return: 947.6430468529552
episode: 489 training return: 961.2081370147373
episode: 490 training return: 948.6582281365348
episode: 491 training return: 952.6030388668194
epoch: 123 test_true_pfm: 23.191933600598055 sim_pfm: 965.1216786947459
episode: 492 training return: 940.7694237652228
episode: 493 training return: 918.1314461650303
episode: 494 training return: 938.1697654566974
episode: 495 training return: 931.5174331970787
epoch: 124 test_true_pfm: 26.064596157056588 sim_pfm: 963.7921171555729
episode: 496 training return: 944.840216688812
episode: 497 training return: 947.315090373264
episode: 498 training return: 948.0973657410738
episode: 499 training return: 958.2410280922604
epoch: 125 test_true_pfm: 26.562200061355618 sim_pfm: 968.8345170262219
episode: 500 training return: 950.9696136442723
episode: 501 training return: 947.6870417306644
episode: 502 training return: 955.1413411309333
episode: 503 training return: 949.152742315711
epoch: 126 test_true_pfm: 27.38200728345499 sim_pfm: 967.3400185178891
episode: 504 training return: 950.6660734159209
episode: 505 training return: 945.1511358033146
episode: 506 training return: 925.4042148198328
episode: 507 training return: 955.7750354904763
epoch: 127 test_true_pfm: 29.50110973090812 sim_pfm: 966.9686113861313
episode: 508 training return: 958.4252920372058
episode: 509 training return: 961.3286337985932
episode: 510 training return: 957.6184551800267
episode: 511 training return: 936.3917747574213
epoch: 128 test_true_pfm: 31.211672349660216 sim_pfm: 963.7141136615095
episode: 512 training return: 944.9997517681796
episode: 513 training return: 929.0964620528858
episode: 514 training return: 941.2816336168197
episode: 515 training return: 942.630047699135
epoch: 129 test_true_pfm: 21.070904788070628 sim_pfm: 963.2805684539242
episode: 516 training return: 934.31646531103
episode: 517 training return: 935.5067063152792
episode: 518 training return: 951.8097457403805
episode: 519 training return: 935.6276886306306
epoch: 130 test_true_pfm: 30.270749179820665 sim_pfm: 968.5184156156608
episode: 520 training return: 956.6294613911082
episode: 521 training return: 953.4619743904435
episode: 522 training return: 950.4374521152479
episode: 523 training return: 923.5311524452701
epoch: 131 test_true_pfm: 25.628147177854668 sim_pfm: 962.3592851571918
episode: 524 training return: 951.1532382544751
episode: 525 training return: 946.9161573500138
episode: 526 training return: 935.1713473652026
episode: 527 training return: 956.9899405446669
epoch: 132 test_true_pfm: 20.97126886076658 sim_pfm: 963.7091822279832
episode: 528 training return: 950.3037207326648
episode: 529 training return: 963.6027616347334
episode: 530 training return: 952.0446216534232
episode: 531 training return: 942.7316826430166
epoch: 133 test_true_pfm: 28.031150023201313 sim_pfm: 964.3486114551239
episode: 532 training return: 926.9763693200566
episode: 533 training return: 954.0915240971937
episode: 534 training return: 957.2329332555615
episode: 535 training return: 935.9848229682544
epoch: 134 test_true_pfm: 26.832161755468462 sim_pfm: 966.5178894057681
episode: 536 training return: 959.0802818431474
episode: 537 training return: 957.5184501057705
episode: 538 training return: 946.882186731533
episode: 539 training return: 942.6770457854594
epoch: 135 test_true_pfm: 24.904818478366632 sim_pfm: 968.4084527333473
episode: 540 training return: 935.3526324952597
episode: 541 training return: 929.8871710193899
episode: 542 training return: 925.9184931162433
episode: 543 training return: 929.0918974030224
epoch: 136 test_true_pfm: 26.80552729136084 sim_pfm: 962.0372082689944
episode: 544 training return: 937.9914782583462
episode: 545 training return: 931.3293744181303
episode: 546 training return: 934.6339982501339
episode: 547 training return: 920.925887962978
epoch: 137 test_true_pfm: 23.0795315566103 sim_pfm: 958.0979257712925
episode: 548 training return: 935.920342050049
episode: 549 training return: 952.6071458721233
episode: 550 training return: 938.9412601116509
episode: 551 training return: 930.2087838463101
epoch: 138 test_true_pfm: 25.980787964414596 sim_pfm: 961.1961907615896
episode: 552 training return: 922.3153696971
episode: 553 training return: 928.6540052542484
episode: 554 training return: 942.8866667443133
episode: 555 training return: 923.6031278602164
epoch: 139 test_true_pfm: 22.488820937976833 sim_pfm: 961.0636167857447
episode: 556 training return: 935.3505389323068
episode: 557 training return: 923.8267360164227
episode: 558 training return: 953.8901704568228
episode: 559 training return: 960.4783989250274
epoch: 140 test_true_pfm: 23.149933363128714 sim_pfm: 968.4320152009157
episode: 560 training return: 954.5375376233077
episode: 561 training return: 955.584071610336
episode: 562 training return: 944.1436762297492
episode: 563 training return: 957.7573037155028
epoch: 141 test_true_pfm: 22.172503439403535 sim_pfm: 966.0513572138847
episode: 564 training return: 933.5814344458302
episode: 565 training return: 942.5130417733396
episode: 566 training return: 951.8297895801888
episode: 567 training return: 957.6527686834174
epoch: 142 test_true_pfm: 21.03920371971092 sim_pfm: 966.9438216425124
episode: 568 training return: 943.8950370112431
episode: 569 training return: 945.4955095234778
episode: 570 training return: 955.4227989617267
episode: 571 training return: 954.6634644020826
epoch: 143 test_true_pfm: 22.009441618322494 sim_pfm: 969.416734566726
episode: 572 training return: 940.9769118858366
episode: 573 training return: 945.624980488561
episode: 574 training return: 959.8595162065236
episode: 575 training return: 950.2346975955846
epoch: 144 test_true_pfm: 26.434362257676487 sim_pfm: 967.4885927379198
episode: 576 training return: 945.8510599887325
episode: 577 training return: 938.3900241492674
episode: 578 training return: 954.4466007978642
episode: 579 training return: 941.671265034838
epoch: 145 test_true_pfm: 24.088799369574843 sim_pfm: 954.0340273501213
episode: 580 training return: 939.9386505628445
episode: 581 training return: 952.7266160687891
episode: 582 training return: 939.6951773146403
episode: 583 training return: 957.6759709896925
epoch: 146 test_true_pfm: 24.768468985191078 sim_pfm: 967.2812547552458
episode: 584 training return: 958.5932988544578
episode: 585 training return: 960.2819693278171
episode: 586 training return: 961.2243544035468
episode: 587 training return: 960.2839043535522
epoch: 147 test_true_pfm: 28.71700254986257 sim_pfm: 968.3046459246405
episode: 588 training return: 948.2823372945378
episode: 589 training return: 961.5980050440846
episode: 590 training return: 959.3287271182991
episode: 591 training return: 956.7016291258333
epoch: 148 test_true_pfm: 25.062517654727365 sim_pfm: 966.3053428238356
episode: 592 training return: 961.7639757373711
episode: 593 training return: 957.9884917140487
episode: 594 training return: 957.7418459404228
episode: 595 training return: 958.7505865971169
epoch: 149 test_true_pfm: 25.925717479974328 sim_pfm: 969.150403416116
episode: 596 training return: 961.9940033397013
episode: 597 training return: 962.0031305736807
episode: 598 training return: 963.7894964002077
episode: 599 training return: 961.026154222479
epoch: 150 test_true_pfm: 27.579277137201096 sim_pfm: 967.3964537590542
