['--alg', 'sac', '--env', 'HalfCheetah-v2', '--learn', 'uncertainty', '--traj', 'expert', '--seed', '3', '--data', '100000']
epoch: 0 training_loss 0.3433282969892025 test_loss: 0.24861655235290528
epoch: 1 training_loss 0.2110559345781803 test_loss: 0.1906452178955078
epoch: 2 training_loss 0.17626856043934822 test_loss: 0.17467797994613649
epoch: 3 training_loss 0.16263878278434277 test_loss: 0.162342631816864
epoch: 4 training_loss 0.15962018378078938 test_loss: 0.14702675342559815
epoch: 5 training_loss 0.15285569027066231 test_loss: 0.14839205741882325
epoch: 6 training_loss 0.14473895147442817 test_loss: 0.1449442744255066
epoch: 7 training_loss 0.12765587523579597 test_loss: 0.12857042551040648
epoch: 8 training_loss 0.12818801008164882 test_loss: 0.13628157377243041
epoch: 9 training_loss 0.1289034053310752 test_loss: 0.13336905241012573
epoch: 10 training_loss 0.13324047476053238 test_loss: 0.1357298970222473
epoch: 11 training_loss 0.12091250076889992 test_loss: 0.1273522138595581
epoch: 12 training_loss 0.12803376510739325 test_loss: 0.1152004599571228
epoch: 13 training_loss 0.11869010251015424 test_loss: 0.13010501861572266
epoch: 14 training_loss 0.11676191816106439 test_loss: 0.12258645296096801
epoch: 15 training_loss 0.12001862969249487 test_loss: 0.12961727380752563
epoch: 16 training_loss 0.12321859745308757 test_loss: 0.13090150356292723
epoch: 17 training_loss 0.12742686063051223 test_loss: 0.10972309112548828
epoch: 18 training_loss 0.11485454957932234 test_loss: 0.11592155694961548
epoch: 19 training_loss 0.11801456529647111 test_loss: 0.12818225622177123
epoch: 20 training_loss 0.11646655913442373 test_loss: 0.12340010404586792
epoch: 21 training_loss 0.11801745319738983 test_loss: 0.11790363788604737
epoch: 22 training_loss 0.11303302254527807 test_loss: 0.13709733486175538
epoch: 23 training_loss 0.11628498189151287 test_loss: 0.13186885118484498
epoch: 24 training_loss 0.12286029543727636 test_loss: 0.1193662405014038
epoch: 25 training_loss 0.1150168239697814 test_loss: 0.11540130376815796
epoch: 26 training_loss 0.1202067257836461 test_loss: 0.12208619117736816
epoch: 27 training_loss 0.10969897091388703 test_loss: 0.12118707895278931
epoch: 28 training_loss 0.11119183383882046 test_loss: 0.1365075469017029
epoch: 29 training_loss 0.12416547387838364 test_loss: 0.12344914674758911
epoch: 30 training_loss 0.11750403955578805 test_loss: 0.12456675767898559
epoch: 31 training_loss 0.11025946598500014 test_loss: 0.10858603715896606
epoch: 32 training_loss 0.1140004438534379 test_loss: 0.12560527324676513
epoch: 33 training_loss 0.11523042438551784 test_loss: 0.10312052965164184
epoch: 34 training_loss 0.11782276455312968 test_loss: 0.12624506950378417
epoch: 35 training_loss 0.10444827971979975 test_loss: 0.14191304445266723
epoch: 36 training_loss 0.11651759535074234 test_loss: 0.12887485027313234
epoch: 37 training_loss 0.11518506130203604 test_loss: 0.11165258884429932
epoch: 38 training_loss 0.10280308958142996 test_loss: 0.12645093202590943
epoch: 39 training_loss 0.11755636930465699 test_loss: 0.11633156538009644
epoch: 40 training_loss 0.11320834282785654 test_loss: 0.12313693761825562
epoch: 41 training_loss 0.1063038333877921 test_loss: 0.10639986991882325
epoch: 42 training_loss 0.1131903599575162 test_loss: 0.12111231088638305
epoch: 43 training_loss 0.10804049087688326 test_loss: 0.12207925319671631
epoch: 44 training_loss 0.1134260057657957 test_loss: 0.10238472223281861
epoch: 45 training_loss 0.10568298749625683 test_loss: 0.10759948492050171
epoch: 46 training_loss 0.10855637533590197 test_loss: 0.11344966888427735
epoch: 47 training_loss 0.10816167768090963 test_loss: 0.11764742136001587
epoch: 48 training_loss 0.11282205641269684 test_loss: 0.11046913862228394
epoch: 49 training_loss 0.11702385649085045 test_loss: 0.12036182880401611
epoch: 50 training_loss 0.1164166660234332 test_loss: 0.10867896080017089
epoch: 51 training_loss 0.10951035626232625 test_loss: 0.11656330823898316
epoch: 52 training_loss 0.10771719992160797 test_loss: 0.1046918511390686
epoch: 53 training_loss 0.10713651403784752 test_loss: 0.1078567385673523
epoch: 54 training_loss 0.10895690210163593 test_loss: 0.12279813289642334
epoch: 55 training_loss 0.1103712297976017 test_loss: 0.10878102779388428
epoch: 56 training_loss 0.11314152367413044 test_loss: 0.1234639048576355
epoch: 57 training_loss 0.10267169073224068 test_loss: 0.12138991355895996
epoch: 58 training_loss 0.11740261681377888 test_loss: 0.1443387508392334
epoch: 59 training_loss 0.10580121949315072 test_loss: 0.11763955354690551
epoch: 60 training_loss 0.10512542825192213 test_loss: 0.10268092155456543
epoch: 61 training_loss 0.1078760877624154 test_loss: 0.12644560337066652
epoch: 62 training_loss 0.10763495383784175 test_loss: 0.11662020683288574
epoch: 63 training_loss 0.10065450187772512 test_loss: 0.10397108793258666
epoch: 64 training_loss 0.11260163150727749 test_loss: 0.11014367341995239
epoch: 65 training_loss 0.11346761928871274 test_loss: 0.12770459651947022
epoch: 66 training_loss 0.10592689491808414 test_loss: 0.1150476574897766
epoch: 67 training_loss 0.1041234640777111 test_loss: 0.117745041847229
epoch: 68 training_loss 0.11464698683470488 test_loss: 0.11167794466018677
epoch: 69 training_loss 0.11957767751067877 test_loss: 0.11120349168777466
epoch: 70 training_loss 0.11194357171654701 test_loss: 0.10898777246475219
epoch: 71 training_loss 0.10974702123552561 test_loss: 0.11238807439804077
epoch: 72 training_loss 0.10770182657986879 test_loss: 0.1214857816696167
epoch: 73 training_loss 0.1091640792414546 test_loss: 0.10936921834945679
epoch: 74 training_loss 0.1139562600106001 test_loss: 0.12984489202499389
epoch: 75 training_loss 0.11097457375377416 test_loss: 0.12174967527389527
epoch: 76 training_loss 0.10799178671091796 test_loss: 0.10990092754364014
epoch: 77 training_loss 0.10061570074409247 test_loss: 0.11160883903503419
epoch: 78 training_loss 0.10145473446696997 test_loss: 0.12127398252487183
epoch: 79 training_loss 0.1067344393208623 test_loss: 0.12040466070175171
epoch: 80 training_loss 0.11194944582879543 test_loss: 0.10975598096847534
epoch: 81 training_loss 0.10983956530690193 test_loss: 0.094802987575531
epoch: 82 training_loss 0.11262295853346586 test_loss: 0.1449831247329712
epoch: 83 training_loss 0.10903621919453144 test_loss: 0.12750834226608276
epoch: 84 training_loss 0.11550733992829919 test_loss: 0.11394163370132446
epoch: 85 training_loss 0.10897360820323229 test_loss: 0.10242612361907959
epoch: 86 training_loss 0.1099561870843172 test_loss: 0.10968682765960694
epoch: 87 training_loss 0.108022181391716 test_loss: 0.11896185874938965
epoch: 88 training_loss 0.10523903220891953 test_loss: 0.1150658369064331
epoch: 89 training_loss 0.10480226170271635 test_loss: 0.1251581072807312
epoch: 90 training_loss 0.11180780787020922 test_loss: 0.10383583307266235
epoch: 91 training_loss 0.10854020915925502 test_loss: 0.1160635232925415
epoch: 92 training_loss 0.10273832727223635 test_loss: 0.09946793913841248
epoch: 93 training_loss 0.10277892794460058 test_loss: 0.12134207487106323
epoch: 94 training_loss 0.10155640866607428 test_loss: 0.12236635684967041
epoch: 95 training_loss 0.10538518119603396 test_loss: 0.10920984745025634
epoch: 96 training_loss 0.10234689872711897 test_loss: 0.10310146808624268
epoch: 97 training_loss 0.10934192502871155 test_loss: 0.1099839448928833
epoch: 98 training_loss 0.11585342723876238 test_loss: 0.10687354803085328
epoch: 99 training_loss 0.11054622758179904 test_loss: 0.13150568008422853
epoch: 100 training_loss 0.10785973031073809 test_loss: 0.1096356987953186
epoch: 101 training_loss 0.11263136636465788 test_loss: 0.1062209129333496
epoch: 102 training_loss 0.10911473359912634 test_loss: 0.11797021627426148
epoch: 103 training_loss 0.10455500140786171 test_loss: 0.10655078887939454
epoch: 104 training_loss 0.10284586068242789 test_loss: 0.11826038360595703
epoch: 105 training_loss 0.10796072669327259 test_loss: 0.11487650871276855
epoch: 106 training_loss 0.10566791277378798 test_loss: 0.11608515977859497
epoch: 107 training_loss 0.10725849254056811 test_loss: 0.0887902021408081
epoch: 108 training_loss 0.10423903631046415 test_loss: 0.09200078248977661
epoch: 109 training_loss 0.10234267324209213 test_loss: 0.12364665269851685
epoch: 110 training_loss 0.10433544281870127 test_loss: 0.10953356027603149
epoch: 111 training_loss 0.10640686212107539 test_loss: 0.13012319803237915
epoch: 112 training_loss 0.10829955717548728 test_loss: 0.10850237607955933
epoch: 113 training_loss 0.10384583802893758 test_loss: 0.12618507146835328
epoch: 114 training_loss 0.11080724067986011 test_loss: 0.09908640384674072
epoch: 115 training_loss 0.11041601428762078 test_loss: 0.11728644371032715
epoch: 116 training_loss 0.1069129279628396 test_loss: 0.12619439363479615
epoch: 117 training_loss 0.10297830671072006 test_loss: 0.10200170278549195
epoch: 118 training_loss 0.11151526367291809 test_loss: 0.11809400320053101
epoch: 119 training_loss 0.10165445771068335 test_loss: 0.10908244848251343
epoch: 120 training_loss 0.10954418051987887 test_loss: 0.11282806396484375
epoch: 121 training_loss 0.10197634356096387 test_loss: 0.10308083295822143
epoch: 122 training_loss 0.10577369909733533 test_loss: 0.12515029907226563
epoch: 123 training_loss 0.10203755237162113 test_loss: 0.11869965791702271
epoch: 124 training_loss 0.11170812614262104 test_loss: 0.09582310318946838
epoch: 125 training_loss 0.10306212071329356 test_loss: 0.11724997758865356
epoch: 126 training_loss 0.10505864463746548 test_loss: 0.13366434574127198
epoch: 127 training_loss 0.10862304760143161 test_loss: 0.10659985542297364
epoch: 128 training_loss 0.09957018179818988 test_loss: 0.10290590524673462
epoch: 129 training_loss 0.11024336755275727 test_loss: 0.12058331966400146
epoch: 130 training_loss 0.1024994482845068 test_loss: 0.13695621490478516
epoch: 131 training_loss 0.10735010605305434 test_loss: 0.10670523643493653
epoch: 132 training_loss 0.11430906347930431 test_loss: 0.10807926654815674
epoch: 133 training_loss 0.09993659362196922 test_loss: 0.11013362407684327
epoch: 134 training_loss 0.1065524773299694 test_loss: 0.11015589237213134
epoch: 135 training_loss 0.10206379007548094 test_loss: 0.11026911735534668
epoch: 136 training_loss 0.1065706890448928 test_loss: 0.10181012153625488
epoch: 137 training_loss 0.1035809887945652 test_loss: 0.11579926013946533
epoch: 138 training_loss 0.09855338547378778 test_loss: 0.11880942583084106
epoch: 139 training_loss 0.10269290728494525 test_loss: 0.09156867265701293
epoch: 140 training_loss 0.09874882301315666 test_loss: 0.13203481435775757
epoch: 141 training_loss 0.10364447318017483 test_loss: 0.10911195278167725
epoch: 142 training_loss 0.11156173232942819 test_loss: 0.10004931688308716
epoch: 143 training_loss 0.09637633493170142 test_loss: 0.11305856704711914
epoch: 144 training_loss 0.10094935053959489 test_loss: 0.11036221981048584
epoch: 145 training_loss 0.09763761755079031 test_loss: 0.10214277505874633
epoch: 146 training_loss 0.10752326531335711 test_loss: 0.1056679368019104
epoch: 147 training_loss 0.10170304536819458 test_loss: 0.1207341194152832
epoch: 148 training_loss 0.09846967108547687 test_loss: 0.1105384111404419
epoch: 149 training_loss 0.09702084152027965 test_loss: 0.11335326433181762
epoch: 0 training_loss 0.3534562349319458 test_loss: 0.2656482934951782
epoch: 1 training_loss 0.2351784184575081 test_loss: 0.23511085510253907
epoch: 2 training_loss 0.1978233603388071 test_loss: 0.22610888481140137
epoch: 3 training_loss 0.1824589602649212 test_loss: 0.19433000087738037
epoch: 4 training_loss 0.1625623530894518 test_loss: 0.19666388034820556
epoch: 5 training_loss 0.15499642118811607 test_loss: 0.18963407278060912
epoch: 6 training_loss 0.15502272997051478 test_loss: 0.1691324830055237
epoch: 7 training_loss 0.15381462134420873 test_loss: 0.1681071400642395
epoch: 8 training_loss 0.14239627297967672 test_loss: 0.15291838645935057
epoch: 9 training_loss 0.13250635609030723 test_loss: 0.15541908740997315
epoch: 10 training_loss 0.1291005066409707 test_loss: 0.15080490112304687
epoch: 11 training_loss 0.12443826120346785 test_loss: 0.13858773708343505
epoch: 12 training_loss 0.12971162538975478 test_loss: 0.18282678127288818
epoch: 13 training_loss 0.12780364248901604 test_loss: 0.13639732599258422
epoch: 14 training_loss 0.12276700139045715 test_loss: 0.15157530307769776
epoch: 15 training_loss 0.11791333058848977 test_loss: 0.1332682967185974
epoch: 16 training_loss 0.12423814594745636 test_loss: 0.14000500440597535
epoch: 17 training_loss 0.11536330252885818 test_loss: 0.11986087560653687
epoch: 18 training_loss 0.12544546272605658 test_loss: 0.1460290312767029
epoch: 19 training_loss 0.1195583551749587 test_loss: 0.13788484334945678
epoch: 20 training_loss 0.1169258901849389 test_loss: 0.12836982011795045
epoch: 21 training_loss 0.1182153007760644 test_loss: 0.14055417776107787
epoch: 22 training_loss 0.11537330579012632 test_loss: 0.14367951154708863
epoch: 23 training_loss 0.1141273057088256 test_loss: 0.11310908794403077
epoch: 24 training_loss 0.12135644525289535 test_loss: 0.12717734575271605
epoch: 25 training_loss 0.1058659103512764 test_loss: 0.1562490463256836
epoch: 26 training_loss 0.11669160068035125 test_loss: 0.13362770080566405
epoch: 27 training_loss 0.11172944886609912 test_loss: 0.12515631914138795
epoch: 28 training_loss 0.11042902145534754 test_loss: 0.12916960716247558
epoch: 29 training_loss 0.11304017838090658 test_loss: 0.12838913202285768
epoch: 30 training_loss 0.11387860134243966 test_loss: 0.12528128623962403
epoch: 31 training_loss 0.1058262138068676 test_loss: 0.12811040878295898
epoch: 32 training_loss 0.10697704188525677 test_loss: 0.12654770612716676
epoch: 33 training_loss 0.11531216744333506 test_loss: 0.1443654179573059
epoch: 34 training_loss 0.11239896647632122 test_loss: 0.13471412658691406
epoch: 35 training_loss 0.11766065262258053 test_loss: 0.12974332571029662
epoch: 36 training_loss 0.10774839874356985 test_loss: 0.1288876414299011
epoch: 37 training_loss 0.10848624495789409 test_loss: 0.13224165439605712
epoch: 38 training_loss 0.11250561967492104 test_loss: 0.13517544269561768
epoch: 39 training_loss 0.10954807655885816 test_loss: 0.12951852083206178
epoch: 40 training_loss 0.10334328439086676 test_loss: 0.1206942081451416
epoch: 41 training_loss 0.11571456424891949 test_loss: 0.12080519199371338
epoch: 42 training_loss 0.111089888215065 test_loss: 0.15132689476013184
epoch: 43 training_loss 0.1062224594131112 test_loss: 0.12912185192108155
epoch: 44 training_loss 0.11106639131903648 test_loss: 0.142915940284729
epoch: 45 training_loss 0.10710556073114276 test_loss: 0.1352367401123047
epoch: 46 training_loss 0.11322757966816425 test_loss: 0.12733694314956664
epoch: 47 training_loss 0.11118859134614467 test_loss: 0.12124770879745483
epoch: 48 training_loss 0.10189441163092852 test_loss: 0.12380012273788452
epoch: 49 training_loss 0.1081686718761921 test_loss: 0.14298015832901
epoch: 50 training_loss 0.11429088911041617 test_loss: 0.1355869174003601
epoch: 51 training_loss 0.11102625880390406 test_loss: 0.12965189218521117
epoch: 52 training_loss 0.10767537007108331 test_loss: 0.1191985011100769
epoch: 53 training_loss 0.11290350027382373 test_loss: 0.1309908628463745
epoch: 54 training_loss 0.10335247494280338 test_loss: 0.11688511371612549
epoch: 55 training_loss 0.11543324198573827 test_loss: 0.10677903890609741
epoch: 56 training_loss 0.10723216971382499 test_loss: 0.12522473335266113
epoch: 57 training_loss 0.10523607067763806 test_loss: 0.12507855892181396
epoch: 58 training_loss 0.10318986516445876 test_loss: 0.12339539527893066
epoch: 59 training_loss 0.10432391345500946 test_loss: 0.12626032829284667
epoch: 60 training_loss 0.1108992138132453 test_loss: 0.13429057598114014
epoch: 61 training_loss 0.10514296781271697 test_loss: 0.117637300491333
epoch: 62 training_loss 0.10231868714094162 test_loss: 0.1190290093421936
epoch: 63 training_loss 0.10265051055699587 test_loss: 0.11705456972122193
epoch: 64 training_loss 0.10277990221977235 test_loss: 0.13820030689239501
epoch: 65 training_loss 0.11076574318110943 test_loss: 0.11606774330139161
epoch: 66 training_loss 0.10256551261991262 test_loss: 0.12169859409332276
epoch: 67 training_loss 0.10208475667983294 test_loss: 0.11894762516021729
epoch: 68 training_loss 0.10857027482241392 test_loss: 0.12889288663864135
epoch: 69 training_loss 0.11282994017004967 test_loss: 0.12175732851028442
epoch: 70 training_loss 0.1068597635254264 test_loss: 0.1295274615287781
epoch: 71 training_loss 0.11067232567816973 test_loss: 0.12701406478881835
epoch: 72 training_loss 0.10793765902519226 test_loss: 0.13545864820480347
epoch: 73 training_loss 0.1067751957476139 test_loss: 0.13395357131958008
epoch: 74 training_loss 0.11731610450893641 test_loss: 0.13534666299819947
epoch: 75 training_loss 0.1116177399083972 test_loss: 0.12608580589294432
epoch: 76 training_loss 0.10776266198605298 test_loss: 0.1223333239555359
epoch: 77 training_loss 0.11211362585425377 test_loss: 0.13019782304763794
epoch: 78 training_loss 0.10647514723241329 test_loss: 0.13009703159332275
epoch: 79 training_loss 0.10297388002276421 test_loss: 0.12027225494384766
epoch: 80 training_loss 0.10132564947009087 test_loss: 0.14310064315795898
epoch: 81 training_loss 0.10507647473365068 test_loss: 0.14308732748031616
epoch: 82 training_loss 0.10349326461553573 test_loss: 0.11736305952072143
epoch: 83 training_loss 0.10470335271209479 test_loss: 0.12840250730514527
epoch: 84 training_loss 0.10298307981342077 test_loss: 0.12145001888275146
epoch: 85 training_loss 0.10576442720368505 test_loss: 0.1248321771621704
epoch: 86 training_loss 0.09977940881624818 test_loss: 0.14299498796463012
epoch: 87 training_loss 0.10042704552412034 test_loss: 0.13017398118972778
epoch: 88 training_loss 0.10014367939904333 test_loss: 0.11776748895645142
epoch: 89 training_loss 0.10511242315173148 test_loss: 0.10882337093353271
epoch: 90 training_loss 0.10447860412299632 test_loss: 0.11710679531097412
epoch: 91 training_loss 0.10394660729914904 test_loss: 0.12345658540725708
epoch: 92 training_loss 0.10220156420022249 test_loss: 0.12480977773666382
epoch: 93 training_loss 0.10435662601143121 test_loss: 0.11840462684631348
epoch: 94 training_loss 0.10103373356163502 test_loss: 0.12211422920227051
epoch: 95 training_loss 0.10124741580337286 test_loss: 0.11134617328643799
epoch: 96 training_loss 0.09869637610390783 test_loss: 0.1090014934539795
epoch: 97 training_loss 0.1014117220044136 test_loss: 0.09845932126045227
epoch: 98 training_loss 0.10275107458233833 test_loss: 0.11849852800369262
epoch: 99 training_loss 0.1017033389210701 test_loss: 0.12418479919433593
epoch: 100 training_loss 0.10041767165064812 test_loss: 0.14561212062835693
epoch: 101 training_loss 0.09983265589922667 test_loss: 0.13422569036483764
epoch: 102 training_loss 0.09780817130580545 test_loss: 0.12040627002716064
epoch: 103 training_loss 0.10124195463955403 test_loss: 0.13104928731918336
epoch: 104 training_loss 0.10038930032402277 test_loss: 0.12468621730804444
epoch: 105 training_loss 0.09576876323670148 test_loss: 0.1364327549934387
epoch: 106 training_loss 0.10979288348928094 test_loss: 0.10588953495025635
epoch: 107 training_loss 0.09313388798385859 test_loss: 0.12172813415527343
epoch: 108 training_loss 0.10380968894809485 test_loss: 0.12247942686080933
epoch: 109 training_loss 0.09993294777348638 test_loss: 0.11572402715682983
epoch: 110 training_loss 0.10096317213028669 test_loss: 0.13501020669937133
epoch: 111 training_loss 0.10868183668702841 test_loss: 0.11600067615509033
epoch: 112 training_loss 0.10386859111487866 test_loss: 0.12903344631195068
epoch: 113 training_loss 0.10147133313119411 test_loss: 0.11299053430557252
epoch: 114 training_loss 0.10115335311740636 test_loss: 0.12267402410507203
epoch: 115 training_loss 0.10097451467067003 test_loss: 0.12145575284957885
epoch: 116 training_loss 0.11149097442626953 test_loss: 0.12701843976974486
epoch: 117 training_loss 0.09875788619741797 test_loss: 0.11746982336044312
epoch: 118 training_loss 0.09986808322370053 test_loss: 0.11812608242034912
epoch: 119 training_loss 0.09837048059329391 test_loss: 0.12747009992599487
epoch: 120 training_loss 0.10948586836457253 test_loss: 0.11288316249847412
epoch: 121 training_loss 0.10241048093885183 test_loss: 0.11516025066375732
epoch: 122 training_loss 0.10470080379396678 test_loss: 0.12325712442398071
epoch: 123 training_loss 0.10132110077887774 test_loss: 0.11939781904220581
epoch: 124 training_loss 0.09661762114614249 test_loss: 0.1339605450630188
epoch: 125 training_loss 0.10460768422111869 test_loss: 0.12339451313018798
epoch: 126 training_loss 0.09738430084660649 test_loss: 0.11198235750198364
epoch: 127 training_loss 0.09869267333298921 test_loss: 0.12358219623565674
epoch: 128 training_loss 0.10081886850297452 test_loss: 0.12587151527404786
epoch: 129 training_loss 0.10692753180861474 test_loss: 0.12513277530670167
epoch: 130 training_loss 0.10133965965360403 test_loss: 0.10807667970657349
epoch: 131 training_loss 0.1061706456542015 test_loss: 0.12902821302413942
epoch: 132 training_loss 0.10575851647183299 test_loss: 0.10453574657440186
epoch: 133 training_loss 0.10718219105154275 test_loss: 0.12527161836624146
epoch: 134 training_loss 0.10180051207542419 test_loss: 0.13061445951461792
epoch: 135 training_loss 0.10309699798002839 test_loss: 0.11821988821029664
epoch: 136 training_loss 0.1096227740496397 test_loss: 0.12579070329666137
epoch: 137 training_loss 0.0913977748528123 test_loss: 0.12152388095855712
epoch: 138 training_loss 0.09667389309033751 test_loss: 0.13316378593444825
epoch: 139 training_loss 0.10206401308998465 test_loss: 0.10595961809158325
epoch: 140 training_loss 0.09712443435564637 test_loss: 0.12819745540618896
epoch: 141 training_loss 0.09602244300767779 test_loss: 0.12872252464294434
epoch: 142 training_loss 0.10216222248971463 test_loss: 0.1335177779197693
epoch: 143 training_loss 0.10253686882555485 test_loss: 0.1317509174346924
epoch: 144 training_loss 0.10239805793389678 test_loss: 0.11688450574874878
epoch: 145 training_loss 0.10771069038659334 test_loss: 0.12803220748901367
epoch: 146 training_loss 0.10942896269261837 test_loss: 0.10879853963851929
epoch: 147 training_loss 0.10381862357258796 test_loss: 0.13833262920379638
epoch: 148 training_loss 0.0992856707982719 test_loss: 0.13153039216995238
epoch: 149 training_loss 0.09723520029336213 test_loss: 0.1203794002532959
epoch: 0 training_loss 0.31088798701763154 test_loss: 0.24459171295166016
epoch: 1 training_loss 0.21876999974250794 test_loss: 0.2194052219390869
epoch: 2 training_loss 0.19438948221504687 test_loss: 0.17371079921722413
epoch: 3 training_loss 0.18132774852216244 test_loss: 0.17011829614639282
epoch: 4 training_loss 0.15702391803264618 test_loss: 0.14907795190811157
epoch: 5 training_loss 0.15613072901964187 test_loss: 0.14793753623962402
epoch: 6 training_loss 0.14285392535850405 test_loss: 0.15186334848403932
epoch: 7 training_loss 0.13595831487327814 test_loss: 0.1427859663963318
epoch: 8 training_loss 0.13083929777145387 test_loss: 0.1339145064353943
epoch: 9 training_loss 0.1360173001885414 test_loss: 0.14561315774917602
epoch: 10 training_loss 0.13690140694379807 test_loss: 0.14384974241256715
epoch: 11 training_loss 0.1214657450094819 test_loss: 0.13953189849853515
epoch: 12 training_loss 0.12506776057183744 test_loss: 0.12793986797332763
epoch: 13 training_loss 0.11803977280855178 test_loss: 0.149414587020874
epoch: 14 training_loss 0.12292331244796514 test_loss: 0.11667708158493043
epoch: 15 training_loss 0.11559214731678366 test_loss: 0.1471721649169922
epoch: 16 training_loss 0.1125391522794962 test_loss: 0.12622106075286865
epoch: 17 training_loss 0.1198070864751935 test_loss: 0.10918763875961304
epoch: 18 training_loss 0.1153229934349656 test_loss: 0.12847502231597902
epoch: 19 training_loss 0.11099863033741712 test_loss: 0.12512072324752807
epoch: 20 training_loss 0.12170064792037011 test_loss: 0.13134249448776245
epoch: 21 training_loss 0.11568061262369156 test_loss: 0.12399835586547851
epoch: 22 training_loss 0.11950888970866799 test_loss: 0.1150244951248169
epoch: 23 training_loss 0.11202653001993895 test_loss: 0.13098922967910767
epoch: 24 training_loss 0.11554012205451727 test_loss: 0.11088749170303344
epoch: 25 training_loss 0.11352522514760494 test_loss: 0.10623731613159179
epoch: 26 training_loss 0.10603762179613113 test_loss: 0.11083369255065918
epoch: 27 training_loss 0.10915745114907623 test_loss: 0.1152844786643982
epoch: 28 training_loss 0.10984265707433223 test_loss: 0.11174309253692627
epoch: 29 training_loss 0.10919252928346396 test_loss: 0.12460815906524658
epoch: 30 training_loss 0.10840282801538706 test_loss: 0.11767756938934326
epoch: 31 training_loss 0.11146866727620364 test_loss: 0.10811383724212646
epoch: 32 training_loss 0.10884092641994357 test_loss: 0.1165063738822937
epoch: 33 training_loss 0.10764749109745025 test_loss: 0.11716581583023071
epoch: 34 training_loss 0.10952495478093624 test_loss: 0.11448928117752075
epoch: 35 training_loss 0.10442283309996128 test_loss: 0.12140462398529053
epoch: 36 training_loss 0.11108104642480612 test_loss: 0.11852420568466186
epoch: 37 training_loss 0.11256681837141513 test_loss: 0.11455936431884765
epoch: 38 training_loss 0.1143317092768848 test_loss: 0.10373903512954712
epoch: 39 training_loss 0.10447229642421008 test_loss: 0.11930246353149414
epoch: 40 training_loss 0.10813706554472446 test_loss: 0.10807613134384156
epoch: 41 training_loss 0.10991358857601881 test_loss: 0.107977294921875
epoch: 42 training_loss 0.10724047306925058 test_loss: 0.11787430047988892
epoch: 43 training_loss 0.11775466375052929 test_loss: 0.13629598617553712
epoch: 44 training_loss 0.1077085298858583 test_loss: 0.11325559616088868
epoch: 45 training_loss 0.11510360386222601 test_loss: 0.12442880868911743
epoch: 46 training_loss 0.10619089368730783 test_loss: 0.11383858919143677
epoch: 47 training_loss 0.10975903242826462 test_loss: 0.10794661045074463
epoch: 48 training_loss 0.1100509749352932 test_loss: 0.11420358419418335
epoch: 49 training_loss 0.10735191129148007 test_loss: 0.12323029041290283
epoch: 50 training_loss 0.10480163678526878 test_loss: 0.11767398118972779
epoch: 51 training_loss 0.10637878704816103 test_loss: 0.1021373987197876
epoch: 52 training_loss 0.10335512885823846 test_loss: 0.12235033512115479
epoch: 53 training_loss 0.10797370215877891 test_loss: 0.10059806108474731
epoch: 54 training_loss 0.11641614302992821 test_loss: 0.1271388053894043
epoch: 55 training_loss 0.10801835369318724 test_loss: 0.11373703479766846
epoch: 56 training_loss 0.1098237780481577 test_loss: 0.14865950345993043
epoch: 57 training_loss 0.10899941600859166 test_loss: 0.12497935295104981
epoch: 58 training_loss 0.11081765044480563 test_loss: 0.10704739093780517
epoch: 59 training_loss 0.10628784723579883 test_loss: 0.10811610221862793
epoch: 60 training_loss 0.10998043328523636 test_loss: 0.10541545152664185
epoch: 61 training_loss 0.10877933654934167 test_loss: 0.11379187107086182
epoch: 62 training_loss 0.10610914304852485 test_loss: 0.10015201568603516
epoch: 63 training_loss 0.10011478293687105 test_loss: 0.12111884355545044
epoch: 64 training_loss 0.10823516240343452 test_loss: 0.11116588115692139
epoch: 65 training_loss 0.10951382828876376 test_loss: 0.11392461061477661
epoch: 66 training_loss 0.10174741867929697 test_loss: 0.11463127136230469
epoch: 67 training_loss 0.11194064367562533 test_loss: 0.09164764285087586
epoch: 68 training_loss 0.10128307092934846 test_loss: 0.10649561882019043
epoch: 69 training_loss 0.10685608129948378 test_loss: 0.11130084991455078
epoch: 70 training_loss 0.1066046579182148 test_loss: 0.1151999831199646
epoch: 71 training_loss 0.10738998364657164 test_loss: 0.10344253778457642
epoch: 72 training_loss 0.10469866074621677 test_loss: 0.11913117170333862
epoch: 73 training_loss 0.10458982409909368 test_loss: 0.11517279148101807
epoch: 74 training_loss 0.11663086190819741 test_loss: 0.10853657722473145
epoch: 75 training_loss 0.10452733805403114 test_loss: 0.11666214466094971
epoch: 76 training_loss 0.10946793682873249 test_loss: 0.10113916397094727
epoch: 77 training_loss 0.09607234422117472 test_loss: 0.1275620460510254
epoch: 78 training_loss 0.09901173826307058 test_loss: 0.10171583890914918
epoch: 79 training_loss 0.11059302292764187 test_loss: 0.0994726538658142
epoch: 80 training_loss 0.10471056468784809 test_loss: 0.1074273943901062
epoch: 81 training_loss 0.10369059406220912 test_loss: 0.11836665868759155
epoch: 82 training_loss 0.10643620319664478 test_loss: 0.12349274158477783
epoch: 83 training_loss 0.10686250992119312 test_loss: 0.13129725456237792
epoch: 84 training_loss 0.10552459429949522 test_loss: 0.0987876296043396
epoch: 85 training_loss 0.10192150752991438 test_loss: 0.10151124000549316
epoch: 86 training_loss 0.09937690163031221 test_loss: 0.11635104417800904
epoch: 87 training_loss 0.10659672910347581 test_loss: 0.12560948133468627
epoch: 88 training_loss 0.10208470325917006 test_loss: 0.11486572027206421
epoch: 89 training_loss 0.10433013800531626 test_loss: 0.11851952075958253
epoch: 90 training_loss 0.10811545286327601 test_loss: 0.10624135732650757
epoch: 91 training_loss 0.10719777010381222 test_loss: 0.11497364044189454
epoch: 92 training_loss 0.10039907477796078 test_loss: 0.11377333402633667
epoch: 93 training_loss 0.1007045205682516 test_loss: 0.09547011852264405
epoch: 94 training_loss 0.10197534129023551 test_loss: 0.1137113094329834
epoch: 95 training_loss 0.10256558544933796 test_loss: 0.1150891661643982
epoch: 96 training_loss 0.09646949773654341 test_loss: 0.10603597164154052
epoch: 97 training_loss 0.1086903290078044 test_loss: 0.12363083362579345
epoch: 98 training_loss 0.09656998869031667 test_loss: 0.1264784336090088
epoch: 99 training_loss 0.09995012013241648 test_loss: 0.11413977146148682
epoch: 100 training_loss 0.10154313366860152 test_loss: 0.10399192571640015
epoch: 101 training_loss 0.10699718315154313 test_loss: 0.1112105131149292
epoch: 102 training_loss 0.10032957199960947 test_loss: 0.11275779008865357
epoch: 103 training_loss 0.0982754180766642 test_loss: 0.1233946442604065
epoch: 104 training_loss 0.09827782306820154 test_loss: 0.09803450107574463
epoch: 105 training_loss 0.1015565269626677 test_loss: 0.10793548822402954
epoch: 106 training_loss 0.099780653975904 test_loss: 0.10950222015380859
epoch: 107 training_loss 0.10023791067302228 test_loss: 0.11578412055969238
epoch: 108 training_loss 0.09825364753603935 test_loss: 0.11124019622802735
epoch: 109 training_loss 0.10662680743262172 test_loss: 0.12323870658874511
epoch: 110 training_loss 0.10068981107324362 test_loss: 0.10251986980438232
epoch: 111 training_loss 0.10154657430946827 test_loss: 0.10007452964782715
epoch: 112 training_loss 0.09591683186590672 test_loss: 0.10284727811813354
epoch: 113 training_loss 0.0990849850513041 test_loss: 0.11924277544021607
epoch: 114 training_loss 0.10750982642173768 test_loss: 0.10101536512374878
epoch: 115 training_loss 0.09836752224713564 test_loss: 0.10819222927093505
epoch: 116 training_loss 0.10187465839087963 test_loss: 0.1116673469543457
epoch: 117 training_loss 0.1056229417771101 test_loss: 0.09636563658714295
epoch: 118 training_loss 0.10020943000912666 test_loss: 0.12087892293930054
epoch: 119 training_loss 0.10824754107743502 test_loss: 0.10147069692611695
epoch: 120 training_loss 0.10044859517365694 test_loss: 0.11149414777755737
epoch: 121 training_loss 0.1012397201731801 test_loss: 0.10651835203170776
epoch: 122 training_loss 0.1025985849276185 test_loss: 0.10858342647552491
epoch: 123 training_loss 0.10440714668482542 test_loss: 0.11126455068588256
epoch: 124 training_loss 0.099469801671803 test_loss: 0.09282475113868713
epoch: 125 training_loss 0.10398437112569808 test_loss: 0.11050831079483033
epoch: 126 training_loss 0.10240155026316643 test_loss: 0.11918423175811768
epoch: 127 training_loss 0.10043274499475956 test_loss: 0.10675333738327027
epoch: 128 training_loss 0.1016399035975337 test_loss: 0.09823270440101624
epoch: 129 training_loss 0.10113295868039131 test_loss: 0.1090929388999939
epoch: 130 training_loss 0.09274789525195956 test_loss: 0.10159260034561157
epoch: 131 training_loss 0.10090222343802452 test_loss: 0.09341732263565064
epoch: 132 training_loss 0.09336073098704219 test_loss: 0.11510617733001709
epoch: 133 training_loss 0.09392913771793246 test_loss: 0.10885031223297119
epoch: 134 training_loss 0.09468755289912224 test_loss: 0.11140422821044922
epoch: 135 training_loss 0.10825534835457802 test_loss: 0.10042759180068969
epoch: 136 training_loss 0.09495651002973318 test_loss: 0.11054724454879761
epoch: 137 training_loss 0.0962191628292203 test_loss: 0.0983855664730072
epoch: 138 training_loss 0.10097236532717943 test_loss: 0.11514112949371338
epoch: 139 training_loss 0.09958817148581148 test_loss: 0.10103520154953002
epoch: 140 training_loss 0.09955960370600224 test_loss: 0.11331990957260132
epoch: 141 training_loss 0.10021647207438945 test_loss: 0.10474838018417358
epoch: 142 training_loss 0.10665392626076936 test_loss: 0.11541318893432617
epoch: 143 training_loss 0.10449631366878748 test_loss: 0.09817183613777161
epoch: 144 training_loss 0.09973067803308368 test_loss: 0.10552575588226318
epoch: 145 training_loss 0.09484014825895429 test_loss: 0.10851150751113892
epoch: 146 training_loss 0.09742715418338775 test_loss: 0.10311268568038941
epoch: 147 training_loss 0.10524078994989396 test_loss: 0.10045204162597657
epoch: 148 training_loss 0.09929021185263992 test_loss: 0.09747594594955444
epoch: 149 training_loss 0.09677108054980636 test_loss: 0.11606086492538452
epoch: 0 training_loss 0.3304969102144241 test_loss: 0.23777971267700196
epoch: 1 training_loss 0.21934855833649636 test_loss: 0.20075149536132814
epoch: 2 training_loss 0.1958101648837328 test_loss: 0.1870598554611206
epoch: 3 training_loss 0.1730158518999815 test_loss: 0.16837482452392577
epoch: 4 training_loss 0.15877626717090607 test_loss: 0.16674351692199707
epoch: 5 training_loss 0.14739960715174674 test_loss: 0.1592464566230774
epoch: 6 training_loss 0.1537191680818796 test_loss: 0.1426318883895874
epoch: 7 training_loss 0.13872420974075794 test_loss: 0.15015243291854857
epoch: 8 training_loss 0.12621799793094396 test_loss: 0.1394650936126709
epoch: 9 training_loss 0.1302985429018736 test_loss: 0.13231099843978883
epoch: 10 training_loss 0.12428128931671381 test_loss: 0.14989334344863892
epoch: 11 training_loss 0.14301885027438402 test_loss: 0.18079670667648315
epoch: 12 training_loss 0.12970151007175446 test_loss: 0.14104728698730468
epoch: 13 training_loss 0.12308290444314479 test_loss: 0.13577871322631835
epoch: 14 training_loss 0.11873067565262317 test_loss: 0.13715869188308716
epoch: 15 training_loss 0.11160355972126126 test_loss: 0.14351840019226075
epoch: 16 training_loss 0.12234192918986082 test_loss: 0.12608975172042847
epoch: 17 training_loss 0.1211375686340034 test_loss: 0.12352941036224366
epoch: 18 training_loss 0.11932260252535343 test_loss: 0.1343658447265625
epoch: 19 training_loss 0.11410748589783908 test_loss: 0.11566727161407471
epoch: 20 training_loss 0.1194329860061407 test_loss: 0.11616417169570922
epoch: 21 training_loss 0.11655026223510503 test_loss: 0.12770556211471557
epoch: 22 training_loss 0.11444393184036017 test_loss: 0.1280422806739807
epoch: 23 training_loss 0.11711460016667843 test_loss: 0.13037946224212646
epoch: 24 training_loss 0.11098616072908044 test_loss: 0.12788245677947999
epoch: 25 training_loss 0.1109572746232152 test_loss: 0.114300537109375
epoch: 26 training_loss 0.1148381019383669 test_loss: 0.13800948858261108
epoch: 27 training_loss 0.12246005360037088 test_loss: 0.10296149253845215
epoch: 28 training_loss 0.12154963381588459 test_loss: 0.11227909326553345
epoch: 29 training_loss 0.1115746358409524 test_loss: 0.1135177731513977
epoch: 30 training_loss 0.11063678111881017 test_loss: 0.14302487373352052
epoch: 31 training_loss 0.11301246721297503 test_loss: 0.13697646856307982
epoch: 32 training_loss 0.11003014465793967 test_loss: 0.12515945434570314
epoch: 33 training_loss 0.10785846760496497 test_loss: 0.1288774847984314
epoch: 34 training_loss 0.11527120424434542 test_loss: 0.13446520566940307
epoch: 35 training_loss 0.1098330757021904 test_loss: 0.1278347373008728
epoch: 36 training_loss 0.10705904453992844 test_loss: 0.11618549823760986
epoch: 37 training_loss 0.11242326075211168 test_loss: 0.11732584238052368
epoch: 38 training_loss 0.11280128806829452 test_loss: 0.11883049011230469
epoch: 39 training_loss 0.11031614653766156 test_loss: 0.14168254137039185
epoch: 40 training_loss 0.1108132810331881 test_loss: 0.1253623604774475
epoch: 41 training_loss 0.11169899623841047 test_loss: 0.12852942943572998
epoch: 42 training_loss 0.10992677241563797 test_loss: 0.12358076572418213
epoch: 43 training_loss 0.10575491830706596 test_loss: 0.11278411149978637
epoch: 44 training_loss 0.10518109977245331 test_loss: 0.11509751081466675
epoch: 45 training_loss 0.11375307984650135 test_loss: 0.12604126930236817
epoch: 46 training_loss 0.11420357648283243 test_loss: 0.13140879869461058
epoch: 47 training_loss 0.10990088555961847 test_loss: 0.11908100843429566
epoch: 48 training_loss 0.11371347220614553 test_loss: 0.12627936601638795
epoch: 49 training_loss 0.11187156144529581 test_loss: 0.10430750846862794
epoch: 50 training_loss 0.10798214670270681 test_loss: 0.12274529933929443
epoch: 51 training_loss 0.10379131894558669 test_loss: 0.12444934844970704
epoch: 52 training_loss 0.10862120997160674 test_loss: 0.1134401798248291
epoch: 53 training_loss 0.10628324275836348 test_loss: 0.11728067398071289
epoch: 54 training_loss 0.10782961128279567 test_loss: 0.11288163661956788
epoch: 55 training_loss 0.11002628296613694 test_loss: 0.10853888988494872
epoch: 56 training_loss 0.10711926467716694 test_loss: 0.12915396690368652
epoch: 57 training_loss 0.10231587931513786 test_loss: 0.12029459476470947
epoch: 58 training_loss 0.10898027336224914 test_loss: 0.12976948022842408
epoch: 59 training_loss 0.11301993992179632 test_loss: 0.12452220916748047
epoch: 60 training_loss 0.11428170442581177 test_loss: 0.1151654601097107
epoch: 61 training_loss 0.10546173211187124 test_loss: 0.12242236137390136
epoch: 62 training_loss 0.10398538742214442 test_loss: 0.1277993679046631
epoch: 63 training_loss 0.11143724642693996 test_loss: 0.1388258934020996
epoch: 64 training_loss 0.11509903330355882 test_loss: 0.1019944429397583
epoch: 65 training_loss 0.11271825402975083 test_loss: 0.10810707807540894
epoch: 66 training_loss 0.10320691844448447 test_loss: 0.11033895015716552
epoch: 67 training_loss 0.11209650013595819 test_loss: 0.1112164855003357
epoch: 68 training_loss 0.10556934241205454 test_loss: 0.11853188276290894
epoch: 69 training_loss 0.10678430465981364 test_loss: 0.12947444915771483
epoch: 70 training_loss 0.10319794535636902 test_loss: 0.1163596510887146
epoch: 71 training_loss 0.10404857855290174 test_loss: 0.11515570878982544
epoch: 72 training_loss 0.10089239915832877 test_loss: 0.13736777305603026
epoch: 73 training_loss 0.10291038382798433 test_loss: 0.13306736946105957
epoch: 74 training_loss 0.10854142352938652 test_loss: 0.10739092826843262
epoch: 75 training_loss 0.11067716628313065 test_loss: 0.11895816326141358
epoch: 76 training_loss 0.10745021667331457 test_loss: 0.11514577865600586
epoch: 77 training_loss 0.10655891846865416 test_loss: 0.11903653144836426
epoch: 78 training_loss 0.10734890304505824 test_loss: 0.1217107892036438
epoch: 79 training_loss 0.103498679921031 test_loss: 0.10950888395309448
epoch: 80 training_loss 0.10682265216484665 test_loss: 0.1109502673149109
epoch: 81 training_loss 0.10374303035438061 test_loss: 0.13066747188568115
epoch: 82 training_loss 0.10308918137103319 test_loss: 0.12534860372543336
epoch: 83 training_loss 0.10650192975997924 test_loss: 0.1358138918876648
epoch: 84 training_loss 0.11376448845490814 test_loss: 0.10913918018341065
epoch: 85 training_loss 0.10543519880622626 test_loss: 0.11171096563339233
epoch: 86 training_loss 0.10753694131970405 test_loss: 0.11293290853500366
epoch: 87 training_loss 0.10198436595499516 test_loss: 0.12235218286514282
epoch: 88 training_loss 0.09938228014856577 test_loss: 0.12469384670257569
epoch: 89 training_loss 0.10675169378519059 test_loss: 0.12272976636886597
epoch: 90 training_loss 0.11332644578069448 test_loss: 0.11933461427688599
epoch: 91 training_loss 0.11109971947968006 test_loss: 0.11025509834289551
epoch: 92 training_loss 0.10100311858579517 test_loss: 0.1223032832145691
epoch: 93 training_loss 0.11192743867635727 test_loss: 0.10297428369522095
epoch: 94 training_loss 0.10869121376425028 test_loss: 0.11685166358947754
epoch: 95 training_loss 0.10904343178495765 test_loss: 0.0989081859588623
epoch: 96 training_loss 0.09887693736702204 test_loss: 0.12036278247833251
epoch: 97 training_loss 0.10666009932756423 test_loss: 0.10960060358047485
epoch: 98 training_loss 0.10389240918681025 test_loss: 0.12253034114837646
epoch: 99 training_loss 0.10994072172790766 test_loss: 0.11713454723358155
epoch: 100 training_loss 0.10299714647233486 test_loss: 0.12668575048446656
epoch: 101 training_loss 0.10384461998939515 test_loss: 0.1215989351272583
epoch: 102 training_loss 0.10107406806200743 test_loss: 0.12602952718734742
epoch: 103 training_loss 0.10601053182035684 test_loss: 0.1134679675102234
epoch: 104 training_loss 0.10698480181396007 test_loss: 0.1118796706199646
epoch: 105 training_loss 0.10558982443064452 test_loss: 0.10377275943756104
epoch: 106 training_loss 0.10738317070528865 test_loss: 0.12137563228607177
epoch: 107 training_loss 0.10856295026838779 test_loss: 0.11909080743789673
epoch: 108 training_loss 0.1024174559302628 test_loss: 0.11095740795135497
epoch: 109 training_loss 0.10385509394109249 test_loss: 0.11869888305664063
epoch: 110 training_loss 0.1027202856913209 test_loss: 0.12291890382766724
epoch: 111 training_loss 0.10254907999187708 test_loss: 0.11865066289901734
epoch: 112 training_loss 0.10407540716230869 test_loss: 0.1160771131515503
epoch: 113 training_loss 0.10462545283138752 test_loss: 0.1098166823387146
epoch: 114 training_loss 0.09360410165041685 test_loss: 0.1264696478843689
epoch: 115 training_loss 0.0975827574543655 test_loss: 0.10458292961120605
epoch: 116 training_loss 0.10494145266711712 test_loss: 0.09909983873367309
epoch: 117 training_loss 0.0994512078911066 test_loss: 0.11825857162475586
epoch: 118 training_loss 0.10956262584775686 test_loss: 0.1296010971069336
epoch: 119 training_loss 0.10253137774765492 test_loss: 0.11883249282836914
epoch: 120 training_loss 0.09638358969241381 test_loss: 0.11327571868896484
epoch: 121 training_loss 0.09830137583427132 test_loss: 0.10289665460586547
epoch: 122 training_loss 0.10064238380640746 test_loss: 0.1222449541091919
epoch: 123 training_loss 0.10391149267554284 test_loss: 0.10736867189407348
epoch: 124 training_loss 0.1022443563863635 test_loss: 0.11741299629211426
epoch: 125 training_loss 0.09748916877433658 test_loss: 0.12537959814071656
epoch: 126 training_loss 0.1081696667894721 test_loss: 0.12408392429351807
epoch: 127 training_loss 0.1021393122524023 test_loss: 0.12752510309219361
epoch: 128 training_loss 0.10708836214616894 test_loss: 0.11827610731124878
epoch: 129 training_loss 0.10731576964259147 test_loss: 0.11476588249206543
epoch: 130 training_loss 0.10538221877068281 test_loss: 0.11977188587188721
epoch: 131 training_loss 0.1019313507899642 test_loss: 0.10838379859924316
epoch: 132 training_loss 0.10770586382597686 test_loss: 0.11348563432693481
epoch: 133 training_loss 0.10132916357368231 test_loss: 0.1164284110069275
epoch: 134 training_loss 0.1005310908704996 test_loss: 0.12317043542861938
epoch: 135 training_loss 0.10713540989905596 test_loss: 0.11115642786026
epoch: 136 training_loss 0.0980700197443366 test_loss: 0.12124036550521851
epoch: 137 training_loss 0.10402735970914363 test_loss: 0.10790736675262451
epoch: 138 training_loss 0.1019408182054758 test_loss: 0.09796545505523682
epoch: 139 training_loss 0.10661707965657115 test_loss: 0.13465858697891236
epoch: 140 training_loss 0.10733943639323115 test_loss: 0.119837486743927
epoch: 141 training_loss 0.10596863344311715 test_loss: 0.10450586080551147
epoch: 142 training_loss 0.09544553983956576 test_loss: 0.09941021800041198
epoch: 143 training_loss 0.1058911183848977 test_loss: 0.10613453388214111
epoch: 144 training_loss 0.1079119236022234 test_loss: 0.11946237087249756
epoch: 145 training_loss 0.10239446787163616 test_loss: 0.12546329498291015
epoch: 146 training_loss 0.10188578279688955 test_loss: 0.11776407957077026
epoch: 147 training_loss 0.10356406182050705 test_loss: 0.10446388721466064
epoch: 148 training_loss 0.1081505073979497 test_loss: 0.12481518983840942
epoch: 149 training_loss 0.10206829261034728 test_loss: 0.11170424222946167
episode: 0 training return: -999.9789481704375
episode: 1 training return: -999.9713672339117
episode: 2 training return: -999.9790207731996
episode: 3 training return: -999.9830111065411
epoch: 1 test_true_pfm: -0.5502130177388437 sim_pfm: -999.5038245412165
episode: 4 training return: -999.9930475128906
episode: 5 training return: -999.989003214295
episode: 6 training return: -999.9837595149445
episode: 7 training return: -999.9892524240898
epoch: 2 test_true_pfm: -0.18782786362053505 sim_pfm: -999.4700302807217
episode: 8 training return: -999.9898817963059
episode: 9 training return: -999.9766310036573
episode: 10 training return: -999.9740574957317
episode: 11 training return: -999.9633525679144
epoch: 3 test_true_pfm: 0.2072000810531299 sim_pfm: -999.492527274792
episode: 12 training return: -999.9820048902172
episode: 13 training return: -999.9807998459514
episode: 14 training return: -999.9737866676307
episode: 15 training return: -999.9898903622942
epoch: 4 test_true_pfm: -0.046531515038125505 sim_pfm: -999.4914888613507
episode: 16 training return: -999.9829989707664
episode: 17 training return: -999.948352278163
episode: 18 training return: -999.9893934967218
episode: 19 training return: -999.9851570950345
epoch: 5 test_true_pfm: -0.7633808763434007 sim_pfm: -999.4924372404812
episode: 20 training return: -999.9824025396001
episode: 21 training return: -999.9730189181073
episode: 22 training return: -999.9876068051336
episode: 23 training return: -999.9914936884925
epoch: 6 test_true_pfm: -0.07685661095504666 sim_pfm: -999.4749128453473
episode: 24 training return: -999.9894921003255
episode: 25 training return: -999.9878260504257
episode: 26 training return: -1000.007057383943
episode: 27 training return: -999.9729207849822
epoch: 7 test_true_pfm: -0.29089815516757106 sim_pfm: -999.4898249058779
episode: 28 training return: -999.9958592227105
episode: 29 training return: -999.9826109434605
episode: 30 training return: -999.9803870879422
episode: 31 training return: -999.9789679521642
epoch: 8 test_true_pfm: -0.3870591719974315 sim_pfm: -999.4848576206738
episode: 32 training return: -999.9670743192629
episode: 33 training return: -999.9788350225331
episode: 34 training return: -999.9978687834912
episode: 35 training return: -999.9958752718956
epoch: 9 test_true_pfm: -0.021566209253660245 sim_pfm: -999.478262610898
episode: 36 training return: -999.9971265909323
episode: 37 training return: -999.9965647302928
episode: 38 training return: -999.9858586743289
episode: 39 training return: -999.9984070843391
epoch: 10 test_true_pfm: -0.0019525661609414806 sim_pfm: -999.4827195798249
episode: 40 training return: -999.9924808108069
episode: 41 training return: -999.9909777435266
episode: 42 training return: -999.9778905266373
episode: 43 training return: -999.9735741725633
epoch: 11 test_true_pfm: -0.333658800706142 sim_pfm: -999.472389186871
episode: 44 training return: -999.9819469515975
episode: 45 training return: -999.9947955500612
episode: 46 training return: -999.9866332949306
episode: 47 training return: -999.9761550458772
epoch: 12 test_true_pfm: -0.33925566361968323 sim_pfm: -999.4904194930917
episode: 48 training return: -999.995003744721
episode: 49 training return: -999.9785749318826
episode: 50 training return: -999.9700255233657
episode: 51 training return: -999.9930552028135
epoch: 13 test_true_pfm: -0.5577401072846054 sim_pfm: -999.4974612907918
episode: 52 training return: -999.9756931991585
episode: 53 training return: -999.9720459689725
episode: 54 training return: -999.9924625968688
episode: 55 training return: -999.9841815080457
epoch: 14 test_true_pfm: -1.141372845732674 sim_pfm: -999.4666297027139
episode: 56 training return: -999.9803240066094
episode: 57 training return: -999.9684344426518
episode: 58 training return: -999.9769175595409
episode: 59 training return: -999.9665985089049
epoch: 15 test_true_pfm: -0.4975781114412971 sim_pfm: -999.4858927691224
episode: 60 training return: -999.9959652088259
episode: 61 training return: -999.9859700964881
episode: 62 training return: -999.9681291660936
episode: 63 training return: -999.9787208941729
epoch: 16 test_true_pfm: -0.9054979996522753 sim_pfm: -999.4850176801457
episode: 64 training return: -999.9969038787551
episode: 65 training return: -999.9832180408567
episode: 66 training return: -999.9775793002318
episode: 67 training return: -999.9771410641515
epoch: 17 test_true_pfm: -0.45842557153358127 sim_pfm: -999.4955348045863
episode: 68 training return: -999.9576281961716
episode: 69 training return: -999.9793173328621
episode: 70 training return: -999.9628443728236
episode: 71 training return: -999.9979790158443
epoch: 18 test_true_pfm: -0.02911336689488068 sim_pfm: -999.49069614364
episode: 72 training return: -999.9929535043699
episode: 73 training return: -999.963750361078
episode: 74 training return: -999.9901097483622
episode: 75 training return: -999.9784866733519
epoch: 19 test_true_pfm: -0.7018603995213669 sim_pfm: -999.4751926123828
episode: 76 training return: -999.9917400488133
episode: 77 training return: -999.9889516617035
episode: 78 training return: -999.9942563399608
episode: 79 training return: -999.97999352252
epoch: 20 test_true_pfm: -0.10236153457898567 sim_pfm: -999.4950205182696
episode: 80 training return: -999.9936980173281
episode: 81 training return: -999.9665633904864
episode: 82 training return: -999.9913500528157
episode: 83 training return: -999.9952740532248
epoch: 21 test_true_pfm: -0.2980907492539584 sim_pfm: -999.4839309281048
episode: 84 training return: -999.9901133548865
episode: 85 training return: -999.9621807165081
episode: 86 training return: -999.9769123310919
episode: 87 training return: -999.9845734571423
epoch: 22 test_true_pfm: -0.6992867145698058 sim_pfm: -999.4802314697077
episode: 88 training return: -999.9856602281047
episode: 89 training return: -999.9685933587451
episode: 90 training return: -999.968503600214
episode: 91 training return: -999.989258217919
epoch: 23 test_true_pfm: -0.19944093849662928 sim_pfm: -999.4995315172806
episode: 92 training return: -999.9816832768132
episode: 93 training return: -999.9805744100393
episode: 94 training return: -999.9972620364146
episode: 95 training return: -999.9935241991026
epoch: 24 test_true_pfm: 0.2587410211088978 sim_pfm: -999.4969564752746
episode: 96 training return: -999.9986235120978
episode: 97 training return: -999.9834263016587
episode: 98 training return: -999.9939081485344
episode: 99 training return: -999.9812977508692
epoch: 25 test_true_pfm: -0.07580580905883368 sim_pfm: -999.46928422651
episode: 100 training return: -999.9922011533727
episode: 101 training return: -999.9628460205258
episode: 102 training return: -999.969756833133
episode: 103 training return: -999.955542178312
epoch: 26 test_true_pfm: 0.03409050468888134 sim_pfm: -999.4842108624152
episode: 104 training return: -999.9913108463022
episode: 105 training return: -999.9960991062754
episode: 106 training return: -999.9774320480809
episode: 107 training return: -999.9484409603938
epoch: 27 test_true_pfm: -0.0904699708294487 sim_pfm: -999.4953709930091
episode: 108 training return: -999.986352353847
episode: 109 training return: -999.9959232720024
episode: 110 training return: -999.9509131596117
episode: 111 training return: -999.9909832325421
epoch: 28 test_true_pfm: 0.21879485119888176 sim_pfm: -999.4633236471744
episode: 112 training return: -999.9918413772521
episode: 113 training return: -999.985641820964
episode: 114 training return: -999.9456768414882
episode: 115 training return: -999.9714460940171
epoch: 29 test_true_pfm: -0.0034776075486312084 sim_pfm: -999.4876114526388
episode: 116 training return: -999.9914563333947
episode: 117 training return: -999.9845704564121
episode: 118 training return: -999.9617570183324
episode: 119 training return: -999.9594449557661
epoch: 30 test_true_pfm: -0.45315827748209747 sim_pfm: -999.4874755392565
episode: 120 training return: -999.9646870427406
episode: 121 training return: -999.9487784955944
episode: 122 training return: -999.9735460577155
episode: 123 training return: -999.9926956045158
epoch: 31 test_true_pfm: -0.34888769417588333 sim_pfm: -999.4753136526937
episode: 124 training return: -999.9814687500756
episode: 125 training return: -999.9960499880625
episode: 126 training return: -999.9781162921753
episode: 127 training return: -999.9835718358185
epoch: 32 test_true_pfm: -0.6791545570657836 sim_pfm: -999.4973383377913
episode: 128 training return: -999.9707507706721
episode: 129 training return: -999.991379249492
episode: 130 training return: -999.9819182778805
episode: 131 training return: -999.9874501316458
epoch: 33 test_true_pfm: 0.1040120479093148 sim_pfm: -999.4925245152908
episode: 132 training return: -999.9817561065727
episode: 133 training return: -999.9594729364468
episode: 134 training return: -999.9857982526094
episode: 135 training return: -999.9403953187809
epoch: 34 test_true_pfm: -0.11483034613926985 sim_pfm: -999.4632122670478
episode: 136 training return: -999.984317296669
episode: 137 training return: -999.9343289042484
episode: 138 training return: -999.9647179263602
episode: 139 training return: -999.9948592995329
epoch: 35 test_true_pfm: -0.39424779303820556 sim_pfm: -999.4892308285083
episode: 140 training return: -999.9884594776447
episode: 141 training return: -999.9810669927471
episode: 142 training return: -999.9628464783658
episode: 143 training return: -999.984311075197
epoch: 36 test_true_pfm: 0.23577955427840983 sim_pfm: -999.4949174570556
episode: 144 training return: -999.9691310101808
episode: 145 training return: -999.9862570793302
episode: 146 training return: -999.9735890645595
episode: 147 training return: -999.9830747791583
epoch: 37 test_true_pfm: -0.03130471480703515 sim_pfm: -999.4945935195891
episode: 148 training return: -999.9710213988271
episode: 149 training return: -999.9892787325554
episode: 150 training return: -999.9730878063239
episode: 151 training return: -999.9833762178124
epoch: 38 test_true_pfm: -0.5418587054750748 sim_pfm: -999.4760436073411
episode: 152 training return: -999.9834234470267
episode: 153 training return: -999.9822205722942
episode: 154 training return: -999.9981573456348
episode: 155 training return: -999.9773020583294
epoch: 39 test_true_pfm: -1.4679014909009573 sim_pfm: -999.4993366056328
episode: 156 training return: -999.9556247285118
episode: 157 training return: -999.9694565345732
episode: 158 training return: -999.9957201334591
episode: 159 training return: -999.986095864975
epoch: 40 test_true_pfm: 0.35319040335938534 sim_pfm: -999.4744821853955
episode: 160 training return: -999.9830754782507
episode: 161 training return: -999.9880315567522
episode: 162 training return: -999.9789081768881
episode: 163 training return: -999.9719331127311
epoch: 41 test_true_pfm: -0.27582485580402505 sim_pfm: -999.4738074423485
episode: 164 training return: -999.9648133649581
episode: 165 training return: -999.9710636341766
episode: 166 training return: -999.9902004395675
episode: 167 training return: -999.9620567347588
epoch: 42 test_true_pfm: 0.4083223521327833 sim_pfm: -999.4792163720014
episode: 168 training return: -999.9792206099014
episode: 169 training return: -999.9853188261397
episode: 170 training return: -999.9853368422183
episode: 171 training return: -999.9760812248236
epoch: 43 test_true_pfm: 0.36356561743764787 sim_pfm: -999.495011529517
episode: 172 training return: -999.9956924653796
episode: 173 training return: -999.980047801412
episode: 174 training return: -999.9966562002409
episode: 175 training return: -999.9934108900165
epoch: 44 test_true_pfm: -1.0797033193208219 sim_pfm: -999.5087978629326
episode: 176 training return: -999.9897418732713
episode: 177 training return: -999.9896383428314
episode: 178 training return: -999.9671664767873
episode: 179 training return: -999.9817235983942
epoch: 45 test_true_pfm: -0.5747273826935159 sim_pfm: -999.4856547343792
episode: 180 training return: -999.9807287111581
episode: 181 training return: -999.9864713421454
episode: 182 training return: -999.9871455466587
episode: 183 training return: -999.9791018841519
epoch: 46 test_true_pfm: -0.27393351409188194 sim_pfm: -999.4768905184375
episode: 184 training return: -999.9670159100012
episode: 185 training return: -999.9877504871566
episode: 186 training return: -999.9645791774082
episode: 187 training return: -999.9768870998669
epoch: 47 test_true_pfm: -1.0201903891063264 sim_pfm: -999.4780072543386
episode: 188 training return: -999.99265438038
episode: 189 training return: -999.9795810536059
episode: 190 training return: -999.9822857194722
episode: 191 training return: -999.9860228065104
epoch: 48 test_true_pfm: 0.21593204873838848 sim_pfm: -999.4902989946621
episode: 192 training return: -999.9919573458455
episode: 193 training return: -999.9680833063027
episode: 194 training return: -999.9875967371817
episode: 195 training return: -999.9753762063011
epoch: 49 test_true_pfm: 0.2344235675755949 sim_pfm: -999.4853147332313
episode: 196 training return: -999.9886100715329
episode: 197 training return: -999.9682102080736
episode: 198 training return: -999.9863807802345
episode: 199 training return: -999.9829519879902
epoch: 50 test_true_pfm: -0.6362310515087013 sim_pfm: -999.4889089014772
episode: 200 training return: -1000.0250892110988
episode: 201 training return: -999.993197135195
episode: 202 training return: -999.9974242525168
episode: 203 training return: -999.9755465773786
epoch: 51 test_true_pfm: -0.43539356928483636 sim_pfm: -999.5017171980502
episode: 204 training return: -999.977313748096
episode: 205 training return: -999.9851378789167
episode: 206 training return: -999.9705356887378
episode: 207 training return: -999.9829180155568
epoch: 52 test_true_pfm: -0.39096261370317636 sim_pfm: -999.4905090032674
episode: 208 training return: -999.9712248529993
episode: 209 training return: -999.9559811520821
episode: 210 training return: -999.9835900732396
episode: 211 training return: -999.9803434367703
epoch: 53 test_true_pfm: -0.29814439200512105 sim_pfm: -999.4911462630581
episode: 212 training return: -999.9719794157362
episode: 213 training return: -999.9733088994651
episode: 214 training return: -999.9940249632252
episode: 215 training return: -999.9812440689813
epoch: 54 test_true_pfm: -0.19950633133527765 sim_pfm: -999.4671186250657
episode: 216 training return: -999.9761916734127
episode: 217 training return: -999.9942773275391
episode: 218 training return: -999.9804965594141
episode: 219 training return: -999.9582806968323
epoch: 55 test_true_pfm: -0.12600033554790857 sim_pfm: -999.4918103922131
episode: 220 training return: -999.9709395496216
episode: 221 training return: -999.9808458120866
episode: 222 training return: -999.9604450326068
episode: 223 training return: -999.9516310712872
epoch: 56 test_true_pfm: -0.3273921084351676 sim_pfm: -999.4776449920936
episode: 224 training return: -999.9720430348941
episode: 225 training return: -999.9932391351969
episode: 226 training return: -999.9822838104768
episode: 227 training return: -999.9823064221398
epoch: 57 test_true_pfm: -0.49306479399730657 sim_pfm: -999.4927270797239
episode: 228 training return: -999.9933767422283
episode: 229 training return: -999.9696150596474
episode: 230 training return: -999.9821664235911
episode: 231 training return: -999.9819635099271
epoch: 58 test_true_pfm: -0.7970238036643646 sim_pfm: -999.5016263715473
episode: 232 training return: -999.9790898245589
episode: 233 training return: -999.9752143975979
episode: 234 training return: -999.9922360621463
episode: 235 training return: -999.9830982605932
epoch: 59 test_true_pfm: 0.021354012868413527 sim_pfm: -999.4852949338821
episode: 236 training return: -999.9907412027887
episode: 237 training return: -999.975924929528
episode: 238 training return: -999.9740952277574
episode: 239 training return: -999.9739772785592
epoch: 60 test_true_pfm: -0.35390062029357444 sim_pfm: -999.492586555723
episode: 240 training return: -999.9861357010832
episode: 241 training return: -999.9957573301107
episode: 242 training return: -999.9719059590836
episode: 243 training return: -999.9924047779674
epoch: 61 test_true_pfm: -0.23569050280816484 sim_pfm: -999.4927568630537
episode: 244 training return: -999.9776964712847
episode: 245 training return: -999.9960805333897
episode: 246 training return: -999.9743561102202
episode: 247 training return: -999.9762990568896
epoch: 62 test_true_pfm: -0.6672535947018837 sim_pfm: -999.4881685291548
episode: 248 training return: -999.9877046808834
episode: 249 training return: -999.9857344332055
episode: 250 training return: -999.9931280379049
episode: 251 training return: -999.9820233020747
epoch: 63 test_true_pfm: 0.1563480663725182 sim_pfm: -999.4822861678089
episode: 252 training return: -999.9665209389294
episode: 253 training return: -999.9941653363692
episode: 254 training return: -999.9521977212792
episode: 255 training return: -999.987273902859
epoch: 64 test_true_pfm: -0.2912336127545924 sim_pfm: -999.5014398921818
episode: 256 training return: -999.987165138927
episode: 257 training return: -999.9796134017804
episode: 258 training return: -999.9932610510584
episode: 259 training return: -999.9859948476133
epoch: 65 test_true_pfm: -0.10423330159814742 sim_pfm: -999.4880485548483
episode: 260 training return: -999.985572590035
episode: 261 training return: -999.9657506998637
episode: 262 training return: -999.9828813620795
episode: 263 training return: -999.9901973308761
epoch: 66 test_true_pfm: -0.5848895712269089 sim_pfm: -999.4953360649607
episode: 264 training return: -999.9870257454284
episode: 265 training return: -999.9945210122246
episode: 266 training return: -999.9931544329288
episode: 267 training return: -999.9876871565205
epoch: 67 test_true_pfm: 0.14340840019798348 sim_pfm: -999.4837415193783
episode: 268 training return: -999.9914836006855
episode: 269 training return: -999.9784387130214
episode: 270 training return: -999.9840241927577
episode: 271 training return: -999.9909096590347
epoch: 68 test_true_pfm: -0.6473724545320093 sim_pfm: -999.4842361864827
episode: 272 training return: -999.9866083734099
episode: 273 training return: -999.9763091787739
episode: 274 training return: -999.9812939137611
episode: 275 training return: -999.98547973608
epoch: 69 test_true_pfm: -0.008277048946352644 sim_pfm: -999.4933162375792
episode: 276 training return: -999.946914375889
episode: 277 training return: -999.9699364274709
episode: 278 training return: -999.9778600087861
episode: 279 training return: -999.9878433826734
epoch: 70 test_true_pfm: -1.3848915238541564 sim_pfm: -999.5079931484562
episode: 280 training return: -999.9644927993681
episode: 281 training return: -999.9727117099158
episode: 282 training return: -999.9850048304787
episode: 283 training return: -999.984572866083
epoch: 71 test_true_pfm: -0.16131154657139912 sim_pfm: -999.4971130290111
episode: 284 training return: -999.9755044564201
episode: 285 training return: -999.969142776266
episode: 286 training return: -999.9828050509411
episode: 287 training return: -999.9516258958471
epoch: 72 test_true_pfm: -7.083629030783338e-05 sim_pfm: -999.4782851896503
episode: 288 training return: -999.9853048201837
episode: 289 training return: -999.9700692082903
episode: 290 training return: -999.9864964214206
episode: 291 training return: -999.98038650559
epoch: 73 test_true_pfm: -0.40453601124298433 sim_pfm: -999.4911863584912
episode: 292 training return: -999.9728365900005
episode: 293 training return: -999.9765873160495
episode: 294 training return: -999.9677334261859
episode: 295 training return: -999.9760761605368
epoch: 74 test_true_pfm: 0.11265474337061938 sim_pfm: -999.4907840506115
episode: 296 training return: -999.9780532101015
episode: 297 training return: -999.9876031453801
episode: 298 training return: -999.9739169075917
episode: 299 training return: -999.9822664508034
epoch: 75 test_true_pfm: -0.4349759940216911 sim_pfm: -999.4812778289048
episode: 300 training return: -999.9751981534964
episode: 301 training return: -999.9816404781245
episode: 302 training return: -999.9832690090522
episode: 303 training return: -999.9850934070108
epoch: 76 test_true_pfm: -0.05622836961901533 sim_pfm: -999.4777448661243
episode: 304 training return: -999.951128558197
episode: 305 training return: -999.9736759352493
episode: 306 training return: -999.9726994819048
episode: 307 training return: -999.9878197197517
epoch: 77 test_true_pfm: -0.17441064669950376 sim_pfm: -999.4991958247874
episode: 308 training return: -999.9697361399301
episode: 309 training return: -999.9635654856575
episode: 310 training return: -999.965922249177
episode: 311 training return: -999.974163625566
epoch: 78 test_true_pfm: -0.013260134976028478 sim_pfm: -999.4958921708052
episode: 312 training return: -999.9659207546542
episode: 313 training return: -999.9811141792883
episode: 314 training return: -999.9931700590058
episode: 315 training return: -999.9621816122016
epoch: 79 test_true_pfm: -0.07086094965335966 sim_pfm: -999.4925623869555
episode: 316 training return: -999.9920957305103
episode: 317 training return: -999.9927123217036
episode: 318 training return: -999.9640671102255
episode: 319 training return: -999.9888145854103
epoch: 80 test_true_pfm: 0.11722882651021131 sim_pfm: -999.4938629201602
episode: 320 training return: -999.9922728892398
episode: 321 training return: -999.9824935680915
episode: 322 training return: -999.9829540503139
episode: 323 training return: -999.990998294444
epoch: 81 test_true_pfm: -0.9057655591534983 sim_pfm: -999.4710433322774
episode: 324 training return: -999.9834821130277
episode: 325 training return: -999.9951076231397
episode: 326 training return: -999.9912575163038
episode: 327 training return: -999.9792436636017
epoch: 82 test_true_pfm: -0.4936705753541338 sim_pfm: -999.4833997389895
episode: 328 training return: -999.9870590768663
episode: 329 training return: -999.9769825059308
episode: 330 training return: -999.9508390930238
episode: 331 training return: -999.9841098620143
epoch: 83 test_true_pfm: -0.3511462628554345 sim_pfm: -999.4919641752589
episode: 332 training return: -999.9812221243483
episode: 333 training return: -999.9775778502509
episode: 334 training return: -999.9576582161548
episode: 335 training return: -999.9949618385028
epoch: 84 test_true_pfm: -0.25826405898331933 sim_pfm: -999.4994939584336
episode: 336 training return: -999.9823526639071
episode: 337 training return: -999.9839744321217
episode: 338 training return: -999.9914307886736
episode: 339 training return: -999.9828566854837
epoch: 85 test_true_pfm: -0.3453425562021393 sim_pfm: -999.482729043007
episode: 340 training return: -999.9875442315667
episode: 341 training return: -999.9758151007741
episode: 342 training return: -999.9746815915115
episode: 343 training return: -999.9898246929923
epoch: 86 test_true_pfm: -0.46557799742759176 sim_pfm: -999.4970073602436
episode: 344 training return: -999.9660546220808
episode: 345 training return: -999.9905836449626
episode: 346 training return: -999.9895508781786
episode: 347 training return: -999.9916048919745
epoch: 87 test_true_pfm: 0.10716397382654182 sim_pfm: -999.4779817284146
episode: 348 training return: -999.9758660534334
episode: 349 training return: -999.9648385705668
episode: 350 training return: -999.9932267639012
episode: 351 training return: -999.9685766063747
epoch: 88 test_true_pfm: 0.07552292204100959 sim_pfm: -999.4890862928327
episode: 352 training return: -999.9836500161842
episode: 353 training return: -999.9784250654931
episode: 354 training return: -999.9544625330769
episode: 355 training return: -999.9597261276779
epoch: 89 test_true_pfm: -0.20808961056442388 sim_pfm: -999.4799493929199
episode: 356 training return: -999.9808302507365
episode: 357 training return: -999.9823055200243
episode: 358 training return: -999.9768929129415
episode: 359 training return: -999.9865872207223
epoch: 90 test_true_pfm: -0.6134804202835372 sim_pfm: -999.4891676597659
episode: 360 training return: -999.9634015749131
episode: 361 training return: -999.973884775968
episode: 362 training return: -999.9923229718568
episode: 363 training return: -999.9831980371198
epoch: 91 test_true_pfm: -0.12015025322725836 sim_pfm: -999.4799901427932
episode: 364 training return: -999.9826632857021
episode: 365 training return: -999.9697497378523
episode: 366 training return: -999.9908958052899
episode: 367 training return: -999.9786186900878
epoch: 92 test_true_pfm: -0.5407050386904038 sim_pfm: -999.4791032863491
episode: 368 training return: -999.947862931006
episode: 369 training return: -999.9862000838494
episode: 370 training return: -999.9951899716974
episode: 371 training return: -999.9805285259644
epoch: 93 test_true_pfm: -0.019282344921156713 sim_pfm: -999.4830395150625
episode: 372 training return: -999.9905512865189
episode: 373 training return: -999.9776572331359
episode: 374 training return: -999.9930863571607
episode: 375 training return: -999.9743861365484
epoch: 94 test_true_pfm: -0.0709713463243029 sim_pfm: -999.4965663776293
episode: 376 training return: -999.9896638882888
episode: 377 training return: -999.9943694693978
episode: 378 training return: -999.9945196130774
episode: 379 training return: -999.9781805976344
epoch: 95 test_true_pfm: -0.4383926614351514 sim_pfm: -999.4857283489213
episode: 380 training return: -999.9962586972179
episode: 381 training return: -999.9817965148219
episode: 382 training return: -999.9834522172105
episode: 383 training return: -999.9893974735251
epoch: 96 test_true_pfm: -0.5563096305998573 sim_pfm: -999.4889897284502
episode: 384 training return: -999.9746744713184
episode: 385 training return: -999.9881778883047
episode: 386 training return: -999.9955480768301
episode: 387 training return: -999.9840424168925
epoch: 97 test_true_pfm: -0.5148329543216381 sim_pfm: -999.499517898601
episode: 388 training return: -999.9815200695725
episode: 389 training return: -999.9683737694505
episode: 390 training return: -999.9671402898354
episode: 391 training return: -999.9635060818601
epoch: 98 test_true_pfm: -0.40576792106513704 sim_pfm: -999.4880139537245
episode: 392 training return: -999.9928445668611
episode: 393 training return: -999.9961255001887
episode: 394 training return: -999.9861066150812
episode: 395 training return: -999.9659933376287
epoch: 99 test_true_pfm: -0.1880495557038245 sim_pfm: -999.4693920004723
episode: 396 training return: -999.984825895748
episode: 397 training return: -999.9882935972068
episode: 398 training return: -999.9833495424884
episode: 399 training return: -999.9687805443185
epoch: 100 test_true_pfm: -1.7397192463931113 sim_pfm: -999.4992271531047
episode: 400 training return: -999.9947215656558
episode: 401 training return: -999.9901808730639
episode: 402 training return: -999.9929781249789
episode: 403 training return: -999.9560965924529
epoch: 101 test_true_pfm: -1.0440911044374726 sim_pfm: -999.492291599045
episode: 404 training return: -999.9956906033568
episode: 405 training return: -999.9688088085785
episode: 406 training return: -999.9666112371447
episode: 407 training return: -999.9944527246311
epoch: 102 test_true_pfm: -0.17708408193726177 sim_pfm: -999.4869460667784
episode: 408 training return: -999.9515800479985
episode: 409 training return: -999.9950366522133
episode: 410 training return: -999.963789357882
episode: 411 training return: -999.9749656436143
epoch: 103 test_true_pfm: -0.6245851074695664 sim_pfm: -999.4804234189469
episode: 412 training return: -999.9892298620767
episode: 413 training return: -999.9723300037055
episode: 414 training return: -999.9790116137489
episode: 415 training return: -999.979439847929
epoch: 104 test_true_pfm: 0.1004866155761291 sim_pfm: -999.4764918747504
episode: 416 training return: -999.967846274297
episode: 417 training return: -1000.0074143680847
episode: 418 training return: -999.9840845553612
episode: 419 training return: -999.9876890943111
epoch: 105 test_true_pfm: -1.5026597915346878 sim_pfm: -999.4948089928065
episode: 420 training return: -999.9737115063916
episode: 421 training return: -999.9810515883236
episode: 422 training return: -999.9583408443877
episode: 423 training return: -999.9740364195683
epoch: 106 test_true_pfm: -0.0867735828855185 sim_pfm: -999.490689528543
episode: 424 training return: -999.9910829650901
episode: 425 training return: -999.9501371063345
episode: 426 training return: -999.9803276102147
episode: 427 training return: -999.9813540093808
epoch: 107 test_true_pfm: -0.49756704880040853 sim_pfm: -999.4769538146724
episode: 428 training return: -999.9830754741666
episode: 429 training return: -999.9833431143016
episode: 430 training return: -999.9835253920953
episode: 431 training return: -999.9900885515201
epoch: 108 test_true_pfm: 0.09012375723442578 sim_pfm: -999.4847192533234
episode: 432 training return: -999.9849179168203
episode: 433 training return: -999.9798398578052
episode: 434 training return: -999.9866314612971
episode: 435 training return: -999.9862670570102
epoch: 109 test_true_pfm: 0.45081315160020313 sim_pfm: -999.4891880758349
episode: 436 training return: -999.98272104321
episode: 437 training return: -999.9874545418621
episode: 438 training return: -999.9661947670841
episode: 439 training return: -999.9787020633031
epoch: 110 test_true_pfm: 0.19218710271218062 sim_pfm: -999.4995706018329
episode: 440 training return: -999.9740943419663
episode: 441 training return: -999.9612462922025
episode: 442 training return: -999.9844747449645
episode: 443 training return: -999.9782121010039
epoch: 111 test_true_pfm: -0.9656419473606525 sim_pfm: -999.4702510127996
episode: 444 training return: -999.9957618036879
episode: 445 training return: -999.9636058349626
episode: 446 training return: -999.960692052015
episode: 447 training return: -999.9848052681044
epoch: 112 test_true_pfm: 0.4893195051575092 sim_pfm: -999.4874889597871
episode: 448 training return: -999.9862638122926
episode: 449 training return: -999.994134964542
episode: 450 training return: -999.9816942769837
episode: 451 training return: -999.9850523199165
epoch: 113 test_true_pfm: -0.3835367908919814 sim_pfm: -999.49996456605
episode: 452 training return: -999.9891944141135
episode: 453 training return: -999.9852379639835
episode: 454 training return: -999.9910006106034
episode: 455 training return: -999.9768144871365
epoch: 114 test_true_pfm: 0.252264676038696 sim_pfm: -999.4812057148014
episode: 456 training return: -999.9803015376037
episode: 457 training return: -999.9863577878989
episode: 458 training return: -999.9943912178828
episode: 459 training return: -999.9742710128514
epoch: 115 test_true_pfm: -0.8028618011709341 sim_pfm: -999.4779875877321
episode: 460 training return: -999.983712121093
episode: 461 training return: -999.9788485564986
episode: 462 training return: -999.9808032062857
episode: 463 training return: -999.9965101426507
epoch: 116 test_true_pfm: 0.1348111164859004 sim_pfm: -999.4888776177031
episode: 464 training return: -999.9882939557838
episode: 465 training return: -999.9898875029273
episode: 466 training return: -999.9868490178615
episode: 467 training return: -999.9443605237265
epoch: 117 test_true_pfm: -0.5271161650748238 sim_pfm: -999.4830976185987
episode: 468 training return: -999.9797130820605
episode: 469 training return: -999.974517909085
episode: 470 training return: -999.9874982603458
episode: 471 training return: -1000.0278589163255
epoch: 118 test_true_pfm: 0.36200412166757556 sim_pfm: -999.4965834610988
episode: 472 training return: -999.9936555595128
episode: 473 training return: -999.9687255973032
episode: 474 training return: -999.9710023412771
episode: 475 training return: -999.9931052530144
epoch: 119 test_true_pfm: -0.7917067809736014 sim_pfm: -999.4564104163209
episode: 476 training return: -999.9830084475014
episode: 477 training return: -999.9840810595572
episode: 478 training return: -999.9773454908699
episode: 479 training return: -999.965631739083
epoch: 120 test_true_pfm: -0.5682316331754639 sim_pfm: -999.4885257667564
episode: 480 training return: -999.9903194976645
episode: 481 training return: -999.9908915153769
episode: 482 training return: -999.9803053228537
episode: 483 training return: -999.9855556217326
epoch: 121 test_true_pfm: -0.020132511523292474 sim_pfm: -999.4700302096553
episode: 484 training return: -999.9897196957306
episode: 485 training return: -999.9353527965853
episode: 486 training return: -999.9718762691813
episode: 487 training return: -999.9945295926044
epoch: 122 test_true_pfm: -0.4304500667883154 sim_pfm: -999.482114735022
episode: 488 training return: -999.980853843027
episode: 489 training return: -999.9851911452457
episode: 490 training return: -999.9810389382263
episode: 491 training return: -999.9809174592107
epoch: 123 test_true_pfm: -0.4624524792388874 sim_pfm: -999.4866723371393
episode: 492 training return: -999.9788082727132
episode: 493 training return: -999.9716286672419
episode: 494 training return: -999.9865928202422
episode: 495 training return: -999.9928349177236
epoch: 124 test_true_pfm: -0.09968857174177741 sim_pfm: -999.478048343797
episode: 496 training return: -999.9839606300095
episode: 497 training return: -999.9754692669461
episode: 498 training return: -999.974219553619
episode: 499 training return: -999.9836293528382
epoch: 125 test_true_pfm: -0.5089655184018521 sim_pfm: -999.4925060230249
episode: 500 training return: -999.9624241107807
episode: 501 training return: -999.9929693578725
episode: 502 training return: -999.9973251303147
episode: 503 training return: -999.9825441069731
epoch: 126 test_true_pfm: -0.018679893500995654 sim_pfm: -999.495845403182
episode: 504 training return: -999.9973101019772
episode: 505 training return: -999.9893995933736
episode: 506 training return: -999.9895272005737
episode: 507 training return: -999.9658810194135
epoch: 127 test_true_pfm: 0.32883958249199685 sim_pfm: -999.4864852820392
episode: 508 training return: -999.9779505379888
episode: 509 training return: -999.9848544765198
episode: 510 training return: -999.9925682843256
episode: 511 training return: -999.997167468613
epoch: 128 test_true_pfm: 0.9877380587501827 sim_pfm: -999.4914686351343
episode: 512 training return: -999.9735246122824
episode: 513 training return: -999.9622250867026
episode: 514 training return: -999.9583640570307
episode: 515 training return: -999.9816043226855
epoch: 129 test_true_pfm: 0.11373578125890293 sim_pfm: -999.484933168014
episode: 516 training return: -999.9820284319966
episode: 517 training return: -999.9598516967956
episode: 518 training return: -999.9880997159173
episode: 519 training return: -999.9938335804025
epoch: 130 test_true_pfm: -0.37807907083580855 sim_pfm: -999.4849231709995
episode: 520 training return: -999.9983554508367
episode: 521 training return: -999.9948890801296
episode: 522 training return: -999.9915183526208
episode: 523 training return: -999.969047640754
epoch: 131 test_true_pfm: 0.3695736672999546 sim_pfm: -999.5047779729089
episode: 524 training return: -999.9819475553911
episode: 525 training return: -999.9926326916061
episode: 526 training return: -999.9908184116708
episode: 527 training return: -999.9968499505217
epoch: 132 test_true_pfm: -0.07720629417128773 sim_pfm: -999.4860134279703
episode: 528 training return: -999.9901931631906
episode: 529 training return: -999.995067746111
episode: 530 training return: -999.9768703604277
episode: 531 training return: -999.9804126679605
epoch: 133 test_true_pfm: -0.4190053660982274 sim_pfm: -999.4712530597466
episode: 532 training return: -999.9866724151133
episode: 533 training return: -999.9885390237922
episode: 534 training return: -999.9818961187361
episode: 535 training return: -999.9922367425107
epoch: 134 test_true_pfm: -1.1255842506831784 sim_pfm: -999.4950853868953
episode: 536 training return: -999.9886404130497
episode: 537 training return: -999.9736845384402
episode: 538 training return: -999.9920488885076
episode: 539 training return: -999.9901666327471
epoch: 135 test_true_pfm: -0.47865388593280783 sim_pfm: -999.4862340048954
episode: 540 training return: -999.9843311146413
episode: 541 training return: -999.9711505353613
episode: 542 training return: -999.9728881100316
episode: 543 training return: -999.9928949251909
epoch: 136 test_true_pfm: -0.9065389532280852 sim_pfm: -999.4855551427182
episode: 544 training return: -999.9947149965723
episode: 545 training return: -999.9496053570238
episode: 546 training return: -999.9467708048595
episode: 547 training return: -999.9708024186261
epoch: 137 test_true_pfm: -0.7045071865334164 sim_pfm: -999.4954078543205
episode: 548 training return: -999.9775602060648
episode: 549 training return: -999.9735579785932
episode: 550 training return: -999.9859754344369
episode: 551 training return: -999.9827029284554
epoch: 138 test_true_pfm: -0.6126801789111952 sim_pfm: -999.4949248391246
episode: 552 training return: -999.9734506243625
episode: 553 training return: -999.9856335801867
episode: 554 training return: -999.9713572392503
episode: 555 training return: -999.9737797347613
epoch: 139 test_true_pfm: -0.1849402011699649 sim_pfm: -999.4837307342917
episode: 556 training return: -999.9857648643646
episode: 557 training return: -999.9745382446285
episode: 558 training return: -999.9910017718187
episode: 559 training return: -999.996441975686
epoch: 140 test_true_pfm: 0.005974064085509785 sim_pfm: -999.4796797188677
episode: 560 training return: -999.9793761080725
episode: 561 training return: -999.989630460704
episode: 562 training return: -999.9820254921771
episode: 563 training return: -999.9943817196599
epoch: 141 test_true_pfm: -0.7199530929587438 sim_pfm: -999.48635612427
episode: 564 training return: -999.9865040013318
episode: 565 training return: -999.9846398891096
episode: 566 training return: -999.9836076361844
episode: 567 training return: -999.9707309598467
epoch: 142 test_true_pfm: -0.5818032278195634 sim_pfm: -999.4948294457531
episode: 568 training return: -999.9908443395609
episode: 569 training return: -999.9592013855147
episode: 570 training return: -999.9829799101313
episode: 571 training return: -999.9918105027759
epoch: 143 test_true_pfm: -0.4157794515297411 sim_pfm: -999.4864543403777
episode: 572 training return: -999.9655244336648
episode: 573 training return: -999.9816312652749
episode: 574 training return: -999.9696030439829
episode: 575 training return: -999.9557494185775
epoch: 144 test_true_pfm: -0.7309214233809141 sim_pfm: -999.4866174893687
episode: 576 training return: -999.9945375556089
episode: 577 training return: -999.9941457967387
episode: 578 training return: -999.9742610512303
episode: 579 training return: -999.9929311920297
epoch: 145 test_true_pfm: -0.5703455369618172 sim_pfm: -999.4824962257849
episode: 580 training return: -999.996047024423
episode: 581 training return: -999.9747946269758
episode: 582 training return: -999.9847330010891
episode: 583 training return: -999.975442348691
epoch: 146 test_true_pfm: 0.23143781563715812 sim_pfm: -999.500387916059
episode: 584 training return: -999.956814448281
episode: 585 training return: -999.9831417664736
episode: 586 training return: -999.9871542026724
episode: 587 training return: -999.9780808664029
epoch: 147 test_true_pfm: 0.2873240129203027 sim_pfm: -999.4726994608036
episode: 588 training return: -999.9944181892237
episode: 589 training return: -999.9600453784857
episode: 590 training return: -999.9937084974562
episode: 591 training return: -999.9897203547799
epoch: 148 test_true_pfm: 0.30820436343263863 sim_pfm: -999.4687652778242
episode: 592 training return: -999.9786587322528
episode: 593 training return: -999.967792367999
episode: 594 training return: -999.9855162005907
episode: 595 training return: -999.9850567515975
epoch: 149 test_true_pfm: -0.1642971898870743 sim_pfm: -999.5071895875093
episode: 596 training return: -999.9746043875695
episode: 597 training return: -999.9755961977938
episode: 598 training return: -999.9796114839214
episode: 599 training return: -999.97531901467
epoch: 150 test_true_pfm: -0.42671577913778663 sim_pfm: -999.4824133482822
